sportdb-formats 1.1.0 → 1.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +6 -34
  3. data/Rakefile +3 -6
  4. data/lib/sportdb/formats.rb +54 -70
  5. data/lib/sportdb/formats/country/country_index.rb +2 -2
  6. data/lib/sportdb/formats/event/event_index.rb +141 -0
  7. data/lib/sportdb/formats/event/event_reader.rb +183 -0
  8. data/lib/sportdb/formats/league/league_index.rb +22 -18
  9. data/lib/sportdb/formats/league/league_outline_reader.rb +24 -7
  10. data/lib/sportdb/formats/league/league_reader.rb +7 -1
  11. data/lib/sportdb/formats/match/match_parser.rb +47 -18
  12. data/lib/sportdb/formats/package.rb +59 -11
  13. data/lib/sportdb/formats/team/club_index.rb +13 -11
  14. data/lib/sportdb/formats/team/club_index_history.rb +134 -0
  15. data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
  16. data/lib/sportdb/formats/team/club_reader_props.rb +20 -5
  17. data/lib/sportdb/formats/version.rb +1 -1
  18. data/test/helper.rb +50 -81
  19. data/test/test_club_index_history.rb +107 -0
  20. data/test/test_club_reader_history.rb +212 -0
  21. data/test/test_datafile_package.rb +1 -1
  22. metadata +11 -81
  23. data/lib/sportdb/formats/config.rb +0 -40
  24. data/lib/sportdb/formats/match/match_parser_csv.rb +0 -321
  25. data/lib/sportdb/formats/name_helper.rb +0 -84
  26. data/lib/sportdb/formats/score/score_formats.rb +0 -220
  27. data/lib/sportdb/formats/score/score_parser.rb +0 -202
  28. data/lib/sportdb/formats/season_utils.rb +0 -27
  29. data/lib/sportdb/formats/structs/country.rb +0 -31
  30. data/lib/sportdb/formats/structs/group.rb +0 -18
  31. data/lib/sportdb/formats/structs/league.rb +0 -37
  32. data/lib/sportdb/formats/structs/match.rb +0 -151
  33. data/lib/sportdb/formats/structs/matchlist.rb +0 -220
  34. data/lib/sportdb/formats/structs/round.rb +0 -25
  35. data/lib/sportdb/formats/structs/season.rb +0 -123
  36. data/lib/sportdb/formats/structs/standings.rb +0 -268
  37. data/lib/sportdb/formats/structs/team.rb +0 -150
  38. data/lib/sportdb/formats/structs/team_usage.rb +0 -88
  39. data/test/test_clubs.rb +0 -40
  40. data/test/test_conf.rb +0 -65
  41. data/test/test_csv_match_parser.rb +0 -114
  42. data/test/test_csv_match_parser_utils.rb +0 -20
  43. data/test/test_csv_reader.rb +0 -31
  44. data/test/test_match.rb +0 -30
  45. data/test/test_match_auto.rb +0 -72
  46. data/test/test_match_auto_champs.rb +0 -45
  47. data/test/test_match_auto_euro.rb +0 -37
  48. data/test/test_match_auto_relegation.rb +0 -41
  49. data/test/test_match_auto_worldcup.rb +0 -61
  50. data/test/test_match_champs.rb +0 -27
  51. data/test/test_match_eng.rb +0 -26
  52. data/test/test_match_euro.rb +0 -27
  53. data/test/test_match_worldcup.rb +0 -27
  54. data/test/test_name_helper.rb +0 -67
  55. data/test/test_scores.rb +0 -122
  56. data/test/test_season.rb +0 -62
@@ -54,6 +54,7 @@ class ClubIndex
54
54
  ## normalize( name )
55
55
 
56
56
  def strip_wiki( name ) # todo/check: rename to strip_wikipedia_en - why? why not?
57
+ ## change/rename to strip_wiki_qualifier or such - why? why not?
57
58
  ## note: strip disambiguationn qualifier from wikipedia page name if present
58
59
  ## note: only remove year and foot... for now
59
60
  ## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck
@@ -178,22 +179,24 @@ class ClubIndex
178
179
  ## todo/fix/check: use rename to find_canon or find_canonical() or something??
179
180
  ## remove (getting used?) - why? why not?
180
181
  def []( name ) ## lookup by canoncial name only; todo/fix: add find alias why? why not?
182
+ puts "WARN!! do not use ClubIndex#[] for lookup >#{name}< - will get removed!!!"
181
183
  @clubs[ name ]
182
184
  end
183
185
 
184
186
 
185
- ## todo/fix/check: return empty array if no match!!!
186
- ## and NOT nil (add || []) - why? why not?
187
187
  def match( name )
188
+ # note: returns empty array (e.g. []) if no match and NOT nil
188
189
  name = normalize( name )
189
- m = @clubs_by_name[ name ]
190
+ m = @clubs_by_name[ name ] || []
190
191
 
191
192
  ## no match - retry with unaccented variant if different
192
193
  ## e.g. example is Preussen Münster (with mixed accent and unaccented letters) that would go unmatched for now
193
194
  ## Preussen Münster => preussenmünster (norm) => preussenmunster (norm+unaccent)
194
- if m.nil?
195
+ if m.empty?
195
196
  name2 = unaccent( name )
196
- m = @clubs_by_name[ name2 ] if name2 != name
197
+ if name2 != name
198
+ m = @clubs_by_name[ name2 ] || []
199
+ end
197
200
  end
198
201
  m
199
202
  end
@@ -227,10 +230,8 @@ class ClubIndex
227
230
  country = country( country )
228
231
 
229
232
  ## note: match must for now always include name
230
- if m ## filter by country
231
- m = m.select { |club| club.country.key == country.key }
232
- m = nil if m.empty? ## note: reset to nil if no more matches
233
- end
233
+ ## filter by country
234
+ m = m.select { |club| club.country.key == country.key }
234
235
  end
235
236
  m
236
237
  end
@@ -263,7 +264,7 @@ class ClubIndex
263
264
 
264
265
  m = match_by( name: name, country: country )
265
266
 
266
- if m.nil?
267
+ if m.empty?
267
268
  ## (re)try with second country - quick hacks for known leagues
268
269
  ## todo/fix: add league flag to activate!!! - why? why not
269
270
  m = match_by( name: name, country: 'wal' ) if country.key == 'eng'
@@ -272,6 +273,7 @@ class ClubIndex
272
273
  m = match_by( name: name, country: 'mc' ) if country.key == 'fr'
273
274
  m = match_by( name: name, country: 'li' ) if country.key == 'ch'
274
275
  m = match_by( name: name, country: 'ca' ) if country.key == 'us'
276
+ m = match_by( name: name, country: 'nz' ) if country.key == 'au'
275
277
  end
276
278
  else ## try "global" search - no country passed in
277
279
  m = match( name )
@@ -279,7 +281,7 @@ class ClubIndex
279
281
 
280
282
 
281
283
  club = nil
282
- if m.nil?
284
+ if m.empty?
283
285
  ## puts "** !!! WARN !!! no match for club >#{name}<"
284
286
  elsif m.size > 1
285
287
  puts "** !!! ERROR - too many matches (#{m.size}) for club >#{name}<:"
@@ -0,0 +1,134 @@
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+ module Import
5
+
6
+
7
+ class ClubHistoryIndex
8
+
9
+ def self.build( path )
10
+ pack = Package.new( path ) ## lets us use direcotry or zip archive
11
+
12
+ recs = []
13
+ pack.each_clubs_history do |entry|
14
+ recs += ClubHistoryReader.parse( entry.read )
15
+ end
16
+ recs
17
+
18
+ index = new
19
+ index.add( recs )
20
+ index
21
+ end
22
+
23
+
24
+
25
+ def catalog() Import.catalog; end
26
+
27
+ ## note: keep name history for now separate from
28
+ ## from club struct - why? why not?
29
+ ## later yes, yes, yes, merge name history into club struct!!!!!
30
+ ##
31
+ ## for now the name history is experimental
32
+
33
+
34
+ def initialize
35
+ @clubs = {} ## clubs (indexed) by canonical name
36
+ @errors = []
37
+ end
38
+
39
+ attr_reader :errors
40
+ def errors?() @errors.empty? == false; end
41
+
42
+ def mappings() @clubs; end ## todo/check: rename to records or histories or something - why? why not?
43
+
44
+
45
+ def add_history( club_rec, keyword, season, args )
46
+ ## note use season obj for now (and NOT key) - why? why not?
47
+ rec = @clubs[ club_rec.name ] ||= []
48
+
49
+ rec << [season, [keyword, args]]
50
+
51
+ ## note: always keep records sorted by season_key for now
52
+ ## check if 2010 and 2010/11 is in order using alpha sort?? (see argentina)
53
+ rec.sort! { |l,r| r[0] <=> l[0] }
54
+ end
55
+
56
+
57
+ def add( rec_or_recs ) ## add club record / alt_names
58
+ recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
59
+
60
+ recs.each do |rec|
61
+
62
+ keyword = rec[0]
63
+ season_key = rec[1]
64
+ args = rec[2..-1] ## get rest of args e.g. one, two or more
65
+
66
+ ## note: for now only add (re)name history season records,
67
+ ## that is, skip MERGE and BANKRUPT for now
68
+ ## and incl. only RENAME, REFORM, MOVE for now
69
+ next if ['MERGE', 'BANKRUPT'].include?( keyword )
70
+
71
+
72
+ name_old = strip_geo( args[0][0] ) ## note: strip optional geo part from name
73
+ name_new = strip_geo( args[1][0] )
74
+
75
+ country_old = args[0][1]
76
+ country_new = args[1][1]
77
+
78
+ club_old = catalog.clubs.find_by!( name: name_old, country: country_old )
79
+ club_new = catalog.clubs.find_by!( name: name_new, country: country_new )
80
+
81
+ ## note use season obj for now (and NOT key) - why? why not?
82
+ season = Season.parse( season_key )
83
+
84
+ ## todo/check:
85
+ ## check if club_old and club_new reference different club record!!
86
+ ## examples - RB II -> Liefering ?? or
87
+ ## FC Pasching -> OOE Juniors ??
88
+ ## Austria Salzburg -> RB Salburg ??
89
+ ## for now always add name history to both - why? why not?
90
+
91
+ add_history( club_old, keyword, season, args )
92
+ ## note: allow for now different club references
93
+ ## but maybe warn later - why? why not?
94
+ ## add history to both for now
95
+ add_history( club_new, keyword, season, args ) if club_old != club_new
96
+ end # each rec
97
+ end # method add
98
+
99
+
100
+ #### todo/check: move as method to club struct later - to always use club reference
101
+ ## returns (simply) name as string for now or nil - why? why not?
102
+ #
103
+ # history entry example
104
+ # Arsenal FC"=>
105
+ # [[1927/28, ["RENAME", [["The Arsenal FC, London", "eng"], ["Arsenal FC", "eng"]]]],
106
+ # [1914/15, ["RENAME", [["Woolwich Arsenal FC, London", "eng"], ["The Arsenal FC", "eng"]]]],
107
+ # [1892/93, ["RENAME", [["Royal Arsenal FC, London", "eng"], ["Woolwich Arsenal FC", "eng"]]]]],
108
+ def find_name_by( name:, season: )
109
+ recs = @clubs[ name ]
110
+ if recs
111
+ season = Season( season ) ## make sure season is a season obj (and NOT a string)
112
+ ## check season records for name; use linear search (assume only few records)
113
+ recs.each do |rec|
114
+ if season >= rec[0]
115
+ return strip_geo( rec[1][1][1][0] ) # use second arg
116
+ end
117
+ end
118
+ ## if we get here use last name
119
+ strip_geo( recs[-1][1][1][0][0] ) # use first arg
120
+ else
121
+ nil
122
+ end
123
+ end
124
+
125
+ ##################
126
+ ## helpers
127
+ def strip_geo( name )
128
+ ## e.g. Arsenal, London => Arsenal
129
+ name.split(',')[0].strip
130
+ end
131
+ end # class ClubHistoryIndex
132
+
133
+ end # module Import
134
+ end # module SportDb
@@ -0,0 +1,203 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ module Import
6
+
7
+
8
+ class ClubHistoryReader
9
+
10
+ def catalog() Import.catalog; end
11
+
12
+
13
+
14
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
15
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
16
+ parse( txt )
17
+ end
18
+
19
+ def self.parse( txt )
20
+ new( txt ).parse
21
+ end
22
+
23
+ def initialize( txt )
24
+ @txt = txt
25
+ end
26
+
27
+
28
+ ###
29
+ ## RENAME/RENAMED
30
+ ## MOVE/MOVED
31
+ ## BANKRUPT/BANKRUPTED
32
+ ## REFORM/REFORMED
33
+ ## MERGE/MERGED - allow + or ++ or +++ or ; for "inline" - why? why not?
34
+
35
+
36
+ KEYWORD_LINE_RE = %r{ ^(?<keyword>RENAMED?|
37
+ MOVED?|
38
+ BANKRUPT(?:ED)?|
39
+ REFORM(?:ED)?|
40
+ MERGED?
41
+ )
42
+ [ ]+
43
+ (?<text>.*) # rest of text
44
+ $
45
+ }x
46
+
47
+
48
+ def parse
49
+ recs = []
50
+ last_rec = nil
51
+
52
+ last_country = nil
53
+ last_season = nil
54
+ last_keyword = nil
55
+ last_teams = []
56
+
57
+ OutlineReader.parse( @txt ).each do |node|
58
+ if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
59
+ heading_level = node[0][1].to_i
60
+ heading = node[1]
61
+
62
+ puts "heading #{heading_level} >#{heading}<"
63
+
64
+
65
+ if heading_level == 1
66
+ ## assume country in heading; allow all "formats" supported by parse e.g.
67
+ ## Österreich • Austria (at)
68
+ ## Österreich • Austria
69
+ ## Austria
70
+ ## Deutschland (de) • Germany
71
+ country = catalog.countries.parse( heading )
72
+ ## check country code - MUST exist for now!!!!
73
+ if country.nil?
74
+ puts "!!! error [club history reader] - unknown country >#{heading}< - sorry - add country to config to fix"
75
+ exit 1
76
+ end
77
+ puts " country >#{heading}< => #{country.name}, #{country.key}"
78
+ last_country = country
79
+ last_season = nil ## reset "lower levels" - season & keyword
80
+ last_keyword = nil
81
+ elsif heading_level == 2
82
+ ## assume season
83
+ season = Season.parse( heading )
84
+ puts " season >#{heading}< => #{season.key}"
85
+ last_season = season ## reset "lowwer levels" - keyword
86
+ last_keyword = nil
87
+ else
88
+ puts "!!! ERROR [club history reader] - for now only heading 1 & 2 supported; sorry"
89
+ exit 1
90
+ end
91
+
92
+ elsif node[0] == :p ## paragraph with (text) lines
93
+ if last_country.nil?
94
+ puts "!!! ERROR [club history reader] - country heading 1 required, sorry"
95
+ exit 1
96
+ end
97
+ if last_season.nil?
98
+ puts "!!! ERROR [club history reader] - season heading 2 required, sorry"
99
+ exit 1
100
+ end
101
+
102
+ lines = node[1]
103
+ lines.each do |line|
104
+ if m=line.match(KEYWORD_LINE_RE) ## extract keyword and continue
105
+ keyword = m[:keyword]
106
+ line = m[:text].strip
107
+
108
+ puts " keyword #{keyword}"
109
+ last_keyword = case keyword ## "normalize" keywords
110
+ when 'BANKRUPT', 'BANKRUPTED'
111
+ 'BANKRUPT'
112
+ when 'RENAME', 'RENAMED'
113
+ 'RENAME'
114
+ when 'REFORM', 'REFORMED'
115
+ 'REFORM'
116
+ when 'MOVE', 'MOVED'
117
+ 'MOVE'
118
+ when 'MERGE', 'MERGED'
119
+ 'MERGE'
120
+ else
121
+ puts "!!! ERROR [club history reader] - unexpected keyword >#{keyword}<; sorry - don't know how to normalize"
122
+ exit 1
123
+ end
124
+
125
+ last_teams = []
126
+ end
127
+
128
+ if last_keyword.nil?
129
+ puts "!!! ERROR [club history reader] - line with keyword expected - got:"
130
+ puts line
131
+ exit 1
132
+ end
133
+
134
+ if last_keyword == 'BANKRUPT'
135
+ ## requires / expects one team in one line
136
+ recs << [ last_keyword, last_season.key,
137
+ [ squish(line), last_country.key ]
138
+ ]
139
+ elsif last_keyword == 'RENAME' ||
140
+ last_keyword == 'REFORM' ||
141
+ last_keyword == 'MOVE'
142
+ ## requires / expects two teams in one line (separated by ⇒ or such)
143
+ teams = line.split( '⇒' )
144
+ if teams.size != 2
145
+ puts "!!! ERROR [club history reader] - expected two teams - got:"
146
+ pp teams
147
+ exit 1
148
+ end
149
+ teams = teams.map {|team| squish(team.strip) } ## remove whitespaces
150
+ recs << [ last_keyword, last_season.key,
151
+ [ teams[0], last_country.key ],
152
+ [ teams[1], last_country.key ]
153
+ ]
154
+ elsif last_keyword == 'MERGE'
155
+ ## check if line starts with separator
156
+ ## otherwise collect to be merged teams
157
+ if line.start_with?( '⇒' )
158
+ if last_teams.size < 2
159
+ puts "!!! ERROR [club history reader] - expected two or more teams for MERGE - got:"
160
+ pp last_teams
161
+ exit 1
162
+ end
163
+ ## auto-add country to all teams
164
+ teams = last_teams.map {|team| [team, last_country.key]}
165
+ recs << [ last_keyword, last_season.key,
166
+ teams,
167
+ [ squish(line.sub('⇒','').strip), last_country.key ]
168
+ ]
169
+
170
+ last_teams = []
171
+ else
172
+ last_teams << squish(line)
173
+ end
174
+ else
175
+ puts "!!! ERROR [club history reader] - unknown keyword >#{last_keyword}<; cannot process; sorry"
176
+ exit 1
177
+ end
178
+ end # each line (in paragraph)
179
+ else
180
+ puts "** !!! ERROR [club history reader] - unknown line type:"
181
+ pp node
182
+ exit 1
183
+ end
184
+ end
185
+
186
+ recs
187
+ end # method read
188
+
189
+
190
+ ###############
191
+ ## helper
192
+
193
+ def squish( str )
194
+ ## colapse all whitespace to one
195
+ str.gsub( /[ ]+/,' ' )
196
+ end
197
+
198
+
199
+ end # class ClubHistoryReader
200
+
201
+
202
+ end ## module Import
203
+ end ## module SportDb
@@ -36,17 +36,16 @@ class ClubPropsReader
36
36
 
37
37
  ## find / match club by (canocial) name
38
38
  m = catalog.clubs.match( name )
39
- if m && m.size > 1
39
+ if m.size > 1
40
40
  puts "** !!! WARN !!! ambigious (multiple) club matches (#{m.size}) for name >#{name}< in props row:"
41
41
  pp rec
42
42
  pp m
43
43
 
44
44
  ## todo/fix: try filter by canonical name if more than one match
45
45
  m = m.select { |club| club.name == name }
46
- m = nil if m.empty? ## note: reset to nil if no more matches
47
46
  end
48
47
 
49
- if m.nil?
48
+ if m.empty?
50
49
  puts "** !!! ERROR !!! no club match for (canonical) name >#{name}< in props row:"
51
50
  pp rec
52
51
  exit 1
@@ -62,13 +61,29 @@ class ClubPropsReader
62
61
  ## todo/fix: only updated "on-demand" from in-memory struct/records!!!!
63
62
 
64
63
  ## update attributes
65
- club_rec.key = rec['Key'] if rec['Key']
66
- club_rec.code = rec['Code'] if rec['Code']
64
+ club_rec.key = rec['Key'] if is_not_na?( rec['Key'] )
65
+ club_rec.code = rec['Code'] if is_not_na?( rec['Code'] )
67
66
  ## todo/fix: add (some) more props e.g. address, web, etc.
68
67
  end
69
68
  end
70
69
  end # method parse
71
70
 
71
+
72
+ ## allow various values for nil or n/a (not available/applicable) for now
73
+ ## add more or less - why? why not?
74
+ def is_not_na?( col ) !is_na?( col); end ## check: find a better name - why? why not?
75
+
76
+ NA_VARIANTS = ['-', '--', '---',
77
+ '?', '??', '???',
78
+ '_', '__', '___',
79
+ 'na', 'n/a',
80
+ 'nil', 'null']
81
+
82
+ def is_na?( col )
83
+ col.nil? || col.empty? || NA_VARIANTS.include?( col.downcase )
84
+ end
85
+
86
+
72
87
  end # class ClubPropsReader
73
88
 
74
89
  end ## module Import