sportdb-formats 1.0.5 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +8 -11
  3. data/Rakefile +1 -1
  4. data/lib/sportdb/formats.rb +19 -0
  5. data/lib/sportdb/formats/country/country_index.rb +2 -2
  6. data/lib/sportdb/formats/event/event_index.rb +141 -0
  7. data/lib/sportdb/formats/event/event_reader.rb +183 -0
  8. data/lib/sportdb/formats/league/league_index.rb +22 -18
  9. data/lib/sportdb/formats/league/league_outline_reader.rb +27 -7
  10. data/lib/sportdb/formats/league/league_reader.rb +7 -1
  11. data/lib/sportdb/formats/match/mapper.rb +63 -63
  12. data/lib/sportdb/formats/match/mapper_teams.rb +1 -1
  13. data/lib/sportdb/formats/match/match_parser.rb +141 -193
  14. data/lib/sportdb/formats/match/match_parser_csv.rb +169 -25
  15. data/lib/sportdb/formats/match/match_status_parser.rb +86 -0
  16. data/lib/sportdb/formats/name_helper.rb +4 -1
  17. data/lib/sportdb/formats/package.rb +57 -9
  18. data/lib/sportdb/formats/parser_helper.rb +11 -2
  19. data/lib/sportdb/formats/score/score_formats.rb +19 -0
  20. data/lib/sportdb/formats/score/score_parser.rb +10 -2
  21. data/lib/sportdb/formats/season_utils.rb +0 -11
  22. data/lib/sportdb/formats/structs/group.rb +5 -12
  23. data/lib/sportdb/formats/structs/match.rb +7 -1
  24. data/lib/sportdb/formats/structs/round.rb +6 -13
  25. data/lib/sportdb/formats/structs/season.rb +114 -45
  26. data/lib/sportdb/formats/structs/standings.rb +30 -9
  27. data/lib/sportdb/formats/structs/team.rb +8 -2
  28. data/lib/sportdb/formats/team/club_index.rb +13 -11
  29. data/lib/sportdb/formats/team/club_index_history.rb +138 -0
  30. data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
  31. data/lib/sportdb/formats/team/club_reader_props.rb +2 -3
  32. data/lib/sportdb/formats/version.rb +2 -2
  33. data/test/helper.rb +48 -81
  34. data/test/test_club_index_history.rb +107 -0
  35. data/test/test_club_reader_history.rb +212 -0
  36. data/test/test_country_reader.rb +2 -2
  37. data/test/test_datafile_package.rb +1 -1
  38. data/test/test_match_status_parser.rb +49 -0
  39. data/test/test_regex.rb +25 -7
  40. data/test/test_scores.rb +2 -0
  41. data/test/test_season.rb +68 -19
  42. metadata +12 -15
  43. data/test/test_conf.rb +0 -65
  44. data/test/test_csv_match_parser.rb +0 -114
  45. data/test/test_csv_match_parser_utils.rb +0 -20
  46. data/test/test_match_auto.rb +0 -72
  47. data/test/test_match_auto_champs.rb +0 -45
  48. data/test/test_match_auto_euro.rb +0 -37
  49. data/test/test_match_auto_worldcup.rb +0 -61
  50. data/test/test_match_champs.rb +0 -27
  51. data/test/test_match_eng.rb +0 -26
  52. data/test/test_match_euro.rb +0 -27
  53. data/test/test_match_worldcup.rb +0 -27
@@ -11,10 +11,9 @@ class Team
11
11
  ## todo: use just names for alt_names - why? why not?
12
12
  attr_accessor :key, :name, :alt_names,
13
13
  :code, ## code == abbreviation e.g. ARS etc.
14
- :year, :year_end, ## todo/fix: change year_end to end_year (like in season)!!!
14
+ :year, :year_end, ## todo/fix: change year to start_year and year_end to end_year (like in season)!!!
15
15
  :country
16
16
 
17
- alias_method :title, :name ## add alias/compat - why? why not
18
17
 
19
18
  def names
20
19
  ## todo/check: add alt_names_auto too? - why? why not?
@@ -66,6 +65,13 @@ class Team
66
65
  end
67
66
 
68
67
 
68
+ ## add convenience lookup helper / method for name by season for now
69
+ ## use clubs history - for now kept separate from struct - why? why not?
70
+ def name_by_season( season )
71
+ ## note: returns / fallback to "regular" name if no records found in history
72
+ SportDb::Import.catalog.clubs_history.find_name_by( name: name, season: season ) || name
73
+ end
74
+
69
75
  ## helper methods for import only
70
76
  ## check for duplicates
71
77
  include NameHelper
@@ -54,6 +54,7 @@ class ClubIndex
54
54
  ## normalize( name )
55
55
 
56
56
  def strip_wiki( name ) # todo/check: rename to strip_wikipedia_en - why? why not?
57
+ ## change/rename to strip_wiki_qualifier or such - why? why not?
57
58
  ## note: strip disambiguationn qualifier from wikipedia page name if present
58
59
  ## note: only remove year and foot... for now
59
60
  ## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck
@@ -178,22 +179,24 @@ class ClubIndex
178
179
  ## todo/fix/check: use rename to find_canon or find_canonical() or something??
179
180
  ## remove (getting used?) - why? why not?
180
181
  def []( name ) ## lookup by canoncial name only; todo/fix: add find alias why? why not?
182
+ puts "WARN!! do not use ClubIndex#[] for lookup >#{name}< - will get removed!!!"
181
183
  @clubs[ name ]
182
184
  end
183
185
 
184
186
 
185
- ## todo/fix/check: return empty array if no match!!!
186
- ## and NOT nil (add || []) - why? why not?
187
187
  def match( name )
188
+ # note: returns empty array (e.g. []) if no match and NOT nil
188
189
  name = normalize( name )
189
- m = @clubs_by_name[ name ]
190
+ m = @clubs_by_name[ name ] || []
190
191
 
191
192
  ## no match - retry with unaccented variant if different
192
193
  ## e.g. example is Preussen Münster (with mixed accent and unaccented letters) that would go unmatched for now
193
194
  ## Preussen Münster => preussenmünster (norm) => preussenmunster (norm+unaccent)
194
- if m.nil?
195
+ if m.empty?
195
196
  name2 = unaccent( name )
196
- m = @clubs_by_name[ name2 ] if name2 != name
197
+ if name2 != name
198
+ m = @clubs_by_name[ name2 ] || []
199
+ end
197
200
  end
198
201
  m
199
202
  end
@@ -227,10 +230,8 @@ class ClubIndex
227
230
  country = country( country )
228
231
 
229
232
  ## note: match must for now always include name
230
- if m ## filter by country
231
- m = m.select { |club| club.country.key == country.key }
232
- m = nil if m.empty? ## note: reset to nil if no more matches
233
- end
233
+ ## filter by country
234
+ m = m.select { |club| club.country.key == country.key }
234
235
  end
235
236
  m
236
237
  end
@@ -263,7 +264,7 @@ class ClubIndex
263
264
 
264
265
  m = match_by( name: name, country: country )
265
266
 
266
- if m.nil?
267
+ if m.empty?
267
268
  ## (re)try with second country - quick hacks for known leagues
268
269
  ## todo/fix: add league flag to activate!!! - why? why not
269
270
  m = match_by( name: name, country: 'wal' ) if country.key == 'eng'
@@ -272,6 +273,7 @@ class ClubIndex
272
273
  m = match_by( name: name, country: 'mc' ) if country.key == 'fr'
273
274
  m = match_by( name: name, country: 'li' ) if country.key == 'ch'
274
275
  m = match_by( name: name, country: 'ca' ) if country.key == 'us'
276
+ m = match_by( name: name, country: 'nz' ) if country.key == 'au'
275
277
  end
276
278
  else ## try "global" search - no country passed in
277
279
  m = match( name )
@@ -279,7 +281,7 @@ class ClubIndex
279
281
 
280
282
 
281
283
  club = nil
282
- if m.nil?
284
+ if m.empty?
283
285
  ## puts "** !!! WARN !!! no match for club >#{name}<"
284
286
  elsif m.size > 1
285
287
  puts "** !!! ERROR - too many matches (#{m.size}) for club >#{name}<:"
@@ -0,0 +1,138 @@
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+ module Import
5
+
6
+
7
+ class ClubHistoryIndex
8
+
9
+ def self.build( path )
10
+ pack = Package.new( path ) ## lets us use direcotry or zip archive
11
+
12
+ recs = []
13
+ pack.each_clubs_history do |entry|
14
+ recs += ClubHistoryReader.parse( entry.read )
15
+ end
16
+ recs
17
+
18
+ index = new
19
+ index.add( recs )
20
+ index
21
+ end
22
+
23
+
24
+
25
+ def catalog() Import.catalog; end
26
+
27
+ ## note: keep name history for now separate from
28
+ ## from club struct - why? why not?
29
+ ## later yes, yes, yes, merge name history into club struct!!!!!
30
+ ##
31
+ ## for now the name history is experimental
32
+
33
+
34
+ def initialize
35
+ @clubs = {} ## clubs (indexed) by canonical name
36
+ @errors = []
37
+ end
38
+
39
+ attr_reader :errors
40
+ def errors?() @errors.empty? == false; end
41
+
42
+ def mappings() @clubs; end ## todo/check: rename to records or histories or something - why? why not?
43
+
44
+
45
+ def add_history( club_rec, keyword, season, args )
46
+ ## note use season obj for now (and NOT key) - why? why not?
47
+ rec = @clubs[ club_rec.name ] ||= []
48
+
49
+ rec << [season, [keyword, args]]
50
+
51
+ ## note: always keep records sorted by season_key for now
52
+ ## check if 2010 and 2010/11 is in order using alpha sort?? (see argentina)
53
+ rec.sort! { |l,r| r[0] <=> l[0] }
54
+ end
55
+
56
+
57
+ def add( rec_or_recs ) ## add club record / alt_names
58
+ recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
59
+
60
+ recs.each do |rec|
61
+
62
+ keyword = rec[0]
63
+ season_key = rec[1]
64
+ args = rec[2..-1] ## get rest of args e.g. one, two or more
65
+
66
+ ## note: for now only add (re)name history season records,
67
+ ## that is, skip MERGE and BANKRUPT for now
68
+ ## and incl. only RENAME, REFORM, MOVE for now
69
+ next if ['MERGE', 'BANKRUPT'].include?( keyword )
70
+
71
+
72
+ name_old = strip_geo( args[0][0] ) ## note: strip optional geo part from name
73
+ name_new = strip_geo( args[1][0] )
74
+
75
+ country_old = args[0][1]
76
+ country_new = args[1][1]
77
+
78
+ club_old = catalog.clubs.find_by!( name: name_old, country: country_old )
79
+ club_new = catalog.clubs.find_by!( name: name_new, country: country_new )
80
+
81
+ ## note use season obj for now (and NOT key) - why? why not?
82
+ season = Season.new( season_key )
83
+
84
+ ## todo/check:
85
+ ## check if club_old and club_new reference different club record!!
86
+ ## examples - RB II -> Liefering ?? or
87
+ ## FC Pasching -> OOE Juniors ??
88
+ ## Austria Salzburg -> RB Salburg ??
89
+ ## for now always add name history to both - why? why not?
90
+
91
+ add_history( club_old, keyword, season, args )
92
+ ## note: allow for now different club references
93
+ ## but maybe warn later - why? why not?
94
+ ## add history to both for now
95
+ add_history( club_new, keyword, season, args ) if club_old != club_new
96
+ end # each rec
97
+ end # method add
98
+
99
+
100
+ #### todo/check: move as method to club struct later - to always use club reference
101
+ ## returns (simply) name as string for now or nil - why? why not?
102
+ #
103
+ # history entry example
104
+ # Arsenal FC"=>
105
+ # [[1927/28, ["RENAME", [["The Arsenal FC, London", "eng"], ["Arsenal FC", "eng"]]]],
106
+ # [1914/15, ["RENAME", [["Woolwich Arsenal FC, London", "eng"], ["The Arsenal FC", "eng"]]]],
107
+ # [1892/93, ["RENAME", [["Royal Arsenal FC, London", "eng"], ["Woolwich Arsenal FC", "eng"]]]]],
108
+ def find_name_by( name:, season: )
109
+ recs = @clubs[ name ]
110
+ if recs
111
+ season = season( season ) ## make sure season is a season obj (and NOT a string)
112
+ ## check season records for name; use linear search (assume only few records)
113
+ recs.each do |rec|
114
+ if season >= rec[0]
115
+ return strip_geo( rec[1][1][1][0] ) # use second arg
116
+ end
117
+ end
118
+ ## if we get here use last name
119
+ strip_geo( recs[-1][1][1][0][0] ) # use first arg
120
+ else
121
+ nil
122
+ end
123
+ end
124
+
125
+ ##################
126
+ ## helpers
127
+ def season( season )
128
+ season.is_a?( Season ) ? season : Season.new( season )
129
+ end
130
+
131
+ def strip_geo( name )
132
+ ## e.g. Arsenal, London => Arsenal
133
+ name.split(',')[0].strip
134
+ end
135
+ end # class ClubHistoryIndex
136
+
137
+ end # module Import
138
+ end # module SportDb
@@ -0,0 +1,203 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ module Import
6
+
7
+
8
+ class ClubHistoryReader
9
+
10
+ def catalog() Import.catalog; end
11
+
12
+
13
+
14
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
15
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
16
+ parse( txt )
17
+ end
18
+
19
+ def self.parse( txt )
20
+ new( txt ).parse
21
+ end
22
+
23
+ def initialize( txt )
24
+ @txt = txt
25
+ end
26
+
27
+
28
+ ###
29
+ ## RENAME/RENAMED
30
+ ## MOVE/MOVED
31
+ ## BANKRUPT/BANKRUPTED
32
+ ## REFORM/REFORMED
33
+ ## MERGE/MERGED - allow + or ++ or +++ or ; for "inline" - why? why not?
34
+
35
+
36
+ KEYWORD_LINE_RE = %r{ ^(?<keyword>RENAMED?|
37
+ MOVED?|
38
+ BANKRUPT(?:ED)?|
39
+ REFORM(?:ED)?|
40
+ MERGED?
41
+ )
42
+ [ ]+
43
+ (?<text>.*) # rest of text
44
+ $
45
+ }x
46
+
47
+
48
+ def parse
49
+ recs = []
50
+ last_rec = nil
51
+
52
+ last_country = nil
53
+ last_season = nil
54
+ last_keyword = nil
55
+ last_teams = []
56
+
57
+ OutlineReader.parse( @txt ).each do |node|
58
+ if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
59
+ heading_level = node[0][1].to_i
60
+ heading = node[1]
61
+
62
+ puts "heading #{heading_level} >#{heading}<"
63
+
64
+
65
+ if heading_level == 1
66
+ ## assume country in heading; allow all "formats" supported by parse e.g.
67
+ ## Österreich • Austria (at)
68
+ ## Österreich • Austria
69
+ ## Austria
70
+ ## Deutschland (de) • Germany
71
+ country = catalog.countries.parse( heading )
72
+ ## check country code - MUST exist for now!!!!
73
+ if country.nil?
74
+ puts "!!! error [club history reader] - unknown country >#{heading}< - sorry - add country to config to fix"
75
+ exit 1
76
+ end
77
+ puts " country >#{heading}< => #{country.name}, #{country.key}"
78
+ last_country = country
79
+ last_season = nil ## reset "lower levels" - season & keyword
80
+ last_keyword = nil
81
+ elsif heading_level == 2
82
+ ## assume season
83
+ season = Season.new( heading )
84
+ puts " season >#{heading}< => #{season.key}"
85
+ last_season = season ## reset "lowwer levels" - keyword
86
+ last_keyword = nil
87
+ else
88
+ puts "!!! ERROR [club history reader] - for now only heading 1 & 2 supported; sorry"
89
+ exit 1
90
+ end
91
+
92
+ elsif node[0] == :p ## paragraph with (text) lines
93
+ if last_country.nil?
94
+ puts "!!! ERROR [club history reader] - country heading 1 required, sorry"
95
+ exit 1
96
+ end
97
+ if last_season.nil?
98
+ puts "!!! ERROR [club history reader] - season heading 2 required, sorry"
99
+ exit 1
100
+ end
101
+
102
+ lines = node[1]
103
+ lines.each do |line|
104
+ if m=line.match(KEYWORD_LINE_RE) ## extract keyword and continue
105
+ keyword = m[:keyword]
106
+ line = m[:text].strip
107
+
108
+ puts " keyword #{keyword}"
109
+ last_keyword = case keyword ## "normalize" keywords
110
+ when 'BANKRUPT', 'BANKRUPTED'
111
+ 'BANKRUPT'
112
+ when 'RENAME', 'RENAMED'
113
+ 'RENAME'
114
+ when 'REFORM', 'REFORMED'
115
+ 'REFORM'
116
+ when 'MOVE', 'MOVED'
117
+ 'MOVE'
118
+ when 'MERGE', 'MERGED'
119
+ 'MERGE'
120
+ else
121
+ puts "!!! ERROR [club history reader] - unexpected keyword >#{keyword}<; sorry - don't know how to normalize"
122
+ exit 1
123
+ end
124
+
125
+ last_teams = []
126
+ end
127
+
128
+ if last_keyword.nil?
129
+ puts "!!! ERROR [club history reader] - line with keyword expected - got:"
130
+ puts line
131
+ exit 1
132
+ end
133
+
134
+ if last_keyword == 'BANKRUPT'
135
+ ## requires / expects one team in one line
136
+ recs << [ last_keyword, last_season.key,
137
+ [ squish(line), last_country.key ]
138
+ ]
139
+ elsif last_keyword == 'RENAME' ||
140
+ last_keyword == 'REFORM' ||
141
+ last_keyword == 'MOVE'
142
+ ## requires / expects two teams in one line (separated by ⇒ or such)
143
+ teams = line.split( '⇒' )
144
+ if teams.size != 2
145
+ puts "!!! ERROR [club history reader] - expected two teams - got:"
146
+ pp teams
147
+ exit 1
148
+ end
149
+ teams = teams.map {|team| squish(team.strip) } ## remove whitespaces
150
+ recs << [ last_keyword, last_season.key,
151
+ [ teams[0], last_country.key ],
152
+ [ teams[1], last_country.key ]
153
+ ]
154
+ elsif last_keyword == 'MERGE'
155
+ ## check if line starts with separator
156
+ ## otherwise collect to be merged teams
157
+ if line.start_with?( '⇒' )
158
+ if last_teams.size < 2
159
+ puts "!!! ERROR [club history reader] - expected two or more teams for MERGE - got:"
160
+ pp last_teams
161
+ exit 1
162
+ end
163
+ ## auto-add country to all teams
164
+ teams = last_teams.map {|team| [team, last_country.key]}
165
+ recs << [ last_keyword, last_season.key,
166
+ teams,
167
+ [ squish(line.sub('⇒','').strip), last_country.key ]
168
+ ]
169
+
170
+ last_teams = []
171
+ else
172
+ last_teams << squish(line)
173
+ end
174
+ else
175
+ puts "!!! ERROR [club history reader] - unknown keyword >#{last_keyword}<; cannot process; sorry"
176
+ exit 1
177
+ end
178
+ end # each line (in paragraph)
179
+ else
180
+ puts "** !!! ERROR [club history reader] - unknown line type:"
181
+ pp node
182
+ exit 1
183
+ end
184
+ end
185
+
186
+ recs
187
+ end # method read
188
+
189
+
190
+ ###############
191
+ ## helper
192
+
193
+ def squish( str )
194
+ ## colapse all whitespace to one
195
+ str.gsub( /[ ]+/,' ' )
196
+ end
197
+
198
+
199
+ end # class ClubHistoryReader
200
+
201
+
202
+ end ## module Import
203
+ end ## module SportDb
@@ -36,17 +36,16 @@ class ClubPropsReader
36
36
 
37
37
  ## find / match club by (canocial) name
38
38
  m = catalog.clubs.match( name )
39
- if m && m.size > 1
39
+ if m.size > 1
40
40
  puts "** !!! WARN !!! ambigious (multiple) club matches (#{m.size}) for name >#{name}< in props row:"
41
41
  pp rec
42
42
  pp m
43
43
 
44
44
  ## todo/fix: try filter by canonical name if more than one match
45
45
  m = m.select { |club| club.name == name }
46
- m = nil if m.empty? ## note: reset to nil if no more matches
47
46
  end
48
47
 
49
- if m.nil?
48
+ if m.empty?
50
49
  puts "** !!! ERROR !!! no club match for (canonical) name >#{name}< in props row:"
51
50
  pp rec
52
51
  exit 1