sportdb-formats 1.0.5 → 1.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +8 -11
  3. data/Rakefile +1 -1
  4. data/lib/sportdb/formats.rb +19 -0
  5. data/lib/sportdb/formats/country/country_index.rb +2 -2
  6. data/lib/sportdb/formats/event/event_index.rb +141 -0
  7. data/lib/sportdb/formats/event/event_reader.rb +183 -0
  8. data/lib/sportdb/formats/league/league_index.rb +22 -18
  9. data/lib/sportdb/formats/league/league_outline_reader.rb +27 -7
  10. data/lib/sportdb/formats/league/league_reader.rb +7 -1
  11. data/lib/sportdb/formats/match/mapper.rb +63 -63
  12. data/lib/sportdb/formats/match/mapper_teams.rb +1 -1
  13. data/lib/sportdb/formats/match/match_parser.rb +141 -193
  14. data/lib/sportdb/formats/match/match_parser_csv.rb +169 -25
  15. data/lib/sportdb/formats/match/match_status_parser.rb +86 -0
  16. data/lib/sportdb/formats/name_helper.rb +4 -1
  17. data/lib/sportdb/formats/package.rb +57 -9
  18. data/lib/sportdb/formats/parser_helper.rb +11 -2
  19. data/lib/sportdb/formats/score/score_formats.rb +19 -0
  20. data/lib/sportdb/formats/score/score_parser.rb +10 -2
  21. data/lib/sportdb/formats/season_utils.rb +0 -11
  22. data/lib/sportdb/formats/structs/group.rb +5 -12
  23. data/lib/sportdb/formats/structs/match.rb +7 -1
  24. data/lib/sportdb/formats/structs/round.rb +6 -13
  25. data/lib/sportdb/formats/structs/season.rb +114 -45
  26. data/lib/sportdb/formats/structs/standings.rb +30 -9
  27. data/lib/sportdb/formats/structs/team.rb +8 -2
  28. data/lib/sportdb/formats/team/club_index.rb +13 -11
  29. data/lib/sportdb/formats/team/club_index_history.rb +138 -0
  30. data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
  31. data/lib/sportdb/formats/team/club_reader_props.rb +2 -3
  32. data/lib/sportdb/formats/version.rb +2 -2
  33. data/test/helper.rb +48 -81
  34. data/test/test_club_index_history.rb +107 -0
  35. data/test/test_club_reader_history.rb +212 -0
  36. data/test/test_country_reader.rb +2 -2
  37. data/test/test_datafile_package.rb +1 -1
  38. data/test/test_match_status_parser.rb +49 -0
  39. data/test/test_regex.rb +25 -7
  40. data/test/test_scores.rb +2 -0
  41. data/test/test_season.rb +68 -19
  42. metadata +12 -15
  43. data/test/test_conf.rb +0 -65
  44. data/test/test_csv_match_parser.rb +0 -114
  45. data/test/test_csv_match_parser_utils.rb +0 -20
  46. data/test/test_match_auto.rb +0 -72
  47. data/test/test_match_auto_champs.rb +0 -45
  48. data/test/test_match_auto_euro.rb +0 -37
  49. data/test/test_match_auto_worldcup.rb +0 -61
  50. data/test/test_match_champs.rb +0 -27
  51. data/test/test_match_eng.rb +0 -26
  52. data/test/test_match_euro.rb +0 -27
  53. data/test/test_match_worldcup.rb +0 -27
@@ -95,36 +95,40 @@ class LeagueIndex
95
95
  end # method add
96
96
 
97
97
 
98
+ ## helper to always convert (possible) country key to existing country record
99
+ ## todo: make private - why? why not?
100
+ def country( country )
101
+ if country.is_a?( String ) || country.is_a?( Symbol )
102
+ ## note: use own "global" countries index setting for ClubIndex - why? why not?
103
+ rec = catalog.countries.find( country.to_s )
104
+ if rec.nil?
105
+ puts "** !!! ERROR !!! - unknown country >#{country}< - no match found, sorry - add to world/countries.txt in config"
106
+ exit 1
107
+ end
108
+ rec
109
+ else
110
+ country ## (re)use country struct - no need to run lookup again
111
+ end
112
+ end
113
+
114
+
98
115
  def match( name )
99
- ## todo/check: return empty array if no match!!! and NOT nil (add || []) - why? why not?
116
+ ## note: returns empty array if no match and NOT nil
100
117
  name = normalize( name )
101
- @leagues_by_name[ name ]
118
+ @leagues_by_name[ name ] || []
102
119
  end
103
120
 
104
-
105
121
  def match_by( name:, country: )
106
122
  ## note: match must for now always include name
107
123
  m = match( name )
108
- if m ## filter by country
124
+ if country ## filter by country
109
125
  ## note: country assumes / allows the country key or fifa code for now
110
-
111
126
  ## note: allow passing in of country struct too
112
- country_rec = if country.is_a?( Country )
113
- country ## (re)use country struct - no need to run lookup again
114
- else
115
- ## note: use own "global" countries index setting for ClubIndex - why? why not?
116
- rec = catalog.countries.find( country )
117
- if rec.nil?
118
- puts "** !!! ERROR !!! - unknown country >#{country}< - no match found, sorry - add to world/countries.txt in config"
119
- exit 1
120
- end
121
- rec
122
- end
127
+ country_rec = country( country )
123
128
 
124
129
  ## note: also skip international leagues & cups (e.g. champions league etc.) for now - why? why not?
125
130
  m = m.select { |league| league.country &&
126
131
  league.country.key == country_rec.key }
127
- m = nil if m.empty? ## note: reset to nil if no more matches
128
132
  end
129
133
  m
130
134
  end
@@ -144,7 +148,7 @@ class LeagueIndex
144
148
  m = match( name )
145
149
  # pp m
146
150
 
147
- if m.nil?
151
+ if m.empty?
148
152
  ## fall through/do nothing
149
153
  elsif m.size > 1
150
154
  puts "** !!! ERROR - ambigious league name; too many leagues (#{m.size}) found:"
@@ -121,14 +121,34 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
121
121
  values
122
122
  end
123
123
 
124
- def check_stage( name )
125
- known_stages = ['regular season',
126
- 'championship round',
127
- 'relegation round',
128
- 'play-offs'
129
- ]
130
124
 
131
- if known_stages.include?( name.downcase )
125
+ # note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
126
+ KNOWN_STAGES = [
127
+ 'Regular Season',
128
+ 'Regular Stage',
129
+ 'Championship Round',
130
+ 'Championship Playoff', # or Championship play-off
131
+ 'Relegation Round',
132
+ 'Relegation Playoff',
133
+ 'Play-offs',
134
+ 'Playoff Stage',
135
+ 'Grunddurchgang',
136
+ 'Finaldurchgang - Qualifikationsgruppe',
137
+ 'Finaldurchgang - Qualifikation',
138
+ 'Finaldurchgang - Meistergruppe',
139
+ 'Finaldurchgang - Meister',
140
+ 'EL Play-off',
141
+ 'Europa League Play-off',
142
+ 'Europa-League-Play-offs',
143
+ 'Playoffs - Championship',
144
+ 'Playoffs - Relegation',
145
+ 'Finals',
146
+ ].map {|name| name.downcase.gsub( /[^a-z]/, '' ) }
147
+
148
+
149
+ def check_stage( name )
150
+ # note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
151
+ if KNOWN_STAGES.include?( name.downcase.gsub( /[^a-z]/, '' ) )
132
152
  ## everything ok
133
153
  else
134
154
  puts "** !!! ERROR - no (league) stage match found for >#{name}<, add to (builtin) stages table; sorry"
@@ -118,12 +118,18 @@ def parse
118
118
  alt_names_auto << "#{country.code}" if league_key == '1' ## add shortcut for top level 1 (just country key)
119
119
  end
120
120
  alt_names_auto << "#{country.name} #{league_key}" if league_key =~ /^[0-9]+$/ ## if all numeric e.g. add Austria 1 etc.
121
+
122
+ ## auto-add with country prepended
123
+ ## e.g. England Premier League, Austria Bundesliga etc.
124
+ ## todo/check: also add variants with country alt name if present!!!
125
+ ## todo/check: exclude cups or such from country + league name auto-add - why? why not?
126
+ alt_names_auto << "#{country.name} #{league_name}"
121
127
  else ## assume int'l (no country) e.g. champions league, etc.
122
128
  ## only auto-add key (e.g. CL, EL, etc.)
123
129
  alt_names_auto << league_key.upcase.gsub('.', ' ') ## note: no country code (prefix/leading) used
124
130
  end
125
131
 
126
- pp alt_names_auto
132
+ ## pp alt_names_auto
127
133
 
128
134
  ## prepend country key/code if country present
129
135
  ## todo/fix: only auto-prepend country if key/code start with a number (level) or incl. cup
@@ -7,21 +7,21 @@ module SportDb
7
7
  ## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/title_mapper2.rb
8
8
 
9
9
 
10
- class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why not??
10
+ class MapperV2 ## todo/check: rename to NameMapper ? why? why not??
11
11
 
12
12
  include Logging
13
13
 
14
- attr_reader :known_titles ## rename to mapping or mappings or just titles - why? why not?
14
+ attr_reader :known_names ## rename to mapping or mappings or just names - why? why not?
15
15
 
16
16
  ########
17
17
  ## key: e.g. augsburg
18
- ## title: e.g. FC Augsburg
19
- ## length (of title(!!) - not regex pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not?
20
- MappingStruct = Struct.new( :key, :title, :length, :pattern) ## todo/check: use (rename to) TitleStruct - why? why not??
18
+ ## name: e.g. FC Augsburg
19
+ ## length (of name(!!) - not regex pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not?
20
+ MappingStruct = Struct.new( :key, :name, :length, :pattern) ## todo/check: use (rename to) NameStruct - why? why not??
21
21
 
22
22
  ######
23
23
  ## convenience helper - (auto)build ActiveRecord-like team records/structs
24
- Record = Struct.new( :key, :title, :synonyms )
24
+ Record = Struct.new( :key, :name, :alt_names )
25
25
  def build_records( txt_or_lines )
26
26
  recs = []
27
27
 
@@ -44,12 +44,12 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
44
44
  values = line.split( '|' )
45
45
  values = values.map { |value| value.strip }
46
46
 
47
- title = values[0]
47
+ name = values[0]
48
48
  ## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
49
- key = title.downcase.gsub( /[^a-z]/, '' )
50
- synonyms = values.size > 1 ? values[1..-1].join( '|' ) : nil
49
+ key = name.downcase.gsub( /[^a-z]/, '' )
50
+ alt_names = values.size > 1 ? values[1..-1].join( '|' ) : nil
51
51
 
52
- recs << Record.new( key, title, synonyms )
52
+ recs << Record.new( key, name, alt_names )
53
53
  end
54
54
  recs
55
55
  end
@@ -63,10 +63,10 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
63
63
  (records_or_mapping.is_a?( Array ) && records_or_mapping[0].is_a?( String ))
64
64
 
65
65
  ## build mapping lookup table
66
- @known_titles = if records_or_mapping.is_a?( Hash ) ## assume "custom" mapping hash table (title/name=>record)
67
- build_title_table_for_mapping( records_or_mapping )
66
+ @known_names = if records_or_mapping.is_a?( Hash ) ## assume "custom" mapping hash table (name=>record)
67
+ build_name_table_for_mapping( records_or_mapping )
68
68
  else ## assume array of records
69
- build_title_table_for_records( records_or_mapping )
69
+ build_name_table_for_records( records_or_mapping )
70
70
  end
71
71
 
72
72
  ## build lookup hash by record (e.g. team/club/etc.) key
@@ -85,9 +85,9 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
85
85
 
86
86
 
87
87
 
88
- def map_titles!( line ) ## rename to just map! - why?? why not???
88
+ def map_names!( line ) ## rename to just map! - why?? why not???
89
89
  begin
90
- found = map_title_for!( @tag, line, @known_titles )
90
+ found = map_name_for!( @tag, line, @known_names )
91
91
  end while found
92
92
  end
93
93
 
@@ -110,27 +110,27 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
110
110
 
111
111
 
112
112
  private
113
- def build_title_table_for_mapping( mapping )
114
- known_titles = []
113
+ def build_name_table_for_mapping( mapping )
114
+ known_names = []
115
115
 
116
- mapping.each do |title, rec|
116
+ mapping.each do |name, rec|
117
117
  m = MappingStruct.new
118
118
  m.key = rec.key
119
- m.title = title
120
- m.length = title.length
121
- m.pattern = Regexp.escape( title ) ## note: just use "standard" regex escape (e.g. no extras for umlauts,accents,etc.)
119
+ m.name = name
120
+ m.length = name.length
121
+ m.pattern = Regexp.escape( name ) ## note: just use "standard" regex escape (e.g. no extras for umlauts,accents,etc.)
122
122
 
123
- known_titles << m
123
+ known_names << m
124
124
  end
125
125
 
126
126
  ## note: sort here by length (largest goes first - best match)
127
- known_titles = known_titles.sort { |l,r| r.length <=> l.length }
128
- known_titles
127
+ known_names = known_names.sort { |l,r| r.length <=> l.length }
128
+ known_names
129
129
  end
130
130
 
131
- def build_title_table_for_records( records )
131
+ def build_name_table_for_records( records )
132
132
 
133
- ## build known tracks table w/ synonyms e.g.
133
+ ## build known tracks table w/ alt names e.g.
134
134
  #
135
135
  # [[ 'wolfsbrug', 'VfL Wolfsburg'],
136
136
  # [ 'augsburg', 'FC Augsburg'],
@@ -138,65 +138,65 @@ private
138
138
  # [ 'augsburg', 'Augi3' ],
139
139
  # [ 'stuttgart', 'VfB Stuttgart']]
140
140
 
141
- known_titles = []
141
+ known_names = []
142
142
 
143
143
  records.each_with_index do |rec,index|
144
144
 
145
- title_candidates = []
146
- title_candidates << rec.title
145
+ name_candidates = []
146
+ name_candidates << rec.name
147
147
 
148
- title_candidates += rec.synonyms.split('|') if rec.synonyms && !rec.synonyms.empty?
148
+ name_candidates += rec.alt_names.split('|') if rec.alt_names && !rec.alt_names.empty?
149
149
 
150
150
 
151
- ## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit)
152
- # make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan
151
+ ## check if name includes subname e.g. Grand Prix Japan (Suzuka Circuit)
152
+ # make subname optional by adding name w/o subname e.g. Grand Prix Japan
153
153
 
154
- titles = []
155
- title_candidates.each do |t|
156
- titles << t
154
+ names = []
155
+ name_candidates.each do |t|
156
+ names << t
157
157
  if t =~ /\(.+\)/
158
- extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles
158
+ extra_name = t.gsub( /\(.+\)/, '' ) # remove/delete subnames
159
159
  # note: strip leading n trailing withspaces too!
160
160
  # -- todo: add squish or something if () is inline e.g. leaves two spaces?
161
- extra_title.strip!
162
- titles << extra_title
161
+ extra_name.strip!
162
+ names << extra_name
163
163
  end
164
164
  end
165
165
 
166
- titles.each do |t|
166
+ names.each do |name|
167
167
  m = MappingStruct.new
168
168
  m.key = rec.key
169
- m.title = t
170
- m.length = t.length
169
+ m.name = name
170
+ m.length = name.length
171
171
  ## note: escape for regex plus allow subs for special chars/accents
172
- m.pattern = title_esc_regex( t )
172
+ m.pattern = name_esc_regex( name )
173
173
 
174
- known_titles << m
174
+ known_names << m
175
175
  end
176
176
 
177
- logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<"
177
+ logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{names.join('|')}<"
178
178
 
179
179
  ## note: only include code field - if defined
180
180
  if rec.respond_to?(:code) && rec.code && !rec.code.empty?
181
181
  m = MappingStruct.new
182
182
  m.key = rec.key
183
- m.title = rec.code
183
+ m.name = rec.code
184
184
  m.length = rec.code.length
185
185
  m.pattern = rec.code ## note: use code for now as is (no variants allowed fow now)
186
186
 
187
- known_titles << m
187
+ known_names << m
188
188
  end
189
189
  end
190
190
 
191
191
  ## note: sort here by length (largest goes first - best match)
192
192
  # exclude code and key (key should always go last)
193
- known_titles = known_titles.sort { |l,r| r.length <=> l.length }
194
- known_titles
193
+ known_names = known_names.sort { |l,r| r.length <=> l.length }
194
+ known_names
195
195
  end
196
196
 
197
197
 
198
198
 
199
- def map_title_for!( tag, line, mappings )
199
+ def map_name_for!( tag, line, mappings )
200
200
  mappings.each do |mapping|
201
201
  key = mapping.key
202
202
  pattern = mapping.pattern
@@ -234,9 +234,9 @@ private
234
234
 
235
235
 
236
236
  ####
237
- # title helper cut-n-paste copy from TextUtils
237
+ # name helper cut-n-paste copy from TextUtils
238
238
  ## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/helper/title_helper.rb
239
- def title_esc_regex( title_unescaped )
239
+ def name_esc_regex( name_unescaped )
240
240
 
241
241
  ## escape regex special chars e.g.
242
242
  # . to \. and
@@ -257,16 +257,16 @@ def title_esc_regex( title_unescaped )
257
257
  # e.g. Club Atlético Colón (Santa Fe)
258
258
  # e.g. Bauer Anton (????)
259
259
 
260
- ## NB: cannot use Regexp.escape! will escape space '' to '\ '
261
- ## title = Regexp.escape( title_unescaped )
262
- title = title_unescaped.gsub( '.', '\.' )
263
- title = title.gsub( '(', '\(' )
264
- title = title.gsub( ')', '\)' )
265
- title = title.gsub( '?', '\?' )
266
- title = title.gsub( '*', '\*' )
267
- title = title.gsub( '+', '\+' )
268
- title = title.gsub( '$', '\$' )
269
- title = title.gsub( '^', '\^' )
260
+ ## note: cannot use Regexp.escape! will escape space '' to '\ '
261
+ ## name = Regexp.escape( name_unescaped )
262
+ name = name_unescaped.gsub( '.', '\.' )
263
+ name = name.gsub( '(', '\(' )
264
+ name = name.gsub( ')', '\)' )
265
+ name = name.gsub( '?', '\?' )
266
+ name = name.gsub( '*', '\*' )
267
+ name = name.gsub( '+', '\+' )
268
+ name = name.gsub( '$', '\$' )
269
+ name = name.gsub( '^', '\^' )
270
270
 
271
271
  ## match accented char with or without accents
272
272
  ## add (ü|ue) etc.
@@ -309,10 +309,10 @@ def title_esc_regex( title_unescaped )
309
309
  ## collect some more (real-world) examples first!!!!!
310
310
 
311
311
  alternatives.each do |alt|
312
- title = title.gsub( alt[0], alt[1] )
312
+ name = name.gsub( alt[0], alt[1] )
313
313
  end
314
314
 
315
- title
315
+ name
316
316
  end
317
317
 
318
318
  end # class MapperV2
@@ -16,7 +16,7 @@ class TeamMapper
16
16
  end
17
17
 
18
18
  def map_teams!( line )
19
- @mapper.map_titles!( line )
19
+ @mapper.map_names!( line )
20
20
  end
21
21
  end # class TeamMapper
22
22
 
@@ -82,15 +82,14 @@ class MatchParser ## simple match parser for team match schedules
82
82
  # team1 team2 - match (will get new auto-matchday! not last round)
83
83
  @last_round = nil
84
84
 
85
- title, pos = find_group_title_and_pos!( line )
85
+ name = find_group_name!( line )
86
86
 
87
- logger.debug " title: >#{title}<"
88
- logger.debug " pos: >#{pos}<"
87
+ logger.debug " name: >#{name}<"
89
88
  logger.debug " line: >#{line}<"
90
89
 
91
- group = @groups[ title ]
90
+ group = @groups[ name ]
92
91
  if group.nil?
93
- puts "!! ERROR - no group def found for >#{title}<"
92
+ puts "!! ERROR - no group def found for >#{name}<"
94
93
  exit 1
95
94
  end
96
95
 
@@ -104,19 +103,19 @@ class MatchParser ## simple match parser for team match schedules
104
103
  @mapper_teams.map_teams!( line )
105
104
  teams = @mapper_teams.find_teams!( line )
106
105
 
107
- title, pos = find_group_title_and_pos!( line )
106
+ name = find_group_name!( line )
108
107
 
109
108
  logger.debug " line: >#{line}<"
110
109
 
111
- group = Import::Group.new( pos: pos,
112
- title: title,
113
- teams: teams.map {|team| team.title } )
110
+ ## todo/check/fix: add back group key - why? why not?
111
+ group = Import::Group.new( name: name,
112
+ teams: teams.map {|team| team.name } )
114
113
 
115
- @groups[ title ] = group
114
+ @groups[ name ] = group
116
115
  end
117
116
 
118
117
 
119
- def find_group_title_and_pos!( line )
118
+ def find_group_name!( line )
120
119
  ## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C or HEX
121
120
  ## nb: (?:) = is for non-capturing group(ing)
122
121
 
@@ -125,37 +124,25 @@ class MatchParser ## simple match parser for team match schedules
125
124
 
126
125
  ## todo:
127
126
  ## check if Group A: or [Group A] works e.g. : or ] get matched by \b ???
128
- regex = /(?:Group|Gruppe|Grupo)\s+((?:\d{1}|[A-Z]{1,3}))\b/
127
+ regex = /\b
128
+ (?:
129
+ (Group | Gruppe | Grupo)
130
+ [ ]+
131
+ (\d+ | [A-Z]+)
132
+ )
133
+ \b/x
129
134
 
130
135
  m = regex.match( line )
131
136
 
132
- return [nil,nil] if m.nil?
133
-
134
- pos = case m[1]
135
- when 'A' then 1
136
- when 'B' then 2
137
- when 'C' then 3
138
- when 'D' then 4
139
- when 'E' then 5
140
- when 'F' then 6
141
- when 'G' then 7
142
- when 'H' then 8
143
- when 'I' then 9
144
- when 'J' then 10
145
- when 'K' then 11
146
- when 'L' then 12
147
- when 'HEX' then 666 # HEX for Hexagonal - todo/check: map to something else ??
148
- else m[1].to_i
149
- end
137
+ return nil if m.nil?
150
138
 
151
- title = m[0]
139
+ name = m[0]
152
140
 
153
- logger.debug " title: >#{title}<"
154
- logger.debug " pos: >#{pos}<"
141
+ logger.debug " name: >#{name}<"
155
142
 
156
- line.sub!( regex, '[GROUP.TITLE+POS]' )
143
+ line.sub!( name, '[GROUP.NAME]' )
157
144
 
158
- [title,pos]
145
+ name
159
146
  end
160
147
 
161
148
 
@@ -180,198 +167,130 @@ class MatchParser ## simple match parser for team match schedules
180
167
  end_date = end_date.to_date
181
168
 
182
169
 
183
- pos = find_round_pos!( line )
184
- title = find_round_def_title!( line )
185
- # NB: use extracted round title for knockout check
186
- knockout_flag = is_knockout_round?( title )
170
+ name = find_round_def_name!( line )
171
+ # NB: use extracted round name for knockout check
172
+ knockout_flag = is_knockout_round?( name )
187
173
 
188
174
 
189
175
  logger.debug " start_date: #{start_date}"
190
176
  logger.debug " end_date: #{end_date}"
191
- logger.debug " pos: #{pos}"
192
- logger.debug " title: >#{title}<"
177
+ logger.debug " name: >#{name}<"
193
178
  logger.debug " knockout_flag: #{knockout_flag}"
194
179
 
195
180
  logger.debug " line: >#{line}<"
196
181
 
197
- #######################################
198
- # todo/fix: add auto flag is false !!!! - why? why not?
199
- round = Import::Round.new( pos: pos,
200
- title: title,
182
+ round = Import::Round.new( name: name,
201
183
  start_date: start_date,
202
184
  end_date: end_date,
203
185
  knockout: knockout_flag,
204
186
  auto: false )
205
187
 
206
- @rounds[ title ] = round
188
+ @rounds[ name ] = round
207
189
  end
208
190
 
209
191
 
210
192
 
211
- def find_round_pos!( line )
212
- # pass #1) extract optional round pos from line
213
- # e.g. (1) - must start line
214
- regex_pos = /^[ \t]*\((\d{1,3})\)[ \t]+/
215
-
216
- # pass #2) find free standing number e.g. Matchday 3 or Round 5 or 3. Spieltag etc.
217
- # note: /\b(\d{1,3})\b/
218
- # will match -12
219
- # thus, use space required - will NOT match -2 e.g. Group-2 Play-off
220
- # note: allow 1. Runde n
221
- # 1^ Giornata
222
- regex_num = /(?:^|\s)(\d{1,3})(?:[.\^\s]|$)/
223
-
224
- if line =~ regex_pos
225
- logger.debug " pos: >#{$1}<"
226
-
227
- line.sub!( regex_pos, '[ROUND.POS] ' ) ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+
228
- return $1.to_i
229
- elsif line =~ regex_num
230
- ## assume number in title is pos (e.g. Jornada 3, 3 Runde etc.)
231
- ## NB: do NOT remove pos from string (will get removed by round title)
232
-
233
- num = $1.to_i # note: clone capture; keep a copy (another regex follows; will redefine $1)
234
-
235
- #### fix:
236
- # use/make keywords required
237
- # e.g. Round of 16 -> should NOT match 16!
238
- # Spiel um Platz 3 (or 5) etc -> should NOT match 3!
239
- # Round 16 - ok
240
- # thus, check for required keywords
241
-
242
- ## quick hack for round of 16
243
- # todo: mask match e.g. Round of xxx ... and try again - might include something
244
- # reuse pattern for Group XX Replays for example
245
- if line =~ /^\s*Round of \d{1,3}\b/
246
- return nil
247
- end
248
-
249
- logger.debug " pos: >#{num}<"
250
- return num
251
- else
252
- ## fix: add logger.warn no round pos found in line
253
- return nil
254
- end
255
- end # method find_round_pos!
256
-
257
- def find_round_def_title!( line )
258
- # assume everything before pipe (\) is the round title
259
- # strip [ROUND.POS], todo:?? [ROUND.TITLE2]
193
+ def find_round_def_name!( line )
194
+ # assume everything before pipe (\) is the round name
195
+ # strip [ROUND.POS], todo:?? [ROUND.NAME2]
260
196
 
261
- # todo/fix: add title2 w/ // or / why? why not?
197
+ # todo/fix: add name2 w/ // or / why? why not?
262
198
  # -- strip / or / chars
263
199
 
264
200
  buf = line.dup
265
- logger.debug " find_round_def_title! line-before: >>#{buf}<<"
201
+ logger.debug " find_round_def_name! line-before: >>#{buf}<<"
266
202
 
267
203
  ## cut-off everything after (including) pipe (|)
268
204
  buf = buf[ 0...buf.index('|') ]
269
-
270
- # e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
271
- buf.gsub!( /\[[^\]]+\]/, '' ) ## fix: use helper for (re)use e.g. remove_match_placeholder/marker or similar?
272
- # remove leading and trailing whitespace
273
205
  buf.strip!
274
206
 
275
- logger.debug " find_round_def_title! line-after: >>#{buf}<<"
207
+ logger.debug " find_round_def_name! line-after: >>#{buf}<<"
276
208
 
277
- logger.debug " title: >>#{buf}<<"
278
- line.sub!( buf, '[ROUND.TITLE]' )
209
+ logger.debug " name: >>#{buf}<<"
210
+ line.sub!( buf, '[ROUND.NAME]' )
279
211
 
280
212
  buf
281
213
  end
282
214
 
283
- def find_round_header_title!( line )
284
- # assume everything left is the round title
285
- # extract all other items first (round title2, round pos, group title n pos, etc.)
286
215
 
287
- ## todo/fix:
288
- ## cleanup method
289
- ## use buf.index( '//' ) to split string (see found_round_def)
290
- ## why? simpler why not?
291
- ## - do we currently allow groups if title2 present? add example if it works?
216
+ ## split by or || or |||
217
+ ## or ++ or +++
218
+ ## or -- or ---
219
+ ## or // or ///
220
+ ## note: allow Final | First Leg as ONE name same as
221
+ ## Final - First Leg or
222
+ ## Final, First Leg
223
+ ## for cut-off always MUST be more than two chars
224
+ ##
225
+ ## todo/check: find a better name than HEADER_SEP(ARATOR) - why? why not?
226
+ ## todo/fix: move to parser utils and add a method split_name or such?
227
+ HEADER_SEP_RE = / [ ]* ## allow (strip) leading spaces
228
+ (?:\|{2,} |
229
+ \+{2,} |
230
+ -{2,} |
231
+ \/{2,}
232
+ )
233
+ [ ]* ## allow (strip) trailing spaces
234
+ /x
235
+
236
+ def find_round_header_name!( line )
237
+ # assume everything left is the round name
238
+ # extract all other items first (round name2, round pos, group name n pos, etc.)
292
239
 
293
240
  buf = line.dup
294
- logger.debug " find_round_header_title! line-before: >>#{buf}<<"
241
+ logger.debug " find_round_header_name! line-before: >>#{buf}<<"
242
+
243
+
244
+ parts = buf.split( HEADER_SEP_RE )
245
+ buf = parts[0]
295
246
 
296
- buf.gsub!( /\[[^\]]+\]/, '' ) # e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
297
247
  buf.strip! # remove leading and trailing whitespace
298
248
 
299
- logger.debug " find_round_title! line-after: >>#{buf}<<"
249
+ logger.debug " find_round_name! line-after: >>#{buf}<<"
300
250
 
301
- ### bingo - assume what's left is the round title
251
+ ### bingo - assume what's left is the round name
302
252
 
303
- logger.debug " title: >>#{buf}<<"
304
- line.sub!( buf, '[ROUND.TITLE]' )
253
+ logger.debug " name: >>#{buf}<<"
254
+ line.sub!( buf, '[ROUND.NAME]' )
305
255
 
306
256
  buf
307
257
  end
308
258
 
259
+ ## quick hack- collect all "fillwords" by language!!!!
260
+ ## change later and add to sportdb-langs!!!!
261
+ ##
262
+ ## strip all "fillwords" e.g.:
263
+ ## Nachtrag/Postponed/Addition/Supplemento names
264
+ ##
265
+ ## todo/change: find a better name for ROUND_EXTRA_WORDS - why? why not?
266
+ ROUND_EXTRA_WORDS_RE = /\b(?:
267
+ Nachtrag | ## de
268
+ Postponed | ## en
269
+ Addition | ## en
270
+ Supplemento ## es
271
+ )
272
+ \b/ix
309
273
 
310
274
  def parse_round_header( line )
311
275
  logger.debug "parsing round header line: >#{line}<"
312
276
 
313
- ## todo/check/fix:
314
- # make sure Round of 16 will not return pos 16 -- how? possible?
315
- # add unit test too to verify
316
- pos = find_round_pos!( line )
317
-
318
- title = find_round_header_title!( line )
277
+ name = find_round_header_name!( line )
319
278
 
320
279
  logger.debug " line: >#{line}<"
321
280
 
281
+ name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
282
+ name = name.strip
322
283
 
323
- round = @rounds[ title ]
284
+ round = @rounds[ name ]
324
285
  if round.nil? ## auto-add / create if missing
325
- round = Import::Round.new( pos: pos,
326
- title: title )
327
- @rounds[ title ] = round
286
+ ## todo/check: add num (was pos) if present - why? why not?
287
+ round = Import::Round.new( name: name )
288
+ @rounds[ name ] = round
328
289
  end
329
290
 
330
291
  ## todo/check: if pos match (MUST always match for now)
331
292
  @last_round = round
332
293
  @last_group = nil # note: reset group to no group - why? why not?
333
-
334
-
335
- ## NB: dummy/placeholder start_at, end_at date
336
- ## replace/patch after adding all games for round
337
-
338
- =begin
339
- round_attribs = {
340
- title: title,
341
- title2: title2,
342
- knockout: knockout_flag
343
- }
344
-
345
- if pos > 999000
346
- # no pos (e.g. will get autonumbered later) - try match by title for now
347
- # e.g. lets us use title 'Group Replays', for example, multiple times
348
- @round = Round.find_by_event_id_and_title( @event.id, title )
349
- else
350
- @round = Round.find_by_event_id_and_pos( @event.id, pos )
351
- end
352
-
353
- if @round.present?
354
- logger.debug "update round #{@round.id}:"
355
- else
356
- logger.debug "create round:"
357
- @round = Round.new
358
-
359
- round_attribs = round_attribs.merge( {
360
- event_id: @event.id,
361
- pos: pos,
362
- start_at: Date.parse('1911-11-11'),
363
- end_at: Date.parse('1911-11-11')
364
- })
365
- end
366
-
367
- logger.debug round_attribs.to_json
368
-
369
- @round.update_attributes!( round_attribs )
370
-
371
- @patch_round_ids_pos << @round.id if pos > 999000
372
- ### store list of round ids for patching start_at/end_at at the end
373
- @patch_round_ids_dates << @round.id # todo/fix/check: check if round has definition (do NOT patch if definition (not auto-added) present)
374
- =end
375
294
  end
376
295
 
377
296
 
@@ -383,6 +302,11 @@ class MatchParser ## simple match parser for team match schedules
383
302
  ScoreFormats.find!( line )
384
303
  end
385
304
 
305
+ def find_status!( line )
306
+ StatusParser.find!( line )
307
+ end
308
+
309
+
386
310
  def try_parse_game( line )
387
311
  # note: clone line; for possible test do NOT modify in place for now
388
312
  # note: returns true if parsed, false if no match
@@ -410,6 +334,10 @@ class MatchParser ## simple match parser for team match schedules
410
334
  return false
411
335
  end
412
336
 
337
+ ## find (optional) match status e.g. [abandoned] or [replay] or [awarded]
338
+ ## or [cancelled] or [postponed] etc.
339
+ status = find_status!( line ) ## todo/check: allow match status also in geo part (e.g. after @) - why? why not?
340
+
413
341
  ## pos = find_game_pos!( line )
414
342
 
415
343
  date = find_date!( line, start: @start )
@@ -434,20 +362,23 @@ class MatchParser ## simple match parser for team match schedules
434
362
  if @last_round
435
363
  round = @last_round
436
364
  else
437
- ## find (first) matching round by date
438
- @rounds.values.each do |round_rec|
439
- ## note: convert date to date only (no time) with to_date!!!
440
- if (round_rec.start_date && round_rec.end_date) &&
441
- (date.to_date >= round_rec.start_date &&
442
- date.to_date <= round_rec.end_date)
443
- round = round_rec
444
- break
365
+ ## find (first) matching round by date if rounds / matchdays defined
366
+ ## if not rounds / matchdays defined - YES, allow matches WITHOUT rounds!!!
367
+ if @rounds.size > 0
368
+ @rounds.values.each do |round_rec|
369
+ ## note: convert date to date only (no time) with to_date!!!
370
+ if (round_rec.start_date && round_rec.end_date) &&
371
+ (date.to_date >= round_rec.start_date &&
372
+ date.to_date <= round_rec.end_date)
373
+ round = round_rec
374
+ break
375
+ end
376
+ end
377
+ if round.nil?
378
+ puts "!! ERROR - no matching round found for match date:"
379
+ pp date
380
+ exit 1
445
381
  end
446
- end
447
- if round.nil?
448
- puts "!! ERROR - no matching round found for match date:"
449
- pp date
450
- exit 1
451
382
  end
452
383
  end
453
384
 
@@ -457,12 +388,12 @@ class MatchParser ## simple match parser for team match schedules
457
388
  ## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
458
389
 
459
390
  @matches << Import::Match.new( date: date,
460
- team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.title)
461
- team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.title)
391
+ team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
392
+ team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
462
393
  score: score,
463
- round: round ? round.title : nil, ## note: for now always use string (assume unique canonical name for event)
464
- group: @last_group ? @last_group.title : nil ) ## note: for now always use string (assume unique canonical name for event)
465
-
394
+ round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
395
+ group: @last_group ? @last_group.name : nil, ## note: for now always use string (assume unique canonical name for event)
396
+ status: status )
466
397
  ### todo: cache team lookups in hash?
467
398
 
468
399
  =begin
@@ -517,7 +448,7 @@ class MatchParser ## simple match parser for team match schedules
517
448
 
518
449
  round_attribs = {
519
450
  event_id: @event.id,
520
- title: "Matchday #{date.to_date}",
451
+ name: "Matchday #{date.to_date}",
521
452
  pos: 999001+@patch_round_ids_pos.length, # e.g. 999<count> - 999001,999002,etc.
522
453
  start_at: date.to_date,
523
454
  end_at: date.to_date
@@ -541,7 +472,7 @@ class MatchParser ## simple match parser for team match schedules
541
472
  end
542
473
 
543
474
  ## note: will crash (round.pos) if round is nil
544
- logger.debug( " using round #{round.pos} >#{round.title}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
475
+ logger.debug( " using round #{round.pos} >#{round.name}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
545
476
  else
546
477
  ## use round from last round header
547
478
  round = @round
@@ -644,12 +575,29 @@ class MatchParser ## simple match parser for team match schedules
644
575
 
645
576
  if date && team1.nil? && team2.nil?
646
577
  logger.debug( "date header line found: >#{line}<")
647
- logger.debug( " date: #{date}")
578
+ logger.debug( " date: #{date} with start: #{@start}")
648
579
 
649
580
  @last_date = date # keep a reference for later use
650
- return true
581
+
582
+ ### quick "corona" hack - support seasons going beyond 12 month (see swiss league 2019/20 and others!!)
583
+ ## find a better way??
584
+ ## set @start date to full year (e.g. 1.1.) if date.year is @start.year+1
585
+ ## todo/fix: add to linter to check for chronological dates!! - warn if NOT chronological
586
+ ### todo/check: just turn on for 2019/20 season or always? why? why not?
587
+
588
+ ## todo/fix: add switch back to old @start_org
589
+ ## if year is date.year == @start.year-1 -- possible when full date with year set!!!
590
+ if @start.month != 1
591
+ if date.year == @start.year+1
592
+ logger.debug( "!! hack - extending start date to full (next/end) year; assumes all dates are chronologigal - always moving forward" )
593
+ @start_org = @start ## keep a copy of the original (old) start date - why? why not? - not used for now
594
+ @start = Date.new( @start.year+1, 1, 1 )
595
+ end
596
+ end
597
+
598
+ true
651
599
  else
652
- return false
600
+ false
653
601
  end
654
602
  end
655
603