sportdb-formats 1.0.3 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4a27c364d2003ece8da886c892d34d595ffcffcc
4
- data.tar.gz: 81530dfec5decf5d9476a3f2be90bcbeb0195824
3
+ metadata.gz: 4cdd2bc410771494ed506a24d384ca3c8b1c9684
4
+ data.tar.gz: 066f5288da503a00efe280369f57d6cd65bf4bf7
5
5
  SHA512:
6
- metadata.gz: 4a2aabb65968939d55f2000c64e4ac5df4cbedfe6b3786fd12ff86d26bd5e6af12c32ea129b59a7ddf30736afa6f093e0e4f2e97f2c6f0b23bffd6901a83ca91
7
- data.tar.gz: 63f4ca42ebb537173334a931a02331b8cbdc27d9e953052c8126d9e683e54a3103eb496103fea400283c6b0b08fa7c6e738466f215e26f9567a3b822a80c172d
6
+ metadata.gz: f61edee9495047fc49dfb5720c7afc1e0e316e5fab024d8fc0b1bd5fcdad70524f6e22d24d8fad0aa679380565e0a0c7e36fae79c170655f8a7a496dee170aca
7
+ data.tar.gz: b378202d2c8152ac46386618d3c69c03d802b1439e74175f8d84fad304ce111edb0bb1cd848fc969848ce55e0cf8de15a46bec322351d5d199652ed16d41b164
@@ -63,6 +63,7 @@ test/test_match.rb
63
63
  test/test_match_auto.rb
64
64
  test/test_match_auto_champs.rb
65
65
  test/test_match_auto_euro.rb
66
+ test/test_match_auto_relegation.rb
66
67
  test/test_match_auto_worldcup.rb
67
68
  test/test_match_champs.rb
68
69
  test/test_match_eng.rb
@@ -30,11 +30,11 @@ class CountryIndex
30
30
  ## auto-fill countries
31
31
  ## pp recs
32
32
  recs.each do |rec|
33
- ## rec e.g. { key:'af', fifa:'AFG', name:'Afghanistan'}
33
+ ## rec e.g. { key:'af', code:'AFG', name:'Afghanistan'}
34
34
 
35
35
  @countries << rec
36
36
 
37
- ## add codes lookups - key, fifa, ...
37
+ ## add codes lookups - key, code, ...
38
38
  if @countries_by_code[ rec.key ]
39
39
  puts "** !! ERROR !! country code (key) >#{rec.key}< already exits!!"
40
40
  exit 1
@@ -42,13 +42,13 @@ class CountryIndex
42
42
  @countries_by_code[ rec.key ] = rec
43
43
  end
44
44
 
45
- ## add fifa code (only) if different from key
46
- if rec.key != rec.fifa.downcase
47
- if @countries_by_code[ rec.fifa.downcase ]
48
- puts "** !! ERROR !! country code (fifa) >#{rec.fifa.downcase}< already exits!!"
45
+ ## add code (only) if different from key
46
+ if rec.key != rec.code.downcase
47
+ if @countries_by_code[ rec.code.downcase ]
48
+ puts "** !! ERROR !! country code >#{rec.code.downcase}< already exits!!"
49
49
  exit 1
50
50
  else
51
- @countries_by_code[ rec.fifa.downcase ] = rec
51
+ @countries_by_code[ rec.code.downcase ] = rec
52
52
  end
53
53
  end
54
54
 
@@ -43,11 +43,31 @@ def parse
43
43
  # e.g. East Germany (-1989) => East Germany (-1989)
44
44
  values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
45
45
  last_country.alt_names += values
46
+ elsif line =~ /^-[ ]*(\d{4})
47
+ [ ]+
48
+ (.+)$
49
+ /x ## check for historic lines e.g. -1989
50
+ year = $1.to_i
51
+ parts = $2.split( /=>|⇒/ )
52
+ values = parts[0].split( ',' )
53
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
54
+
55
+ name = values[0]
56
+ code = values[1]
57
+
58
+ last_country = country = Country.new( name: "#{name} (-#{year})",
59
+ code: code )
60
+ ## country.alt_names << name ## note: for now do NOT add name without year to alt_names - gets auto-add by index!!!
61
+
62
+ countries << country
63
+ ## todo/fix: add reference to country today (in parts[1] !!!!)
46
64
  else
47
65
  ## assume "regular" line
48
66
  ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
49
67
  ## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
50
- if line =~ /^([a-z]{2,4})[ ]+(.+)$/
68
+ if line =~ /^([a-z]{2,4})
69
+ [ ]+
70
+ (.+)$/x
51
71
  key = $1
52
72
  values = $2.split( ',' )
53
73
  ## strip and squish (white)spaces
@@ -60,14 +80,14 @@ def parse
60
80
  geos = split_geo( values[0] )
61
81
  name = geos[0] ## note: ignore all other geos for now
62
82
 
63
- ## note: allow fifa country codes upto 4 (!!) e.g. Northern Cyprus
64
- fifa = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
83
+ ## note: allow country codes up to 4 (!!) e.g. Northern Cyprus
84
+ code = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
65
85
  values[1]
66
86
  else
67
87
  if values[1]
68
- puts "** !!! ERROR !!! wrong fifa code format >#{values[1]}<; expected three (or four)-letter all up-case"
88
+ puts "** !!! ERROR !!! wrong code format >#{values[1]}<; expected three (or four)-letter all up-case"
69
89
  else
70
- puts "** !!! ERROR !!! missing fifa code for (canonical) country name"
90
+ puts "** !!! ERROR !!! missing code for (canonical) country name"
71
91
  end
72
92
  exit 1
73
93
  end
@@ -80,7 +100,7 @@ def parse
80
100
 
81
101
  last_country = country = Country.new( key: key,
82
102
  name: name,
83
- fifa: fifa,
103
+ code: code,
84
104
  tags: tags )
85
105
  countries << country
86
106
  else
@@ -121,14 +121,31 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
121
121
  values
122
122
  end
123
123
 
124
- def check_stage( name )
125
- known_stages = ['regular season',
126
- 'championship round',
127
- 'relegation round',
128
- 'play-offs'
129
- ]
130
124
 
131
- if known_stages.include?( name.downcase )
125
+ # note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
126
+ KNOWN_STAGES = [
127
+ 'Regular Season',
128
+ 'Regular Stage',
129
+ 'Championship Round',
130
+ 'Championship Playoff',
131
+ 'Relegation Round',
132
+ 'Relegation Playoff',
133
+ 'Play-offs',
134
+ 'Playoff Stage',
135
+ 'Grunddurchgang',
136
+ 'Finaldurchgang - Qualifikationsgruppe',
137
+ 'Finaldurchgang - Qualifikation',
138
+ 'Finaldurchgang - Meistergruppe',
139
+ 'Finaldurchgang - Meister',
140
+ 'EL Play-off',
141
+ 'Europa League Play-off',
142
+ 'Europa-League-Play-offs',
143
+ ].map {|name| name.downcase.gsub( /[^a-z]/, '' ) }
144
+
145
+
146
+ def check_stage( name )
147
+ # note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
148
+ if KNOWN_STAGES.include?( name.downcase.gsub( /[^a-z]/, '' ) )
132
149
  ## everything ok
133
150
  else
134
151
  puts "** !!! ERROR - no (league) stage match found for >#{name}<, add to (builtin) stages table; sorry"
@@ -113,9 +113,9 @@ def parse
113
113
  ## add a list of (auto-)excluded country codes with conflicts? why? why not?
114
114
  ## cl - a) Chile b) Champions League
115
115
  alt_names_auto << "#{country.key.upcase}" if league_key == '1' ## add shortcut for top level 1 (just country key)
116
- if country.key.upcase != country.fifa
117
- alt_names_auto << "#{country.fifa} #{league_key.upcase.gsub('.', ' ')}"
118
- alt_names_auto << "#{country.fifa}" if league_key == '1' ## add shortcut for top level 1 (just country key)
116
+ if country.key.upcase != country.code
117
+ alt_names_auto << "#{country.code} #{league_key.upcase.gsub('.', ' ')}"
118
+ alt_names_auto << "#{country.code}" if league_key == '1' ## add shortcut for top level 1 (just country key)
119
119
  end
120
120
  alt_names_auto << "#{country.name} #{league_key}" if league_key =~ /^[0-9]+$/ ## if all numeric e.g. add Austria 1 etc.
121
121
  else ## assume int'l (no country) e.g. champions league, etc.
@@ -7,21 +7,21 @@ module SportDb
7
7
  ## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/title_mapper2.rb
8
8
 
9
9
 
10
- class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why not??
10
+ class MapperV2 ## todo/check: rename to NameMapper ? why? why not??
11
11
 
12
12
  include Logging
13
13
 
14
- attr_reader :known_titles ## rename to mapping or mappings or just titles - why? why not?
14
+ attr_reader :known_names ## rename to mapping or mappings or just names - why? why not?
15
15
 
16
16
  ########
17
17
  ## key: e.g. augsburg
18
- ## title: e.g. FC Augsburg
19
- ## length (of title(!!) - not regex pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not?
20
- MappingStruct = Struct.new( :key, :title, :length, :pattern) ## todo/check: use (rename to) TitleStruct - why? why not??
18
+ ## name: e.g. FC Augsburg
19
+ ## length (of name(!!) - not regex pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not?
20
+ MappingStruct = Struct.new( :key, :name, :length, :pattern) ## todo/check: use (rename to) NameStruct - why? why not??
21
21
 
22
22
  ######
23
23
  ## convenience helper - (auto)build ActiveRecord-like team records/structs
24
- Record = Struct.new( :key, :title, :synonyms )
24
+ Record = Struct.new( :key, :name, :alt_names )
25
25
  def build_records( txt_or_lines )
26
26
  recs = []
27
27
 
@@ -44,12 +44,12 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
44
44
  values = line.split( '|' )
45
45
  values = values.map { |value| value.strip }
46
46
 
47
- title = values[0]
47
+ name = values[0]
48
48
  ## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
49
- key = title.downcase.gsub( /[^a-z]/, '' )
50
- synonyms = values.size > 1 ? values[1..-1].join( '|' ) : nil
49
+ key = name.downcase.gsub( /[^a-z]/, '' )
50
+ alt_names = values.size > 1 ? values[1..-1].join( '|' ) : nil
51
51
 
52
- recs << Record.new( key, title, synonyms )
52
+ recs << Record.new( key, name, alt_names )
53
53
  end
54
54
  recs
55
55
  end
@@ -63,10 +63,10 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
63
63
  (records_or_mapping.is_a?( Array ) && records_or_mapping[0].is_a?( String ))
64
64
 
65
65
  ## build mapping lookup table
66
- @known_titles = if records_or_mapping.is_a?( Hash ) ## assume "custom" mapping hash table (title/name=>record)
67
- build_title_table_for_mapping( records_or_mapping )
66
+ @known_names = if records_or_mapping.is_a?( Hash ) ## assume "custom" mapping hash table (name=>record)
67
+ build_name_table_for_mapping( records_or_mapping )
68
68
  else ## assume array of records
69
- build_title_table_for_records( records_or_mapping )
69
+ build_name_table_for_records( records_or_mapping )
70
70
  end
71
71
 
72
72
  ## build lookup hash by record (e.g. team/club/etc.) key
@@ -85,9 +85,9 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
85
85
 
86
86
 
87
87
 
88
- def map_titles!( line ) ## rename to just map! - why?? why not???
88
+ def map_names!( line ) ## rename to just map! - why?? why not???
89
89
  begin
90
- found = map_title_for!( @tag, line, @known_titles )
90
+ found = map_name_for!( @tag, line, @known_names )
91
91
  end while found
92
92
  end
93
93
 
@@ -110,27 +110,27 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
110
110
 
111
111
 
112
112
  private
113
- def build_title_table_for_mapping( mapping )
114
- known_titles = []
113
+ def build_name_table_for_mapping( mapping )
114
+ known_names = []
115
115
 
116
- mapping.each do |title, rec|
116
+ mapping.each do |name, rec|
117
117
  m = MappingStruct.new
118
118
  m.key = rec.key
119
- m.title = title
120
- m.length = title.length
121
- m.pattern = Regexp.escape( title ) ## note: just use "standard" regex escape (e.g. no extras for umlauts,accents,etc.)
119
+ m.name = name
120
+ m.length = name.length
121
+ m.pattern = Regexp.escape( name ) ## note: just use "standard" regex escape (e.g. no extras for umlauts,accents,etc.)
122
122
 
123
- known_titles << m
123
+ known_names << m
124
124
  end
125
125
 
126
126
  ## note: sort here by length (largest goes first - best match)
127
- known_titles = known_titles.sort { |l,r| r.length <=> l.length }
128
- known_titles
127
+ known_names = known_names.sort { |l,r| r.length <=> l.length }
128
+ known_names
129
129
  end
130
130
 
131
- def build_title_table_for_records( records )
131
+ def build_name_table_for_records( records )
132
132
 
133
- ## build known tracks table w/ synonyms e.g.
133
+ ## build known tracks table w/ alt names e.g.
134
134
  #
135
135
  # [[ 'wolfsbrug', 'VfL Wolfsburg'],
136
136
  # [ 'augsburg', 'FC Augsburg'],
@@ -138,65 +138,65 @@ private
138
138
  # [ 'augsburg', 'Augi3' ],
139
139
  # [ 'stuttgart', 'VfB Stuttgart']]
140
140
 
141
- known_titles = []
141
+ known_names = []
142
142
 
143
143
  records.each_with_index do |rec,index|
144
144
 
145
- title_candidates = []
146
- title_candidates << rec.title
145
+ name_candidates = []
146
+ name_candidates << rec.name
147
147
 
148
- title_candidates += rec.synonyms.split('|') if rec.synonyms && !rec.synonyms.empty?
148
+ name_candidates += rec.alt_names.split('|') if rec.alt_names && !rec.alt_names.empty?
149
149
 
150
150
 
151
- ## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit)
152
- # make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan
151
+ ## check if name includes subname e.g. Grand Prix Japan (Suzuka Circuit)
152
+ # make subname optional by adding name w/o subname e.g. Grand Prix Japan
153
153
 
154
- titles = []
155
- title_candidates.each do |t|
156
- titles << t
154
+ names = []
155
+ name_candidates.each do |t|
156
+ names << t
157
157
  if t =~ /\(.+\)/
158
- extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles
158
+ extra_name = t.gsub( /\(.+\)/, '' ) # remove/delete subnames
159
159
  # note: strip leading n trailing withspaces too!
160
160
  # -- todo: add squish or something if () is inline e.g. leaves two spaces?
161
- extra_title.strip!
162
- titles << extra_title
161
+ extra_name.strip!
162
+ names << extra_name
163
163
  end
164
164
  end
165
165
 
166
- titles.each do |t|
166
+ names.each do |name|
167
167
  m = MappingStruct.new
168
168
  m.key = rec.key
169
- m.title = t
170
- m.length = t.length
169
+ m.name = name
170
+ m.length = name.length
171
171
  ## note: escape for regex plus allow subs for special chars/accents
172
- m.pattern = title_esc_regex( t )
172
+ m.pattern = name_esc_regex( name )
173
173
 
174
- known_titles << m
174
+ known_names << m
175
175
  end
176
176
 
177
- logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<"
177
+ logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{names.join('|')}<"
178
178
 
179
179
  ## note: only include code field - if defined
180
180
  if rec.respond_to?(:code) && rec.code && !rec.code.empty?
181
181
  m = MappingStruct.new
182
182
  m.key = rec.key
183
- m.title = rec.code
183
+ m.name = rec.code
184
184
  m.length = rec.code.length
185
185
  m.pattern = rec.code ## note: use code for now as is (no variants allowed fow now)
186
186
 
187
- known_titles << m
187
+ known_names << m
188
188
  end
189
189
  end
190
190
 
191
191
  ## note: sort here by length (largest goes first - best match)
192
192
  # exclude code and key (key should always go last)
193
- known_titles = known_titles.sort { |l,r| r.length <=> l.length }
194
- known_titles
193
+ known_names = known_names.sort { |l,r| r.length <=> l.length }
194
+ known_names
195
195
  end
196
196
 
197
197
 
198
198
 
199
- def map_title_for!( tag, line, mappings )
199
+ def map_name_for!( tag, line, mappings )
200
200
  mappings.each do |mapping|
201
201
  key = mapping.key
202
202
  pattern = mapping.pattern
@@ -234,9 +234,9 @@ private
234
234
 
235
235
 
236
236
  ####
237
- # title helper cut-n-paste copy from TextUtils
237
+ # name helper cut-n-paste copy from TextUtils
238
238
  ## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/helper/title_helper.rb
239
- def title_esc_regex( title_unescaped )
239
+ def name_esc_regex( name_unescaped )
240
240
 
241
241
  ## escape regex special chars e.g.
242
242
  # . to \. and
@@ -257,16 +257,16 @@ def title_esc_regex( title_unescaped )
257
257
  # e.g. Club Atlético Colón (Santa Fe)
258
258
  # e.g. Bauer Anton (????)
259
259
 
260
- ## NB: cannot use Regexp.escape! will escape space '' to '\ '
261
- ## title = Regexp.escape( title_unescaped )
262
- title = title_unescaped.gsub( '.', '\.' )
263
- title = title.gsub( '(', '\(' )
264
- title = title.gsub( ')', '\)' )
265
- title = title.gsub( '?', '\?' )
266
- title = title.gsub( '*', '\*' )
267
- title = title.gsub( '+', '\+' )
268
- title = title.gsub( '$', '\$' )
269
- title = title.gsub( '^', '\^' )
260
+ ## note: cannot use Regexp.escape! will escape space '' to '\ '
261
+ ## name = Regexp.escape( name_unescaped )
262
+ name = name_unescaped.gsub( '.', '\.' )
263
+ name = name.gsub( '(', '\(' )
264
+ name = name.gsub( ')', '\)' )
265
+ name = name.gsub( '?', '\?' )
266
+ name = name.gsub( '*', '\*' )
267
+ name = name.gsub( '+', '\+' )
268
+ name = name.gsub( '$', '\$' )
269
+ name = name.gsub( '^', '\^' )
270
270
 
271
271
  ## match accented char with or without accents
272
272
  ## add (ü|ue) etc.
@@ -309,10 +309,10 @@ def title_esc_regex( title_unescaped )
309
309
  ## collect some more (real-world) examples first!!!!!
310
310
 
311
311
  alternatives.each do |alt|
312
- title = title.gsub( alt[0], alt[1] )
312
+ name = name.gsub( alt[0], alt[1] )
313
313
  end
314
314
 
315
- title
315
+ name
316
316
  end
317
317
 
318
318
  end # class MapperV2
@@ -16,7 +16,7 @@ class TeamMapper
16
16
  end
17
17
 
18
18
  def map_teams!( line )
19
- @mapper.map_titles!( line )
19
+ @mapper.map_names!( line )
20
20
  end
21
21
  end # class TeamMapper
22
22
 
@@ -82,15 +82,14 @@ class MatchParser ## simple match parser for team match schedules
82
82
  # team1 team2 - match (will get new auto-matchday! not last round)
83
83
  @last_round = nil
84
84
 
85
- title, pos = find_group_title_and_pos!( line )
85
+ name = find_group_name!( line )
86
86
 
87
- logger.debug " title: >#{title}<"
88
- logger.debug " pos: >#{pos}<"
87
+ logger.debug " name: >#{name}<"
89
88
  logger.debug " line: >#{line}<"
90
89
 
91
- group = @groups[ title ]
90
+ group = @groups[ name ]
92
91
  if group.nil?
93
- puts "!! ERROR - no group def found for >#{title}<"
92
+ puts "!! ERROR - no group def found for >#{name}<"
94
93
  exit 1
95
94
  end
96
95
 
@@ -104,19 +103,19 @@ class MatchParser ## simple match parser for team match schedules
104
103
  @mapper_teams.map_teams!( line )
105
104
  teams = @mapper_teams.find_teams!( line )
106
105
 
107
- title, pos = find_group_title_and_pos!( line )
106
+ name = find_group_name!( line )
108
107
 
109
108
  logger.debug " line: >#{line}<"
110
109
 
111
- group = Import::Group.new( pos: pos,
112
- title: title,
113
- teams: teams.map {|team| team.title } )
110
+ ## todo/check/fix: add back group key - why? why not?
111
+ group = Import::Group.new( name: name,
112
+ teams: teams.map {|team| team.name } )
114
113
 
115
- @groups[ title ] = group
114
+ @groups[ name ] = group
116
115
  end
117
116
 
118
117
 
119
- def find_group_title_and_pos!( line )
118
+ def find_group_name!( line )
120
119
  ## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C or HEX
121
120
  ## nb: (?:) = is for non-capturing group(ing)
122
121
 
@@ -125,37 +124,25 @@ class MatchParser ## simple match parser for team match schedules
125
124
 
126
125
  ## todo:
127
126
  ## check if Group A: or [Group A] works e.g. : or ] get matched by \b ???
128
- regex = /(?:Group|Gruppe|Grupo)\s+((?:\d{1}|[A-Z]{1,3}))\b/
127
+ regex = /\b
128
+ (?:
129
+ (Group | Gruppe | Grupo)
130
+ [ ]+
131
+ (\d+ | [A-Z]+)
132
+ )
133
+ \b/x
129
134
 
130
135
  m = regex.match( line )
131
136
 
132
- return [nil,nil] if m.nil?
133
-
134
- pos = case m[1]
135
- when 'A' then 1
136
- when 'B' then 2
137
- when 'C' then 3
138
- when 'D' then 4
139
- when 'E' then 5
140
- when 'F' then 6
141
- when 'G' then 7
142
- when 'H' then 8
143
- when 'I' then 9
144
- when 'J' then 10
145
- when 'K' then 11
146
- when 'L' then 12
147
- when 'HEX' then 666 # HEX for Hexagonal - todo/check: map to something else ??
148
- else m[1].to_i
149
- end
150
-
151
- title = m[0]
152
-
153
- logger.debug " title: >#{title}<"
154
- logger.debug " pos: >#{pos}<"
155
-
156
- line.sub!( regex, '[GROUP.TITLE+POS]' )
157
-
158
- [title,pos]
137
+ return nil if m.nil?
138
+
139
+ name = m[0]
140
+
141
+ logger.debug " name: >#{name}<"
142
+
143
+ line.sub!( name, '[GROUP.NAME]' )
144
+
145
+ name
159
146
  end
160
147
 
161
148
 
@@ -180,198 +167,130 @@ class MatchParser ## simple match parser for team match schedules
180
167
  end_date = end_date.to_date
181
168
 
182
169
 
183
- pos = find_round_pos!( line )
184
- title = find_round_def_title!( line )
185
- # NB: use extracted round title for knockout check
186
- knockout_flag = is_knockout_round?( title )
170
+ name = find_round_def_name!( line )
171
+ # NB: use extracted round name for knockout check
172
+ knockout_flag = is_knockout_round?( name )
187
173
 
188
174
 
189
175
  logger.debug " start_date: #{start_date}"
190
176
  logger.debug " end_date: #{end_date}"
191
- logger.debug " pos: #{pos}"
192
- logger.debug " title: >#{title}<"
177
+ logger.debug " name: >#{name}<"
193
178
  logger.debug " knockout_flag: #{knockout_flag}"
194
179
 
195
180
  logger.debug " line: >#{line}<"
196
181
 
197
- #######################################
198
- # todo/fix: add auto flag is false !!!! - why? why not?
199
- round = Import::Round.new( pos: pos,
200
- title: title,
182
+ round = Import::Round.new( name: name,
201
183
  start_date: start_date,
202
184
  end_date: end_date,
203
185
  knockout: knockout_flag,
204
186
  auto: false )
205
187
 
206
- @rounds[ title ] = round
188
+ @rounds[ name ] = round
207
189
  end
208
190
 
209
191
 
210
192
 
211
- def find_round_pos!( line )
212
- # pass #1) extract optional round pos from line
213
- # e.g. (1) - must start line
214
- regex_pos = /^[ \t]*\((\d{1,3})\)[ \t]+/
215
-
216
- # pass #2) find free standing number e.g. Matchday 3 or Round 5 or 3. Spieltag etc.
217
- # note: /\b(\d{1,3})\b/
218
- # will match -12
219
- # thus, use space required - will NOT match -2 e.g. Group-2 Play-off
220
- # note: allow 1. Runde n
221
- # 1^ Giornata
222
- regex_num = /(?:^|\s)(\d{1,3})(?:[.\^\s]|$)/
223
-
224
- if line =~ regex_pos
225
- logger.debug " pos: >#{$1}<"
226
-
227
- line.sub!( regex_pos, '[ROUND.POS] ' ) ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+
228
- return $1.to_i
229
- elsif line =~ regex_num
230
- ## assume number in title is pos (e.g. Jornada 3, 3 Runde etc.)
231
- ## NB: do NOT remove pos from string (will get removed by round title)
232
-
233
- num = $1.to_i # note: clone capture; keep a copy (another regex follows; will redefine $1)
234
-
235
- #### fix:
236
- # use/make keywords required
237
- # e.g. Round of 16 -> should NOT match 16!
238
- # Spiel um Platz 3 (or 5) etc -> should NOT match 3!
239
- # Round 16 - ok
240
- # thus, check for required keywords
193
+ def find_round_def_name!( line )
194
+ # assume everything before pipe (\) is the round name
195
+ # strip [ROUND.POS], todo:?? [ROUND.NAME2]
241
196
 
242
- ## quick hack for round of 16
243
- # todo: mask match e.g. Round of xxx ... and try again - might include something
244
- # reuse pattern for Group XX Replays for example
245
- if line =~ /^\s*Round of \d{1,3}\b/
246
- return nil
247
- end
248
-
249
- logger.debug " pos: >#{num}<"
250
- return num
251
- else
252
- ## fix: add logger.warn no round pos found in line
253
- return nil
254
- end
255
- end # method find_round_pos!
256
-
257
- def find_round_def_title!( line )
258
- # assume everything before pipe (\) is the round title
259
- # strip [ROUND.POS], todo:?? [ROUND.TITLE2]
260
-
261
- # todo/fix: add title2 w/ // or / why? why not?
197
+ # todo/fix: add name2 w/ // or / why? why not?
262
198
  # -- strip / or / chars
263
199
 
264
200
  buf = line.dup
265
- logger.debug " find_round_def_title! line-before: >>#{buf}<<"
201
+ logger.debug " find_round_def_name! line-before: >>#{buf}<<"
266
202
 
267
203
  ## cut-off everything after (including) pipe (|)
268
204
  buf = buf[ 0...buf.index('|') ]
269
-
270
- # e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
271
- buf.gsub!( /\[[^\]]+\]/, '' ) ## fix: use helper for (re)use e.g. remove_match_placeholder/marker or similar?
272
- # remove leading and trailing whitespace
273
205
  buf.strip!
274
206
 
275
- logger.debug " find_round_def_title! line-after: >>#{buf}<<"
207
+ logger.debug " find_round_def_name! line-after: >>#{buf}<<"
276
208
 
277
- logger.debug " title: >>#{buf}<<"
278
- line.sub!( buf, '[ROUND.TITLE]' )
209
+ logger.debug " name: >>#{buf}<<"
210
+ line.sub!( buf, '[ROUND.NAME]' )
279
211
 
280
212
  buf
281
213
  end
282
214
 
283
- def find_round_header_title!( line )
284
- # assume everything left is the round title
285
- # extract all other items first (round title2, round pos, group title n pos, etc.)
286
215
 
287
- ## todo/fix:
288
- ## cleanup method
289
- ## use buf.index( '//' ) to split string (see found_round_def)
290
- ## why? simpler why not?
291
- ## - do we currently allow groups if title2 present? add example if it works?
216
+ ## split by or || or |||
217
+ ## or ++ or +++
218
+ ## or -- or ---
219
+ ## or // or ///
220
+ ## note: allow Final | First Leg as ONE name same as
221
+ ## Final - First Leg or
222
+ ## Final, First Leg
223
+ ## for cut-off always MUST be more than two chars
224
+ ##
225
+ ## todo/check: find a better name than HEADER_SEP(ARATOR) - why? why not?
226
+ ## todo/fix: move to parser utils and add a method split_name or such?
227
+ HEADER_SEP_RE = / [ ]* ## allow (strip) leading spaces
228
+ (?:\|{2,} |
229
+ \+{2,} |
230
+ -{2,} |
231
+ \/{2,}
232
+ )
233
+ [ ]* ## allow (strip) trailing spaces
234
+ /x
235
+
236
+ def find_round_header_name!( line )
237
+ # assume everything left is the round name
238
+ # extract all other items first (round name2, round pos, group name n pos, etc.)
292
239
 
293
240
  buf = line.dup
294
- logger.debug " find_round_header_title! line-before: >>#{buf}<<"
241
+ logger.debug " find_round_header_name! line-before: >>#{buf}<<"
242
+
243
+
244
+ parts = buf.split( HEADER_SEP_RE )
245
+ buf = parts[0]
295
246
 
296
- buf.gsub!( /\[[^\]]+\]/, '' ) # e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
297
247
  buf.strip! # remove leading and trailing whitespace
298
248
 
299
- logger.debug " find_round_title! line-after: >>#{buf}<<"
249
+ logger.debug " find_round_name! line-after: >>#{buf}<<"
300
250
 
301
- ### bingo - assume what's left is the round title
251
+ ### bingo - assume what's left is the round name
302
252
 
303
- logger.debug " title: >>#{buf}<<"
304
- line.sub!( buf, '[ROUND.TITLE]' )
253
+ logger.debug " name: >>#{buf}<<"
254
+ line.sub!( buf, '[ROUND.NAME]' )
305
255
 
306
256
  buf
307
257
  end
308
258
 
259
+ ## quick hack- collect all "fillwords" by language!!!!
260
+ ## change later and add to sportdb-langs!!!!
261
+ ##
262
+ ## strip all "fillwords" e.g.:
263
+ ## Nachtrag/Postponed/Addition/Supplemento names
264
+ ##
265
+ ## todo/change: find a better name for ROUND_EXTRA_WORDS - why? why not?
266
+ ROUND_EXTRA_WORDS_RE = /\b(?:
267
+ Nachtrag | ## de
268
+ Postponed | ## en
269
+ Addition | ## en
270
+ Supplemento ## es
271
+ )
272
+ \b/ix
309
273
 
310
274
  def parse_round_header( line )
311
275
  logger.debug "parsing round header line: >#{line}<"
312
276
 
313
- ## todo/check/fix:
314
- # make sure Round of 16 will not return pos 16 -- how? possible?
315
- # add unit test too to verify
316
- pos = find_round_pos!( line )
317
-
318
- title = find_round_header_title!( line )
277
+ name = find_round_header_name!( line )
319
278
 
320
279
  logger.debug " line: >#{line}<"
321
280
 
281
+ name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
282
+ name = name.strip
322
283
 
323
- round = @rounds[ title ]
284
+ round = @rounds[ name ]
324
285
  if round.nil? ## auto-add / create if missing
325
- round = Import::Round.new( pos: pos,
326
- title: title )
327
- @rounds[ title ] = round
286
+ ## todo/check: add num (was pos) if present - why? why not?
287
+ round = Import::Round.new( name: name )
288
+ @rounds[ name ] = round
328
289
  end
329
290
 
330
291
  ## todo/check: if pos match (MUST always match for now)
331
292
  @last_round = round
332
293
  @last_group = nil # note: reset group to no group - why? why not?
333
-
334
-
335
- ## NB: dummy/placeholder start_at, end_at date
336
- ## replace/patch after adding all games for round
337
-
338
- =begin
339
- round_attribs = {
340
- title: title,
341
- title2: title2,
342
- knockout: knockout_flag
343
- }
344
-
345
- if pos > 999000
346
- # no pos (e.g. will get autonumbered later) - try match by title for now
347
- # e.g. lets us use title 'Group Replays', for example, multiple times
348
- @round = Round.find_by_event_id_and_title( @event.id, title )
349
- else
350
- @round = Round.find_by_event_id_and_pos( @event.id, pos )
351
- end
352
-
353
- if @round.present?
354
- logger.debug "update round #{@round.id}:"
355
- else
356
- logger.debug "create round:"
357
- @round = Round.new
358
-
359
- round_attribs = round_attribs.merge( {
360
- event_id: @event.id,
361
- pos: pos,
362
- start_at: Date.parse('1911-11-11'),
363
- end_at: Date.parse('1911-11-11')
364
- })
365
- end
366
-
367
- logger.debug round_attribs.to_json
368
-
369
- @round.update_attributes!( round_attribs )
370
-
371
- @patch_round_ids_pos << @round.id if pos > 999000
372
- ### store list of round ids for patching start_at/end_at at the end
373
- @patch_round_ids_dates << @round.id # todo/fix/check: check if round has definition (do NOT patch if definition (not auto-added) present)
374
- =end
375
294
  end
376
295
 
377
296
 
@@ -457,11 +376,11 @@ class MatchParser ## simple match parser for team match schedules
457
376
  ## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
458
377
 
459
378
  @matches << Import::Match.new( date: date,
460
- team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.title)
461
- team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.title)
379
+ team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
380
+ team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
462
381
  score: score,
463
- round: round ? round.title : nil, ## note: for now always use string (assume unique canonical name for event)
464
- group: @last_group ? @last_group.title : nil ) ## note: for now always use string (assume unique canonical name for event)
382
+ round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
383
+ group: @last_group ? @last_group.name : nil ) ## note: for now always use string (assume unique canonical name for event)
465
384
 
466
385
  ### todo: cache team lookups in hash?
467
386
 
@@ -517,7 +436,7 @@ class MatchParser ## simple match parser for team match schedules
517
436
 
518
437
  round_attribs = {
519
438
  event_id: @event.id,
520
- title: "Matchday #{date.to_date}",
439
+ name: "Matchday #{date.to_date}",
521
440
  pos: 999001+@patch_round_ids_pos.length, # e.g. 999<count> - 999001,999002,etc.
522
441
  start_at: date.to_date,
523
442
  end_at: date.to_date
@@ -541,7 +460,7 @@ class MatchParser ## simple match parser for team match schedules
541
460
  end
542
461
 
543
462
  ## note: will crash (round.pos) if round is nil
544
- logger.debug( " using round #{round.pos} >#{round.title}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
463
+ logger.debug( " using round #{round.pos} >#{round.name}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
545
464
  else
546
465
  ## use round from last round header
547
466
  round = @round