sportdb-formats 1.0.3 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4a27c364d2003ece8da886c892d34d595ffcffcc
4
- data.tar.gz: 81530dfec5decf5d9476a3f2be90bcbeb0195824
3
+ metadata.gz: 4cdd2bc410771494ed506a24d384ca3c8b1c9684
4
+ data.tar.gz: 066f5288da503a00efe280369f57d6cd65bf4bf7
5
5
  SHA512:
6
- metadata.gz: 4a2aabb65968939d55f2000c64e4ac5df4cbedfe6b3786fd12ff86d26bd5e6af12c32ea129b59a7ddf30736afa6f093e0e4f2e97f2c6f0b23bffd6901a83ca91
7
- data.tar.gz: 63f4ca42ebb537173334a931a02331b8cbdc27d9e953052c8126d9e683e54a3103eb496103fea400283c6b0b08fa7c6e738466f215e26f9567a3b822a80c172d
6
+ metadata.gz: f61edee9495047fc49dfb5720c7afc1e0e316e5fab024d8fc0b1bd5fcdad70524f6e22d24d8fad0aa679380565e0a0c7e36fae79c170655f8a7a496dee170aca
7
+ data.tar.gz: b378202d2c8152ac46386618d3c69c03d802b1439e74175f8d84fad304ce111edb0bb1cd848fc969848ce55e0cf8de15a46bec322351d5d199652ed16d41b164
@@ -63,6 +63,7 @@ test/test_match.rb
63
63
  test/test_match_auto.rb
64
64
  test/test_match_auto_champs.rb
65
65
  test/test_match_auto_euro.rb
66
+ test/test_match_auto_relegation.rb
66
67
  test/test_match_auto_worldcup.rb
67
68
  test/test_match_champs.rb
68
69
  test/test_match_eng.rb
@@ -30,11 +30,11 @@ class CountryIndex
30
30
  ## auto-fill countries
31
31
  ## pp recs
32
32
  recs.each do |rec|
33
- ## rec e.g. { key:'af', fifa:'AFG', name:'Afghanistan'}
33
+ ## rec e.g. { key:'af', code:'AFG', name:'Afghanistan'}
34
34
 
35
35
  @countries << rec
36
36
 
37
- ## add codes lookups - key, fifa, ...
37
+ ## add codes lookups - key, code, ...
38
38
  if @countries_by_code[ rec.key ]
39
39
  puts "** !! ERROR !! country code (key) >#{rec.key}< already exits!!"
40
40
  exit 1
@@ -42,13 +42,13 @@ class CountryIndex
42
42
  @countries_by_code[ rec.key ] = rec
43
43
  end
44
44
 
45
- ## add fifa code (only) if different from key
46
- if rec.key != rec.fifa.downcase
47
- if @countries_by_code[ rec.fifa.downcase ]
48
- puts "** !! ERROR !! country code (fifa) >#{rec.fifa.downcase}< already exits!!"
45
+ ## add code (only) if different from key
46
+ if rec.key != rec.code.downcase
47
+ if @countries_by_code[ rec.code.downcase ]
48
+ puts "** !! ERROR !! country code >#{rec.code.downcase}< already exits!!"
49
49
  exit 1
50
50
  else
51
- @countries_by_code[ rec.fifa.downcase ] = rec
51
+ @countries_by_code[ rec.code.downcase ] = rec
52
52
  end
53
53
  end
54
54
 
@@ -43,11 +43,31 @@ def parse
43
43
  # e.g. East Germany (-1989) => East Germany (-1989)
44
44
  values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
45
45
  last_country.alt_names += values
46
+ elsif line =~ /^-[ ]*(\d{4})
47
+ [ ]+
48
+ (.+)$
49
+ /x ## check for historic lines e.g. -1989
50
+ year = $1.to_i
51
+ parts = $2.split( /=>|⇒/ )
52
+ values = parts[0].split( ',' )
53
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
54
+
55
+ name = values[0]
56
+ code = values[1]
57
+
58
+ last_country = country = Country.new( name: "#{name} (-#{year})",
59
+ code: code )
60
+ ## country.alt_names << name ## note: for now do NOT add name without year to alt_names - gets auto-add by index!!!
61
+
62
+ countries << country
63
+ ## todo/fix: add reference to country today (in parts[1] !!!!)
46
64
  else
47
65
  ## assume "regular" line
48
66
  ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
49
67
  ## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
50
- if line =~ /^([a-z]{2,4})[ ]+(.+)$/
68
+ if line =~ /^([a-z]{2,4})
69
+ [ ]+
70
+ (.+)$/x
51
71
  key = $1
52
72
  values = $2.split( ',' )
53
73
  ## strip and squish (white)spaces
@@ -60,14 +80,14 @@ def parse
60
80
  geos = split_geo( values[0] )
61
81
  name = geos[0] ## note: ignore all other geos for now
62
82
 
63
- ## note: allow fifa country codes upto 4 (!!) e.g. Northern Cyprus
64
- fifa = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
83
+ ## note: allow country codes up to 4 (!!) e.g. Northern Cyprus
84
+ code = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
65
85
  values[1]
66
86
  else
67
87
  if values[1]
68
- puts "** !!! ERROR !!! wrong fifa code format >#{values[1]}<; expected three (or four)-letter all up-case"
88
+ puts "** !!! ERROR !!! wrong code format >#{values[1]}<; expected three (or four)-letter all up-case"
69
89
  else
70
- puts "** !!! ERROR !!! missing fifa code for (canonical) country name"
90
+ puts "** !!! ERROR !!! missing code for (canonical) country name"
71
91
  end
72
92
  exit 1
73
93
  end
@@ -80,7 +100,7 @@ def parse
80
100
 
81
101
  last_country = country = Country.new( key: key,
82
102
  name: name,
83
- fifa: fifa,
103
+ code: code,
84
104
  tags: tags )
85
105
  countries << country
86
106
  else
@@ -121,14 +121,31 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
121
121
  values
122
122
  end
123
123
 
124
- def check_stage( name )
125
- known_stages = ['regular season',
126
- 'championship round',
127
- 'relegation round',
128
- 'play-offs'
129
- ]
130
124
 
131
- if known_stages.include?( name.downcase )
125
+ # note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
126
+ KNOWN_STAGES = [
127
+ 'Regular Season',
128
+ 'Regular Stage',
129
+ 'Championship Round',
130
+ 'Championship Playoff',
131
+ 'Relegation Round',
132
+ 'Relegation Playoff',
133
+ 'Play-offs',
134
+ 'Playoff Stage',
135
+ 'Grunddurchgang',
136
+ 'Finaldurchgang - Qualifikationsgruppe',
137
+ 'Finaldurchgang - Qualifikation',
138
+ 'Finaldurchgang - Meistergruppe',
139
+ 'Finaldurchgang - Meister',
140
+ 'EL Play-off',
141
+ 'Europa League Play-off',
142
+ 'Europa-League-Play-offs',
143
+ ].map {|name| name.downcase.gsub( /[^a-z]/, '' ) }
144
+
145
+
146
+ def check_stage( name )
147
+ # note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
148
+ if KNOWN_STAGES.include?( name.downcase.gsub( /[^a-z]/, '' ) )
132
149
  ## everything ok
133
150
  else
134
151
  puts "** !!! ERROR - no (league) stage match found for >#{name}<, add to (builtin) stages table; sorry"
@@ -113,9 +113,9 @@ def parse
113
113
  ## add a list of (auto-)excluded country codes with conflicts? why? why not?
114
114
  ## cl - a) Chile b) Champions League
115
115
  alt_names_auto << "#{country.key.upcase}" if league_key == '1' ## add shortcut for top level 1 (just country key)
116
- if country.key.upcase != country.fifa
117
- alt_names_auto << "#{country.fifa} #{league_key.upcase.gsub('.', ' ')}"
118
- alt_names_auto << "#{country.fifa}" if league_key == '1' ## add shortcut for top level 1 (just country key)
116
+ if country.key.upcase != country.code
117
+ alt_names_auto << "#{country.code} #{league_key.upcase.gsub('.', ' ')}"
118
+ alt_names_auto << "#{country.code}" if league_key == '1' ## add shortcut for top level 1 (just country key)
119
119
  end
120
120
  alt_names_auto << "#{country.name} #{league_key}" if league_key =~ /^[0-9]+$/ ## if all numeric e.g. add Austria 1 etc.
121
121
  else ## assume int'l (no country) e.g. champions league, etc.
@@ -7,21 +7,21 @@ module SportDb
7
7
  ## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/title_mapper2.rb
8
8
 
9
9
 
10
- class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why not??
10
+ class MapperV2 ## todo/check: rename to NameMapper ? why? why not??
11
11
 
12
12
  include Logging
13
13
 
14
- attr_reader :known_titles ## rename to mapping or mappings or just titles - why? why not?
14
+ attr_reader :known_names ## rename to mapping or mappings or just names - why? why not?
15
15
 
16
16
  ########
17
17
  ## key: e.g. augsburg
18
- ## title: e.g. FC Augsburg
19
- ## length (of title(!!) - not regex pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not?
20
- MappingStruct = Struct.new( :key, :title, :length, :pattern) ## todo/check: use (rename to) TitleStruct - why? why not??
18
+ ## name: e.g. FC Augsburg
19
+ ## length (of name(!!) - not regex pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not?
20
+ MappingStruct = Struct.new( :key, :name, :length, :pattern) ## todo/check: use (rename to) NameStruct - why? why not??
21
21
 
22
22
  ######
23
23
  ## convenience helper - (auto)build ActiveRecord-like team records/structs
24
- Record = Struct.new( :key, :title, :synonyms )
24
+ Record = Struct.new( :key, :name, :alt_names )
25
25
  def build_records( txt_or_lines )
26
26
  recs = []
27
27
 
@@ -44,12 +44,12 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
44
44
  values = line.split( '|' )
45
45
  values = values.map { |value| value.strip }
46
46
 
47
- title = values[0]
47
+ name = values[0]
48
48
  ## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
49
- key = title.downcase.gsub( /[^a-z]/, '' )
50
- synonyms = values.size > 1 ? values[1..-1].join( '|' ) : nil
49
+ key = name.downcase.gsub( /[^a-z]/, '' )
50
+ alt_names = values.size > 1 ? values[1..-1].join( '|' ) : nil
51
51
 
52
- recs << Record.new( key, title, synonyms )
52
+ recs << Record.new( key, name, alt_names )
53
53
  end
54
54
  recs
55
55
  end
@@ -63,10 +63,10 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
63
63
  (records_or_mapping.is_a?( Array ) && records_or_mapping[0].is_a?( String ))
64
64
 
65
65
  ## build mapping lookup table
66
- @known_titles = if records_or_mapping.is_a?( Hash ) ## assume "custom" mapping hash table (title/name=>record)
67
- build_title_table_for_mapping( records_or_mapping )
66
+ @known_names = if records_or_mapping.is_a?( Hash ) ## assume "custom" mapping hash table (name=>record)
67
+ build_name_table_for_mapping( records_or_mapping )
68
68
  else ## assume array of records
69
- build_title_table_for_records( records_or_mapping )
69
+ build_name_table_for_records( records_or_mapping )
70
70
  end
71
71
 
72
72
  ## build lookup hash by record (e.g. team/club/etc.) key
@@ -85,9 +85,9 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
85
85
 
86
86
 
87
87
 
88
- def map_titles!( line ) ## rename to just map! - why?? why not???
88
+ def map_names!( line ) ## rename to just map! - why?? why not???
89
89
  begin
90
- found = map_title_for!( @tag, line, @known_titles )
90
+ found = map_name_for!( @tag, line, @known_names )
91
91
  end while found
92
92
  end
93
93
 
@@ -110,27 +110,27 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
110
110
 
111
111
 
112
112
  private
113
- def build_title_table_for_mapping( mapping )
114
- known_titles = []
113
+ def build_name_table_for_mapping( mapping )
114
+ known_names = []
115
115
 
116
- mapping.each do |title, rec|
116
+ mapping.each do |name, rec|
117
117
  m = MappingStruct.new
118
118
  m.key = rec.key
119
- m.title = title
120
- m.length = title.length
121
- m.pattern = Regexp.escape( title ) ## note: just use "standard" regex escape (e.g. no extras for umlauts,accents,etc.)
119
+ m.name = name
120
+ m.length = name.length
121
+ m.pattern = Regexp.escape( name ) ## note: just use "standard" regex escape (e.g. no extras for umlauts,accents,etc.)
122
122
 
123
- known_titles << m
123
+ known_names << m
124
124
  end
125
125
 
126
126
  ## note: sort here by length (largest goes first - best match)
127
- known_titles = known_titles.sort { |l,r| r.length <=> l.length }
128
- known_titles
127
+ known_names = known_names.sort { |l,r| r.length <=> l.length }
128
+ known_names
129
129
  end
130
130
 
131
- def build_title_table_for_records( records )
131
+ def build_name_table_for_records( records )
132
132
 
133
- ## build known tracks table w/ synonyms e.g.
133
+ ## build known tracks table w/ alt names e.g.
134
134
  #
135
135
  # [[ 'wolfsbrug', 'VfL Wolfsburg'],
136
136
  # [ 'augsburg', 'FC Augsburg'],
@@ -138,65 +138,65 @@ private
138
138
  # [ 'augsburg', 'Augi3' ],
139
139
  # [ 'stuttgart', 'VfB Stuttgart']]
140
140
 
141
- known_titles = []
141
+ known_names = []
142
142
 
143
143
  records.each_with_index do |rec,index|
144
144
 
145
- title_candidates = []
146
- title_candidates << rec.title
145
+ name_candidates = []
146
+ name_candidates << rec.name
147
147
 
148
- title_candidates += rec.synonyms.split('|') if rec.synonyms && !rec.synonyms.empty?
148
+ name_candidates += rec.alt_names.split('|') if rec.alt_names && !rec.alt_names.empty?
149
149
 
150
150
 
151
- ## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit)
152
- # make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan
151
+ ## check if name includes subname e.g. Grand Prix Japan (Suzuka Circuit)
152
+ # make subname optional by adding name w/o subname e.g. Grand Prix Japan
153
153
 
154
- titles = []
155
- title_candidates.each do |t|
156
- titles << t
154
+ names = []
155
+ name_candidates.each do |t|
156
+ names << t
157
157
  if t =~ /\(.+\)/
158
- extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles
158
+ extra_name = t.gsub( /\(.+\)/, '' ) # remove/delete subnames
159
159
  # note: strip leading n trailing withspaces too!
160
160
  # -- todo: add squish or something if () is inline e.g. leaves two spaces?
161
- extra_title.strip!
162
- titles << extra_title
161
+ extra_name.strip!
162
+ names << extra_name
163
163
  end
164
164
  end
165
165
 
166
- titles.each do |t|
166
+ names.each do |name|
167
167
  m = MappingStruct.new
168
168
  m.key = rec.key
169
- m.title = t
170
- m.length = t.length
169
+ m.name = name
170
+ m.length = name.length
171
171
  ## note: escape for regex plus allow subs for special chars/accents
172
- m.pattern = title_esc_regex( t )
172
+ m.pattern = name_esc_regex( name )
173
173
 
174
- known_titles << m
174
+ known_names << m
175
175
  end
176
176
 
177
- logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<"
177
+ logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{names.join('|')}<"
178
178
 
179
179
  ## note: only include code field - if defined
180
180
  if rec.respond_to?(:code) && rec.code && !rec.code.empty?
181
181
  m = MappingStruct.new
182
182
  m.key = rec.key
183
- m.title = rec.code
183
+ m.name = rec.code
184
184
  m.length = rec.code.length
185
185
  m.pattern = rec.code ## note: use code for now as is (no variants allowed fow now)
186
186
 
187
- known_titles << m
187
+ known_names << m
188
188
  end
189
189
  end
190
190
 
191
191
  ## note: sort here by length (largest goes first - best match)
192
192
  # exclude code and key (key should always go last)
193
- known_titles = known_titles.sort { |l,r| r.length <=> l.length }
194
- known_titles
193
+ known_names = known_names.sort { |l,r| r.length <=> l.length }
194
+ known_names
195
195
  end
196
196
 
197
197
 
198
198
 
199
- def map_title_for!( tag, line, mappings )
199
+ def map_name_for!( tag, line, mappings )
200
200
  mappings.each do |mapping|
201
201
  key = mapping.key
202
202
  pattern = mapping.pattern
@@ -234,9 +234,9 @@ private
234
234
 
235
235
 
236
236
  ####
237
- # title helper cut-n-paste copy from TextUtils
237
+ # name helper cut-n-paste copy from TextUtils
238
238
  ## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/helper/title_helper.rb
239
- def title_esc_regex( title_unescaped )
239
+ def name_esc_regex( name_unescaped )
240
240
 
241
241
  ## escape regex special chars e.g.
242
242
  # . to \. and
@@ -257,16 +257,16 @@ def title_esc_regex( title_unescaped )
257
257
  # e.g. Club Atlético Colón (Santa Fe)
258
258
  # e.g. Bauer Anton (????)
259
259
 
260
- ## NB: cannot use Regexp.escape! will escape space '' to '\ '
261
- ## title = Regexp.escape( title_unescaped )
262
- title = title_unescaped.gsub( '.', '\.' )
263
- title = title.gsub( '(', '\(' )
264
- title = title.gsub( ')', '\)' )
265
- title = title.gsub( '?', '\?' )
266
- title = title.gsub( '*', '\*' )
267
- title = title.gsub( '+', '\+' )
268
- title = title.gsub( '$', '\$' )
269
- title = title.gsub( '^', '\^' )
260
+ ## note: cannot use Regexp.escape! will escape space '' to '\ '
261
+ ## name = Regexp.escape( name_unescaped )
262
+ name = name_unescaped.gsub( '.', '\.' )
263
+ name = name.gsub( '(', '\(' )
264
+ name = name.gsub( ')', '\)' )
265
+ name = name.gsub( '?', '\?' )
266
+ name = name.gsub( '*', '\*' )
267
+ name = name.gsub( '+', '\+' )
268
+ name = name.gsub( '$', '\$' )
269
+ name = name.gsub( '^', '\^' )
270
270
 
271
271
  ## match accented char with or without accents
272
272
  ## add (ü|ue) etc.
@@ -309,10 +309,10 @@ def title_esc_regex( title_unescaped )
309
309
  ## collect some more (real-world) examples first!!!!!
310
310
 
311
311
  alternatives.each do |alt|
312
- title = title.gsub( alt[0], alt[1] )
312
+ name = name.gsub( alt[0], alt[1] )
313
313
  end
314
314
 
315
- title
315
+ name
316
316
  end
317
317
 
318
318
  end # class MapperV2
@@ -16,7 +16,7 @@ class TeamMapper
16
16
  end
17
17
 
18
18
  def map_teams!( line )
19
- @mapper.map_titles!( line )
19
+ @mapper.map_names!( line )
20
20
  end
21
21
  end # class TeamMapper
22
22
 
@@ -82,15 +82,14 @@ class MatchParser ## simple match parser for team match schedules
82
82
  # team1 team2 - match (will get new auto-matchday! not last round)
83
83
  @last_round = nil
84
84
 
85
- title, pos = find_group_title_and_pos!( line )
85
+ name = find_group_name!( line )
86
86
 
87
- logger.debug " title: >#{title}<"
88
- logger.debug " pos: >#{pos}<"
87
+ logger.debug " name: >#{name}<"
89
88
  logger.debug " line: >#{line}<"
90
89
 
91
- group = @groups[ title ]
90
+ group = @groups[ name ]
92
91
  if group.nil?
93
- puts "!! ERROR - no group def found for >#{title}<"
92
+ puts "!! ERROR - no group def found for >#{name}<"
94
93
  exit 1
95
94
  end
96
95
 
@@ -104,19 +103,19 @@ class MatchParser ## simple match parser for team match schedules
104
103
  @mapper_teams.map_teams!( line )
105
104
  teams = @mapper_teams.find_teams!( line )
106
105
 
107
- title, pos = find_group_title_and_pos!( line )
106
+ name = find_group_name!( line )
108
107
 
109
108
  logger.debug " line: >#{line}<"
110
109
 
111
- group = Import::Group.new( pos: pos,
112
- title: title,
113
- teams: teams.map {|team| team.title } )
110
+ ## todo/check/fix: add back group key - why? why not?
111
+ group = Import::Group.new( name: name,
112
+ teams: teams.map {|team| team.name } )
114
113
 
115
- @groups[ title ] = group
114
+ @groups[ name ] = group
116
115
  end
117
116
 
118
117
 
119
- def find_group_title_and_pos!( line )
118
+ def find_group_name!( line )
120
119
  ## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C or HEX
121
120
  ## nb: (?:) = is for non-capturing group(ing)
122
121
 
@@ -125,37 +124,25 @@ class MatchParser ## simple match parser for team match schedules
125
124
 
126
125
  ## todo:
127
126
  ## check if Group A: or [Group A] works e.g. : or ] get matched by \b ???
128
- regex = /(?:Group|Gruppe|Grupo)\s+((?:\d{1}|[A-Z]{1,3}))\b/
127
+ regex = /\b
128
+ (?:
129
+ (Group | Gruppe | Grupo)
130
+ [ ]+
131
+ (\d+ | [A-Z]+)
132
+ )
133
+ \b/x
129
134
 
130
135
  m = regex.match( line )
131
136
 
132
- return [nil,nil] if m.nil?
133
-
134
- pos = case m[1]
135
- when 'A' then 1
136
- when 'B' then 2
137
- when 'C' then 3
138
- when 'D' then 4
139
- when 'E' then 5
140
- when 'F' then 6
141
- when 'G' then 7
142
- when 'H' then 8
143
- when 'I' then 9
144
- when 'J' then 10
145
- when 'K' then 11
146
- when 'L' then 12
147
- when 'HEX' then 666 # HEX for Hexagonal - todo/check: map to something else ??
148
- else m[1].to_i
149
- end
150
-
151
- title = m[0]
152
-
153
- logger.debug " title: >#{title}<"
154
- logger.debug " pos: >#{pos}<"
155
-
156
- line.sub!( regex, '[GROUP.TITLE+POS]' )
157
-
158
- [title,pos]
137
+ return nil if m.nil?
138
+
139
+ name = m[0]
140
+
141
+ logger.debug " name: >#{name}<"
142
+
143
+ line.sub!( name, '[GROUP.NAME]' )
144
+
145
+ name
159
146
  end
160
147
 
161
148
 
@@ -180,198 +167,130 @@ class MatchParser ## simple match parser for team match schedules
180
167
  end_date = end_date.to_date
181
168
 
182
169
 
183
- pos = find_round_pos!( line )
184
- title = find_round_def_title!( line )
185
- # NB: use extracted round title for knockout check
186
- knockout_flag = is_knockout_round?( title )
170
+ name = find_round_def_name!( line )
171
+ # NB: use extracted round name for knockout check
172
+ knockout_flag = is_knockout_round?( name )
187
173
 
188
174
 
189
175
  logger.debug " start_date: #{start_date}"
190
176
  logger.debug " end_date: #{end_date}"
191
- logger.debug " pos: #{pos}"
192
- logger.debug " title: >#{title}<"
177
+ logger.debug " name: >#{name}<"
193
178
  logger.debug " knockout_flag: #{knockout_flag}"
194
179
 
195
180
  logger.debug " line: >#{line}<"
196
181
 
197
- #######################################
198
- # todo/fix: add auto flag is false !!!! - why? why not?
199
- round = Import::Round.new( pos: pos,
200
- title: title,
182
+ round = Import::Round.new( name: name,
201
183
  start_date: start_date,
202
184
  end_date: end_date,
203
185
  knockout: knockout_flag,
204
186
  auto: false )
205
187
 
206
- @rounds[ title ] = round
188
+ @rounds[ name ] = round
207
189
  end
208
190
 
209
191
 
210
192
 
211
- def find_round_pos!( line )
212
- # pass #1) extract optional round pos from line
213
- # e.g. (1) - must start line
214
- regex_pos = /^[ \t]*\((\d{1,3})\)[ \t]+/
215
-
216
- # pass #2) find free standing number e.g. Matchday 3 or Round 5 or 3. Spieltag etc.
217
- # note: /\b(\d{1,3})\b/
218
- # will match -12
219
- # thus, use space required - will NOT match -2 e.g. Group-2 Play-off
220
- # note: allow 1. Runde n
221
- # 1^ Giornata
222
- regex_num = /(?:^|\s)(\d{1,3})(?:[.\^\s]|$)/
223
-
224
- if line =~ regex_pos
225
- logger.debug " pos: >#{$1}<"
226
-
227
- line.sub!( regex_pos, '[ROUND.POS] ' ) ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+
228
- return $1.to_i
229
- elsif line =~ regex_num
230
- ## assume number in title is pos (e.g. Jornada 3, 3 Runde etc.)
231
- ## NB: do NOT remove pos from string (will get removed by round title)
232
-
233
- num = $1.to_i # note: clone capture; keep a copy (another regex follows; will redefine $1)
234
-
235
- #### fix:
236
- # use/make keywords required
237
- # e.g. Round of 16 -> should NOT match 16!
238
- # Spiel um Platz 3 (or 5) etc -> should NOT match 3!
239
- # Round 16 - ok
240
- # thus, check for required keywords
193
+ def find_round_def_name!( line )
194
+ # assume everything before pipe (\) is the round name
195
+ # strip [ROUND.POS], todo:?? [ROUND.NAME2]
241
196
 
242
- ## quick hack for round of 16
243
- # todo: mask match e.g. Round of xxx ... and try again - might include something
244
- # reuse pattern for Group XX Replays for example
245
- if line =~ /^\s*Round of \d{1,3}\b/
246
- return nil
247
- end
248
-
249
- logger.debug " pos: >#{num}<"
250
- return num
251
- else
252
- ## fix: add logger.warn no round pos found in line
253
- return nil
254
- end
255
- end # method find_round_pos!
256
-
257
- def find_round_def_title!( line )
258
- # assume everything before pipe (\) is the round title
259
- # strip [ROUND.POS], todo:?? [ROUND.TITLE2]
260
-
261
- # todo/fix: add title2 w/ // or / why? why not?
197
+ # todo/fix: add name2 w/ // or / why? why not?
262
198
  # -- strip / or / chars
263
199
 
264
200
  buf = line.dup
265
- logger.debug " find_round_def_title! line-before: >>#{buf}<<"
201
+ logger.debug " find_round_def_name! line-before: >>#{buf}<<"
266
202
 
267
203
  ## cut-off everything after (including) pipe (|)
268
204
  buf = buf[ 0...buf.index('|') ]
269
-
270
- # e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
271
- buf.gsub!( /\[[^\]]+\]/, '' ) ## fix: use helper for (re)use e.g. remove_match_placeholder/marker or similar?
272
- # remove leading and trailing whitespace
273
205
  buf.strip!
274
206
 
275
- logger.debug " find_round_def_title! line-after: >>#{buf}<<"
207
+ logger.debug " find_round_def_name! line-after: >>#{buf}<<"
276
208
 
277
- logger.debug " title: >>#{buf}<<"
278
- line.sub!( buf, '[ROUND.TITLE]' )
209
+ logger.debug " name: >>#{buf}<<"
210
+ line.sub!( buf, '[ROUND.NAME]' )
279
211
 
280
212
  buf
281
213
  end
282
214
 
283
- def find_round_header_title!( line )
284
- # assume everything left is the round title
285
- # extract all other items first (round title2, round pos, group title n pos, etc.)
286
215
 
287
- ## todo/fix:
288
- ## cleanup method
289
- ## use buf.index( '//' ) to split string (see found_round_def)
290
- ## why? simpler why not?
291
- ## - do we currently allow groups if title2 present? add example if it works?
216
+ ## split by or || or |||
217
+ ## or ++ or +++
218
+ ## or -- or ---
219
+ ## or // or ///
220
+ ## note: allow Final | First Leg as ONE name same as
221
+ ## Final - First Leg or
222
+ ## Final, First Leg
223
+ ## for cut-off always MUST be more than two chars
224
+ ##
225
+ ## todo/check: find a better name than HEADER_SEP(ARATOR) - why? why not?
226
+ ## todo/fix: move to parser utils and add a method split_name or such?
227
+ HEADER_SEP_RE = / [ ]* ## allow (strip) leading spaces
228
+ (?:\|{2,} |
229
+ \+{2,} |
230
+ -{2,} |
231
+ \/{2,}
232
+ )
233
+ [ ]* ## allow (strip) trailing spaces
234
+ /x
235
+
236
+ def find_round_header_name!( line )
237
+ # assume everything left is the round name
238
+ # extract all other items first (round name2, round pos, group name n pos, etc.)
292
239
 
293
240
  buf = line.dup
294
- logger.debug " find_round_header_title! line-before: >>#{buf}<<"
241
+ logger.debug " find_round_header_name! line-before: >>#{buf}<<"
242
+
243
+
244
+ parts = buf.split( HEADER_SEP_RE )
245
+ buf = parts[0]
295
246
 
296
- buf.gsub!( /\[[^\]]+\]/, '' ) # e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
297
247
  buf.strip! # remove leading and trailing whitespace
298
248
 
299
- logger.debug " find_round_title! line-after: >>#{buf}<<"
249
+ logger.debug " find_round_name! line-after: >>#{buf}<<"
300
250
 
301
- ### bingo - assume what's left is the round title
251
+ ### bingo - assume what's left is the round name
302
252
 
303
- logger.debug " title: >>#{buf}<<"
304
- line.sub!( buf, '[ROUND.TITLE]' )
253
+ logger.debug " name: >>#{buf}<<"
254
+ line.sub!( buf, '[ROUND.NAME]' )
305
255
 
306
256
  buf
307
257
  end
308
258
 
259
+ ## quick hack- collect all "fillwords" by language!!!!
260
+ ## change later and add to sportdb-langs!!!!
261
+ ##
262
+ ## strip all "fillwords" e.g.:
263
+ ## Nachtrag/Postponed/Addition/Supplemento names
264
+ ##
265
+ ## todo/change: find a better name for ROUND_EXTRA_WORDS - why? why not?
266
+ ROUND_EXTRA_WORDS_RE = /\b(?:
267
+ Nachtrag | ## de
268
+ Postponed | ## en
269
+ Addition | ## en
270
+ Supplemento ## es
271
+ )
272
+ \b/ix
309
273
 
310
274
  def parse_round_header( line )
311
275
  logger.debug "parsing round header line: >#{line}<"
312
276
 
313
- ## todo/check/fix:
314
- # make sure Round of 16 will not return pos 16 -- how? possible?
315
- # add unit test too to verify
316
- pos = find_round_pos!( line )
317
-
318
- title = find_round_header_title!( line )
277
+ name = find_round_header_name!( line )
319
278
 
320
279
  logger.debug " line: >#{line}<"
321
280
 
281
+ name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
282
+ name = name.strip
322
283
 
323
- round = @rounds[ title ]
284
+ round = @rounds[ name ]
324
285
  if round.nil? ## auto-add / create if missing
325
- round = Import::Round.new( pos: pos,
326
- title: title )
327
- @rounds[ title ] = round
286
+ ## todo/check: add num (was pos) if present - why? why not?
287
+ round = Import::Round.new( name: name )
288
+ @rounds[ name ] = round
328
289
  end
329
290
 
330
291
  ## todo/check: if pos match (MUST always match for now)
331
292
  @last_round = round
332
293
  @last_group = nil # note: reset group to no group - why? why not?
333
-
334
-
335
- ## NB: dummy/placeholder start_at, end_at date
336
- ## replace/patch after adding all games for round
337
-
338
- =begin
339
- round_attribs = {
340
- title: title,
341
- title2: title2,
342
- knockout: knockout_flag
343
- }
344
-
345
- if pos > 999000
346
- # no pos (e.g. will get autonumbered later) - try match by title for now
347
- # e.g. lets us use title 'Group Replays', for example, multiple times
348
- @round = Round.find_by_event_id_and_title( @event.id, title )
349
- else
350
- @round = Round.find_by_event_id_and_pos( @event.id, pos )
351
- end
352
-
353
- if @round.present?
354
- logger.debug "update round #{@round.id}:"
355
- else
356
- logger.debug "create round:"
357
- @round = Round.new
358
-
359
- round_attribs = round_attribs.merge( {
360
- event_id: @event.id,
361
- pos: pos,
362
- start_at: Date.parse('1911-11-11'),
363
- end_at: Date.parse('1911-11-11')
364
- })
365
- end
366
-
367
- logger.debug round_attribs.to_json
368
-
369
- @round.update_attributes!( round_attribs )
370
-
371
- @patch_round_ids_pos << @round.id if pos > 999000
372
- ### store list of round ids for patching start_at/end_at at the end
373
- @patch_round_ids_dates << @round.id # todo/fix/check: check if round has definition (do NOT patch if definition (not auto-added) present)
374
- =end
375
294
  end
376
295
 
377
296
 
@@ -457,11 +376,11 @@ class MatchParser ## simple match parser for team match schedules
457
376
  ## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
458
377
 
459
378
  @matches << Import::Match.new( date: date,
460
- team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.title)
461
- team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.title)
379
+ team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
380
+ team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
462
381
  score: score,
463
- round: round ? round.title : nil, ## note: for now always use string (assume unique canonical name for event)
464
- group: @last_group ? @last_group.title : nil ) ## note: for now always use string (assume unique canonical name for event)
382
+ round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
383
+ group: @last_group ? @last_group.name : nil ) ## note: for now always use string (assume unique canonical name for event)
465
384
 
466
385
  ### todo: cache team lookups in hash?
467
386
 
@@ -517,7 +436,7 @@ class MatchParser ## simple match parser for team match schedules
517
436
 
518
437
  round_attribs = {
519
438
  event_id: @event.id,
520
- title: "Matchday #{date.to_date}",
439
+ name: "Matchday #{date.to_date}",
521
440
  pos: 999001+@patch_round_ids_pos.length, # e.g. 999<count> - 999001,999002,etc.
522
441
  start_at: date.to_date,
523
442
  end_at: date.to_date
@@ -541,7 +460,7 @@ class MatchParser ## simple match parser for team match schedules
541
460
  end
542
461
 
543
462
  ## note: will crash (round.pos) if round is nil
544
- logger.debug( " using round #{round.pos} >#{round.title}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
463
+ logger.debug( " using round #{round.pos} >#{round.name}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
545
464
  else
546
465
  ## use round from last round header
547
466
  round = @round