sportdb-formats 1.0.4 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -16,7 +16,7 @@ class TeamMapper
16
16
  end
17
17
 
18
18
  def map_teams!( line )
19
- @mapper.map_titles!( line )
19
+ @mapper.map_names!( line )
20
20
  end
21
21
  end # class TeamMapper
22
22
 
@@ -82,15 +82,14 @@ class MatchParser ## simple match parser for team match schedules
82
82
  # team1 team2 - match (will get new auto-matchday! not last round)
83
83
  @last_round = nil
84
84
 
85
- title, pos = find_group_title_and_pos!( line )
85
+ name = find_group_name!( line )
86
86
 
87
- logger.debug " title: >#{title}<"
88
- logger.debug " pos: >#{pos}<"
87
+ logger.debug " name: >#{name}<"
89
88
  logger.debug " line: >#{line}<"
90
89
 
91
- group = @groups[ title ]
90
+ group = @groups[ name ]
92
91
  if group.nil?
93
- puts "!! ERROR - no group def found for >#{title}<"
92
+ puts "!! ERROR - no group def found for >#{name}<"
94
93
  exit 1
95
94
  end
96
95
 
@@ -104,19 +103,19 @@ class MatchParser ## simple match parser for team match schedules
104
103
  @mapper_teams.map_teams!( line )
105
104
  teams = @mapper_teams.find_teams!( line )
106
105
 
107
- title, pos = find_group_title_and_pos!( line )
106
+ name = find_group_name!( line )
108
107
 
109
108
  logger.debug " line: >#{line}<"
110
109
 
111
- group = Import::Group.new( pos: pos,
112
- title: title,
113
- teams: teams.map {|team| team.title } )
110
+ ## todo/check/fix: add back group key - why? why not?
111
+ group = Import::Group.new( name: name,
112
+ teams: teams.map {|team| team.name } )
114
113
 
115
- @groups[ title ] = group
114
+ @groups[ name ] = group
116
115
  end
117
116
 
118
117
 
119
- def find_group_title_and_pos!( line )
118
+ def find_group_name!( line )
120
119
  ## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C or HEX
121
120
  ## nb: (?:) = is for non-capturing group(ing)
122
121
 
@@ -125,37 +124,25 @@ class MatchParser ## simple match parser for team match schedules
125
124
 
126
125
  ## todo:
127
126
  ## check if Group A: or [Group A] works e.g. : or ] get matched by \b ???
128
- regex = /(?:Group|Gruppe|Grupo)\s+((?:\d{1}|[A-Z]{1,3}))\b/
127
+ regex = /\b
128
+ (?:
129
+ (Group | Gruppe | Grupo)
130
+ [ ]+
131
+ (\d+ | [A-Z]+)
132
+ )
133
+ \b/x
129
134
 
130
135
  m = regex.match( line )
131
136
 
132
- return [nil,nil] if m.nil?
133
-
134
- pos = case m[1]
135
- when 'A' then 1
136
- when 'B' then 2
137
- when 'C' then 3
138
- when 'D' then 4
139
- when 'E' then 5
140
- when 'F' then 6
141
- when 'G' then 7
142
- when 'H' then 8
143
- when 'I' then 9
144
- when 'J' then 10
145
- when 'K' then 11
146
- when 'L' then 12
147
- when 'HEX' then 666 # HEX for Hexagonal - todo/check: map to something else ??
148
- else m[1].to_i
149
- end
150
-
151
- title = m[0]
152
-
153
- logger.debug " title: >#{title}<"
154
- logger.debug " pos: >#{pos}<"
155
-
156
- line.sub!( regex, '[GROUP.TITLE+POS]' )
157
-
158
- [title,pos]
137
+ return nil if m.nil?
138
+
139
+ name = m[0]
140
+
141
+ logger.debug " name: >#{name}<"
142
+
143
+ line.sub!( name, '[GROUP.NAME]' )
144
+
145
+ name
159
146
  end
160
147
 
161
148
 
@@ -180,198 +167,130 @@ class MatchParser ## simple match parser for team match schedules
180
167
  end_date = end_date.to_date
181
168
 
182
169
 
183
- pos = find_round_pos!( line )
184
- title = find_round_def_title!( line )
185
- # NB: use extracted round title for knockout check
186
- knockout_flag = is_knockout_round?( title )
170
+ name = find_round_def_name!( line )
171
+ # NB: use extracted round name for knockout check
172
+ knockout_flag = is_knockout_round?( name )
187
173
 
188
174
 
189
175
  logger.debug " start_date: #{start_date}"
190
176
  logger.debug " end_date: #{end_date}"
191
- logger.debug " pos: #{pos}"
192
- logger.debug " title: >#{title}<"
177
+ logger.debug " name: >#{name}<"
193
178
  logger.debug " knockout_flag: #{knockout_flag}"
194
179
 
195
180
  logger.debug " line: >#{line}<"
196
181
 
197
- #######################################
198
- # todo/fix: add auto flag is false !!!! - why? why not?
199
- round = Import::Round.new( pos: pos,
200
- title: title,
182
+ round = Import::Round.new( name: name,
201
183
  start_date: start_date,
202
184
  end_date: end_date,
203
185
  knockout: knockout_flag,
204
186
  auto: false )
205
187
 
206
- @rounds[ title ] = round
188
+ @rounds[ name ] = round
207
189
  end
208
190
 
209
191
 
210
192
 
211
- def find_round_pos!( line )
212
- # pass #1) extract optional round pos from line
213
- # e.g. (1) - must start line
214
- regex_pos = /^[ \t]*\((\d{1,3})\)[ \t]+/
215
-
216
- # pass #2) find free standing number e.g. Matchday 3 or Round 5 or 3. Spieltag etc.
217
- # note: /\b(\d{1,3})\b/
218
- # will match -12
219
- # thus, use space required - will NOT match -2 e.g. Group-2 Play-off
220
- # note: allow 1. Runde n
221
- # 1^ Giornata
222
- regex_num = /(?:^|\s)(\d{1,3})(?:[.\^\s]|$)/
193
+ def find_round_def_name!( line )
194
+ # assume everything before pipe (\) is the round name
195
+ # strip [ROUND.POS], todo:?? [ROUND.NAME2]
223
196
 
224
- if line =~ regex_pos
225
- logger.debug " pos: >#{$1}<"
226
-
227
- line.sub!( regex_pos, '[ROUND.POS] ' ) ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+
228
- return $1.to_i
229
- elsif line =~ regex_num
230
- ## assume number in title is pos (e.g. Jornada 3, 3 Runde etc.)
231
- ## NB: do NOT remove pos from string (will get removed by round title)
232
-
233
- num = $1.to_i # note: clone capture; keep a copy (another regex follows; will redefine $1)
234
-
235
- #### fix:
236
- # use/make keywords required
237
- # e.g. Round of 16 -> should NOT match 16!
238
- # Spiel um Platz 3 (or 5) etc -> should NOT match 3!
239
- # Round 16 - ok
240
- # thus, check for required keywords
241
-
242
- ## quick hack for round of 16
243
- # todo: mask match e.g. Round of xxx ... and try again - might include something
244
- # reuse pattern for Group XX Replays for example
245
- if line =~ /^\s*Round of \d{1,3}\b/
246
- return nil
247
- end
248
-
249
- logger.debug " pos: >#{num}<"
250
- return num
251
- else
252
- ## fix: add logger.warn no round pos found in line
253
- return nil
254
- end
255
- end # method find_round_pos!
256
-
257
- def find_round_def_title!( line )
258
- # assume everything before pipe (\) is the round title
259
- # strip [ROUND.POS], todo:?? [ROUND.TITLE2]
260
-
261
- # todo/fix: add title2 w/ // or / why? why not?
197
+ # todo/fix: add name2 w/ // or / why? why not?
262
198
  # -- strip / or / chars
263
199
 
264
200
  buf = line.dup
265
- logger.debug " find_round_def_title! line-before: >>#{buf}<<"
201
+ logger.debug " find_round_def_name! line-before: >>#{buf}<<"
266
202
 
267
203
  ## cut-off everything after (including) pipe (|)
268
204
  buf = buf[ 0...buf.index('|') ]
269
-
270
- # e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
271
- buf.gsub!( /\[[^\]]+\]/, '' ) ## fix: use helper for (re)use e.g. remove_match_placeholder/marker or similar?
272
- # remove leading and trailing whitespace
273
205
  buf.strip!
274
206
 
275
- logger.debug " find_round_def_title! line-after: >>#{buf}<<"
207
+ logger.debug " find_round_def_name! line-after: >>#{buf}<<"
276
208
 
277
- logger.debug " title: >>#{buf}<<"
278
- line.sub!( buf, '[ROUND.TITLE]' )
209
+ logger.debug " name: >>#{buf}<<"
210
+ line.sub!( buf, '[ROUND.NAME]' )
279
211
 
280
212
  buf
281
213
  end
282
214
 
283
- def find_round_header_title!( line )
284
- # assume everything left is the round title
285
- # extract all other items first (round title2, round pos, group title n pos, etc.)
286
215
 
287
- ## todo/fix:
288
- ## cleanup method
289
- ## use buf.index( '//' ) to split string (see found_round_def)
290
- ## why? simpler why not?
291
- ## - do we currently allow groups if title2 present? add example if it works?
216
+ ## split by or || or |||
217
+ ## or ++ or +++
218
+ ## or -- or ---
219
+ ## or // or ///
220
+ ## note: allow Final | First Leg as ONE name same as
221
+ ## Final - First Leg or
222
+ ## Final, First Leg
223
+ ## for cut-off always MUST be more than two chars
224
+ ##
225
+ ## todo/check: find a better name than HEADER_SEP(ARATOR) - why? why not?
226
+ ## todo/fix: move to parser utils and add a method split_name or such?
227
+ HEADER_SEP_RE = / [ ]* ## allow (strip) leading spaces
228
+ (?:\|{2,} |
229
+ \+{2,} |
230
+ -{2,} |
231
+ \/{2,}
232
+ )
233
+ [ ]* ## allow (strip) trailing spaces
234
+ /x
235
+
236
+ def find_round_header_name!( line )
237
+ # assume everything left is the round name
238
+ # extract all other items first (round name2, round pos, group name n pos, etc.)
292
239
 
293
240
  buf = line.dup
294
- logger.debug " find_round_header_title! line-before: >>#{buf}<<"
241
+ logger.debug " find_round_header_name! line-before: >>#{buf}<<"
242
+
243
+
244
+ parts = buf.split( HEADER_SEP_RE )
245
+ buf = parts[0]
295
246
 
296
- buf.gsub!( /\[[^\]]+\]/, '' ) # e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
297
247
  buf.strip! # remove leading and trailing whitespace
298
248
 
299
- logger.debug " find_round_title! line-after: >>#{buf}<<"
249
+ logger.debug " find_round_name! line-after: >>#{buf}<<"
300
250
 
301
- ### bingo - assume what's left is the round title
251
+ ### bingo - assume what's left is the round name
302
252
 
303
- logger.debug " title: >>#{buf}<<"
304
- line.sub!( buf, '[ROUND.TITLE]' )
253
+ logger.debug " name: >>#{buf}<<"
254
+ line.sub!( buf, '[ROUND.NAME]' )
305
255
 
306
256
  buf
307
257
  end
308
258
 
259
+ ## quick hack- collect all "fillwords" by language!!!!
260
+ ## change later and add to sportdb-langs!!!!
261
+ ##
262
+ ## strip all "fillwords" e.g.:
263
+ ## Nachtrag/Postponed/Addition/Supplemento names
264
+ ##
265
+ ## todo/change: find a better name for ROUND_EXTRA_WORDS - why? why not?
266
+ ROUND_EXTRA_WORDS_RE = /\b(?:
267
+ Nachtrag | ## de
268
+ Postponed | ## en
269
+ Addition | ## en
270
+ Supplemento ## es
271
+ )
272
+ \b/ix
309
273
 
310
274
  def parse_round_header( line )
311
275
  logger.debug "parsing round header line: >#{line}<"
312
276
 
313
- ## todo/check/fix:
314
- # make sure Round of 16 will not return pos 16 -- how? possible?
315
- # add unit test too to verify
316
- pos = find_round_pos!( line )
317
-
318
- title = find_round_header_title!( line )
277
+ name = find_round_header_name!( line )
319
278
 
320
279
  logger.debug " line: >#{line}<"
321
280
 
281
+ name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
282
+ name = name.strip
322
283
 
323
- round = @rounds[ title ]
284
+ round = @rounds[ name ]
324
285
  if round.nil? ## auto-add / create if missing
325
- round = Import::Round.new( pos: pos,
326
- title: title )
327
- @rounds[ title ] = round
286
+ ## todo/check: add num (was pos) if present - why? why not?
287
+ round = Import::Round.new( name: name )
288
+ @rounds[ name ] = round
328
289
  end
329
290
 
330
291
  ## todo/check: if pos match (MUST always match for now)
331
292
  @last_round = round
332
293
  @last_group = nil # note: reset group to no group - why? why not?
333
-
334
-
335
- ## NB: dummy/placeholder start_at, end_at date
336
- ## replace/patch after adding all games for round
337
-
338
- =begin
339
- round_attribs = {
340
- title: title,
341
- title2: title2,
342
- knockout: knockout_flag
343
- }
344
-
345
- if pos > 999000
346
- # no pos (e.g. will get autonumbered later) - try match by title for now
347
- # e.g. lets us use title 'Group Replays', for example, multiple times
348
- @round = Round.find_by_event_id_and_title( @event.id, title )
349
- else
350
- @round = Round.find_by_event_id_and_pos( @event.id, pos )
351
- end
352
-
353
- if @round.present?
354
- logger.debug "update round #{@round.id}:"
355
- else
356
- logger.debug "create round:"
357
- @round = Round.new
358
-
359
- round_attribs = round_attribs.merge( {
360
- event_id: @event.id,
361
- pos: pos,
362
- start_at: Date.parse('1911-11-11'),
363
- end_at: Date.parse('1911-11-11')
364
- })
365
- end
366
-
367
- logger.debug round_attribs.to_json
368
-
369
- @round.update_attributes!( round_attribs )
370
-
371
- @patch_round_ids_pos << @round.id if pos > 999000
372
- ### store list of round ids for patching start_at/end_at at the end
373
- @patch_round_ids_dates << @round.id # todo/fix/check: check if round has definition (do NOT patch if definition (not auto-added) present)
374
- =end
375
294
  end
376
295
 
377
296
 
@@ -457,11 +376,11 @@ class MatchParser ## simple match parser for team match schedules
457
376
  ## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
458
377
 
459
378
  @matches << Import::Match.new( date: date,
460
- team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.title)
461
- team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.title)
379
+ team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
380
+ team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
462
381
  score: score,
463
- round: round ? round.title : nil, ## note: for now always use string (assume unique canonical name for event)
464
- group: @last_group ? @last_group.title : nil ) ## note: for now always use string (assume unique canonical name for event)
382
+ round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
383
+ group: @last_group ? @last_group.name : nil ) ## note: for now always use string (assume unique canonical name for event)
465
384
 
466
385
  ### todo: cache team lookups in hash?
467
386
 
@@ -517,7 +436,7 @@ class MatchParser ## simple match parser for team match schedules
517
436
 
518
437
  round_attribs = {
519
438
  event_id: @event.id,
520
- title: "Matchday #{date.to_date}",
439
+ name: "Matchday #{date.to_date}",
521
440
  pos: 999001+@patch_round_ids_pos.length, # e.g. 999<count> - 999001,999002,etc.
522
441
  start_at: date.to_date,
523
442
  end_at: date.to_date
@@ -541,7 +460,7 @@ class MatchParser ## simple match parser for team match schedules
541
460
  end
542
461
 
543
462
  ## note: will crash (round.pos) if round is nil
544
- logger.debug( " using round #{round.pos} >#{round.title}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
463
+ logger.debug( " using round #{round.pos} >#{round.name}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
545
464
  else
546
465
  ## use round from last round header
547
466
  round = @round
@@ -644,12 +563,29 @@ class MatchParser ## simple match parser for team match schedules
644
563
 
645
564
  if date && team1.nil? && team2.nil?
646
565
  logger.debug( "date header line found: >#{line}<")
647
- logger.debug( " date: #{date}")
566
+ logger.debug( " date: #{date} with start: #{@start}")
648
567
 
649
568
  @last_date = date # keep a reference for later use
650
- return true
569
+
570
+ ### quick "corona" hack - support seasons going beyond 12 month (see swiss league 2019/20 and others!!)
571
+ ## find a better way??
572
+ ## set @start date to full year (e.g. 1.1.) if date.year is @start.year+1
573
+ ## todo/fix: add to linter to check for chronological dates!! - warn if NOT chronological
574
+ ### todo/check: just turn on for 2019/20 season or always? why? why not?
575
+
576
+ ## todo/fix: add switch back to old @start_org
577
+ ## if year is date.year == @start.year-1 -- possible when full date with year set!!!
578
+ if @start.month != 1
579
+ if date.year == @start.year+1
580
+ logger.debug( "!! hack - extending start date to full (next/end) year; assumes all dates are chronologigal - always moving forward" )
581
+ @start_org = @start ## keep a copy of the original (old) start date - why? why not? - not used for now
582
+ @start = Date.new( @start.year+1, 1, 1 )
583
+ end
584
+ end
585
+
586
+ true
651
587
  else
652
- return false
588
+ false
653
589
  end
654
590
  end
655
591
 
@@ -95,11 +95,14 @@ module SportDb
95
95
  headers_mapping[:score] = find_header( headers, ['FT'] )
96
96
  headers_mapping[:scorei] = find_header( headers, ['HT'] )
97
97
 
98
- headers_mapping[:round] = find_header( headers, ['Round'] )
98
+ headers_mapping[:round] = find_header( headers, ['Round', 'Matchday'] )
99
99
 
100
100
  ## optional headers - note: find_header returns nil if header NOT found
101
101
  header_stage = find_header( headers, ['Stage'] )
102
102
  headers_mapping[:stage] = header_stage if header_stage
103
+
104
+ header_league = find_header( headers, ['League'] )
105
+ headers_mapping[:league] = header_league if header_league
103
106
  else
104
107
  ## else try footballdata.uk and others
105
108
  headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
@@ -167,7 +170,10 @@ module SportDb
167
170
 
168
171
 
169
172
  ## check if data present - if not skip (might be empty row)
170
- if team1.nil? && team2.nil?
173
+ ## note: (old classic) csv reader returns nil for empty fields
174
+ ## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
175
+ if (team1.nil? || team1.empty?) &&
176
+ (team2.nil? || team2.empty?)
171
177
  puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
172
178
  pp row
173
179
  next
@@ -182,9 +188,11 @@ module SportDb
182
188
  col = row[ headers_mapping[ :date ]]
183
189
  col = col.strip # make sure not leading or trailing spaces left over
184
190
 
185
- if col.empty? || col == '-' || col == '?'
186
- ## note: allow missing / unknown date for match
187
- date = nil
191
+ if col.empty? ||
192
+ col =~ /^-{1,}$/ || # e.g. - or ---
193
+ col =~ /^\?{1,}$/ # e.g. ? or ???
194
+ ## note: allow missing / unknown date for match
195
+ date = nil
188
196
  else
189
197
  ## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
190
198
  col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
@@ -199,6 +207,8 @@ module SportDb
199
207
  date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
200
208
  elsif col =~ /^\d{1,2} \w{3} \d{4}$/
201
209
  date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
210
+ elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
211
+ date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
202
212
  else
203
213
  puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
204
214
  ## todo/fix: add to errors/warns list - why? why not?
@@ -211,6 +221,9 @@ module SportDb
211
221
  end
212
222
 
213
223
 
224
+ ##
225
+ ## todo/fix: round might not always be just a simple integer number!!!
226
+ ## might be text such as Final | Leg 1 or such!!!!
214
227
  round = nil
215
228
  ## check for (optional) round / matchday
216
229
  if headers_mapping[ :round ]
@@ -283,13 +296,17 @@ module SportDb
283
296
  end
284
297
  end
285
298
 
299
+ league = nil
300
+ league = row[ headers_mapping[ :league ]] if headers_mapping[ :league ]
301
+
286
302
 
287
303
  match = Import::Match.new( date: date,
288
304
  team1: team1, team2: team2,
289
305
  score1: score1, score2: score2,
290
306
  score1i: score1i, score2i: score2i,
291
307
  round: round,
292
- stage: stage )
308
+ stage: stage,
309
+ league: league )
293
310
  matches << match
294
311
  end
295
312