sportdb-formats 1.0.2 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +5 -0
  3. data/Rakefile +2 -2
  4. data/lib/sportdb/formats.rb +17 -5
  5. data/lib/sportdb/formats/country/country_index.rb +7 -7
  6. data/lib/sportdb/formats/country/country_reader.rb +26 -6
  7. data/lib/sportdb/formats/datafile_package.rb +10 -7
  8. data/lib/sportdb/formats/league/league_outline_reader.rb +24 -7
  9. data/lib/sportdb/formats/league/league_reader.rb +3 -3
  10. data/lib/sportdb/formats/match/mapper.rb +63 -63
  11. data/lib/sportdb/formats/match/mapper_teams.rb +1 -1
  12. data/lib/sportdb/formats/match/match_parser.rb +99 -180
  13. data/lib/sportdb/formats/match/match_parser_csv.rb +321 -0
  14. data/lib/sportdb/formats/package.rb +165 -11
  15. data/lib/sportdb/formats/parser_helper.rb +11 -2
  16. data/lib/sportdb/formats/score/score_formats.rb +41 -1
  17. data/lib/sportdb/formats/score/score_parser.rb +6 -0
  18. data/lib/sportdb/formats/structs/country.rb +6 -3
  19. data/lib/sportdb/formats/structs/group.rb +5 -12
  20. data/lib/sportdb/formats/structs/round.rb +6 -13
  21. data/lib/sportdb/formats/structs/standings.rb +30 -9
  22. data/lib/sportdb/formats/structs/team.rb +1 -2
  23. data/lib/sportdb/formats/team/club_reader_props.rb +3 -3
  24. data/lib/sportdb/formats/version.rb +4 -2
  25. data/test/helper.rb +2 -1
  26. data/test/test_club_reader_props.rb +2 -2
  27. data/test/test_country_index.rb +4 -4
  28. data/test/test_country_reader.rb +34 -4
  29. data/test/test_csv_match_parser.rb +114 -0
  30. data/test/test_csv_match_parser_utils.rb +20 -0
  31. data/test/test_csv_reader.rb +5 -5
  32. data/test/test_datafile.rb +0 -32
  33. data/test/test_datafile_package.rb +46 -0
  34. data/test/test_match_auto_relegation.rb +41 -0
  35. data/test/test_package.rb +60 -28
  36. data/test/test_package_match.rb +27 -3
  37. data/test/test_regex.rb +25 -7
  38. data/test/test_scores.rb +58 -49
  39. metadata +9 -4
@@ -16,7 +16,7 @@ class TeamMapper
16
16
  end
17
17
 
18
18
  def map_teams!( line )
19
- @mapper.map_titles!( line )
19
+ @mapper.map_names!( line )
20
20
  end
21
21
  end # class TeamMapper
22
22
 
@@ -82,15 +82,14 @@ class MatchParser ## simple match parser for team match schedules
82
82
  # team1 team2 - match (will get new auto-matchday! not last round)
83
83
  @last_round = nil
84
84
 
85
- title, pos = find_group_title_and_pos!( line )
85
+ name = find_group_name!( line )
86
86
 
87
- logger.debug " title: >#{title}<"
88
- logger.debug " pos: >#{pos}<"
87
+ logger.debug " name: >#{name}<"
89
88
  logger.debug " line: >#{line}<"
90
89
 
91
- group = @groups[ title ]
90
+ group = @groups[ name ]
92
91
  if group.nil?
93
- puts "!! ERROR - no group def found for >#{title}<"
92
+ puts "!! ERROR - no group def found for >#{name}<"
94
93
  exit 1
95
94
  end
96
95
 
@@ -104,19 +103,19 @@ class MatchParser ## simple match parser for team match schedules
104
103
  @mapper_teams.map_teams!( line )
105
104
  teams = @mapper_teams.find_teams!( line )
106
105
 
107
- title, pos = find_group_title_and_pos!( line )
106
+ name = find_group_name!( line )
108
107
 
109
108
  logger.debug " line: >#{line}<"
110
109
 
111
- group = Import::Group.new( pos: pos,
112
- title: title,
113
- teams: teams.map {|team| team.title } )
110
+ ## todo/check/fix: add back group key - why? why not?
111
+ group = Import::Group.new( name: name,
112
+ teams: teams.map {|team| team.name } )
114
113
 
115
- @groups[ title ] = group
114
+ @groups[ name ] = group
116
115
  end
117
116
 
118
117
 
119
- def find_group_title_and_pos!( line )
118
+ def find_group_name!( line )
120
119
  ## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C or HEX
121
120
  ## nb: (?:) = is for non-capturing group(ing)
122
121
 
@@ -125,37 +124,25 @@ class MatchParser ## simple match parser for team match schedules
125
124
 
126
125
  ## todo:
127
126
  ## check if Group A: or [Group A] works e.g. : or ] get matched by \b ???
128
- regex = /(?:Group|Gruppe|Grupo)\s+((?:\d{1}|[A-Z]{1,3}))\b/
127
+ regex = /\b
128
+ (?:
129
+ (Group | Gruppe | Grupo)
130
+ [ ]+
131
+ (\d+ | [A-Z]+)
132
+ )
133
+ \b/x
129
134
 
130
135
  m = regex.match( line )
131
136
 
132
- return [nil,nil] if m.nil?
133
-
134
- pos = case m[1]
135
- when 'A' then 1
136
- when 'B' then 2
137
- when 'C' then 3
138
- when 'D' then 4
139
- when 'E' then 5
140
- when 'F' then 6
141
- when 'G' then 7
142
- when 'H' then 8
143
- when 'I' then 9
144
- when 'J' then 10
145
- when 'K' then 11
146
- when 'L' then 12
147
- when 'HEX' then 666 # HEX for Hexagonal - todo/check: map to something else ??
148
- else m[1].to_i
149
- end
150
-
151
- title = m[0]
152
-
153
- logger.debug " title: >#{title}<"
154
- logger.debug " pos: >#{pos}<"
155
-
156
- line.sub!( regex, '[GROUP.TITLE+POS]' )
157
-
158
- [title,pos]
137
+ return nil if m.nil?
138
+
139
+ name = m[0]
140
+
141
+ logger.debug " name: >#{name}<"
142
+
143
+ line.sub!( name, '[GROUP.NAME]' )
144
+
145
+ name
159
146
  end
160
147
 
161
148
 
@@ -180,198 +167,130 @@ class MatchParser ## simple match parser for team match schedules
180
167
  end_date = end_date.to_date
181
168
 
182
169
 
183
- pos = find_round_pos!( line )
184
- title = find_round_def_title!( line )
185
- # NB: use extracted round title for knockout check
186
- knockout_flag = is_knockout_round?( title )
170
+ name = find_round_def_name!( line )
171
+ # NB: use extracted round name for knockout check
172
+ knockout_flag = is_knockout_round?( name )
187
173
 
188
174
 
189
175
  logger.debug " start_date: #{start_date}"
190
176
  logger.debug " end_date: #{end_date}"
191
- logger.debug " pos: #{pos}"
192
- logger.debug " title: >#{title}<"
177
+ logger.debug " name: >#{name}<"
193
178
  logger.debug " knockout_flag: #{knockout_flag}"
194
179
 
195
180
  logger.debug " line: >#{line}<"
196
181
 
197
- #######################################
198
- # todo/fix: add auto flag is false !!!! - why? why not?
199
- round = Import::Round.new( pos: pos,
200
- title: title,
182
+ round = Import::Round.new( name: name,
201
183
  start_date: start_date,
202
184
  end_date: end_date,
203
185
  knockout: knockout_flag,
204
186
  auto: false )
205
187
 
206
- @rounds[ title ] = round
188
+ @rounds[ name ] = round
207
189
  end
208
190
 
209
191
 
210
192
 
211
- def find_round_pos!( line )
212
- # pass #1) extract optional round pos from line
213
- # e.g. (1) - must start line
214
- regex_pos = /^[ \t]*\((\d{1,3})\)[ \t]+/
215
-
216
- # pass #2) find free standing number e.g. Matchday 3 or Round 5 or 3. Spieltag etc.
217
- # note: /\b(\d{1,3})\b/
218
- # will match -12
219
- # thus, use space required - will NOT match -2 e.g. Group-2 Play-off
220
- # note: allow 1. Runde n
221
- # 1^ Giornata
222
- regex_num = /(?:^|\s)(\d{1,3})(?:[.\^\s]|$)/
223
-
224
- if line =~ regex_pos
225
- logger.debug " pos: >#{$1}<"
226
-
227
- line.sub!( regex_pos, '[ROUND.POS] ' ) ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+
228
- return $1.to_i
229
- elsif line =~ regex_num
230
- ## assume number in title is pos (e.g. Jornada 3, 3 Runde etc.)
231
- ## NB: do NOT remove pos from string (will get removed by round title)
232
-
233
- num = $1.to_i # note: clone capture; keep a copy (another regex follows; will redefine $1)
234
-
235
- #### fix:
236
- # use/make keywords required
237
- # e.g. Round of 16 -> should NOT match 16!
238
- # Spiel um Platz 3 (or 5) etc -> should NOT match 3!
239
- # Round 16 - ok
240
- # thus, check for required keywords
193
+ def find_round_def_name!( line )
194
+ # assume everything before pipe (\) is the round name
195
+ # strip [ROUND.POS], todo:?? [ROUND.NAME2]
241
196
 
242
- ## quick hack for round of 16
243
- # todo: mask match e.g. Round of xxx ... and try again - might include something
244
- # reuse pattern for Group XX Replays for example
245
- if line =~ /^\s*Round of \d{1,3}\b/
246
- return nil
247
- end
248
-
249
- logger.debug " pos: >#{num}<"
250
- return num
251
- else
252
- ## fix: add logger.warn no round pos found in line
253
- return nil
254
- end
255
- end # method find_round_pos!
256
-
257
- def find_round_def_title!( line )
258
- # assume everything before pipe (\) is the round title
259
- # strip [ROUND.POS], todo:?? [ROUND.TITLE2]
260
-
261
- # todo/fix: add title2 w/ // or / why? why not?
197
+ # todo/fix: add name2 w/ // or / why? why not?
262
198
  # -- strip / or / chars
263
199
 
264
200
  buf = line.dup
265
- logger.debug " find_round_def_title! line-before: >>#{buf}<<"
201
+ logger.debug " find_round_def_name! line-before: >>#{buf}<<"
266
202
 
267
203
  ## cut-off everything after (including) pipe (|)
268
204
  buf = buf[ 0...buf.index('|') ]
269
-
270
- # e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
271
- buf.gsub!( /\[[^\]]+\]/, '' ) ## fix: use helper for (re)use e.g. remove_match_placeholder/marker or similar?
272
- # remove leading and trailing whitespace
273
205
  buf.strip!
274
206
 
275
- logger.debug " find_round_def_title! line-after: >>#{buf}<<"
207
+ logger.debug " find_round_def_name! line-after: >>#{buf}<<"
276
208
 
277
- logger.debug " title: >>#{buf}<<"
278
- line.sub!( buf, '[ROUND.TITLE]' )
209
+ logger.debug " name: >>#{buf}<<"
210
+ line.sub!( buf, '[ROUND.NAME]' )
279
211
 
280
212
  buf
281
213
  end
282
214
 
283
- def find_round_header_title!( line )
284
- # assume everything left is the round title
285
- # extract all other items first (round title2, round pos, group title n pos, etc.)
286
215
 
287
- ## todo/fix:
288
- ## cleanup method
289
- ## use buf.index( '//' ) to split string (see found_round_def)
290
- ## why? simpler why not?
291
- ## - do we currently allow groups if title2 present? add example if it works?
216
+ ## split by or || or |||
217
+ ## or ++ or +++
218
+ ## or -- or ---
219
+ ## or // or ///
220
+ ## note: allow Final | First Leg as ONE name same as
221
+ ## Final - First Leg or
222
+ ## Final, First Leg
223
+ ## for cut-off always MUST be more than two chars
224
+ ##
225
+ ## todo/check: find a better name than HEADER_SEP(ARATOR) - why? why not?
226
+ ## todo/fix: move to parser utils and add a method split_name or such?
227
+ HEADER_SEP_RE = / [ ]* ## allow (strip) leading spaces
228
+ (?:\|{2,} |
229
+ \+{2,} |
230
+ -{2,} |
231
+ \/{2,}
232
+ )
233
+ [ ]* ## allow (strip) trailing spaces
234
+ /x
235
+
236
+ def find_round_header_name!( line )
237
+ # assume everything left is the round name
238
+ # extract all other items first (round name2, round pos, group name n pos, etc.)
292
239
 
293
240
  buf = line.dup
294
- logger.debug " find_round_header_title! line-before: >>#{buf}<<"
241
+ logger.debug " find_round_header_name! line-before: >>#{buf}<<"
242
+
243
+
244
+ parts = buf.split( HEADER_SEP_RE )
245
+ buf = parts[0]
295
246
 
296
- buf.gsub!( /\[[^\]]+\]/, '' ) # e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
297
247
  buf.strip! # remove leading and trailing whitespace
298
248
 
299
- logger.debug " find_round_title! line-after: >>#{buf}<<"
249
+ logger.debug " find_round_name! line-after: >>#{buf}<<"
300
250
 
301
- ### bingo - assume what's left is the round title
251
+ ### bingo - assume what's left is the round name
302
252
 
303
- logger.debug " title: >>#{buf}<<"
304
- line.sub!( buf, '[ROUND.TITLE]' )
253
+ logger.debug " name: >>#{buf}<<"
254
+ line.sub!( buf, '[ROUND.NAME]' )
305
255
 
306
256
  buf
307
257
  end
308
258
 
259
+ ## quick hack- collect all "fillwords" by language!!!!
260
+ ## change later and add to sportdb-langs!!!!
261
+ ##
262
+ ## strip all "fillwords" e.g.:
263
+ ## Nachtrag/Postponed/Addition/Supplemento names
264
+ ##
265
+ ## todo/change: find a better name for ROUND_EXTRA_WORDS - why? why not?
266
+ ROUND_EXTRA_WORDS_RE = /\b(?:
267
+ Nachtrag | ## de
268
+ Postponed | ## en
269
+ Addition | ## en
270
+ Supplemento ## es
271
+ )
272
+ \b/ix
309
273
 
310
274
  def parse_round_header( line )
311
275
  logger.debug "parsing round header line: >#{line}<"
312
276
 
313
- ## todo/check/fix:
314
- # make sure Round of 16 will not return pos 16 -- how? possible?
315
- # add unit test too to verify
316
- pos = find_round_pos!( line )
317
-
318
- title = find_round_header_title!( line )
277
+ name = find_round_header_name!( line )
319
278
 
320
279
  logger.debug " line: >#{line}<"
321
280
 
281
+ name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
282
+ name = name.strip
322
283
 
323
- round = @rounds[ title ]
284
+ round = @rounds[ name ]
324
285
  if round.nil? ## auto-add / create if missing
325
- round = Import::Round.new( pos: pos,
326
- title: title )
327
- @rounds[ title ] = round
286
+ ## todo/check: add num (was pos) if present - why? why not?
287
+ round = Import::Round.new( name: name )
288
+ @rounds[ name ] = round
328
289
  end
329
290
 
330
291
  ## todo/check: if pos match (MUST always match for now)
331
292
  @last_round = round
332
293
  @last_group = nil # note: reset group to no group - why? why not?
333
-
334
-
335
- ## NB: dummy/placeholder start_at, end_at date
336
- ## replace/patch after adding all games for round
337
-
338
- =begin
339
- round_attribs = {
340
- title: title,
341
- title2: title2,
342
- knockout: knockout_flag
343
- }
344
-
345
- if pos > 999000
346
- # no pos (e.g. will get autonumbered later) - try match by title for now
347
- # e.g. lets us use title 'Group Replays', for example, multiple times
348
- @round = Round.find_by_event_id_and_title( @event.id, title )
349
- else
350
- @round = Round.find_by_event_id_and_pos( @event.id, pos )
351
- end
352
-
353
- if @round.present?
354
- logger.debug "update round #{@round.id}:"
355
- else
356
- logger.debug "create round:"
357
- @round = Round.new
358
-
359
- round_attribs = round_attribs.merge( {
360
- event_id: @event.id,
361
- pos: pos,
362
- start_at: Date.parse('1911-11-11'),
363
- end_at: Date.parse('1911-11-11')
364
- })
365
- end
366
-
367
- logger.debug round_attribs.to_json
368
-
369
- @round.update_attributes!( round_attribs )
370
-
371
- @patch_round_ids_pos << @round.id if pos > 999000
372
- ### store list of round ids for patching start_at/end_at at the end
373
- @patch_round_ids_dates << @round.id # todo/fix/check: check if round has definition (do NOT patch if definition (not auto-added) present)
374
- =end
375
294
  end
376
295
 
377
296
 
@@ -457,11 +376,11 @@ class MatchParser ## simple match parser for team match schedules
457
376
  ## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
458
377
 
459
378
  @matches << Import::Match.new( date: date,
460
- team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.title)
461
- team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.title)
379
+ team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
380
+ team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
462
381
  score: score,
463
- round: round ? round.title : nil, ## note: for now always use string (assume unique canonical name for event)
464
- group: @last_group ? @last_group.title : nil ) ## note: for now always use string (assume unique canonical name for event)
382
+ round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
383
+ group: @last_group ? @last_group.name : nil ) ## note: for now always use string (assume unique canonical name for event)
465
384
 
466
385
  ### todo: cache team lookups in hash?
467
386
 
@@ -517,7 +436,7 @@ class MatchParser ## simple match parser for team match schedules
517
436
 
518
437
  round_attribs = {
519
438
  event_id: @event.id,
520
- title: "Matchday #{date.to_date}",
439
+ name: "Matchday #{date.to_date}",
521
440
  pos: 999001+@patch_round_ids_pos.length, # e.g. 999<count> - 999001,999002,etc.
522
441
  start_at: date.to_date,
523
442
  end_at: date.to_date
@@ -541,7 +460,7 @@ class MatchParser ## simple match parser for team match schedules
541
460
  end
542
461
 
543
462
  ## note: will crash (round.pos) if round is nil
544
- logger.debug( " using round #{round.pos} >#{round.title}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
463
+ logger.debug( " using round #{round.pos} >#{round.name}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
545
464
  else
546
465
  ## use round from last round header
547
466
  round = @round
@@ -0,0 +1,321 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ class CsvMatchParser
6
+
7
+ #############
8
+ # helpers
9
+ def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
10
+
11
+ ## check if headers incl. season if yes,has priority over col mapping
12
+ ## e.g. no need to specify twice (if using headers)
13
+ col = headers[:season] if headers && headers[:season]
14
+
15
+ seasons = Hash.new( 0 ) ## default value is 0
16
+
17
+ ## todo/fix: yes, use CsvHash.foreach - why? why not?
18
+ ## use read_csv with block to switch to foreach!!!!
19
+ rows = read_csv( path, sep: sep )
20
+
21
+ rows.each_with_index do |row,i|
22
+ puts "[#{i}] " + row.inspect if i < 2
23
+
24
+ season = row[ col ] ## column name defaults to 'Season'
25
+ seasons[ season ] += 1
26
+ end
27
+
28
+ pp seasons
29
+
30
+ ## note: only return season keys/names (not hash with usage counter)
31
+ seasons.keys
32
+ end
33
+
34
+
35
+ ##########
36
+ # main machinery
37
+
38
+ ## todo/fix: use a generic "global" parse_csv method - why? why not?
39
+ ## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
40
+ ## ## note: do NOT symbolize keys - keep them as is!!!!!!
41
+ ## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
42
+ ## CsvHash.parse( text, sep: sep )
43
+ ## end
44
+
45
+ def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
46
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
47
+ parse( txt, headers: headers,
48
+ filters: filters,
49
+ converters: converters,
50
+ sep: sep )
51
+ end
52
+
53
+ def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
54
+ new( txt ).parse( headers: headers,
55
+ filters: filters,
56
+ converters: converters,
57
+ sep: sep )
58
+ end
59
+
60
+
61
+ def initialize( txt )
62
+ @txt = txt
63
+ end
64
+
65
+ def parse( headers: nil, filters: nil, converters: nil, sep: nil )
66
+
67
+ headers_mapping = {}
68
+
69
+ rows = parse_csv( @txt, sep: sep )
70
+
71
+ return [] if rows.empty? ## no rows / empty?
72
+
73
+
74
+ ## fix/todo: use logger!!!!
75
+ ## pp csv
76
+
77
+ if headers ## use user supplied headers if present
78
+ headers_mapping = headers_mapping.merge( headers )
79
+ else
80
+
81
+ ## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
82
+ headers = rows[0].keys
83
+ pp headers
84
+
85
+ # note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
86
+ # e.g. row['HomeTeam'] || row['HT'] will NOT work for now
87
+
88
+ if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
89
+ ## assume our own football.csv format, see github.com/footballcsv
90
+ headers_mapping[:team1] = find_header( headers, ['Team 1'] )
91
+ headers_mapping[:team2] = find_header( headers, ['Team 2'] )
92
+ headers_mapping[:date] = find_header( headers, ['Date'] )
93
+
94
+ ## check for all-in-one full time (ft) and half time (ht9 scores?
95
+ headers_mapping[:score] = find_header( headers, ['FT'] )
96
+ headers_mapping[:scorei] = find_header( headers, ['HT'] )
97
+
98
+ headers_mapping[:round] = find_header( headers, ['Round'] )
99
+
100
+ ## optional headers - note: find_header returns nil if header NOT found
101
+ header_stage = find_header( headers, ['Stage'] )
102
+ headers_mapping[:stage] = header_stage if header_stage
103
+ else
104
+ ## else try footballdata.uk and others
105
+ headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
106
+ headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
107
+ headers_mapping[:date] = find_header( headers, ['Date'] )
108
+
109
+ ## note: FT = Full Time, HG = Home Goal, AG = Away Goal
110
+ headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
111
+ headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
112
+
113
+ ## check for half time scores ?
114
+ ## note: HT = Half Time
115
+ headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
116
+ headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
117
+ end
118
+ end
119
+
120
+ pp headers_mapping
121
+
122
+ ### todo/fix: check headers - how?
123
+ ## if present HomeTeam or HT required etc.
124
+ ## issue error/warn is not present
125
+ ##
126
+ ## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
127
+ ## exit 1
128
+ ##
129
+
130
+ matches = []
131
+
132
+ rows.each_with_index do |row,i|
133
+
134
+ ## fix/todo: use logger!!!!
135
+ ## puts "[#{i}] " + row.inspect if i < 2
136
+
137
+
138
+ ## todo/fix: move to its own (helper) method - filter or such!!!!
139
+ if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
140
+ skip = false
141
+ filters.each do |header, value|
142
+ if row[ header ] != value ## e.g. row['Season']
143
+ skip = true
144
+ break
145
+ end
146
+ end
147
+ next if skip ## if header values NOT matching
148
+ end
149
+
150
+
151
+ ## note:
152
+ ## add converters after filters for now (why not before filters?)
153
+ if converters ## any converters defined?
154
+ ## convert single proc shortcut to array with single converter
155
+ converters = [converters] if converters.is_a?( Proc )
156
+
157
+ ## assumes array of procs
158
+ converters.each do |converter|
159
+ row = converter.call( row )
160
+ end
161
+ end
162
+
163
+
164
+
165
+ team1 = row[ headers_mapping[ :team1 ]]
166
+ team2 = row[ headers_mapping[ :team2 ]]
167
+
168
+
169
+ ## check if data present - if not skip (might be empty row)
170
+ ## note: (old classic) csv reader returns nil for empty fields
171
+ ## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
172
+ if (team1.nil? || team1.empty?) &&
173
+ (team2.nil? || team2.empty?)
174
+ puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
175
+ pp row
176
+ next
177
+ end
178
+
179
+ ## remove possible match played counters e.g. (4) (11) etc.
180
+ team1 = team1.sub( /\(\d+\)/, '' ).strip
181
+ team2 = team2.sub( /\(\d+\)/, '' ).strip
182
+
183
+
184
+
185
+ col = row[ headers_mapping[ :date ]]
186
+ col = col.strip # make sure not leading or trailing spaces left over
187
+
188
+ if col.empty? ||
189
+ col =~ /^-{1,}$/ || # e.g. - or ---
190
+ col =~ /^\?{1,}$/ # e.g. ? or ???
191
+ ## note: allow missing / unknown date for match
192
+ date = nil
193
+ else
194
+ ## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
195
+ col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
196
+ col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
197
+ col = col.strip # make sure not leading or trailing spaces left over
198
+
199
+ if col =~ /^\d{2}\/\d{2}\/\d{4}$/
200
+ date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
201
+ elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
202
+ date_fmt = '%d/%m/%y' # e.g. 17/08/02
203
+ elsif col =~ /^\d{4}-\d{2}-\d{2}$/ ## "standard" / default date format
204
+ date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
205
+ elsif col =~ /^\d{1,2} \w{3} \d{4}$/
206
+ date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
207
+ elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
208
+ date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
209
+ else
210
+ puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
211
+ ## todo/fix: add to errors/warns list - why? why not?
212
+ exit 1
213
+ end
214
+
215
+ ## todo/check: use date object (keep string?) - why? why not?
216
+ ## todo/fix: yes!! use date object!!!! do NOT use string
217
+ date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
218
+ end
219
+
220
+
221
+ round = nil
222
+ ## check for (optional) round / matchday
223
+ if headers_mapping[ :round ]
224
+ col = row[ headers_mapping[ :round ]]
225
+ ## todo: issue warning if not ? or - (and just empty string) why? why not
226
+ round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
227
+ end
228
+
229
+
230
+ score1 = nil
231
+ score2 = nil
232
+ score1i = nil
233
+ score2i = nil
234
+
235
+ ## check for full time scores ?
236
+ if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
237
+ ft = [ row[ headers_mapping[ :score1 ]],
238
+ row[ headers_mapping[ :score2 ]] ]
239
+
240
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
241
+ score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
242
+ score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
243
+ end
244
+
245
+ ## check for half time scores ?
246
+ if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
247
+ ht = [ row[ headers_mapping[ :score1i ]],
248
+ row[ headers_mapping[ :score2i ]] ]
249
+
250
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
251
+ score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
252
+ score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
253
+ end
254
+
255
+ ## check for all-in-one full time scores?
256
+ if headers_mapping[ :score ]
257
+ ft = row[ headers_mapping[ :score ] ]
258
+ if ft =~ /^\d{1,2}[\-:]\d{1,2}$/ ## sanity check scores format
259
+ scores = ft.split( /[\-:]/ )
260
+ score1 = scores[0].to_i
261
+ score2 = scores[1].to_i
262
+ end
263
+ ## todo/fix: issue warning if non-empty!!! and not matching format!!!!
264
+ end
265
+
266
+ if headers_mapping[ :scorei ]
267
+ ht = row[ headers_mapping[ :scorei ] ]
268
+ if ht =~ /^\d{1,2}[\-:]\d{1,2}$/ ## sanity check scores format
269
+ scores = ht.split( /[\-:]/) ## allow 1-1 and 1:1
270
+ score1i = scores[0].to_i
271
+ score2i = scores[1].to_i
272
+ end
273
+ ## todo/fix: issue warning if non-empty!!! and not matching format!!!!
274
+ end
275
+
276
+
277
+ ## try some optional headings / columns
278
+ stage = nil
279
+ if headers_mapping[ :stage ]
280
+ col = row[ headers_mapping[ :stage ]]
281
+ ## todo/fix: check can col be nil e.g. col.nil? possible?
282
+ stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
283
+ ## note: allow missing stage for match / defaults to "regular"
284
+ nil
285
+ elsif col == '?'
286
+ ## note: default explicit unknown to unknown for now AND not regular - why? why not?
287
+ '?' ## todo/check: use unkown and NOT ? - why? why not?
288
+ else
289
+ col
290
+ end
291
+ end
292
+
293
+
294
+ match = Import::Match.new( date: date,
295
+ team1: team1, team2: team2,
296
+ score1: score1, score2: score2,
297
+ score1i: score1i, score2i: score2i,
298
+ round: round,
299
+ stage: stage )
300
+ matches << match
301
+ end
302
+
303
+ ## pp matches
304
+ matches
305
+ end
306
+
307
+
308
+ private
309
+
310
+ def find_header( headers, candidates )
311
+ ## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
312
+
313
+ candidates.each do |candidate|
314
+ return candidate if headers.include?( candidate ) ## bingo!!!
315
+ end
316
+ nil ## no matching header found!!!
317
+ end
318
+
319
+ end # class CsvMatchParser
320
+ end # module SportDb
321
+