sportdb-formats 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +5 -0
  3. data/Rakefile +2 -2
  4. data/lib/sportdb/formats.rb +17 -5
  5. data/lib/sportdb/formats/country/country_index.rb +7 -7
  6. data/lib/sportdb/formats/country/country_reader.rb +26 -6
  7. data/lib/sportdb/formats/datafile_package.rb +10 -7
  8. data/lib/sportdb/formats/league/league_outline_reader.rb +24 -7
  9. data/lib/sportdb/formats/league/league_reader.rb +3 -3
  10. data/lib/sportdb/formats/match/mapper.rb +63 -63
  11. data/lib/sportdb/formats/match/mapper_teams.rb +1 -1
  12. data/lib/sportdb/formats/match/match_parser.rb +99 -180
  13. data/lib/sportdb/formats/match/match_parser_csv.rb +321 -0
  14. data/lib/sportdb/formats/package.rb +165 -11
  15. data/lib/sportdb/formats/parser_helper.rb +11 -2
  16. data/lib/sportdb/formats/score/score_formats.rb +41 -1
  17. data/lib/sportdb/formats/score/score_parser.rb +6 -0
  18. data/lib/sportdb/formats/structs/country.rb +6 -3
  19. data/lib/sportdb/formats/structs/group.rb +5 -12
  20. data/lib/sportdb/formats/structs/round.rb +6 -13
  21. data/lib/sportdb/formats/structs/standings.rb +30 -9
  22. data/lib/sportdb/formats/structs/team.rb +1 -2
  23. data/lib/sportdb/formats/team/club_reader_props.rb +3 -3
  24. data/lib/sportdb/formats/version.rb +4 -2
  25. data/test/helper.rb +2 -1
  26. data/test/test_club_reader_props.rb +2 -2
  27. data/test/test_country_index.rb +4 -4
  28. data/test/test_country_reader.rb +34 -4
  29. data/test/test_csv_match_parser.rb +114 -0
  30. data/test/test_csv_match_parser_utils.rb +20 -0
  31. data/test/test_csv_reader.rb +5 -5
  32. data/test/test_datafile.rb +0 -32
  33. data/test/test_datafile_package.rb +46 -0
  34. data/test/test_match_auto_relegation.rb +41 -0
  35. data/test/test_package.rb +60 -28
  36. data/test/test_package_match.rb +27 -3
  37. data/test/test_regex.rb +25 -7
  38. data/test/test_scores.rb +58 -49
  39. metadata +9 -4
@@ -16,7 +16,7 @@ class TeamMapper
16
16
  end
17
17
 
18
18
  def map_teams!( line )
19
- @mapper.map_titles!( line )
19
+ @mapper.map_names!( line )
20
20
  end
21
21
  end # class TeamMapper
22
22
 
@@ -82,15 +82,14 @@ class MatchParser ## simple match parser for team match schedules
82
82
  # team1 team2 - match (will get new auto-matchday! not last round)
83
83
  @last_round = nil
84
84
 
85
- title, pos = find_group_title_and_pos!( line )
85
+ name = find_group_name!( line )
86
86
 
87
- logger.debug " title: >#{title}<"
88
- logger.debug " pos: >#{pos}<"
87
+ logger.debug " name: >#{name}<"
89
88
  logger.debug " line: >#{line}<"
90
89
 
91
- group = @groups[ title ]
90
+ group = @groups[ name ]
92
91
  if group.nil?
93
- puts "!! ERROR - no group def found for >#{title}<"
92
+ puts "!! ERROR - no group def found for >#{name}<"
94
93
  exit 1
95
94
  end
96
95
 
@@ -104,19 +103,19 @@ class MatchParser ## simple match parser for team match schedules
104
103
  @mapper_teams.map_teams!( line )
105
104
  teams = @mapper_teams.find_teams!( line )
106
105
 
107
- title, pos = find_group_title_and_pos!( line )
106
+ name = find_group_name!( line )
108
107
 
109
108
  logger.debug " line: >#{line}<"
110
109
 
111
- group = Import::Group.new( pos: pos,
112
- title: title,
113
- teams: teams.map {|team| team.title } )
110
+ ## todo/check/fix: add back group key - why? why not?
111
+ group = Import::Group.new( name: name,
112
+ teams: teams.map {|team| team.name } )
114
113
 
115
- @groups[ title ] = group
114
+ @groups[ name ] = group
116
115
  end
117
116
 
118
117
 
119
- def find_group_title_and_pos!( line )
118
+ def find_group_name!( line )
120
119
  ## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C or HEX
121
120
  ## nb: (?:) = is for non-capturing group(ing)
122
121
 
@@ -125,37 +124,25 @@ class MatchParser ## simple match parser for team match schedules
125
124
 
126
125
  ## todo:
127
126
  ## check if Group A: or [Group A] works e.g. : or ] get matched by \b ???
128
- regex = /(?:Group|Gruppe|Grupo)\s+((?:\d{1}|[A-Z]{1,3}))\b/
127
+ regex = /\b
128
+ (?:
129
+ (Group | Gruppe | Grupo)
130
+ [ ]+
131
+ (\d+ | [A-Z]+)
132
+ )
133
+ \b/x
129
134
 
130
135
  m = regex.match( line )
131
136
 
132
- return [nil,nil] if m.nil?
133
-
134
- pos = case m[1]
135
- when 'A' then 1
136
- when 'B' then 2
137
- when 'C' then 3
138
- when 'D' then 4
139
- when 'E' then 5
140
- when 'F' then 6
141
- when 'G' then 7
142
- when 'H' then 8
143
- when 'I' then 9
144
- when 'J' then 10
145
- when 'K' then 11
146
- when 'L' then 12
147
- when 'HEX' then 666 # HEX for Hexagonal - todo/check: map to something else ??
148
- else m[1].to_i
149
- end
150
-
151
- title = m[0]
152
-
153
- logger.debug " title: >#{title}<"
154
- logger.debug " pos: >#{pos}<"
155
-
156
- line.sub!( regex, '[GROUP.TITLE+POS]' )
157
-
158
- [title,pos]
137
+ return nil if m.nil?
138
+
139
+ name = m[0]
140
+
141
+ logger.debug " name: >#{name}<"
142
+
143
+ line.sub!( name, '[GROUP.NAME]' )
144
+
145
+ name
159
146
  end
160
147
 
161
148
 
@@ -180,198 +167,130 @@ class MatchParser ## simple match parser for team match schedules
180
167
  end_date = end_date.to_date
181
168
 
182
169
 
183
- pos = find_round_pos!( line )
184
- title = find_round_def_title!( line )
185
- # NB: use extracted round title for knockout check
186
- knockout_flag = is_knockout_round?( title )
170
+ name = find_round_def_name!( line )
171
+ # NB: use extracted round name for knockout check
172
+ knockout_flag = is_knockout_round?( name )
187
173
 
188
174
 
189
175
  logger.debug " start_date: #{start_date}"
190
176
  logger.debug " end_date: #{end_date}"
191
- logger.debug " pos: #{pos}"
192
- logger.debug " title: >#{title}<"
177
+ logger.debug " name: >#{name}<"
193
178
  logger.debug " knockout_flag: #{knockout_flag}"
194
179
 
195
180
  logger.debug " line: >#{line}<"
196
181
 
197
- #######################################
198
- # todo/fix: add auto flag is false !!!! - why? why not?
199
- round = Import::Round.new( pos: pos,
200
- title: title,
182
+ round = Import::Round.new( name: name,
201
183
  start_date: start_date,
202
184
  end_date: end_date,
203
185
  knockout: knockout_flag,
204
186
  auto: false )
205
187
 
206
- @rounds[ title ] = round
188
+ @rounds[ name ] = round
207
189
  end
208
190
 
209
191
 
210
192
 
211
- def find_round_pos!( line )
212
- # pass #1) extract optional round pos from line
213
- # e.g. (1) - must start line
214
- regex_pos = /^[ \t]*\((\d{1,3})\)[ \t]+/
215
-
216
- # pass #2) find free standing number e.g. Matchday 3 or Round 5 or 3. Spieltag etc.
217
- # note: /\b(\d{1,3})\b/
218
- # will match -12
219
- # thus, use space required - will NOT match -2 e.g. Group-2 Play-off
220
- # note: allow 1. Runde n
221
- # 1^ Giornata
222
- regex_num = /(?:^|\s)(\d{1,3})(?:[.\^\s]|$)/
223
-
224
- if line =~ regex_pos
225
- logger.debug " pos: >#{$1}<"
226
-
227
- line.sub!( regex_pos, '[ROUND.POS] ' ) ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+
228
- return $1.to_i
229
- elsif line =~ regex_num
230
- ## assume number in title is pos (e.g. Jornada 3, 3 Runde etc.)
231
- ## NB: do NOT remove pos from string (will get removed by round title)
232
-
233
- num = $1.to_i # note: clone capture; keep a copy (another regex follows; will redefine $1)
234
-
235
- #### fix:
236
- # use/make keywords required
237
- # e.g. Round of 16 -> should NOT match 16!
238
- # Spiel um Platz 3 (or 5) etc -> should NOT match 3!
239
- # Round 16 - ok
240
- # thus, check for required keywords
193
+ def find_round_def_name!( line )
194
+ # assume everything before pipe (\) is the round name
195
+ # strip [ROUND.POS], todo:?? [ROUND.NAME2]
241
196
 
242
- ## quick hack for round of 16
243
- # todo: mask match e.g. Round of xxx ... and try again - might include something
244
- # reuse pattern for Group XX Replays for example
245
- if line =~ /^\s*Round of \d{1,3}\b/
246
- return nil
247
- end
248
-
249
- logger.debug " pos: >#{num}<"
250
- return num
251
- else
252
- ## fix: add logger.warn no round pos found in line
253
- return nil
254
- end
255
- end # method find_round_pos!
256
-
257
- def find_round_def_title!( line )
258
- # assume everything before pipe (\) is the round title
259
- # strip [ROUND.POS], todo:?? [ROUND.TITLE2]
260
-
261
- # todo/fix: add title2 w/ // or / why? why not?
197
+ # todo/fix: add name2 w/ // or / why? why not?
262
198
  # -- strip / or / chars
263
199
 
264
200
  buf = line.dup
265
- logger.debug " find_round_def_title! line-before: >>#{buf}<<"
201
+ logger.debug " find_round_def_name! line-before: >>#{buf}<<"
266
202
 
267
203
  ## cut-off everything after (including) pipe (|)
268
204
  buf = buf[ 0...buf.index('|') ]
269
-
270
- # e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
271
- buf.gsub!( /\[[^\]]+\]/, '' ) ## fix: use helper for (re)use e.g. remove_match_placeholder/marker or similar?
272
- # remove leading and trailing whitespace
273
205
  buf.strip!
274
206
 
275
- logger.debug " find_round_def_title! line-after: >>#{buf}<<"
207
+ logger.debug " find_round_def_name! line-after: >>#{buf}<<"
276
208
 
277
- logger.debug " title: >>#{buf}<<"
278
- line.sub!( buf, '[ROUND.TITLE]' )
209
+ logger.debug " name: >>#{buf}<<"
210
+ line.sub!( buf, '[ROUND.NAME]' )
279
211
 
280
212
  buf
281
213
  end
282
214
 
283
- def find_round_header_title!( line )
284
- # assume everything left is the round title
285
- # extract all other items first (round title2, round pos, group title n pos, etc.)
286
215
 
287
- ## todo/fix:
288
- ## cleanup method
289
- ## use buf.index( '//' ) to split string (see found_round_def)
290
- ## why? simpler why not?
291
- ## - do we currently allow groups if title2 present? add example if it works?
216
+ ## split by or || or |||
217
+ ## or ++ or +++
218
+ ## or -- or ---
219
+ ## or // or ///
220
+ ## note: allow Final | First Leg as ONE name same as
221
+ ## Final - First Leg or
222
+ ## Final, First Leg
223
+ ## for cut-off always MUST be more than two chars
224
+ ##
225
+ ## todo/check: find a better name than HEADER_SEP(ARATOR) - why? why not?
226
+ ## todo/fix: move to parser utils and add a method split_name or such?
227
+ HEADER_SEP_RE = / [ ]* ## allow (strip) leading spaces
228
+ (?:\|{2,} |
229
+ \+{2,} |
230
+ -{2,} |
231
+ \/{2,}
232
+ )
233
+ [ ]* ## allow (strip) trailing spaces
234
+ /x
235
+
236
+ def find_round_header_name!( line )
237
+ # assume everything left is the round name
238
+ # extract all other items first (round name2, round pos, group name n pos, etc.)
292
239
 
293
240
  buf = line.dup
294
- logger.debug " find_round_header_title! line-before: >>#{buf}<<"
241
+ logger.debug " find_round_header_name! line-before: >>#{buf}<<"
242
+
243
+
244
+ parts = buf.split( HEADER_SEP_RE )
245
+ buf = parts[0]
295
246
 
296
- buf.gsub!( /\[[^\]]+\]/, '' ) # e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
297
247
  buf.strip! # remove leading and trailing whitespace
298
248
 
299
- logger.debug " find_round_title! line-after: >>#{buf}<<"
249
+ logger.debug " find_round_name! line-after: >>#{buf}<<"
300
250
 
301
- ### bingo - assume what's left is the round title
251
+ ### bingo - assume what's left is the round name
302
252
 
303
- logger.debug " title: >>#{buf}<<"
304
- line.sub!( buf, '[ROUND.TITLE]' )
253
+ logger.debug " name: >>#{buf}<<"
254
+ line.sub!( buf, '[ROUND.NAME]' )
305
255
 
306
256
  buf
307
257
  end
308
258
 
259
+ ## quick hack- collect all "fillwords" by language!!!!
260
+ ## change later and add to sportdb-langs!!!!
261
+ ##
262
+ ## strip all "fillwords" e.g.:
263
+ ## Nachtrag/Postponed/Addition/Supplemento names
264
+ ##
265
+ ## todo/change: find a better name for ROUND_EXTRA_WORDS - why? why not?
266
+ ROUND_EXTRA_WORDS_RE = /\b(?:
267
+ Nachtrag | ## de
268
+ Postponed | ## en
269
+ Addition | ## en
270
+ Supplemento ## es
271
+ )
272
+ \b/ix
309
273
 
310
274
  def parse_round_header( line )
311
275
  logger.debug "parsing round header line: >#{line}<"
312
276
 
313
- ## todo/check/fix:
314
- # make sure Round of 16 will not return pos 16 -- how? possible?
315
- # add unit test too to verify
316
- pos = find_round_pos!( line )
317
-
318
- title = find_round_header_title!( line )
277
+ name = find_round_header_name!( line )
319
278
 
320
279
  logger.debug " line: >#{line}<"
321
280
 
281
+ name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
282
+ name = name.strip
322
283
 
323
- round = @rounds[ title ]
284
+ round = @rounds[ name ]
324
285
  if round.nil? ## auto-add / create if missing
325
- round = Import::Round.new( pos: pos,
326
- title: title )
327
- @rounds[ title ] = round
286
+ ## todo/check: add num (was pos) if present - why? why not?
287
+ round = Import::Round.new( name: name )
288
+ @rounds[ name ] = round
328
289
  end
329
290
 
330
291
  ## todo/check: if pos match (MUST always match for now)
331
292
  @last_round = round
332
293
  @last_group = nil # note: reset group to no group - why? why not?
333
-
334
-
335
- ## NB: dummy/placeholder start_at, end_at date
336
- ## replace/patch after adding all games for round
337
-
338
- =begin
339
- round_attribs = {
340
- title: title,
341
- title2: title2,
342
- knockout: knockout_flag
343
- }
344
-
345
- if pos > 999000
346
- # no pos (e.g. will get autonumbered later) - try match by title for now
347
- # e.g. lets us use title 'Group Replays', for example, multiple times
348
- @round = Round.find_by_event_id_and_title( @event.id, title )
349
- else
350
- @round = Round.find_by_event_id_and_pos( @event.id, pos )
351
- end
352
-
353
- if @round.present?
354
- logger.debug "update round #{@round.id}:"
355
- else
356
- logger.debug "create round:"
357
- @round = Round.new
358
-
359
- round_attribs = round_attribs.merge( {
360
- event_id: @event.id,
361
- pos: pos,
362
- start_at: Date.parse('1911-11-11'),
363
- end_at: Date.parse('1911-11-11')
364
- })
365
- end
366
-
367
- logger.debug round_attribs.to_json
368
-
369
- @round.update_attributes!( round_attribs )
370
-
371
- @patch_round_ids_pos << @round.id if pos > 999000
372
- ### store list of round ids for patching start_at/end_at at the end
373
- @patch_round_ids_dates << @round.id # todo/fix/check: check if round has definition (do NOT patch if definition (not auto-added) present)
374
- =end
375
294
  end
376
295
 
377
296
 
@@ -457,11 +376,11 @@ class MatchParser ## simple match parser for team match schedules
457
376
  ## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
458
377
 
459
378
  @matches << Import::Match.new( date: date,
460
- team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.title)
461
- team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.title)
379
+ team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
380
+ team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
462
381
  score: score,
463
- round: round ? round.title : nil, ## note: for now always use string (assume unique canonical name for event)
464
- group: @last_group ? @last_group.title : nil ) ## note: for now always use string (assume unique canonical name for event)
382
+ round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
383
+ group: @last_group ? @last_group.name : nil ) ## note: for now always use string (assume unique canonical name for event)
465
384
 
466
385
  ### todo: cache team lookups in hash?
467
386
 
@@ -517,7 +436,7 @@ class MatchParser ## simple match parser for team match schedules
517
436
 
518
437
  round_attribs = {
519
438
  event_id: @event.id,
520
- title: "Matchday #{date.to_date}",
439
+ name: "Matchday #{date.to_date}",
521
440
  pos: 999001+@patch_round_ids_pos.length, # e.g. 999<count> - 999001,999002,etc.
522
441
  start_at: date.to_date,
523
442
  end_at: date.to_date
@@ -541,7 +460,7 @@ class MatchParser ## simple match parser for team match schedules
541
460
  end
542
461
 
543
462
  ## note: will crash (round.pos) if round is nil
544
- logger.debug( " using round #{round.pos} >#{round.title}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
463
+ logger.debug( " using round #{round.pos} >#{round.name}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
545
464
  else
546
465
  ## use round from last round header
547
466
  round = @round
@@ -0,0 +1,321 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ class CsvMatchParser
6
+
7
+ #############
8
+ # helpers
9
+ def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
10
+
11
+ ## check if headers incl. season if yes,has priority over col mapping
12
+ ## e.g. no need to specify twice (if using headers)
13
+ col = headers[:season] if headers && headers[:season]
14
+
15
+ seasons = Hash.new( 0 ) ## default value is 0
16
+
17
+ ## todo/fix: yes, use CsvHash.foreach - why? why not?
18
+ ## use read_csv with block to switch to foreach!!!!
19
+ rows = read_csv( path, sep: sep )
20
+
21
+ rows.each_with_index do |row,i|
22
+ puts "[#{i}] " + row.inspect if i < 2
23
+
24
+ season = row[ col ] ## column name defaults to 'Season'
25
+ seasons[ season ] += 1
26
+ end
27
+
28
+ pp seasons
29
+
30
+ ## note: only return season keys/names (not hash with usage counter)
31
+ seasons.keys
32
+ end
33
+
34
+
35
+ ##########
36
+ # main machinery
37
+
38
+ ## todo/fix: use a generic "global" parse_csv method - why? why not?
39
+ ## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
40
+ ## ## note: do NOT symbolize keys - keep them as is!!!!!!
41
+ ## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
42
+ ## CsvHash.parse( text, sep: sep )
43
+ ## end
44
+
45
+ def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
46
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
47
+ parse( txt, headers: headers,
48
+ filters: filters,
49
+ converters: converters,
50
+ sep: sep )
51
+ end
52
+
53
+ def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
54
+ new( txt ).parse( headers: headers,
55
+ filters: filters,
56
+ converters: converters,
57
+ sep: sep )
58
+ end
59
+
60
+
61
+ def initialize( txt )
62
+ @txt = txt
63
+ end
64
+
65
+ def parse( headers: nil, filters: nil, converters: nil, sep: nil )
66
+
67
+ headers_mapping = {}
68
+
69
+ rows = parse_csv( @txt, sep: sep )
70
+
71
+ return [] if rows.empty? ## no rows / empty?
72
+
73
+
74
+ ## fix/todo: use logger!!!!
75
+ ## pp csv
76
+
77
+ if headers ## use user supplied headers if present
78
+ headers_mapping = headers_mapping.merge( headers )
79
+ else
80
+
81
+ ## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
82
+ headers = rows[0].keys
83
+ pp headers
84
+
85
+ # note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
86
+ # e.g. row['HomeTeam'] || row['HT'] will NOT work for now
87
+
88
+ if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
89
+ ## assume our own football.csv format, see github.com/footballcsv
90
+ headers_mapping[:team1] = find_header( headers, ['Team 1'] )
91
+ headers_mapping[:team2] = find_header( headers, ['Team 2'] )
92
+ headers_mapping[:date] = find_header( headers, ['Date'] )
93
+
94
+ ## check for all-in-one full time (ft) and half time (ht9 scores?
95
+ headers_mapping[:score] = find_header( headers, ['FT'] )
96
+ headers_mapping[:scorei] = find_header( headers, ['HT'] )
97
+
98
+ headers_mapping[:round] = find_header( headers, ['Round'] )
99
+
100
+ ## optional headers - note: find_header returns nil if header NOT found
101
+ header_stage = find_header( headers, ['Stage'] )
102
+ headers_mapping[:stage] = header_stage if header_stage
103
+ else
104
+ ## else try footballdata.uk and others
105
+ headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
106
+ headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
107
+ headers_mapping[:date] = find_header( headers, ['Date'] )
108
+
109
+ ## note: FT = Full Time, HG = Home Goal, AG = Away Goal
110
+ headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
111
+ headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
112
+
113
+ ## check for half time scores ?
114
+ ## note: HT = Half Time
115
+ headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
116
+ headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
117
+ end
118
+ end
119
+
120
+ pp headers_mapping
121
+
122
+ ### todo/fix: check headers - how?
123
+ ## if present HomeTeam or HT required etc.
124
+ ## issue error/warn is not present
125
+ ##
126
+ ## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
127
+ ## exit 1
128
+ ##
129
+
130
+ matches = []
131
+
132
+ rows.each_with_index do |row,i|
133
+
134
+ ## fix/todo: use logger!!!!
135
+ ## puts "[#{i}] " + row.inspect if i < 2
136
+
137
+
138
+ ## todo/fix: move to its own (helper) method - filter or such!!!!
139
+ if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
140
+ skip = false
141
+ filters.each do |header, value|
142
+ if row[ header ] != value ## e.g. row['Season']
143
+ skip = true
144
+ break
145
+ end
146
+ end
147
+ next if skip ## if header values NOT matching
148
+ end
149
+
150
+
151
+ ## note:
152
+ ## add converters after filters for now (why not before filters?)
153
+ if converters ## any converters defined?
154
+ ## convert single proc shortcut to array with single converter
155
+ converters = [converters] if converters.is_a?( Proc )
156
+
157
+ ## assumes array of procs
158
+ converters.each do |converter|
159
+ row = converter.call( row )
160
+ end
161
+ end
162
+
163
+
164
+
165
+ team1 = row[ headers_mapping[ :team1 ]]
166
+ team2 = row[ headers_mapping[ :team2 ]]
167
+
168
+
169
+ ## check if data present - if not skip (might be empty row)
170
+ ## note: (old classic) csv reader returns nil for empty fields
171
+ ## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
172
+ if (team1.nil? || team1.empty?) &&
173
+ (team2.nil? || team2.empty?)
174
+ puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
175
+ pp row
176
+ next
177
+ end
178
+
179
+ ## remove possible match played counters e.g. (4) (11) etc.
180
+ team1 = team1.sub( /\(\d+\)/, '' ).strip
181
+ team2 = team2.sub( /\(\d+\)/, '' ).strip
182
+
183
+
184
+
185
+ col = row[ headers_mapping[ :date ]]
186
+ col = col.strip # make sure not leading or trailing spaces left over
187
+
188
+ if col.empty? ||
189
+ col =~ /^-{1,}$/ || # e.g. - or ---
190
+ col =~ /^\?{1,}$/ # e.g. ? or ???
191
+ ## note: allow missing / unknown date for match
192
+ date = nil
193
+ else
194
+ ## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
195
+ col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
196
+ col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
197
+ col = col.strip # make sure not leading or trailing spaces left over
198
+
199
+ if col =~ /^\d{2}\/\d{2}\/\d{4}$/
200
+ date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
201
+ elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
202
+ date_fmt = '%d/%m/%y' # e.g. 17/08/02
203
+ elsif col =~ /^\d{4}-\d{2}-\d{2}$/ ## "standard" / default date format
204
+ date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
205
+ elsif col =~ /^\d{1,2} \w{3} \d{4}$/
206
+ date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
207
+ elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
208
+ date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
209
+ else
210
+ puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
211
+ ## todo/fix: add to errors/warns list - why? why not?
212
+ exit 1
213
+ end
214
+
215
+ ## todo/check: use date object (keep string?) - why? why not?
216
+ ## todo/fix: yes!! use date object!!!! do NOT use string
217
+ date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
218
+ end
219
+
220
+
221
+ round = nil
222
+ ## check for (optional) round / matchday
223
+ if headers_mapping[ :round ]
224
+ col = row[ headers_mapping[ :round ]]
225
+ ## todo: issue warning if not ? or - (and just empty string) why? why not
226
+ round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
227
+ end
228
+
229
+
230
+ score1 = nil
231
+ score2 = nil
232
+ score1i = nil
233
+ score2i = nil
234
+
235
+ ## check for full time scores ?
236
+ if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
237
+ ft = [ row[ headers_mapping[ :score1 ]],
238
+ row[ headers_mapping[ :score2 ]] ]
239
+
240
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
241
+ score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
242
+ score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
243
+ end
244
+
245
+ ## check for half time scores ?
246
+ if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
247
+ ht = [ row[ headers_mapping[ :score1i ]],
248
+ row[ headers_mapping[ :score2i ]] ]
249
+
250
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
251
+ score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
252
+ score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
253
+ end
254
+
255
+ ## check for all-in-one full time scores?
256
+ if headers_mapping[ :score ]
257
+ ft = row[ headers_mapping[ :score ] ]
258
+ if ft =~ /^\d{1,2}[\-:]\d{1,2}$/ ## sanity check scores format
259
+ scores = ft.split( /[\-:]/ )
260
+ score1 = scores[0].to_i
261
+ score2 = scores[1].to_i
262
+ end
263
+ ## todo/fix: issue warning if non-empty!!! and not matching format!!!!
264
+ end
265
+
266
+ if headers_mapping[ :scorei ]
267
+ ht = row[ headers_mapping[ :scorei ] ]
268
+ if ht =~ /^\d{1,2}[\-:]\d{1,2}$/ ## sanity check scores format
269
+ scores = ht.split( /[\-:]/) ## allow 1-1 and 1:1
270
+ score1i = scores[0].to_i
271
+ score2i = scores[1].to_i
272
+ end
273
+ ## todo/fix: issue warning if non-empty!!! and not matching format!!!!
274
+ end
275
+
276
+
277
+ ## try some optional headings / columns
278
+ stage = nil
279
+ if headers_mapping[ :stage ]
280
+ col = row[ headers_mapping[ :stage ]]
281
+ ## todo/fix: check can col be nil e.g. col.nil? possible?
282
+ stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
283
+ ## note: allow missing stage for match / defaults to "regular"
284
+ nil
285
+ elsif col == '?'
286
+ ## note: default explicit unknown to unknown for now AND not regular - why? why not?
287
+ '?' ## todo/check: use unkown and NOT ? - why? why not?
288
+ else
289
+ col
290
+ end
291
+ end
292
+
293
+
294
+ match = Import::Match.new( date: date,
295
+ team1: team1, team2: team2,
296
+ score1: score1, score2: score2,
297
+ score1i: score1i, score2i: score2i,
298
+ round: round,
299
+ stage: stage )
300
+ matches << match
301
+ end
302
+
303
+ ## pp matches
304
+ matches
305
+ end
306
+
307
+
308
+ private
309
+
310
+ def find_header( headers, candidates )
311
+ ## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
312
+
313
+ candidates.each do |candidate|
314
+ return candidate if headers.include?( candidate ) ## bingo!!!
315
+ end
316
+ nil ## no matching header found!!!
317
+ end
318
+
319
+ end # class CsvMatchParser
320
+ end # module SportDb
321
+