sportdb-formats 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +5 -0
- data/Rakefile +2 -2
- data/lib/sportdb/formats.rb +17 -5
- data/lib/sportdb/formats/country/country_index.rb +7 -7
- data/lib/sportdb/formats/country/country_reader.rb +26 -6
- data/lib/sportdb/formats/datafile_package.rb +10 -7
- data/lib/sportdb/formats/league/league_outline_reader.rb +24 -7
- data/lib/sportdb/formats/league/league_reader.rb +3 -3
- data/lib/sportdb/formats/match/mapper.rb +63 -63
- data/lib/sportdb/formats/match/mapper_teams.rb +1 -1
- data/lib/sportdb/formats/match/match_parser.rb +99 -180
- data/lib/sportdb/formats/match/match_parser_csv.rb +321 -0
- data/lib/sportdb/formats/package.rb +165 -11
- data/lib/sportdb/formats/parser_helper.rb +11 -2
- data/lib/sportdb/formats/score/score_formats.rb +41 -1
- data/lib/sportdb/formats/score/score_parser.rb +6 -0
- data/lib/sportdb/formats/structs/country.rb +6 -3
- data/lib/sportdb/formats/structs/group.rb +5 -12
- data/lib/sportdb/formats/structs/round.rb +6 -13
- data/lib/sportdb/formats/structs/standings.rb +30 -9
- data/lib/sportdb/formats/structs/team.rb +1 -2
- data/lib/sportdb/formats/team/club_reader_props.rb +3 -3
- data/lib/sportdb/formats/version.rb +4 -2
- data/test/helper.rb +2 -1
- data/test/test_club_reader_props.rb +2 -2
- data/test/test_country_index.rb +4 -4
- data/test/test_country_reader.rb +34 -4
- data/test/test_csv_match_parser.rb +114 -0
- data/test/test_csv_match_parser_utils.rb +20 -0
- data/test/test_csv_reader.rb +5 -5
- data/test/test_datafile.rb +0 -32
- data/test/test_datafile_package.rb +46 -0
- data/test/test_match_auto_relegation.rb +41 -0
- data/test/test_package.rb +60 -28
- data/test/test_package_match.rb +27 -3
- data/test/test_regex.rb +25 -7
- data/test/test_scores.rb +58 -49
- metadata +9 -4
@@ -82,15 +82,14 @@ class MatchParser ## simple match parser for team match schedules
|
|
82
82
|
# team1 team2 - match (will get new auto-matchday! not last round)
|
83
83
|
@last_round = nil
|
84
84
|
|
85
|
-
|
85
|
+
name = find_group_name!( line )
|
86
86
|
|
87
|
-
logger.debug "
|
88
|
-
logger.debug " pos: >#{pos}<"
|
87
|
+
logger.debug " name: >#{name}<"
|
89
88
|
logger.debug " line: >#{line}<"
|
90
89
|
|
91
|
-
group = @groups[
|
90
|
+
group = @groups[ name ]
|
92
91
|
if group.nil?
|
93
|
-
puts "!! ERROR - no group def found for >#{
|
92
|
+
puts "!! ERROR - no group def found for >#{name}<"
|
94
93
|
exit 1
|
95
94
|
end
|
96
95
|
|
@@ -104,19 +103,19 @@ class MatchParser ## simple match parser for team match schedules
|
|
104
103
|
@mapper_teams.map_teams!( line )
|
105
104
|
teams = @mapper_teams.find_teams!( line )
|
106
105
|
|
107
|
-
|
106
|
+
name = find_group_name!( line )
|
108
107
|
|
109
108
|
logger.debug " line: >#{line}<"
|
110
109
|
|
111
|
-
group
|
112
|
-
|
113
|
-
teams: teams.map {|team| team.
|
110
|
+
## todo/check/fix: add back group key - why? why not?
|
111
|
+
group = Import::Group.new( name: name,
|
112
|
+
teams: teams.map {|team| team.name } )
|
114
113
|
|
115
|
-
@groups[
|
114
|
+
@groups[ name ] = group
|
116
115
|
end
|
117
116
|
|
118
117
|
|
119
|
-
def
|
118
|
+
def find_group_name!( line )
|
120
119
|
## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C or HEX
|
121
120
|
## nb: (?:) = is for non-capturing group(ing)
|
122
121
|
|
@@ -125,37 +124,25 @@ class MatchParser ## simple match parser for team match schedules
|
|
125
124
|
|
126
125
|
## todo:
|
127
126
|
## check if Group A: or [Group A] works e.g. : or ] get matched by \b ???
|
128
|
-
regex =
|
127
|
+
regex = /\b
|
128
|
+
(?:
|
129
|
+
(Group | Gruppe | Grupo)
|
130
|
+
[ ]+
|
131
|
+
(\d+ | [A-Z]+)
|
132
|
+
)
|
133
|
+
\b/x
|
129
134
|
|
130
135
|
m = regex.match( line )
|
131
136
|
|
132
|
-
return
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
when 'G' then 7
|
142
|
-
when 'H' then 8
|
143
|
-
when 'I' then 9
|
144
|
-
when 'J' then 10
|
145
|
-
when 'K' then 11
|
146
|
-
when 'L' then 12
|
147
|
-
when 'HEX' then 666 # HEX for Hexagonal - todo/check: map to something else ??
|
148
|
-
else m[1].to_i
|
149
|
-
end
|
150
|
-
|
151
|
-
title = m[0]
|
152
|
-
|
153
|
-
logger.debug " title: >#{title}<"
|
154
|
-
logger.debug " pos: >#{pos}<"
|
155
|
-
|
156
|
-
line.sub!( regex, '[GROUP.TITLE+POS]' )
|
157
|
-
|
158
|
-
[title,pos]
|
137
|
+
return nil if m.nil?
|
138
|
+
|
139
|
+
name = m[0]
|
140
|
+
|
141
|
+
logger.debug " name: >#{name}<"
|
142
|
+
|
143
|
+
line.sub!( name, '[GROUP.NAME]' )
|
144
|
+
|
145
|
+
name
|
159
146
|
end
|
160
147
|
|
161
148
|
|
@@ -180,198 +167,130 @@ class MatchParser ## simple match parser for team match schedules
|
|
180
167
|
end_date = end_date.to_date
|
181
168
|
|
182
169
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
knockout_flag = is_knockout_round?( title )
|
170
|
+
name = find_round_def_name!( line )
|
171
|
+
# NB: use extracted round name for knockout check
|
172
|
+
knockout_flag = is_knockout_round?( name )
|
187
173
|
|
188
174
|
|
189
175
|
logger.debug " start_date: #{start_date}"
|
190
176
|
logger.debug " end_date: #{end_date}"
|
191
|
-
logger.debug "
|
192
|
-
logger.debug " title: >#{title}<"
|
177
|
+
logger.debug " name: >#{name}<"
|
193
178
|
logger.debug " knockout_flag: #{knockout_flag}"
|
194
179
|
|
195
180
|
logger.debug " line: >#{line}<"
|
196
181
|
|
197
|
-
|
198
|
-
# todo/fix: add auto flag is false !!!! - why? why not?
|
199
|
-
round = Import::Round.new( pos: pos,
|
200
|
-
title: title,
|
182
|
+
round = Import::Round.new( name: name,
|
201
183
|
start_date: start_date,
|
202
184
|
end_date: end_date,
|
203
185
|
knockout: knockout_flag,
|
204
186
|
auto: false )
|
205
187
|
|
206
|
-
@rounds[
|
188
|
+
@rounds[ name ] = round
|
207
189
|
end
|
208
190
|
|
209
191
|
|
210
192
|
|
211
|
-
def
|
212
|
-
#
|
213
|
-
#
|
214
|
-
regex_pos = /^[ \t]*\((\d{1,3})\)[ \t]+/
|
215
|
-
|
216
|
-
# pass #2) find free standing number e.g. Matchday 3 or Round 5 or 3. Spieltag etc.
|
217
|
-
# note: /\b(\d{1,3})\b/
|
218
|
-
# will match -12
|
219
|
-
# thus, use space required - will NOT match -2 e.g. Group-2 Play-off
|
220
|
-
# note: allow 1. Runde n
|
221
|
-
# 1^ Giornata
|
222
|
-
regex_num = /(?:^|\s)(\d{1,3})(?:[.\^\s]|$)/
|
223
|
-
|
224
|
-
if line =~ regex_pos
|
225
|
-
logger.debug " pos: >#{$1}<"
|
226
|
-
|
227
|
-
line.sub!( regex_pos, '[ROUND.POS] ' ) ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+
|
228
|
-
return $1.to_i
|
229
|
-
elsif line =~ regex_num
|
230
|
-
## assume number in title is pos (e.g. Jornada 3, 3 Runde etc.)
|
231
|
-
## NB: do NOT remove pos from string (will get removed by round title)
|
232
|
-
|
233
|
-
num = $1.to_i # note: clone capture; keep a copy (another regex follows; will redefine $1)
|
234
|
-
|
235
|
-
#### fix:
|
236
|
-
# use/make keywords required
|
237
|
-
# e.g. Round of 16 -> should NOT match 16!
|
238
|
-
# Spiel um Platz 3 (or 5) etc -> should NOT match 3!
|
239
|
-
# Round 16 - ok
|
240
|
-
# thus, check for required keywords
|
193
|
+
def find_round_def_name!( line )
|
194
|
+
# assume everything before pipe (\) is the round name
|
195
|
+
# strip [ROUND.POS], todo:?? [ROUND.NAME2]
|
241
196
|
|
242
|
-
|
243
|
-
# todo: mask match e.g. Round of xxx ... and try again - might include something
|
244
|
-
# reuse pattern for Group XX Replays for example
|
245
|
-
if line =~ /^\s*Round of \d{1,3}\b/
|
246
|
-
return nil
|
247
|
-
end
|
248
|
-
|
249
|
-
logger.debug " pos: >#{num}<"
|
250
|
-
return num
|
251
|
-
else
|
252
|
-
## fix: add logger.warn no round pos found in line
|
253
|
-
return nil
|
254
|
-
end
|
255
|
-
end # method find_round_pos!
|
256
|
-
|
257
|
-
def find_round_def_title!( line )
|
258
|
-
# assume everything before pipe (\) is the round title
|
259
|
-
# strip [ROUND.POS], todo:?? [ROUND.TITLE2]
|
260
|
-
|
261
|
-
# todo/fix: add title2 w/ // or / why? why not?
|
197
|
+
# todo/fix: add name2 w/ // or / why? why not?
|
262
198
|
# -- strip / or / chars
|
263
199
|
|
264
200
|
buf = line.dup
|
265
|
-
logger.debug "
|
201
|
+
logger.debug " find_round_def_name! line-before: >>#{buf}<<"
|
266
202
|
|
267
203
|
## cut-off everything after (including) pipe (|)
|
268
204
|
buf = buf[ 0...buf.index('|') ]
|
269
|
-
|
270
|
-
# e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
|
271
|
-
buf.gsub!( /\[[^\]]+\]/, '' ) ## fix: use helper for (re)use e.g. remove_match_placeholder/marker or similar?
|
272
|
-
# remove leading and trailing whitespace
|
273
205
|
buf.strip!
|
274
206
|
|
275
|
-
logger.debug "
|
207
|
+
logger.debug " find_round_def_name! line-after: >>#{buf}<<"
|
276
208
|
|
277
|
-
logger.debug "
|
278
|
-
line.sub!( buf, '[ROUND.
|
209
|
+
logger.debug " name: >>#{buf}<<"
|
210
|
+
line.sub!( buf, '[ROUND.NAME]' )
|
279
211
|
|
280
212
|
buf
|
281
213
|
end
|
282
214
|
|
283
|
-
def find_round_header_title!( line )
|
284
|
-
# assume everything left is the round title
|
285
|
-
# extract all other items first (round title2, round pos, group title n pos, etc.)
|
286
215
|
|
287
|
-
##
|
288
|
-
##
|
289
|
-
##
|
290
|
-
##
|
291
|
-
##
|
216
|
+
## split by or || or |||
|
217
|
+
## or ++ or +++
|
218
|
+
## or -- or ---
|
219
|
+
## or // or ///
|
220
|
+
## note: allow Final | First Leg as ONE name same as
|
221
|
+
## Final - First Leg or
|
222
|
+
## Final, First Leg
|
223
|
+
## for cut-off always MUST be more than two chars
|
224
|
+
##
|
225
|
+
## todo/check: find a better name than HEADER_SEP(ARATOR) - why? why not?
|
226
|
+
## todo/fix: move to parser utils and add a method split_name or such?
|
227
|
+
HEADER_SEP_RE = / [ ]* ## allow (strip) leading spaces
|
228
|
+
(?:\|{2,} |
|
229
|
+
\+{2,} |
|
230
|
+
-{2,} |
|
231
|
+
\/{2,}
|
232
|
+
)
|
233
|
+
[ ]* ## allow (strip) trailing spaces
|
234
|
+
/x
|
235
|
+
|
236
|
+
def find_round_header_name!( line )
|
237
|
+
# assume everything left is the round name
|
238
|
+
# extract all other items first (round name2, round pos, group name n pos, etc.)
|
292
239
|
|
293
240
|
buf = line.dup
|
294
|
-
logger.debug "
|
241
|
+
logger.debug " find_round_header_name! line-before: >>#{buf}<<"
|
242
|
+
|
243
|
+
|
244
|
+
parts = buf.split( HEADER_SEP_RE )
|
245
|
+
buf = parts[0]
|
295
246
|
|
296
|
-
buf.gsub!( /\[[^\]]+\]/, '' ) # e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
|
297
247
|
buf.strip! # remove leading and trailing whitespace
|
298
248
|
|
299
|
-
logger.debug "
|
249
|
+
logger.debug " find_round_name! line-after: >>#{buf}<<"
|
300
250
|
|
301
|
-
### bingo - assume what's left is the round
|
251
|
+
### bingo - assume what's left is the round name
|
302
252
|
|
303
|
-
logger.debug "
|
304
|
-
line.sub!( buf, '[ROUND.
|
253
|
+
logger.debug " name: >>#{buf}<<"
|
254
|
+
line.sub!( buf, '[ROUND.NAME]' )
|
305
255
|
|
306
256
|
buf
|
307
257
|
end
|
308
258
|
|
259
|
+
## quick hack- collect all "fillwords" by language!!!!
|
260
|
+
## change later and add to sportdb-langs!!!!
|
261
|
+
##
|
262
|
+
## strip all "fillwords" e.g.:
|
263
|
+
## Nachtrag/Postponed/Addition/Supplemento names
|
264
|
+
##
|
265
|
+
## todo/change: find a better name for ROUND_EXTRA_WORDS - why? why not?
|
266
|
+
ROUND_EXTRA_WORDS_RE = /\b(?:
|
267
|
+
Nachtrag | ## de
|
268
|
+
Postponed | ## en
|
269
|
+
Addition | ## en
|
270
|
+
Supplemento ## es
|
271
|
+
)
|
272
|
+
\b/ix
|
309
273
|
|
310
274
|
def parse_round_header( line )
|
311
275
|
logger.debug "parsing round header line: >#{line}<"
|
312
276
|
|
313
|
-
|
314
|
-
# make sure Round of 16 will not return pos 16 -- how? possible?
|
315
|
-
# add unit test too to verify
|
316
|
-
pos = find_round_pos!( line )
|
317
|
-
|
318
|
-
title = find_round_header_title!( line )
|
277
|
+
name = find_round_header_name!( line )
|
319
278
|
|
320
279
|
logger.debug " line: >#{line}<"
|
321
280
|
|
281
|
+
name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
|
282
|
+
name = name.strip
|
322
283
|
|
323
|
-
round = @rounds[
|
284
|
+
round = @rounds[ name ]
|
324
285
|
if round.nil? ## auto-add / create if missing
|
325
|
-
|
326
|
-
|
327
|
-
@rounds[
|
286
|
+
## todo/check: add num (was pos) if present - why? why not?
|
287
|
+
round = Import::Round.new( name: name )
|
288
|
+
@rounds[ name ] = round
|
328
289
|
end
|
329
290
|
|
330
291
|
## todo/check: if pos match (MUST always match for now)
|
331
292
|
@last_round = round
|
332
293
|
@last_group = nil # note: reset group to no group - why? why not?
|
333
|
-
|
334
|
-
|
335
|
-
## NB: dummy/placeholder start_at, end_at date
|
336
|
-
## replace/patch after adding all games for round
|
337
|
-
|
338
|
-
=begin
|
339
|
-
round_attribs = {
|
340
|
-
title: title,
|
341
|
-
title2: title2,
|
342
|
-
knockout: knockout_flag
|
343
|
-
}
|
344
|
-
|
345
|
-
if pos > 999000
|
346
|
-
# no pos (e.g. will get autonumbered later) - try match by title for now
|
347
|
-
# e.g. lets us use title 'Group Replays', for example, multiple times
|
348
|
-
@round = Round.find_by_event_id_and_title( @event.id, title )
|
349
|
-
else
|
350
|
-
@round = Round.find_by_event_id_and_pos( @event.id, pos )
|
351
|
-
end
|
352
|
-
|
353
|
-
if @round.present?
|
354
|
-
logger.debug "update round #{@round.id}:"
|
355
|
-
else
|
356
|
-
logger.debug "create round:"
|
357
|
-
@round = Round.new
|
358
|
-
|
359
|
-
round_attribs = round_attribs.merge( {
|
360
|
-
event_id: @event.id,
|
361
|
-
pos: pos,
|
362
|
-
start_at: Date.parse('1911-11-11'),
|
363
|
-
end_at: Date.parse('1911-11-11')
|
364
|
-
})
|
365
|
-
end
|
366
|
-
|
367
|
-
logger.debug round_attribs.to_json
|
368
|
-
|
369
|
-
@round.update_attributes!( round_attribs )
|
370
|
-
|
371
|
-
@patch_round_ids_pos << @round.id if pos > 999000
|
372
|
-
### store list of round ids for patching start_at/end_at at the end
|
373
|
-
@patch_round_ids_dates << @round.id # todo/fix/check: check if round has definition (do NOT patch if definition (not auto-added) present)
|
374
|
-
=end
|
375
294
|
end
|
376
295
|
|
377
296
|
|
@@ -457,11 +376,11 @@ class MatchParser ## simple match parser for team match schedules
|
|
457
376
|
## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
|
458
377
|
|
459
378
|
@matches << Import::Match.new( date: date,
|
460
|
-
team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.
|
461
|
-
team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.
|
379
|
+
team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
|
380
|
+
team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
|
462
381
|
score: score,
|
463
|
-
round: round ? round.
|
464
|
-
group: @last_group ? @last_group.
|
382
|
+
round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
|
383
|
+
group: @last_group ? @last_group.name : nil ) ## note: for now always use string (assume unique canonical name for event)
|
465
384
|
|
466
385
|
### todo: cache team lookups in hash?
|
467
386
|
|
@@ -517,7 +436,7 @@ class MatchParser ## simple match parser for team match schedules
|
|
517
436
|
|
518
437
|
round_attribs = {
|
519
438
|
event_id: @event.id,
|
520
|
-
|
439
|
+
name: "Matchday #{date.to_date}",
|
521
440
|
pos: 999001+@patch_round_ids_pos.length, # e.g. 999<count> - 999001,999002,etc.
|
522
441
|
start_at: date.to_date,
|
523
442
|
end_at: date.to_date
|
@@ -541,7 +460,7 @@ class MatchParser ## simple match parser for team match schedules
|
|
541
460
|
end
|
542
461
|
|
543
462
|
## note: will crash (round.pos) if round is nil
|
544
|
-
logger.debug( " using round #{round.pos} >#{round.
|
463
|
+
logger.debug( " using round #{round.pos} >#{round.name}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
|
545
464
|
else
|
546
465
|
## use round from last round header
|
547
466
|
round = @round
|
@@ -0,0 +1,321 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
class CsvMatchParser
|
6
|
+
|
7
|
+
#############
|
8
|
+
# helpers
|
9
|
+
def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
|
10
|
+
|
11
|
+
## check if headers incl. season if yes,has priority over col mapping
|
12
|
+
## e.g. no need to specify twice (if using headers)
|
13
|
+
col = headers[:season] if headers && headers[:season]
|
14
|
+
|
15
|
+
seasons = Hash.new( 0 ) ## default value is 0
|
16
|
+
|
17
|
+
## todo/fix: yes, use CsvHash.foreach - why? why not?
|
18
|
+
## use read_csv with block to switch to foreach!!!!
|
19
|
+
rows = read_csv( path, sep: sep )
|
20
|
+
|
21
|
+
rows.each_with_index do |row,i|
|
22
|
+
puts "[#{i}] " + row.inspect if i < 2
|
23
|
+
|
24
|
+
season = row[ col ] ## column name defaults to 'Season'
|
25
|
+
seasons[ season ] += 1
|
26
|
+
end
|
27
|
+
|
28
|
+
pp seasons
|
29
|
+
|
30
|
+
## note: only return season keys/names (not hash with usage counter)
|
31
|
+
seasons.keys
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
##########
|
36
|
+
# main machinery
|
37
|
+
|
38
|
+
## todo/fix: use a generic "global" parse_csv method - why? why not?
|
39
|
+
## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
|
40
|
+
## ## note: do NOT symbolize keys - keep them as is!!!!!!
|
41
|
+
## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
|
42
|
+
## CsvHash.parse( text, sep: sep )
|
43
|
+
## end
|
44
|
+
|
45
|
+
def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
|
46
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
47
|
+
parse( txt, headers: headers,
|
48
|
+
filters: filters,
|
49
|
+
converters: converters,
|
50
|
+
sep: sep )
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
|
54
|
+
new( txt ).parse( headers: headers,
|
55
|
+
filters: filters,
|
56
|
+
converters: converters,
|
57
|
+
sep: sep )
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
def initialize( txt )
|
62
|
+
@txt = txt
|
63
|
+
end
|
64
|
+
|
65
|
+
def parse( headers: nil, filters: nil, converters: nil, sep: nil )
|
66
|
+
|
67
|
+
headers_mapping = {}
|
68
|
+
|
69
|
+
rows = parse_csv( @txt, sep: sep )
|
70
|
+
|
71
|
+
return [] if rows.empty? ## no rows / empty?
|
72
|
+
|
73
|
+
|
74
|
+
## fix/todo: use logger!!!!
|
75
|
+
## pp csv
|
76
|
+
|
77
|
+
if headers ## use user supplied headers if present
|
78
|
+
headers_mapping = headers_mapping.merge( headers )
|
79
|
+
else
|
80
|
+
|
81
|
+
## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
|
82
|
+
headers = rows[0].keys
|
83
|
+
pp headers
|
84
|
+
|
85
|
+
# note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
|
86
|
+
# e.g. row['HomeTeam'] || row['HT'] will NOT work for now
|
87
|
+
|
88
|
+
if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
|
89
|
+
## assume our own football.csv format, see github.com/footballcsv
|
90
|
+
headers_mapping[:team1] = find_header( headers, ['Team 1'] )
|
91
|
+
headers_mapping[:team2] = find_header( headers, ['Team 2'] )
|
92
|
+
headers_mapping[:date] = find_header( headers, ['Date'] )
|
93
|
+
|
94
|
+
## check for all-in-one full time (ft) and half time (ht9 scores?
|
95
|
+
headers_mapping[:score] = find_header( headers, ['FT'] )
|
96
|
+
headers_mapping[:scorei] = find_header( headers, ['HT'] )
|
97
|
+
|
98
|
+
headers_mapping[:round] = find_header( headers, ['Round'] )
|
99
|
+
|
100
|
+
## optional headers - note: find_header returns nil if header NOT found
|
101
|
+
header_stage = find_header( headers, ['Stage'] )
|
102
|
+
headers_mapping[:stage] = header_stage if header_stage
|
103
|
+
else
|
104
|
+
## else try footballdata.uk and others
|
105
|
+
headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
|
106
|
+
headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
|
107
|
+
headers_mapping[:date] = find_header( headers, ['Date'] )
|
108
|
+
|
109
|
+
## note: FT = Full Time, HG = Home Goal, AG = Away Goal
|
110
|
+
headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
|
111
|
+
headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
|
112
|
+
|
113
|
+
## check for half time scores ?
|
114
|
+
## note: HT = Half Time
|
115
|
+
headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
|
116
|
+
headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
pp headers_mapping
|
121
|
+
|
122
|
+
### todo/fix: check headers - how?
|
123
|
+
## if present HomeTeam or HT required etc.
|
124
|
+
## issue error/warn is not present
|
125
|
+
##
|
126
|
+
## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
|
127
|
+
## exit 1
|
128
|
+
##
|
129
|
+
|
130
|
+
matches = []
|
131
|
+
|
132
|
+
rows.each_with_index do |row,i|
|
133
|
+
|
134
|
+
## fix/todo: use logger!!!!
|
135
|
+
## puts "[#{i}] " + row.inspect if i < 2
|
136
|
+
|
137
|
+
|
138
|
+
## todo/fix: move to its own (helper) method - filter or such!!!!
|
139
|
+
if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
|
140
|
+
skip = false
|
141
|
+
filters.each do |header, value|
|
142
|
+
if row[ header ] != value ## e.g. row['Season']
|
143
|
+
skip = true
|
144
|
+
break
|
145
|
+
end
|
146
|
+
end
|
147
|
+
next if skip ## if header values NOT matching
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
## note:
|
152
|
+
## add converters after filters for now (why not before filters?)
|
153
|
+
if converters ## any converters defined?
|
154
|
+
## convert single proc shortcut to array with single converter
|
155
|
+
converters = [converters] if converters.is_a?( Proc )
|
156
|
+
|
157
|
+
## assumes array of procs
|
158
|
+
converters.each do |converter|
|
159
|
+
row = converter.call( row )
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
|
165
|
+
team1 = row[ headers_mapping[ :team1 ]]
|
166
|
+
team2 = row[ headers_mapping[ :team2 ]]
|
167
|
+
|
168
|
+
|
169
|
+
## check if data present - if not skip (might be empty row)
|
170
|
+
## note: (old classic) csv reader returns nil for empty fields
|
171
|
+
## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
|
172
|
+
if (team1.nil? || team1.empty?) &&
|
173
|
+
(team2.nil? || team2.empty?)
|
174
|
+
puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
|
175
|
+
pp row
|
176
|
+
next
|
177
|
+
end
|
178
|
+
|
179
|
+
## remove possible match played counters e.g. (4) (11) etc.
|
180
|
+
team1 = team1.sub( /\(\d+\)/, '' ).strip
|
181
|
+
team2 = team2.sub( /\(\d+\)/, '' ).strip
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
col = row[ headers_mapping[ :date ]]
|
186
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
187
|
+
|
188
|
+
if col.empty? ||
|
189
|
+
col =~ /^-{1,}$/ || # e.g. - or ---
|
190
|
+
col =~ /^\?{1,}$/ # e.g. ? or ???
|
191
|
+
## note: allow missing / unknown date for match
|
192
|
+
date = nil
|
193
|
+
else
|
194
|
+
## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
|
195
|
+
col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
|
196
|
+
col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
|
197
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
198
|
+
|
199
|
+
if col =~ /^\d{2}\/\d{2}\/\d{4}$/
|
200
|
+
date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
|
201
|
+
elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
|
202
|
+
date_fmt = '%d/%m/%y' # e.g. 17/08/02
|
203
|
+
elsif col =~ /^\d{4}-\d{2}-\d{2}$/ ## "standard" / default date format
|
204
|
+
date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
|
205
|
+
elsif col =~ /^\d{1,2} \w{3} \d{4}$/
|
206
|
+
date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
|
207
|
+
elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
|
208
|
+
date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
|
209
|
+
else
|
210
|
+
puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
|
211
|
+
## todo/fix: add to errors/warns list - why? why not?
|
212
|
+
exit 1
|
213
|
+
end
|
214
|
+
|
215
|
+
## todo/check: use date object (keep string?) - why? why not?
|
216
|
+
## todo/fix: yes!! use date object!!!! do NOT use string
|
217
|
+
date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
|
218
|
+
end
|
219
|
+
|
220
|
+
|
221
|
+
round = nil
|
222
|
+
## check for (optional) round / matchday
|
223
|
+
if headers_mapping[ :round ]
|
224
|
+
col = row[ headers_mapping[ :round ]]
|
225
|
+
## todo: issue warning if not ? or - (and just empty string) why? why not
|
226
|
+
round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
|
227
|
+
end
|
228
|
+
|
229
|
+
|
230
|
+
score1 = nil
|
231
|
+
score2 = nil
|
232
|
+
score1i = nil
|
233
|
+
score2i = nil
|
234
|
+
|
235
|
+
## check for full time scores ?
|
236
|
+
if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
|
237
|
+
ft = [ row[ headers_mapping[ :score1 ]],
|
238
|
+
row[ headers_mapping[ :score2 ]] ]
|
239
|
+
|
240
|
+
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
241
|
+
score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
|
242
|
+
score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
|
243
|
+
end
|
244
|
+
|
245
|
+
## check for half time scores ?
|
246
|
+
if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
|
247
|
+
ht = [ row[ headers_mapping[ :score1i ]],
|
248
|
+
row[ headers_mapping[ :score2i ]] ]
|
249
|
+
|
250
|
+
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
251
|
+
score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
|
252
|
+
score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
|
253
|
+
end
|
254
|
+
|
255
|
+
## check for all-in-one full time scores?
|
256
|
+
if headers_mapping[ :score ]
|
257
|
+
ft = row[ headers_mapping[ :score ] ]
|
258
|
+
if ft =~ /^\d{1,2}[\-:]\d{1,2}$/ ## sanity check scores format
|
259
|
+
scores = ft.split( /[\-:]/ )
|
260
|
+
score1 = scores[0].to_i
|
261
|
+
score2 = scores[1].to_i
|
262
|
+
end
|
263
|
+
## todo/fix: issue warning if non-empty!!! and not matching format!!!!
|
264
|
+
end
|
265
|
+
|
266
|
+
if headers_mapping[ :scorei ]
|
267
|
+
ht = row[ headers_mapping[ :scorei ] ]
|
268
|
+
if ht =~ /^\d{1,2}[\-:]\d{1,2}$/ ## sanity check scores format
|
269
|
+
scores = ht.split( /[\-:]/) ## allow 1-1 and 1:1
|
270
|
+
score1i = scores[0].to_i
|
271
|
+
score2i = scores[1].to_i
|
272
|
+
end
|
273
|
+
## todo/fix: issue warning if non-empty!!! and not matching format!!!!
|
274
|
+
end
|
275
|
+
|
276
|
+
|
277
|
+
## try some optional headings / columns
|
278
|
+
stage = nil
|
279
|
+
if headers_mapping[ :stage ]
|
280
|
+
col = row[ headers_mapping[ :stage ]]
|
281
|
+
## todo/fix: check can col be nil e.g. col.nil? possible?
|
282
|
+
stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
283
|
+
## note: allow missing stage for match / defaults to "regular"
|
284
|
+
nil
|
285
|
+
elsif col == '?'
|
286
|
+
## note: default explicit unknown to unknown for now AND not regular - why? why not?
|
287
|
+
'?' ## todo/check: use unkown and NOT ? - why? why not?
|
288
|
+
else
|
289
|
+
col
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
|
294
|
+
match = Import::Match.new( date: date,
|
295
|
+
team1: team1, team2: team2,
|
296
|
+
score1: score1, score2: score2,
|
297
|
+
score1i: score1i, score2i: score2i,
|
298
|
+
round: round,
|
299
|
+
stage: stage )
|
300
|
+
matches << match
|
301
|
+
end
|
302
|
+
|
303
|
+
## pp matches
|
304
|
+
matches
|
305
|
+
end
|
306
|
+
|
307
|
+
|
308
|
+
private
|
309
|
+
|
310
|
+
def find_header( headers, candidates )
|
311
|
+
## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
|
312
|
+
|
313
|
+
candidates.each do |candidate|
|
314
|
+
return candidate if headers.include?( candidate ) ## bingo!!!
|
315
|
+
end
|
316
|
+
nil ## no matching header found!!!
|
317
|
+
end
|
318
|
+
|
319
|
+
end # class CsvMatchParser
|
320
|
+
end # module SportDb
|
321
|
+
|