sportdb-formats 1.0.2 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +5 -0
- data/Rakefile +2 -2
- data/lib/sportdb/formats.rb +17 -5
- data/lib/sportdb/formats/country/country_index.rb +7 -7
- data/lib/sportdb/formats/country/country_reader.rb +26 -6
- data/lib/sportdb/formats/datafile_package.rb +10 -7
- data/lib/sportdb/formats/league/league_outline_reader.rb +24 -7
- data/lib/sportdb/formats/league/league_reader.rb +3 -3
- data/lib/sportdb/formats/match/mapper.rb +63 -63
- data/lib/sportdb/formats/match/mapper_teams.rb +1 -1
- data/lib/sportdb/formats/match/match_parser.rb +99 -180
- data/lib/sportdb/formats/match/match_parser_csv.rb +321 -0
- data/lib/sportdb/formats/package.rb +165 -11
- data/lib/sportdb/formats/parser_helper.rb +11 -2
- data/lib/sportdb/formats/score/score_formats.rb +41 -1
- data/lib/sportdb/formats/score/score_parser.rb +6 -0
- data/lib/sportdb/formats/structs/country.rb +6 -3
- data/lib/sportdb/formats/structs/group.rb +5 -12
- data/lib/sportdb/formats/structs/round.rb +6 -13
- data/lib/sportdb/formats/structs/standings.rb +30 -9
- data/lib/sportdb/formats/structs/team.rb +1 -2
- data/lib/sportdb/formats/team/club_reader_props.rb +3 -3
- data/lib/sportdb/formats/version.rb +4 -2
- data/test/helper.rb +2 -1
- data/test/test_club_reader_props.rb +2 -2
- data/test/test_country_index.rb +4 -4
- data/test/test_country_reader.rb +34 -4
- data/test/test_csv_match_parser.rb +114 -0
- data/test/test_csv_match_parser_utils.rb +20 -0
- data/test/test_csv_reader.rb +5 -5
- data/test/test_datafile.rb +0 -32
- data/test/test_datafile_package.rb +46 -0
- data/test/test_match_auto_relegation.rb +41 -0
- data/test/test_package.rb +60 -28
- data/test/test_package_match.rb +27 -3
- data/test/test_regex.rb +25 -7
- data/test/test_scores.rb +58 -49
- metadata +9 -4
@@ -82,15 +82,14 @@ class MatchParser ## simple match parser for team match schedules
|
|
82
82
|
# team1 team2 - match (will get new auto-matchday! not last round)
|
83
83
|
@last_round = nil
|
84
84
|
|
85
|
-
|
85
|
+
name = find_group_name!( line )
|
86
86
|
|
87
|
-
logger.debug "
|
88
|
-
logger.debug " pos: >#{pos}<"
|
87
|
+
logger.debug " name: >#{name}<"
|
89
88
|
logger.debug " line: >#{line}<"
|
90
89
|
|
91
|
-
group = @groups[
|
90
|
+
group = @groups[ name ]
|
92
91
|
if group.nil?
|
93
|
-
puts "!! ERROR - no group def found for >#{
|
92
|
+
puts "!! ERROR - no group def found for >#{name}<"
|
94
93
|
exit 1
|
95
94
|
end
|
96
95
|
|
@@ -104,19 +103,19 @@ class MatchParser ## simple match parser for team match schedules
|
|
104
103
|
@mapper_teams.map_teams!( line )
|
105
104
|
teams = @mapper_teams.find_teams!( line )
|
106
105
|
|
107
|
-
|
106
|
+
name = find_group_name!( line )
|
108
107
|
|
109
108
|
logger.debug " line: >#{line}<"
|
110
109
|
|
111
|
-
group
|
112
|
-
|
113
|
-
teams: teams.map {|team| team.
|
110
|
+
## todo/check/fix: add back group key - why? why not?
|
111
|
+
group = Import::Group.new( name: name,
|
112
|
+
teams: teams.map {|team| team.name } )
|
114
113
|
|
115
|
-
@groups[
|
114
|
+
@groups[ name ] = group
|
116
115
|
end
|
117
116
|
|
118
117
|
|
119
|
-
def
|
118
|
+
def find_group_name!( line )
|
120
119
|
## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C or HEX
|
121
120
|
## nb: (?:) = is for non-capturing group(ing)
|
122
121
|
|
@@ -125,37 +124,25 @@ class MatchParser ## simple match parser for team match schedules
|
|
125
124
|
|
126
125
|
## todo:
|
127
126
|
## check if Group A: or [Group A] works e.g. : or ] get matched by \b ???
|
128
|
-
regex =
|
127
|
+
regex = /\b
|
128
|
+
(?:
|
129
|
+
(Group | Gruppe | Grupo)
|
130
|
+
[ ]+
|
131
|
+
(\d+ | [A-Z]+)
|
132
|
+
)
|
133
|
+
\b/x
|
129
134
|
|
130
135
|
m = regex.match( line )
|
131
136
|
|
132
|
-
return
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
when 'G' then 7
|
142
|
-
when 'H' then 8
|
143
|
-
when 'I' then 9
|
144
|
-
when 'J' then 10
|
145
|
-
when 'K' then 11
|
146
|
-
when 'L' then 12
|
147
|
-
when 'HEX' then 666 # HEX for Hexagonal - todo/check: map to something else ??
|
148
|
-
else m[1].to_i
|
149
|
-
end
|
150
|
-
|
151
|
-
title = m[0]
|
152
|
-
|
153
|
-
logger.debug " title: >#{title}<"
|
154
|
-
logger.debug " pos: >#{pos}<"
|
155
|
-
|
156
|
-
line.sub!( regex, '[GROUP.TITLE+POS]' )
|
157
|
-
|
158
|
-
[title,pos]
|
137
|
+
return nil if m.nil?
|
138
|
+
|
139
|
+
name = m[0]
|
140
|
+
|
141
|
+
logger.debug " name: >#{name}<"
|
142
|
+
|
143
|
+
line.sub!( name, '[GROUP.NAME]' )
|
144
|
+
|
145
|
+
name
|
159
146
|
end
|
160
147
|
|
161
148
|
|
@@ -180,198 +167,130 @@ class MatchParser ## simple match parser for team match schedules
|
|
180
167
|
end_date = end_date.to_date
|
181
168
|
|
182
169
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
knockout_flag = is_knockout_round?( title )
|
170
|
+
name = find_round_def_name!( line )
|
171
|
+
# NB: use extracted round name for knockout check
|
172
|
+
knockout_flag = is_knockout_round?( name )
|
187
173
|
|
188
174
|
|
189
175
|
logger.debug " start_date: #{start_date}"
|
190
176
|
logger.debug " end_date: #{end_date}"
|
191
|
-
logger.debug "
|
192
|
-
logger.debug " title: >#{title}<"
|
177
|
+
logger.debug " name: >#{name}<"
|
193
178
|
logger.debug " knockout_flag: #{knockout_flag}"
|
194
179
|
|
195
180
|
logger.debug " line: >#{line}<"
|
196
181
|
|
197
|
-
|
198
|
-
# todo/fix: add auto flag is false !!!! - why? why not?
|
199
|
-
round = Import::Round.new( pos: pos,
|
200
|
-
title: title,
|
182
|
+
round = Import::Round.new( name: name,
|
201
183
|
start_date: start_date,
|
202
184
|
end_date: end_date,
|
203
185
|
knockout: knockout_flag,
|
204
186
|
auto: false )
|
205
187
|
|
206
|
-
@rounds[
|
188
|
+
@rounds[ name ] = round
|
207
189
|
end
|
208
190
|
|
209
191
|
|
210
192
|
|
211
|
-
def
|
212
|
-
#
|
213
|
-
#
|
214
|
-
regex_pos = /^[ \t]*\((\d{1,3})\)[ \t]+/
|
215
|
-
|
216
|
-
# pass #2) find free standing number e.g. Matchday 3 or Round 5 or 3. Spieltag etc.
|
217
|
-
# note: /\b(\d{1,3})\b/
|
218
|
-
# will match -12
|
219
|
-
# thus, use space required - will NOT match -2 e.g. Group-2 Play-off
|
220
|
-
# note: allow 1. Runde n
|
221
|
-
# 1^ Giornata
|
222
|
-
regex_num = /(?:^|\s)(\d{1,3})(?:[.\^\s]|$)/
|
223
|
-
|
224
|
-
if line =~ regex_pos
|
225
|
-
logger.debug " pos: >#{$1}<"
|
226
|
-
|
227
|
-
line.sub!( regex_pos, '[ROUND.POS] ' ) ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+
|
228
|
-
return $1.to_i
|
229
|
-
elsif line =~ regex_num
|
230
|
-
## assume number in title is pos (e.g. Jornada 3, 3 Runde etc.)
|
231
|
-
## NB: do NOT remove pos from string (will get removed by round title)
|
232
|
-
|
233
|
-
num = $1.to_i # note: clone capture; keep a copy (another regex follows; will redefine $1)
|
234
|
-
|
235
|
-
#### fix:
|
236
|
-
# use/make keywords required
|
237
|
-
# e.g. Round of 16 -> should NOT match 16!
|
238
|
-
# Spiel um Platz 3 (or 5) etc -> should NOT match 3!
|
239
|
-
# Round 16 - ok
|
240
|
-
# thus, check for required keywords
|
193
|
+
def find_round_def_name!( line )
|
194
|
+
# assume everything before pipe (\) is the round name
|
195
|
+
# strip [ROUND.POS], todo:?? [ROUND.NAME2]
|
241
196
|
|
242
|
-
|
243
|
-
# todo: mask match e.g. Round of xxx ... and try again - might include something
|
244
|
-
# reuse pattern for Group XX Replays for example
|
245
|
-
if line =~ /^\s*Round of \d{1,3}\b/
|
246
|
-
return nil
|
247
|
-
end
|
248
|
-
|
249
|
-
logger.debug " pos: >#{num}<"
|
250
|
-
return num
|
251
|
-
else
|
252
|
-
## fix: add logger.warn no round pos found in line
|
253
|
-
return nil
|
254
|
-
end
|
255
|
-
end # method find_round_pos!
|
256
|
-
|
257
|
-
def find_round_def_title!( line )
|
258
|
-
# assume everything before pipe (\) is the round title
|
259
|
-
# strip [ROUND.POS], todo:?? [ROUND.TITLE2]
|
260
|
-
|
261
|
-
# todo/fix: add title2 w/ // or / why? why not?
|
197
|
+
# todo/fix: add name2 w/ // or / why? why not?
|
262
198
|
# -- strip / or / chars
|
263
199
|
|
264
200
|
buf = line.dup
|
265
|
-
logger.debug "
|
201
|
+
logger.debug " find_round_def_name! line-before: >>#{buf}<<"
|
266
202
|
|
267
203
|
## cut-off everything after (including) pipe (|)
|
268
204
|
buf = buf[ 0...buf.index('|') ]
|
269
|
-
|
270
|
-
# e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
|
271
|
-
buf.gsub!( /\[[^\]]+\]/, '' ) ## fix: use helper for (re)use e.g. remove_match_placeholder/marker or similar?
|
272
|
-
# remove leading and trailing whitespace
|
273
205
|
buf.strip!
|
274
206
|
|
275
|
-
logger.debug "
|
207
|
+
logger.debug " find_round_def_name! line-after: >>#{buf}<<"
|
276
208
|
|
277
|
-
logger.debug "
|
278
|
-
line.sub!( buf, '[ROUND.
|
209
|
+
logger.debug " name: >>#{buf}<<"
|
210
|
+
line.sub!( buf, '[ROUND.NAME]' )
|
279
211
|
|
280
212
|
buf
|
281
213
|
end
|
282
214
|
|
283
|
-
def find_round_header_title!( line )
|
284
|
-
# assume everything left is the round title
|
285
|
-
# extract all other items first (round title2, round pos, group title n pos, etc.)
|
286
215
|
|
287
|
-
##
|
288
|
-
##
|
289
|
-
##
|
290
|
-
##
|
291
|
-
##
|
216
|
+
## split by or || or |||
|
217
|
+
## or ++ or +++
|
218
|
+
## or -- or ---
|
219
|
+
## or // or ///
|
220
|
+
## note: allow Final | First Leg as ONE name same as
|
221
|
+
## Final - First Leg or
|
222
|
+
## Final, First Leg
|
223
|
+
## for cut-off always MUST be more than two chars
|
224
|
+
##
|
225
|
+
## todo/check: find a better name than HEADER_SEP(ARATOR) - why? why not?
|
226
|
+
## todo/fix: move to parser utils and add a method split_name or such?
|
227
|
+
HEADER_SEP_RE = / [ ]* ## allow (strip) leading spaces
|
228
|
+
(?:\|{2,} |
|
229
|
+
\+{2,} |
|
230
|
+
-{2,} |
|
231
|
+
\/{2,}
|
232
|
+
)
|
233
|
+
[ ]* ## allow (strip) trailing spaces
|
234
|
+
/x
|
235
|
+
|
236
|
+
def find_round_header_name!( line )
|
237
|
+
# assume everything left is the round name
|
238
|
+
# extract all other items first (round name2, round pos, group name n pos, etc.)
|
292
239
|
|
293
240
|
buf = line.dup
|
294
|
-
logger.debug "
|
241
|
+
logger.debug " find_round_header_name! line-before: >>#{buf}<<"
|
242
|
+
|
243
|
+
|
244
|
+
parts = buf.split( HEADER_SEP_RE )
|
245
|
+
buf = parts[0]
|
295
246
|
|
296
|
-
buf.gsub!( /\[[^\]]+\]/, '' ) # e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
|
297
247
|
buf.strip! # remove leading and trailing whitespace
|
298
248
|
|
299
|
-
logger.debug "
|
249
|
+
logger.debug " find_round_name! line-after: >>#{buf}<<"
|
300
250
|
|
301
|
-
### bingo - assume what's left is the round
|
251
|
+
### bingo - assume what's left is the round name
|
302
252
|
|
303
|
-
logger.debug "
|
304
|
-
line.sub!( buf, '[ROUND.
|
253
|
+
logger.debug " name: >>#{buf}<<"
|
254
|
+
line.sub!( buf, '[ROUND.NAME]' )
|
305
255
|
|
306
256
|
buf
|
307
257
|
end
|
308
258
|
|
259
|
+
## quick hack- collect all "fillwords" by language!!!!
|
260
|
+
## change later and add to sportdb-langs!!!!
|
261
|
+
##
|
262
|
+
## strip all "fillwords" e.g.:
|
263
|
+
## Nachtrag/Postponed/Addition/Supplemento names
|
264
|
+
##
|
265
|
+
## todo/change: find a better name for ROUND_EXTRA_WORDS - why? why not?
|
266
|
+
ROUND_EXTRA_WORDS_RE = /\b(?:
|
267
|
+
Nachtrag | ## de
|
268
|
+
Postponed | ## en
|
269
|
+
Addition | ## en
|
270
|
+
Supplemento ## es
|
271
|
+
)
|
272
|
+
\b/ix
|
309
273
|
|
310
274
|
def parse_round_header( line )
|
311
275
|
logger.debug "parsing round header line: >#{line}<"
|
312
276
|
|
313
|
-
|
314
|
-
# make sure Round of 16 will not return pos 16 -- how? possible?
|
315
|
-
# add unit test too to verify
|
316
|
-
pos = find_round_pos!( line )
|
317
|
-
|
318
|
-
title = find_round_header_title!( line )
|
277
|
+
name = find_round_header_name!( line )
|
319
278
|
|
320
279
|
logger.debug " line: >#{line}<"
|
321
280
|
|
281
|
+
name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
|
282
|
+
name = name.strip
|
322
283
|
|
323
|
-
round = @rounds[
|
284
|
+
round = @rounds[ name ]
|
324
285
|
if round.nil? ## auto-add / create if missing
|
325
|
-
|
326
|
-
|
327
|
-
@rounds[
|
286
|
+
## todo/check: add num (was pos) if present - why? why not?
|
287
|
+
round = Import::Round.new( name: name )
|
288
|
+
@rounds[ name ] = round
|
328
289
|
end
|
329
290
|
|
330
291
|
## todo/check: if pos match (MUST always match for now)
|
331
292
|
@last_round = round
|
332
293
|
@last_group = nil # note: reset group to no group - why? why not?
|
333
|
-
|
334
|
-
|
335
|
-
## NB: dummy/placeholder start_at, end_at date
|
336
|
-
## replace/patch after adding all games for round
|
337
|
-
|
338
|
-
=begin
|
339
|
-
round_attribs = {
|
340
|
-
title: title,
|
341
|
-
title2: title2,
|
342
|
-
knockout: knockout_flag
|
343
|
-
}
|
344
|
-
|
345
|
-
if pos > 999000
|
346
|
-
# no pos (e.g. will get autonumbered later) - try match by title for now
|
347
|
-
# e.g. lets us use title 'Group Replays', for example, multiple times
|
348
|
-
@round = Round.find_by_event_id_and_title( @event.id, title )
|
349
|
-
else
|
350
|
-
@round = Round.find_by_event_id_and_pos( @event.id, pos )
|
351
|
-
end
|
352
|
-
|
353
|
-
if @round.present?
|
354
|
-
logger.debug "update round #{@round.id}:"
|
355
|
-
else
|
356
|
-
logger.debug "create round:"
|
357
|
-
@round = Round.new
|
358
|
-
|
359
|
-
round_attribs = round_attribs.merge( {
|
360
|
-
event_id: @event.id,
|
361
|
-
pos: pos,
|
362
|
-
start_at: Date.parse('1911-11-11'),
|
363
|
-
end_at: Date.parse('1911-11-11')
|
364
|
-
})
|
365
|
-
end
|
366
|
-
|
367
|
-
logger.debug round_attribs.to_json
|
368
|
-
|
369
|
-
@round.update_attributes!( round_attribs )
|
370
|
-
|
371
|
-
@patch_round_ids_pos << @round.id if pos > 999000
|
372
|
-
### store list of round ids for patching start_at/end_at at the end
|
373
|
-
@patch_round_ids_dates << @round.id # todo/fix/check: check if round has definition (do NOT patch if definition (not auto-added) present)
|
374
|
-
=end
|
375
294
|
end
|
376
295
|
|
377
296
|
|
@@ -457,11 +376,11 @@ class MatchParser ## simple match parser for team match schedules
|
|
457
376
|
## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
|
458
377
|
|
459
378
|
@matches << Import::Match.new( date: date,
|
460
|
-
team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.
|
461
|
-
team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.
|
379
|
+
team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
|
380
|
+
team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
|
462
381
|
score: score,
|
463
|
-
round: round ? round.
|
464
|
-
group: @last_group ? @last_group.
|
382
|
+
round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
|
383
|
+
group: @last_group ? @last_group.name : nil ) ## note: for now always use string (assume unique canonical name for event)
|
465
384
|
|
466
385
|
### todo: cache team lookups in hash?
|
467
386
|
|
@@ -517,7 +436,7 @@ class MatchParser ## simple match parser for team match schedules
|
|
517
436
|
|
518
437
|
round_attribs = {
|
519
438
|
event_id: @event.id,
|
520
|
-
|
439
|
+
name: "Matchday #{date.to_date}",
|
521
440
|
pos: 999001+@patch_round_ids_pos.length, # e.g. 999<count> - 999001,999002,etc.
|
522
441
|
start_at: date.to_date,
|
523
442
|
end_at: date.to_date
|
@@ -541,7 +460,7 @@ class MatchParser ## simple match parser for team match schedules
|
|
541
460
|
end
|
542
461
|
|
543
462
|
## note: will crash (round.pos) if round is nil
|
544
|
-
logger.debug( " using round #{round.pos} >#{round.
|
463
|
+
logger.debug( " using round #{round.pos} >#{round.name}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
|
545
464
|
else
|
546
465
|
## use round from last round header
|
547
466
|
round = @round
|
@@ -0,0 +1,321 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
class CsvMatchParser
|
6
|
+
|
7
|
+
#############
|
8
|
+
# helpers
|
9
|
+
def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
|
10
|
+
|
11
|
+
## check if headers incl. season if yes,has priority over col mapping
|
12
|
+
## e.g. no need to specify twice (if using headers)
|
13
|
+
col = headers[:season] if headers && headers[:season]
|
14
|
+
|
15
|
+
seasons = Hash.new( 0 ) ## default value is 0
|
16
|
+
|
17
|
+
## todo/fix: yes, use CsvHash.foreach - why? why not?
|
18
|
+
## use read_csv with block to switch to foreach!!!!
|
19
|
+
rows = read_csv( path, sep: sep )
|
20
|
+
|
21
|
+
rows.each_with_index do |row,i|
|
22
|
+
puts "[#{i}] " + row.inspect if i < 2
|
23
|
+
|
24
|
+
season = row[ col ] ## column name defaults to 'Season'
|
25
|
+
seasons[ season ] += 1
|
26
|
+
end
|
27
|
+
|
28
|
+
pp seasons
|
29
|
+
|
30
|
+
## note: only return season keys/names (not hash with usage counter)
|
31
|
+
seasons.keys
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
##########
|
36
|
+
# main machinery
|
37
|
+
|
38
|
+
## todo/fix: use a generic "global" parse_csv method - why? why not?
|
39
|
+
## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
|
40
|
+
## ## note: do NOT symbolize keys - keep them as is!!!!!!
|
41
|
+
## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
|
42
|
+
## CsvHash.parse( text, sep: sep )
|
43
|
+
## end
|
44
|
+
|
45
|
+
def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
|
46
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
47
|
+
parse( txt, headers: headers,
|
48
|
+
filters: filters,
|
49
|
+
converters: converters,
|
50
|
+
sep: sep )
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
|
54
|
+
new( txt ).parse( headers: headers,
|
55
|
+
filters: filters,
|
56
|
+
converters: converters,
|
57
|
+
sep: sep )
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
def initialize( txt )
|
62
|
+
@txt = txt
|
63
|
+
end
|
64
|
+
|
65
|
+
def parse( headers: nil, filters: nil, converters: nil, sep: nil )
|
66
|
+
|
67
|
+
headers_mapping = {}
|
68
|
+
|
69
|
+
rows = parse_csv( @txt, sep: sep )
|
70
|
+
|
71
|
+
return [] if rows.empty? ## no rows / empty?
|
72
|
+
|
73
|
+
|
74
|
+
## fix/todo: use logger!!!!
|
75
|
+
## pp csv
|
76
|
+
|
77
|
+
if headers ## use user supplied headers if present
|
78
|
+
headers_mapping = headers_mapping.merge( headers )
|
79
|
+
else
|
80
|
+
|
81
|
+
## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
|
82
|
+
headers = rows[0].keys
|
83
|
+
pp headers
|
84
|
+
|
85
|
+
# note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
|
86
|
+
# e.g. row['HomeTeam'] || row['HT'] will NOT work for now
|
87
|
+
|
88
|
+
if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
|
89
|
+
## assume our own football.csv format, see github.com/footballcsv
|
90
|
+
headers_mapping[:team1] = find_header( headers, ['Team 1'] )
|
91
|
+
headers_mapping[:team2] = find_header( headers, ['Team 2'] )
|
92
|
+
headers_mapping[:date] = find_header( headers, ['Date'] )
|
93
|
+
|
94
|
+
## check for all-in-one full time (ft) and half time (ht9 scores?
|
95
|
+
headers_mapping[:score] = find_header( headers, ['FT'] )
|
96
|
+
headers_mapping[:scorei] = find_header( headers, ['HT'] )
|
97
|
+
|
98
|
+
headers_mapping[:round] = find_header( headers, ['Round'] )
|
99
|
+
|
100
|
+
## optional headers - note: find_header returns nil if header NOT found
|
101
|
+
header_stage = find_header( headers, ['Stage'] )
|
102
|
+
headers_mapping[:stage] = header_stage if header_stage
|
103
|
+
else
|
104
|
+
## else try footballdata.uk and others
|
105
|
+
headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
|
106
|
+
headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
|
107
|
+
headers_mapping[:date] = find_header( headers, ['Date'] )
|
108
|
+
|
109
|
+
## note: FT = Full Time, HG = Home Goal, AG = Away Goal
|
110
|
+
headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
|
111
|
+
headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
|
112
|
+
|
113
|
+
## check for half time scores ?
|
114
|
+
## note: HT = Half Time
|
115
|
+
headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
|
116
|
+
headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
pp headers_mapping
|
121
|
+
|
122
|
+
### todo/fix: check headers - how?
|
123
|
+
## if present HomeTeam or HT required etc.
|
124
|
+
## issue error/warn is not present
|
125
|
+
##
|
126
|
+
## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
|
127
|
+
## exit 1
|
128
|
+
##
|
129
|
+
|
130
|
+
matches = []
|
131
|
+
|
132
|
+
rows.each_with_index do |row,i|
|
133
|
+
|
134
|
+
## fix/todo: use logger!!!!
|
135
|
+
## puts "[#{i}] " + row.inspect if i < 2
|
136
|
+
|
137
|
+
|
138
|
+
## todo/fix: move to its own (helper) method - filter or such!!!!
|
139
|
+
if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
|
140
|
+
skip = false
|
141
|
+
filters.each do |header, value|
|
142
|
+
if row[ header ] != value ## e.g. row['Season']
|
143
|
+
skip = true
|
144
|
+
break
|
145
|
+
end
|
146
|
+
end
|
147
|
+
next if skip ## if header values NOT matching
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
## note:
|
152
|
+
## add converters after filters for now (why not before filters?)
|
153
|
+
if converters ## any converters defined?
|
154
|
+
## convert single proc shortcut to array with single converter
|
155
|
+
converters = [converters] if converters.is_a?( Proc )
|
156
|
+
|
157
|
+
## assumes array of procs
|
158
|
+
converters.each do |converter|
|
159
|
+
row = converter.call( row )
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
|
165
|
+
team1 = row[ headers_mapping[ :team1 ]]
|
166
|
+
team2 = row[ headers_mapping[ :team2 ]]
|
167
|
+
|
168
|
+
|
169
|
+
## check if data present - if not skip (might be empty row)
|
170
|
+
## note: (old classic) csv reader returns nil for empty fields
|
171
|
+
## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
|
172
|
+
if (team1.nil? || team1.empty?) &&
|
173
|
+
(team2.nil? || team2.empty?)
|
174
|
+
puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
|
175
|
+
pp row
|
176
|
+
next
|
177
|
+
end
|
178
|
+
|
179
|
+
## remove possible match played counters e.g. (4) (11) etc.
|
180
|
+
team1 = team1.sub( /\(\d+\)/, '' ).strip
|
181
|
+
team2 = team2.sub( /\(\d+\)/, '' ).strip
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
col = row[ headers_mapping[ :date ]]
|
186
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
187
|
+
|
188
|
+
if col.empty? ||
|
189
|
+
col =~ /^-{1,}$/ || # e.g. - or ---
|
190
|
+
col =~ /^\?{1,}$/ # e.g. ? or ???
|
191
|
+
## note: allow missing / unknown date for match
|
192
|
+
date = nil
|
193
|
+
else
|
194
|
+
## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
|
195
|
+
col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
|
196
|
+
col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
|
197
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
198
|
+
|
199
|
+
if col =~ /^\d{2}\/\d{2}\/\d{4}$/
|
200
|
+
date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
|
201
|
+
elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
|
202
|
+
date_fmt = '%d/%m/%y' # e.g. 17/08/02
|
203
|
+
elsif col =~ /^\d{4}-\d{2}-\d{2}$/ ## "standard" / default date format
|
204
|
+
date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
|
205
|
+
elsif col =~ /^\d{1,2} \w{3} \d{4}$/
|
206
|
+
date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
|
207
|
+
elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
|
208
|
+
date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
|
209
|
+
else
|
210
|
+
puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
|
211
|
+
## todo/fix: add to errors/warns list - why? why not?
|
212
|
+
exit 1
|
213
|
+
end
|
214
|
+
|
215
|
+
## todo/check: use date object (keep string?) - why? why not?
|
216
|
+
## todo/fix: yes!! use date object!!!! do NOT use string
|
217
|
+
date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
|
218
|
+
end
|
219
|
+
|
220
|
+
|
221
|
+
round = nil
|
222
|
+
## check for (optional) round / matchday
|
223
|
+
if headers_mapping[ :round ]
|
224
|
+
col = row[ headers_mapping[ :round ]]
|
225
|
+
## todo: issue warning if not ? or - (and just empty string) why? why not
|
226
|
+
round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
|
227
|
+
end
|
228
|
+
|
229
|
+
|
230
|
+
score1 = nil
|
231
|
+
score2 = nil
|
232
|
+
score1i = nil
|
233
|
+
score2i = nil
|
234
|
+
|
235
|
+
## check for full time scores ?
|
236
|
+
if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
|
237
|
+
ft = [ row[ headers_mapping[ :score1 ]],
|
238
|
+
row[ headers_mapping[ :score2 ]] ]
|
239
|
+
|
240
|
+
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
241
|
+
score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
|
242
|
+
score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
|
243
|
+
end
|
244
|
+
|
245
|
+
## check for half time scores ?
|
246
|
+
if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
|
247
|
+
ht = [ row[ headers_mapping[ :score1i ]],
|
248
|
+
row[ headers_mapping[ :score2i ]] ]
|
249
|
+
|
250
|
+
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
251
|
+
score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
|
252
|
+
score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
|
253
|
+
end
|
254
|
+
|
255
|
+
## check for all-in-one full time scores?
|
256
|
+
if headers_mapping[ :score ]
|
257
|
+
ft = row[ headers_mapping[ :score ] ]
|
258
|
+
if ft =~ /^\d{1,2}[\-:]\d{1,2}$/ ## sanity check scores format
|
259
|
+
scores = ft.split( /[\-:]/ )
|
260
|
+
score1 = scores[0].to_i
|
261
|
+
score2 = scores[1].to_i
|
262
|
+
end
|
263
|
+
## todo/fix: issue warning if non-empty!!! and not matching format!!!!
|
264
|
+
end
|
265
|
+
|
266
|
+
if headers_mapping[ :scorei ]
|
267
|
+
ht = row[ headers_mapping[ :scorei ] ]
|
268
|
+
if ht =~ /^\d{1,2}[\-:]\d{1,2}$/ ## sanity check scores format
|
269
|
+
scores = ht.split( /[\-:]/) ## allow 1-1 and 1:1
|
270
|
+
score1i = scores[0].to_i
|
271
|
+
score2i = scores[1].to_i
|
272
|
+
end
|
273
|
+
## todo/fix: issue warning if non-empty!!! and not matching format!!!!
|
274
|
+
end
|
275
|
+
|
276
|
+
|
277
|
+
## try some optional headings / columns
|
278
|
+
stage = nil
|
279
|
+
if headers_mapping[ :stage ]
|
280
|
+
col = row[ headers_mapping[ :stage ]]
|
281
|
+
## todo/fix: check can col be nil e.g. col.nil? possible?
|
282
|
+
stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
283
|
+
## note: allow missing stage for match / defaults to "regular"
|
284
|
+
nil
|
285
|
+
elsif col == '?'
|
286
|
+
## note: default explicit unknown to unknown for now AND not regular - why? why not?
|
287
|
+
'?' ## todo/check: use unkown and NOT ? - why? why not?
|
288
|
+
else
|
289
|
+
col
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
|
294
|
+
match = Import::Match.new( date: date,
|
295
|
+
team1: team1, team2: team2,
|
296
|
+
score1: score1, score2: score2,
|
297
|
+
score1i: score1i, score2i: score2i,
|
298
|
+
round: round,
|
299
|
+
stage: stage )
|
300
|
+
matches << match
|
301
|
+
end
|
302
|
+
|
303
|
+
## pp matches
|
304
|
+
matches
|
305
|
+
end
|
306
|
+
|
307
|
+
|
308
|
+
private
|
309
|
+
|
310
|
+
def find_header( headers, candidates )
|
311
|
+
## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
|
312
|
+
|
313
|
+
candidates.each do |candidate|
|
314
|
+
return candidate if headers.include?( candidate ) ## bingo!!!
|
315
|
+
end
|
316
|
+
nil ## no matching header found!!!
|
317
|
+
end
|
318
|
+
|
319
|
+
end # class CsvMatchParser
|
320
|
+
end # module SportDb
|
321
|
+
|