sportdb-structs 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +12 -15
- data/README.md +0 -1
- data/Rakefile +2 -5
- data/lib/sportdb/structs/{structs/country.rb → country.rb} +9 -0
- data/lib/sportdb/structs/event_info.rb +65 -0
- data/lib/sportdb/structs/goal.rb +78 -0
- data/lib/sportdb/structs/{structs/group.rb → group.rb} +4 -4
- data/lib/sportdb/structs/{structs/league.rb → league.rb} +29 -0
- data/lib/sportdb/structs/version.rb +1 -1
- data/lib/sportdb/structs.rb +19 -113
- metadata +15 -60
- data/lib/sportdb/structs/config.rb +0 -39
- data/lib/sportdb/structs/goal_parser_csv.rb +0 -28
- data/lib/sportdb/structs/match_parser_csv.rb +0 -490
- data/lib/sportdb/structs/match_status_parser.rb +0 -90
- data/lib/sportdb/structs/structs/goal.rb +0 -231
- /data/lib/sportdb/structs/{structs/ground.rb → ground.rb} +0 -0
- /data/lib/sportdb/structs/{structs/match.rb → match.rb} +0 -0
- /data/lib/sportdb/structs/{structs/matchlist.rb → matchlist.rb} +0 -0
- /data/lib/sportdb/structs/{structs/round.rb → round.rb} +0 -0
- /data/lib/sportdb/structs/{structs/standings.rb → standings.rb} +0 -0
- /data/lib/sportdb/structs/{structs/team.rb → team.rb} +0 -0
- /data/lib/sportdb/structs/{structs/team_usage.rb → team_usage.rb} +0 -0
@@ -1,490 +0,0 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
class CsvMatchParser
|
4
|
-
|
5
|
-
#############
|
6
|
-
# helpers
|
7
|
-
def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
|
8
|
-
|
9
|
-
## check if headers incl. season if yes,has priority over col mapping
|
10
|
-
## e.g. no need to specify twice (if using headers)
|
11
|
-
col = headers[:season] if headers && headers[:season]
|
12
|
-
|
13
|
-
seasons = Hash.new( 0 ) ## default value is 0
|
14
|
-
|
15
|
-
## todo/fix: yes, use CsvHash.foreach - why? why not?
|
16
|
-
## use read_csv with block to switch to foreach!!!!
|
17
|
-
rows = read_csv( path, sep: sep )
|
18
|
-
|
19
|
-
rows.each_with_index do |row,i|
|
20
|
-
puts "[#{i}] " + row.inspect if i < 2
|
21
|
-
|
22
|
-
season = row[ col ] ## column name defaults to 'Season'
|
23
|
-
seasons[ season ] += 1
|
24
|
-
end
|
25
|
-
|
26
|
-
pp seasons
|
27
|
-
|
28
|
-
## note: only return season keys/names (not hash with usage counter)
|
29
|
-
seasons.keys
|
30
|
-
end
|
31
|
-
|
32
|
-
|
33
|
-
##########
|
34
|
-
# main machinery
|
35
|
-
|
36
|
-
## todo/fix: use a generic "global" parse_csv method - why? why not?
|
37
|
-
## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
|
38
|
-
## ## note: do NOT symbolize keys - keep them as is!!!!!!
|
39
|
-
## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
|
40
|
-
## CsvHash.parse( text, sep: sep )
|
41
|
-
## end
|
42
|
-
|
43
|
-
def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
|
44
|
-
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
45
|
-
parse( txt, headers: headers,
|
46
|
-
filters: filters,
|
47
|
-
converters: converters,
|
48
|
-
sep: sep )
|
49
|
-
end
|
50
|
-
|
51
|
-
def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
|
52
|
-
new( txt ).parse( headers: headers,
|
53
|
-
filters: filters,
|
54
|
-
converters: converters,
|
55
|
-
sep: sep )
|
56
|
-
end
|
57
|
-
|
58
|
-
|
59
|
-
def initialize( txt )
|
60
|
-
@txt = txt
|
61
|
-
end
|
62
|
-
|
63
|
-
def parse( headers: nil, filters: nil, converters: nil, sep: nil )
|
64
|
-
|
65
|
-
headers_mapping = {}
|
66
|
-
|
67
|
-
rows = parse_csv( @txt, sep: sep )
|
68
|
-
|
69
|
-
return [] if rows.empty? ## no rows / empty?
|
70
|
-
|
71
|
-
|
72
|
-
## fix/todo: use logger!!!!
|
73
|
-
## pp csv
|
74
|
-
|
75
|
-
if headers ## use user supplied headers if present
|
76
|
-
headers_mapping = headers_mapping.merge( headers )
|
77
|
-
else
|
78
|
-
|
79
|
-
## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
|
80
|
-
headers = rows[0].keys
|
81
|
-
pp headers
|
82
|
-
|
83
|
-
# note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
|
84
|
-
# e.g. row['HomeTeam'] || row['HT'] will NOT work for now
|
85
|
-
|
86
|
-
if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
|
87
|
-
## assume our own football.csv format, see github.com/footballcsv
|
88
|
-
headers_mapping[:team1] = find_header( headers, ['Team 1'] )
|
89
|
-
headers_mapping[:team2] = find_header( headers, ['Team 2'] )
|
90
|
-
headers_mapping[:date] = find_header( headers, ['Date'] )
|
91
|
-
headers_mapping[:time] = find_header( headers, ['Time'] )
|
92
|
-
|
93
|
-
## check for all-in-one full time (ft) and half time (ht9 scores?
|
94
|
-
headers_mapping[:score] = find_header( headers, ['FT'] )
|
95
|
-
headers_mapping[:scorei] = find_header( headers, ['HT'] )
|
96
|
-
|
97
|
-
headers_mapping[:round] = find_header( headers, ['Round', 'Matchday'] )
|
98
|
-
|
99
|
-
## optional headers - note: find_header returns nil if header NOT found
|
100
|
-
header_stage = find_header( headers, ['Stage'] )
|
101
|
-
headers_mapping[:stage] = header_stage if header_stage
|
102
|
-
|
103
|
-
header_group = find_header( headers, ['Group'] )
|
104
|
-
headers_mapping[:group] = header_group if header_group
|
105
|
-
|
106
|
-
|
107
|
-
header_et = find_header( headers, ['ET', 'AET'] ) ## (after) extra time
|
108
|
-
headers_mapping[:score_et] = header_et if header_et
|
109
|
-
|
110
|
-
header_p = find_header( headers, ['P', 'PEN'] ) ## penalties
|
111
|
-
headers_mapping[:score_p] = header_p if header_p
|
112
|
-
|
113
|
-
header_notes = find_header( headers, ['Notes', 'Comments'] )
|
114
|
-
headers_mapping[:notes] = header_notes if header_notes
|
115
|
-
|
116
|
-
|
117
|
-
header_league = find_header( headers, ['League'] )
|
118
|
-
headers_mapping[:league] = header_league if header_league
|
119
|
-
else
|
120
|
-
## else try footballdata.uk and others
|
121
|
-
headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
|
122
|
-
headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
|
123
|
-
headers_mapping[:date] = find_header( headers, ['Date'] )
|
124
|
-
headers_mapping[:time] = find_header( headers, ['Time'] )
|
125
|
-
|
126
|
-
## note: FT = Full Time, HG = Home Goal, AG = Away Goal
|
127
|
-
headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
|
128
|
-
headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
|
129
|
-
|
130
|
-
## check for half time scores ?
|
131
|
-
## note: HT = Half Time
|
132
|
-
headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
|
133
|
-
headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
pp headers_mapping
|
138
|
-
|
139
|
-
### todo/fix: check headers - how?
|
140
|
-
## if present HomeTeam or HT required etc.
|
141
|
-
## issue error/warn is not present
|
142
|
-
##
|
143
|
-
## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
|
144
|
-
## exit 1
|
145
|
-
##
|
146
|
-
|
147
|
-
matches = []
|
148
|
-
|
149
|
-
rows.each_with_index do |row,i|
|
150
|
-
|
151
|
-
## fix/todo: use logger!!!!
|
152
|
-
## puts "[#{i}] " + row.inspect if i < 2
|
153
|
-
|
154
|
-
|
155
|
-
## todo/fix: move to its own (helper) method - filter or such!!!!
|
156
|
-
if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
|
157
|
-
skip = false
|
158
|
-
filters.each do |header, value|
|
159
|
-
if row[ header ] != value ## e.g. row['Season']
|
160
|
-
skip = true
|
161
|
-
break
|
162
|
-
end
|
163
|
-
end
|
164
|
-
next if skip ## if header values NOT matching
|
165
|
-
end
|
166
|
-
|
167
|
-
|
168
|
-
## note:
|
169
|
-
## add converters after filters for now (why not before filters?)
|
170
|
-
if converters ## any converters defined?
|
171
|
-
## convert single proc shortcut to array with single converter
|
172
|
-
converters = [converters] if converters.is_a?( Proc )
|
173
|
-
|
174
|
-
## assumes array of procs
|
175
|
-
converters.each do |converter|
|
176
|
-
row = converter.call( row )
|
177
|
-
end
|
178
|
-
end
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
team1 = row[ headers_mapping[ :team1 ]]
|
183
|
-
team2 = row[ headers_mapping[ :team2 ]]
|
184
|
-
|
185
|
-
|
186
|
-
## check if data present - if not skip (might be empty row)
|
187
|
-
## note: (old classic) csv reader returns nil for empty fields
|
188
|
-
## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
|
189
|
-
if (team1.nil? || team1.empty?) &&
|
190
|
-
(team2.nil? || team2.empty?)
|
191
|
-
puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
|
192
|
-
pp row
|
193
|
-
next
|
194
|
-
end
|
195
|
-
|
196
|
-
## remove possible match played counters e.g. (4) (11) etc.
|
197
|
-
team1 = team1.sub( /\(\d+\)/, '' ).strip
|
198
|
-
team2 = team2.sub( /\(\d+\)/, '' ).strip
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
col = row[ headers_mapping[ :time ]]
|
203
|
-
|
204
|
-
if col.nil?
|
205
|
-
time = nil
|
206
|
-
else
|
207
|
-
col = col.strip # make sure not leading or trailing spaces left over
|
208
|
-
|
209
|
-
if col.empty?
|
210
|
-
col =~ /^-{1,}$/ || # e.g. - or ---
|
211
|
-
col =~ /^\?{1,}$/ # e.g. ? or ???
|
212
|
-
## note: allow missing / unknown date for match
|
213
|
-
time = nil
|
214
|
-
else
|
215
|
-
if col =~ /^\d{1,2}:\d{2}$/
|
216
|
-
time_fmt = '%H:%M' # e.g. 17:00 or 3:00
|
217
|
-
elsif col =~ /^\d{1,2}.\d{2}$/
|
218
|
-
time_fmt = '%H.%M' # e.g. 17:00 or 3:00
|
219
|
-
else
|
220
|
-
puts "*** !!! wrong (unknown) time format >>#{col}<<; cannot continue; fix it; sorry"
|
221
|
-
## todo/fix: add to errors/warns list - why? why not?
|
222
|
-
exit 1
|
223
|
-
end
|
224
|
-
|
225
|
-
## todo/check: use date object (keep string?) - why? why not?
|
226
|
-
## todo/fix: yes!! use date object!!!! do NOT use string
|
227
|
-
time = Time.strptime( col, time_fmt ).strftime( '%H:%M' )
|
228
|
-
end
|
229
|
-
end
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
col = row[ headers_mapping[ :date ]]
|
234
|
-
col = col.strip # make sure not leading or trailing spaces left over
|
235
|
-
|
236
|
-
if col.empty? ||
|
237
|
-
col =~ /^-{1,}$/ || # e.g. - or ---
|
238
|
-
col =~ /^\?{1,}$/ # e.g. ? or ???
|
239
|
-
## note: allow missing / unknown date for match
|
240
|
-
date = nil
|
241
|
-
else
|
242
|
-
## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
|
243
|
-
col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
|
244
|
-
col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
|
245
|
-
col = col.strip # make sure not leading or trailing spaces left over
|
246
|
-
|
247
|
-
if col =~ /^\d{2}\/\d{2}\/\d{4}$/
|
248
|
-
date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
|
249
|
-
elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
|
250
|
-
date_fmt = '%d/%m/%y' # e.g. 17/08/02
|
251
|
-
elsif col =~ /^\d{4}-\d{2}-\d{2}$/ ## "standard" / default date format
|
252
|
-
date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
|
253
|
-
elsif col =~ /^\d{1,2} \w{3} \d{4}$/
|
254
|
-
date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
|
255
|
-
elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
|
256
|
-
date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
|
257
|
-
else
|
258
|
-
puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
|
259
|
-
## todo/fix: add to errors/warns list - why? why not?
|
260
|
-
exit 1
|
261
|
-
end
|
262
|
-
|
263
|
-
## todo/check: use date object (keep string?) - why? why not?
|
264
|
-
## todo/fix: yes!! use date object!!!! do NOT use string
|
265
|
-
date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
|
266
|
-
end
|
267
|
-
|
268
|
-
|
269
|
-
##
|
270
|
-
## todo/fix: round might not always be just a simple integer number!!!
|
271
|
-
## might be text such as Final | Leg 1 or such!!!!
|
272
|
-
round = nil
|
273
|
-
## check for (optional) round / matchday
|
274
|
-
if headers_mapping[ :round ]
|
275
|
-
col = row[ headers_mapping[ :round ]]
|
276
|
-
## todo: issue warning if not ? or - (and just empty string) why? why not
|
277
|
-
## (old attic) was: round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
|
278
|
-
|
279
|
-
## note: make round always a string for now!!!! e.g. "1", "2" too!!
|
280
|
-
round = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
281
|
-
## note: allow missing round for match / defaults to nil
|
282
|
-
nil
|
283
|
-
else
|
284
|
-
col
|
285
|
-
end
|
286
|
-
end
|
287
|
-
|
288
|
-
|
289
|
-
score1 = nil
|
290
|
-
score2 = nil
|
291
|
-
score1i = nil
|
292
|
-
score2i = nil
|
293
|
-
|
294
|
-
## check for full time scores ?
|
295
|
-
if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
|
296
|
-
ft = [ row[ headers_mapping[ :score1 ]],
|
297
|
-
row[ headers_mapping[ :score2 ]] ]
|
298
|
-
|
299
|
-
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
300
|
-
score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
|
301
|
-
score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
|
302
|
-
end
|
303
|
-
|
304
|
-
## check for half time scores ?
|
305
|
-
if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
|
306
|
-
ht = [ row[ headers_mapping[ :score1i ]],
|
307
|
-
row[ headers_mapping[ :score2i ]] ]
|
308
|
-
|
309
|
-
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
310
|
-
score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
|
311
|
-
score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
|
312
|
-
end
|
313
|
-
|
314
|
-
|
315
|
-
## check for all-in-one full time scores?
|
316
|
-
if headers_mapping[ :score ]
|
317
|
-
col = row[ headers_mapping[ :score ]]
|
318
|
-
score = parse_score( col )
|
319
|
-
if score
|
320
|
-
score1 = score[0]
|
321
|
-
score2 = score[1]
|
322
|
-
else
|
323
|
-
puts "!! ERROR - invalid score (ft) format >#{col}<:"
|
324
|
-
pp row
|
325
|
-
exit 1
|
326
|
-
end
|
327
|
-
end
|
328
|
-
|
329
|
-
if headers_mapping[ :scorei ]
|
330
|
-
col = row[ headers_mapping[ :scorei ]]
|
331
|
-
score = parse_score( col )
|
332
|
-
if score
|
333
|
-
score1i = score[0]
|
334
|
-
score2i = score[1]
|
335
|
-
else
|
336
|
-
puts "!! ERROR - invalid score (ht) format >#{col}<:"
|
337
|
-
pp row
|
338
|
-
exit 1
|
339
|
-
end
|
340
|
-
end
|
341
|
-
|
342
|
-
####
|
343
|
-
## try optional score - extra time (et) and penalities (p/pen)
|
344
|
-
score1et = nil
|
345
|
-
score2et = nil
|
346
|
-
score1p = nil
|
347
|
-
score2p = nil
|
348
|
-
|
349
|
-
if headers_mapping[ :score_et ]
|
350
|
-
col = row[ headers_mapping[ :score_et ]]
|
351
|
-
score = parse_score( col )
|
352
|
-
if score
|
353
|
-
score1et = score[0]
|
354
|
-
score2et = score[1]
|
355
|
-
else
|
356
|
-
puts "!! ERROR - invalid score (et) format >#{col}<:"
|
357
|
-
pp row
|
358
|
-
exit 1
|
359
|
-
end
|
360
|
-
end
|
361
|
-
|
362
|
-
if headers_mapping[ :score_p ]
|
363
|
-
col = row[ headers_mapping[ :score_p ]]
|
364
|
-
score = parse_score( col )
|
365
|
-
if score
|
366
|
-
score1p = score[0]
|
367
|
-
score2p = score[1]
|
368
|
-
else
|
369
|
-
puts "!! ERROR - invalid score (p) format >#{col}<:"
|
370
|
-
pp row
|
371
|
-
exit 1
|
372
|
-
end
|
373
|
-
end
|
374
|
-
|
375
|
-
|
376
|
-
## try some optional headings / columns
|
377
|
-
stage = nil
|
378
|
-
if headers_mapping[ :stage ]
|
379
|
-
col = row[ headers_mapping[ :stage ]]
|
380
|
-
## todo/fix: check can col be nil e.g. col.nil? possible?
|
381
|
-
stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
382
|
-
## note: allow missing stage for match / defaults to "regular"
|
383
|
-
nil
|
384
|
-
elsif col == '?'
|
385
|
-
## note: default explicit unknown to unknown for now AND not regular - why? why not?
|
386
|
-
'?' ## todo/check: use unkown and NOT ? - why? why not?
|
387
|
-
else
|
388
|
-
col
|
389
|
-
end
|
390
|
-
end
|
391
|
-
|
392
|
-
group = nil
|
393
|
-
if headers_mapping[ :group ]
|
394
|
-
col = row[ headers_mapping[ :group ]]
|
395
|
-
## todo/fix: check can col be nil e.g. col.nil? possible?
|
396
|
-
group = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
397
|
-
## note: allow missing stage for match / defaults to "regular"
|
398
|
-
nil
|
399
|
-
else
|
400
|
-
col
|
401
|
-
end
|
402
|
-
end
|
403
|
-
|
404
|
-
status = nil ## e.g. AWARDED, CANCELLED, POSTPONED, etc.
|
405
|
-
if headers_mapping[ :notes ]
|
406
|
-
col = row[ headers_mapping[ :notes ]]
|
407
|
-
## check for optional (match) status in notes / comments
|
408
|
-
status = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
409
|
-
nil
|
410
|
-
else
|
411
|
-
StatusParser.parse( col ) # note: returns nil if no (match) status found
|
412
|
-
end
|
413
|
-
end
|
414
|
-
|
415
|
-
|
416
|
-
league = nil
|
417
|
-
league = row[ headers_mapping[ :league ]] if headers_mapping[ :league ]
|
418
|
-
|
419
|
-
|
420
|
-
## puts 'match attributes:'
|
421
|
-
attributes = {
|
422
|
-
date: date,
|
423
|
-
time: time,
|
424
|
-
team1: team1, team2: team2,
|
425
|
-
score1: score1, score2: score2,
|
426
|
-
score1i: score1i, score2i: score2i,
|
427
|
-
score1et: score1et, score2et: score2et,
|
428
|
-
score1p: score1p, score2p: score2p,
|
429
|
-
round: round,
|
430
|
-
stage: stage,
|
431
|
-
group: group,
|
432
|
-
status: status,
|
433
|
-
league: league
|
434
|
-
}
|
435
|
-
## pp attributes
|
436
|
-
|
437
|
-
match = Sports::Match.new( **attributes )
|
438
|
-
matches << match
|
439
|
-
end
|
440
|
-
|
441
|
-
## pp matches
|
442
|
-
matches
|
443
|
-
end
|
444
|
-
|
445
|
-
|
446
|
-
private
|
447
|
-
|
448
|
-
def find_header( headers, candidates )
|
449
|
-
## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
|
450
|
-
|
451
|
-
candidates.each do |candidate|
|
452
|
-
return candidate if headers.include?( candidate ) ## bingo!!!
|
453
|
-
end
|
454
|
-
nil ## no matching header found!!!
|
455
|
-
end
|
456
|
-
|
457
|
-
########
|
458
|
-
# more helpers
|
459
|
-
#
|
460
|
-
|
461
|
-
def parse_score( str )
|
462
|
-
if str.nil? ## todo/check: remove nil case - possible? - why? why not?
|
463
|
-
[nil,nil]
|
464
|
-
else
|
465
|
-
## remove (optional single) note/footnote/endnote markers
|
466
|
-
## e.g. (*) or (a), (b),
|
467
|
-
## or [*], [A], [1], etc.
|
468
|
-
## - allow (1) or maybe (*1) in the future - why? why not?
|
469
|
-
str = str.sub( /\( [a-z*] \)
|
470
|
-
|
|
471
|
-
\[ [1-9a-z*] \]
|
472
|
-
/ix, '' ).strip
|
473
|
-
|
474
|
-
if str.empty? || str == '?' || str == '-' || str == 'n/a'
|
475
|
-
[nil,nil]
|
476
|
-
### todo/check: use regex with named capture groups here - why? why not?
|
477
|
-
elsif str =~ /^\d{1,2}[:-]\d{1,2}$/ ## sanity check scores format
|
478
|
-
score = str.split( /[:-]/ )
|
479
|
-
[score[0].to_i, score[1].to_i]
|
480
|
-
else
|
481
|
-
nil ## note: returns nil if invalid / unparseable format!!!
|
482
|
-
end
|
483
|
-
end
|
484
|
-
end # method parse_score
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
end # class CsvMatchParser
|
489
|
-
end # module Sports
|
490
|
-
|
@@ -1,90 +0,0 @@
|
|
1
|
-
#####################
|
2
|
-
# helpers for parsing & finding match status e.g.
|
3
|
-
# - cancelled / canceled
|
4
|
-
# - awarded
|
5
|
-
# - abandoned
|
6
|
-
# - replay
|
7
|
-
# etc.
|
8
|
-
|
9
|
-
|
10
|
-
module SportDb
|
11
|
-
|
12
|
-
|
13
|
-
### todo/fix: move Status inside Match struct - why? why not?
|
14
|
-
|
15
|
-
class Status
|
16
|
-
# note: use a class as an "enum"-like namespace for now - why? why not?
|
17
|
-
# move class into Match e.g. Match::Status - why? why not?
|
18
|
-
CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
|
19
|
-
AWARDED = 'AWARDED'
|
20
|
-
POSTPONED = 'POSTPONED'
|
21
|
-
ABANDONED = 'ABANDONED'
|
22
|
-
REPLAY = 'REPLAY'
|
23
|
-
end # class Status
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
class StatusParser
|
28
|
-
|
29
|
-
def self.parse( str )
|
30
|
-
## note: returns nil if no match found
|
31
|
-
## note: english usage - cancelled (in UK), canceled (in US)
|
32
|
-
if str =~ /^(cancelled|
|
33
|
-
canceled|
|
34
|
-
can\.
|
35
|
-
)/xi
|
36
|
-
Status::CANCELLED
|
37
|
-
elsif str =~ /^(awarded|
|
38
|
-
awd\.
|
39
|
-
)/xi
|
40
|
-
Status::AWARDED
|
41
|
-
elsif str =~ /^(postponed
|
42
|
-
)/xi
|
43
|
-
Status::POSTPONED
|
44
|
-
elsif str =~ /^(abandoned|
|
45
|
-
abd\.
|
46
|
-
)/xi
|
47
|
-
Status::ABANDONED
|
48
|
-
elsif str =~ /^(replay
|
49
|
-
)/xi
|
50
|
-
Status::REPLAY
|
51
|
-
else
|
52
|
-
# no match
|
53
|
-
nil
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
RUN_RE = /\[
|
59
|
-
(?<text>[^\]]+)
|
60
|
-
\]
|
61
|
-
/x
|
62
|
-
def self.find!( line )
|
63
|
-
## for now check all "protected" text run blocks e.g. []
|
64
|
-
## puts "line: >#{line}<"
|
65
|
-
|
66
|
-
status = nil
|
67
|
-
|
68
|
-
str = line
|
69
|
-
while m = str.match( RUN_RE )
|
70
|
-
str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
|
71
|
-
|
72
|
-
## check for status match
|
73
|
-
match_str = m[0] ## keep a copy of the match string (for later sub)
|
74
|
-
text = m[:text].strip
|
75
|
-
## puts " text: >#{text}<"
|
76
|
-
|
77
|
-
status = parse( text )
|
78
|
-
|
79
|
-
if status
|
80
|
-
line.sub!( match_str, "[STATUS.#{status}]" )
|
81
|
-
break
|
82
|
-
end
|
83
|
-
end # while match
|
84
|
-
|
85
|
-
status
|
86
|
-
end # method find!
|
87
|
-
end # class StatusParser
|
88
|
-
|
89
|
-
end # module SportDb
|
90
|
-
|