sportdb-formats 2.0.2 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +2 -20
- data/Rakefile +2 -7
- data/bin/fbchk +166 -0
- data/lib/sportdb/formats/quick_match_linter.rb +195 -0
- data/lib/sportdb/formats/version.rb +2 -2
- data/lib/sportdb/formats.rb +10 -269
- metadata +11 -85
- data/bin/fbx +0 -146
- data/lib/sportdb/formats/country/country_reader.rb +0 -142
- data/lib/sportdb/formats/csv/goal.rb +0 -192
- data/lib/sportdb/formats/csv/goal_parser_csv.rb +0 -28
- data/lib/sportdb/formats/csv/match_parser_csv.rb +0 -490
- data/lib/sportdb/formats/csv/match_status_parser.rb +0 -90
- data/lib/sportdb/formats/datafile.rb +0 -59
- data/lib/sportdb/formats/event/event_reader.rb +0 -119
- data/lib/sportdb/formats/ground/ground_reader.rb +0 -289
- data/lib/sportdb/formats/league/league_outline_reader.rb +0 -176
- data/lib/sportdb/formats/league/league_reader.rb +0 -152
- data/lib/sportdb/formats/match/conf_parser.rb +0 -132
- data/lib/sportdb/formats/match/match_parser.rb +0 -735
- data/lib/sportdb/formats/search/sport.rb +0 -372
- data/lib/sportdb/formats/search/structs.rb +0 -116
- data/lib/sportdb/formats/search/world.rb +0 -157
- data/lib/sportdb/formats/team/club_reader.rb +0 -318
- data/lib/sportdb/formats/team/club_reader_history.rb +0 -203
- data/lib/sportdb/formats/team/club_reader_props.rb +0 -90
- data/lib/sportdb/formats/team/wiki_reader.rb +0 -108
@@ -1,490 +0,0 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
class CsvMatchParser
|
4
|
-
|
5
|
-
#############
|
6
|
-
# helpers
|
7
|
-
def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
|
8
|
-
|
9
|
-
## check if headers incl. season if yes,has priority over col mapping
|
10
|
-
## e.g. no need to specify twice (if using headers)
|
11
|
-
col = headers[:season] if headers && headers[:season]
|
12
|
-
|
13
|
-
seasons = Hash.new( 0 ) ## default value is 0
|
14
|
-
|
15
|
-
## todo/fix: yes, use CsvHash.foreach - why? why not?
|
16
|
-
## use read_csv with block to switch to foreach!!!!
|
17
|
-
rows = read_csv( path, sep: sep )
|
18
|
-
|
19
|
-
rows.each_with_index do |row,i|
|
20
|
-
puts "[#{i}] " + row.inspect if i < 2
|
21
|
-
|
22
|
-
season = row[ col ] ## column name defaults to 'Season'
|
23
|
-
seasons[ season ] += 1
|
24
|
-
end
|
25
|
-
|
26
|
-
pp seasons
|
27
|
-
|
28
|
-
## note: only return season keys/names (not hash with usage counter)
|
29
|
-
seasons.keys
|
30
|
-
end
|
31
|
-
|
32
|
-
|
33
|
-
##########
|
34
|
-
# main machinery
|
35
|
-
|
36
|
-
## todo/fix: use a generic "global" parse_csv method - why? why not?
|
37
|
-
## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
|
38
|
-
## ## note: do NOT symbolize keys - keep them as is!!!!!!
|
39
|
-
## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
|
40
|
-
## CsvHash.parse( text, sep: sep )
|
41
|
-
## end
|
42
|
-
|
43
|
-
def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
|
44
|
-
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
45
|
-
parse( txt, headers: headers,
|
46
|
-
filters: filters,
|
47
|
-
converters: converters,
|
48
|
-
sep: sep )
|
49
|
-
end
|
50
|
-
|
51
|
-
def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
|
52
|
-
new( txt ).parse( headers: headers,
|
53
|
-
filters: filters,
|
54
|
-
converters: converters,
|
55
|
-
sep: sep )
|
56
|
-
end
|
57
|
-
|
58
|
-
|
59
|
-
def initialize( txt )
|
60
|
-
@txt = txt
|
61
|
-
end
|
62
|
-
|
63
|
-
def parse( headers: nil, filters: nil, converters: nil, sep: nil )
|
64
|
-
|
65
|
-
headers_mapping = {}
|
66
|
-
|
67
|
-
rows = parse_csv( @txt, sep: sep )
|
68
|
-
|
69
|
-
return [] if rows.empty? ## no rows / empty?
|
70
|
-
|
71
|
-
|
72
|
-
## fix/todo: use logger!!!!
|
73
|
-
## pp csv
|
74
|
-
|
75
|
-
if headers ## use user supplied headers if present
|
76
|
-
headers_mapping = headers_mapping.merge( headers )
|
77
|
-
else
|
78
|
-
|
79
|
-
## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
|
80
|
-
headers = rows[0].keys
|
81
|
-
pp headers
|
82
|
-
|
83
|
-
# note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
|
84
|
-
# e.g. row['HomeTeam'] || row['HT'] will NOT work for now
|
85
|
-
|
86
|
-
if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
|
87
|
-
## assume our own football.csv format, see github.com/footballcsv
|
88
|
-
headers_mapping[:team1] = find_header( headers, ['Team 1'] )
|
89
|
-
headers_mapping[:team2] = find_header( headers, ['Team 2'] )
|
90
|
-
headers_mapping[:date] = find_header( headers, ['Date'] )
|
91
|
-
headers_mapping[:time] = find_header( headers, ['Time'] )
|
92
|
-
|
93
|
-
## check for all-in-one full time (ft) and half time (ht9 scores?
|
94
|
-
headers_mapping[:score] = find_header( headers, ['FT'] )
|
95
|
-
headers_mapping[:scorei] = find_header( headers, ['HT'] )
|
96
|
-
|
97
|
-
headers_mapping[:round] = find_header( headers, ['Round', 'Matchday'] )
|
98
|
-
|
99
|
-
## optional headers - note: find_header returns nil if header NOT found
|
100
|
-
header_stage = find_header( headers, ['Stage'] )
|
101
|
-
headers_mapping[:stage] = header_stage if header_stage
|
102
|
-
|
103
|
-
header_group = find_header( headers, ['Group'] )
|
104
|
-
headers_mapping[:group] = header_group if header_group
|
105
|
-
|
106
|
-
|
107
|
-
header_et = find_header( headers, ['ET', 'AET'] ) ## (after) extra time
|
108
|
-
headers_mapping[:score_et] = header_et if header_et
|
109
|
-
|
110
|
-
header_p = find_header( headers, ['P', 'PEN'] ) ## penalties
|
111
|
-
headers_mapping[:score_p] = header_p if header_p
|
112
|
-
|
113
|
-
header_notes = find_header( headers, ['Notes', 'Comments'] )
|
114
|
-
headers_mapping[:notes] = header_notes if header_notes
|
115
|
-
|
116
|
-
|
117
|
-
header_league = find_header( headers, ['League'] )
|
118
|
-
headers_mapping[:league] = header_league if header_league
|
119
|
-
else
|
120
|
-
## else try footballdata.uk and others
|
121
|
-
headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
|
122
|
-
headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
|
123
|
-
headers_mapping[:date] = find_header( headers, ['Date'] )
|
124
|
-
headers_mapping[:time] = find_header( headers, ['Time'] )
|
125
|
-
|
126
|
-
## note: FT = Full Time, HG = Home Goal, AG = Away Goal
|
127
|
-
headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
|
128
|
-
headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
|
129
|
-
|
130
|
-
## check for half time scores ?
|
131
|
-
## note: HT = Half Time
|
132
|
-
headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
|
133
|
-
headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
pp headers_mapping
|
138
|
-
|
139
|
-
### todo/fix: check headers - how?
|
140
|
-
## if present HomeTeam or HT required etc.
|
141
|
-
## issue error/warn is not present
|
142
|
-
##
|
143
|
-
## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
|
144
|
-
## exit 1
|
145
|
-
##
|
146
|
-
|
147
|
-
matches = []
|
148
|
-
|
149
|
-
rows.each_with_index do |row,i|
|
150
|
-
|
151
|
-
## fix/todo: use logger!!!!
|
152
|
-
## puts "[#{i}] " + row.inspect if i < 2
|
153
|
-
|
154
|
-
|
155
|
-
## todo/fix: move to its own (helper) method - filter or such!!!!
|
156
|
-
if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
|
157
|
-
skip = false
|
158
|
-
filters.each do |header, value|
|
159
|
-
if row[ header ] != value ## e.g. row['Season']
|
160
|
-
skip = true
|
161
|
-
break
|
162
|
-
end
|
163
|
-
end
|
164
|
-
next if skip ## if header values NOT matching
|
165
|
-
end
|
166
|
-
|
167
|
-
|
168
|
-
## note:
|
169
|
-
## add converters after filters for now (why not before filters?)
|
170
|
-
if converters ## any converters defined?
|
171
|
-
## convert single proc shortcut to array with single converter
|
172
|
-
converters = [converters] if converters.is_a?( Proc )
|
173
|
-
|
174
|
-
## assumes array of procs
|
175
|
-
converters.each do |converter|
|
176
|
-
row = converter.call( row )
|
177
|
-
end
|
178
|
-
end
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
team1 = row[ headers_mapping[ :team1 ]]
|
183
|
-
team2 = row[ headers_mapping[ :team2 ]]
|
184
|
-
|
185
|
-
|
186
|
-
## check if data present - if not skip (might be empty row)
|
187
|
-
## note: (old classic) csv reader returns nil for empty fields
|
188
|
-
## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
|
189
|
-
if (team1.nil? || team1.empty?) &&
|
190
|
-
(team2.nil? || team2.empty?)
|
191
|
-
puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
|
192
|
-
pp row
|
193
|
-
next
|
194
|
-
end
|
195
|
-
|
196
|
-
## remove possible match played counters e.g. (4) (11) etc.
|
197
|
-
team1 = team1.sub( /\(\d+\)/, '' ).strip
|
198
|
-
team2 = team2.sub( /\(\d+\)/, '' ).strip
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
col = row[ headers_mapping[ :time ]]
|
203
|
-
|
204
|
-
if col.nil?
|
205
|
-
time = nil
|
206
|
-
else
|
207
|
-
col = col.strip # make sure not leading or trailing spaces left over
|
208
|
-
|
209
|
-
if col.empty?
|
210
|
-
col =~ /^-{1,}$/ || # e.g. - or ---
|
211
|
-
col =~ /^\?{1,}$/ # e.g. ? or ???
|
212
|
-
## note: allow missing / unknown date for match
|
213
|
-
time = nil
|
214
|
-
else
|
215
|
-
if col =~ /^\d{1,2}:\d{2}$/
|
216
|
-
time_fmt = '%H:%M' # e.g. 17:00 or 3:00
|
217
|
-
elsif col =~ /^\d{1,2}.\d{2}$/
|
218
|
-
time_fmt = '%H.%M' # e.g. 17:00 or 3:00
|
219
|
-
else
|
220
|
-
puts "*** !!! wrong (unknown) time format >>#{col}<<; cannot continue; fix it; sorry"
|
221
|
-
## todo/fix: add to errors/warns list - why? why not?
|
222
|
-
exit 1
|
223
|
-
end
|
224
|
-
|
225
|
-
## todo/check: use date object (keep string?) - why? why not?
|
226
|
-
## todo/fix: yes!! use date object!!!! do NOT use string
|
227
|
-
time = Time.strptime( col, time_fmt ).strftime( '%H:%M' )
|
228
|
-
end
|
229
|
-
end
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
col = row[ headers_mapping[ :date ]]
|
234
|
-
col = col.strip # make sure not leading or trailing spaces left over
|
235
|
-
|
236
|
-
if col.empty? ||
|
237
|
-
col =~ /^-{1,}$/ || # e.g. - or ---
|
238
|
-
col =~ /^\?{1,}$/ # e.g. ? or ???
|
239
|
-
## note: allow missing / unknown date for match
|
240
|
-
date = nil
|
241
|
-
else
|
242
|
-
## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
|
243
|
-
col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
|
244
|
-
col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
|
245
|
-
col = col.strip # make sure not leading or trailing spaces left over
|
246
|
-
|
247
|
-
if col =~ /^\d{2}\/\d{2}\/\d{4}$/
|
248
|
-
date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
|
249
|
-
elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
|
250
|
-
date_fmt = '%d/%m/%y' # e.g. 17/08/02
|
251
|
-
elsif col =~ /^\d{4}-\d{2}-\d{2}$/ ## "standard" / default date format
|
252
|
-
date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
|
253
|
-
elsif col =~ /^\d{1,2} \w{3} \d{4}$/
|
254
|
-
date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
|
255
|
-
elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
|
256
|
-
date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
|
257
|
-
else
|
258
|
-
puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
|
259
|
-
## todo/fix: add to errors/warns list - why? why not?
|
260
|
-
exit 1
|
261
|
-
end
|
262
|
-
|
263
|
-
## todo/check: use date object (keep string?) - why? why not?
|
264
|
-
## todo/fix: yes!! use date object!!!! do NOT use string
|
265
|
-
date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
|
266
|
-
end
|
267
|
-
|
268
|
-
|
269
|
-
##
|
270
|
-
## todo/fix: round might not always be just a simple integer number!!!
|
271
|
-
## might be text such as Final | Leg 1 or such!!!!
|
272
|
-
round = nil
|
273
|
-
## check for (optional) round / matchday
|
274
|
-
if headers_mapping[ :round ]
|
275
|
-
col = row[ headers_mapping[ :round ]]
|
276
|
-
## todo: issue warning if not ? or - (and just empty string) why? why not
|
277
|
-
## (old attic) was: round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
|
278
|
-
|
279
|
-
## note: make round always a string for now!!!! e.g. "1", "2" too!!
|
280
|
-
round = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
281
|
-
## note: allow missing round for match / defaults to nil
|
282
|
-
nil
|
283
|
-
else
|
284
|
-
col
|
285
|
-
end
|
286
|
-
end
|
287
|
-
|
288
|
-
|
289
|
-
score1 = nil
|
290
|
-
score2 = nil
|
291
|
-
score1i = nil
|
292
|
-
score2i = nil
|
293
|
-
|
294
|
-
## check for full time scores ?
|
295
|
-
if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
|
296
|
-
ft = [ row[ headers_mapping[ :score1 ]],
|
297
|
-
row[ headers_mapping[ :score2 ]] ]
|
298
|
-
|
299
|
-
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
300
|
-
score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
|
301
|
-
score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
|
302
|
-
end
|
303
|
-
|
304
|
-
## check for half time scores ?
|
305
|
-
if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
|
306
|
-
ht = [ row[ headers_mapping[ :score1i ]],
|
307
|
-
row[ headers_mapping[ :score2i ]] ]
|
308
|
-
|
309
|
-
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
310
|
-
score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
|
311
|
-
score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
|
312
|
-
end
|
313
|
-
|
314
|
-
|
315
|
-
## check for all-in-one full time scores?
|
316
|
-
if headers_mapping[ :score ]
|
317
|
-
col = row[ headers_mapping[ :score ]]
|
318
|
-
score = parse_score( col )
|
319
|
-
if score
|
320
|
-
score1 = score[0]
|
321
|
-
score2 = score[1]
|
322
|
-
else
|
323
|
-
puts "!! ERROR - invalid score (ft) format >#{col}<:"
|
324
|
-
pp row
|
325
|
-
exit 1
|
326
|
-
end
|
327
|
-
end
|
328
|
-
|
329
|
-
if headers_mapping[ :scorei ]
|
330
|
-
col = row[ headers_mapping[ :scorei ]]
|
331
|
-
score = parse_score( col )
|
332
|
-
if score
|
333
|
-
score1i = score[0]
|
334
|
-
score2i = score[1]
|
335
|
-
else
|
336
|
-
puts "!! ERROR - invalid score (ht) format >#{col}<:"
|
337
|
-
pp row
|
338
|
-
exit 1
|
339
|
-
end
|
340
|
-
end
|
341
|
-
|
342
|
-
####
|
343
|
-
## try optional score - extra time (et) and penalities (p/pen)
|
344
|
-
score1et = nil
|
345
|
-
score2et = nil
|
346
|
-
score1p = nil
|
347
|
-
score2p = nil
|
348
|
-
|
349
|
-
if headers_mapping[ :score_et ]
|
350
|
-
col = row[ headers_mapping[ :score_et ]]
|
351
|
-
score = parse_score( col )
|
352
|
-
if score
|
353
|
-
score1et = score[0]
|
354
|
-
score2et = score[1]
|
355
|
-
else
|
356
|
-
puts "!! ERROR - invalid score (et) format >#{col}<:"
|
357
|
-
pp row
|
358
|
-
exit 1
|
359
|
-
end
|
360
|
-
end
|
361
|
-
|
362
|
-
if headers_mapping[ :score_p ]
|
363
|
-
col = row[ headers_mapping[ :score_p ]]
|
364
|
-
score = parse_score( col )
|
365
|
-
if score
|
366
|
-
score1p = score[0]
|
367
|
-
score2p = score[1]
|
368
|
-
else
|
369
|
-
puts "!! ERROR - invalid score (p) format >#{col}<:"
|
370
|
-
pp row
|
371
|
-
exit 1
|
372
|
-
end
|
373
|
-
end
|
374
|
-
|
375
|
-
|
376
|
-
## try some optional headings / columns
|
377
|
-
stage = nil
|
378
|
-
if headers_mapping[ :stage ]
|
379
|
-
col = row[ headers_mapping[ :stage ]]
|
380
|
-
## todo/fix: check can col be nil e.g. col.nil? possible?
|
381
|
-
stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
382
|
-
## note: allow missing stage for match / defaults to "regular"
|
383
|
-
nil
|
384
|
-
elsif col == '?'
|
385
|
-
## note: default explicit unknown to unknown for now AND not regular - why? why not?
|
386
|
-
'?' ## todo/check: use unkown and NOT ? - why? why not?
|
387
|
-
else
|
388
|
-
col
|
389
|
-
end
|
390
|
-
end
|
391
|
-
|
392
|
-
group = nil
|
393
|
-
if headers_mapping[ :group ]
|
394
|
-
col = row[ headers_mapping[ :group ]]
|
395
|
-
## todo/fix: check can col be nil e.g. col.nil? possible?
|
396
|
-
group = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
397
|
-
## note: allow missing stage for match / defaults to "regular"
|
398
|
-
nil
|
399
|
-
else
|
400
|
-
col
|
401
|
-
end
|
402
|
-
end
|
403
|
-
|
404
|
-
status = nil ## e.g. AWARDED, CANCELLED, POSTPONED, etc.
|
405
|
-
if headers_mapping[ :notes ]
|
406
|
-
col = row[ headers_mapping[ :notes ]]
|
407
|
-
## check for optional (match) status in notes / comments
|
408
|
-
status = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
409
|
-
nil
|
410
|
-
else
|
411
|
-
StatusParser.parse( col ) # note: returns nil if no (match) status found
|
412
|
-
end
|
413
|
-
end
|
414
|
-
|
415
|
-
|
416
|
-
league = nil
|
417
|
-
league = row[ headers_mapping[ :league ]] if headers_mapping[ :league ]
|
418
|
-
|
419
|
-
|
420
|
-
## puts 'match attributes:'
|
421
|
-
attributes = {
|
422
|
-
date: date,
|
423
|
-
time: time,
|
424
|
-
team1: team1, team2: team2,
|
425
|
-
score1: score1, score2: score2,
|
426
|
-
score1i: score1i, score2i: score2i,
|
427
|
-
score1et: score1et, score2et: score2et,
|
428
|
-
score1p: score1p, score2p: score2p,
|
429
|
-
round: round,
|
430
|
-
stage: stage,
|
431
|
-
group: group,
|
432
|
-
status: status,
|
433
|
-
league: league
|
434
|
-
}
|
435
|
-
## pp attributes
|
436
|
-
|
437
|
-
match = Sports::Match.new( **attributes )
|
438
|
-
matches << match
|
439
|
-
end
|
440
|
-
|
441
|
-
## pp matches
|
442
|
-
matches
|
443
|
-
end
|
444
|
-
|
445
|
-
|
446
|
-
private
|
447
|
-
|
448
|
-
def find_header( headers, candidates )
|
449
|
-
## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
|
450
|
-
|
451
|
-
candidates.each do |candidate|
|
452
|
-
return candidate if headers.include?( candidate ) ## bingo!!!
|
453
|
-
end
|
454
|
-
nil ## no matching header found!!!
|
455
|
-
end
|
456
|
-
|
457
|
-
########
|
458
|
-
# more helpers
|
459
|
-
#
|
460
|
-
|
461
|
-
def parse_score( str )
|
462
|
-
if str.nil? ## todo/check: remove nil case - possible? - why? why not?
|
463
|
-
[nil,nil]
|
464
|
-
else
|
465
|
-
## remove (optional single) note/footnote/endnote markers
|
466
|
-
## e.g. (*) or (a), (b),
|
467
|
-
## or [*], [A], [1], etc.
|
468
|
-
## - allow (1) or maybe (*1) in the future - why? why not?
|
469
|
-
str = str.sub( /\( [a-z*] \)
|
470
|
-
|
|
471
|
-
\[ [1-9a-z*] \]
|
472
|
-
/ix, '' ).strip
|
473
|
-
|
474
|
-
if str.empty? || str == '?' || str == '-' || str == 'n/a'
|
475
|
-
[nil,nil]
|
476
|
-
### todo/check: use regex with named capture groups here - why? why not?
|
477
|
-
elsif str =~ /^\d{1,2}[:-]\d{1,2}$/ ## sanity check scores format
|
478
|
-
score = str.split( /[:-]/ )
|
479
|
-
[score[0].to_i, score[1].to_i]
|
480
|
-
else
|
481
|
-
nil ## note: returns nil if invalid / unparseable format!!!
|
482
|
-
end
|
483
|
-
end
|
484
|
-
end # method parse_score
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
end # class CsvMatchParser
|
489
|
-
end # module Sports
|
490
|
-
|
@@ -1,90 +0,0 @@
|
|
1
|
-
#####################
|
2
|
-
# helpers for parsing & finding match status e.g.
|
3
|
-
# - cancelled / canceled
|
4
|
-
# - awarded
|
5
|
-
# - abandoned
|
6
|
-
# - replay
|
7
|
-
# etc.
|
8
|
-
|
9
|
-
|
10
|
-
module SportDb
|
11
|
-
|
12
|
-
|
13
|
-
### todo/fix: move Status inside Match struct - why? why not?
|
14
|
-
|
15
|
-
class Status
|
16
|
-
# note: use a class as an "enum"-like namespace for now - why? why not?
|
17
|
-
# move class into Match e.g. Match::Status - why? why not?
|
18
|
-
CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
|
19
|
-
AWARDED = 'AWARDED'
|
20
|
-
POSTPONED = 'POSTPONED'
|
21
|
-
ABANDONED = 'ABANDONED'
|
22
|
-
REPLAY = 'REPLAY'
|
23
|
-
end # class Status
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
class StatusParser
|
28
|
-
|
29
|
-
def self.parse( str )
|
30
|
-
## note: returns nil if no match found
|
31
|
-
## note: english usage - cancelled (in UK), canceled (in US)
|
32
|
-
if str =~ /^(cancelled|
|
33
|
-
canceled|
|
34
|
-
can\.
|
35
|
-
)/xi
|
36
|
-
Status::CANCELLED
|
37
|
-
elsif str =~ /^(awarded|
|
38
|
-
awd\.
|
39
|
-
)/xi
|
40
|
-
Status::AWARDED
|
41
|
-
elsif str =~ /^(postponed
|
42
|
-
)/xi
|
43
|
-
Status::POSTPONED
|
44
|
-
elsif str =~ /^(abandoned|
|
45
|
-
abd\.
|
46
|
-
)/xi
|
47
|
-
Status::ABANDONED
|
48
|
-
elsif str =~ /^(replay
|
49
|
-
)/xi
|
50
|
-
Status::REPLAY
|
51
|
-
else
|
52
|
-
# no match
|
53
|
-
nil
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
RUN_RE = /\[
|
59
|
-
(?<text>[^\]]+)
|
60
|
-
\]
|
61
|
-
/x
|
62
|
-
def self.find!( line )
|
63
|
-
## for now check all "protected" text run blocks e.g. []
|
64
|
-
## puts "line: >#{line}<"
|
65
|
-
|
66
|
-
status = nil
|
67
|
-
|
68
|
-
str = line
|
69
|
-
while m = str.match( RUN_RE )
|
70
|
-
str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
|
71
|
-
|
72
|
-
## check for status match
|
73
|
-
match_str = m[0] ## keep a copy of the match string (for later sub)
|
74
|
-
text = m[:text].strip
|
75
|
-
## puts " text: >#{text}<"
|
76
|
-
|
77
|
-
status = parse( text )
|
78
|
-
|
79
|
-
if status
|
80
|
-
line.sub!( match_str, "[STATUS.#{status}]" )
|
81
|
-
break
|
82
|
-
end
|
83
|
-
end # while match
|
84
|
-
|
85
|
-
status
|
86
|
-
end # method find!
|
87
|
-
end # class StatusParser
|
88
|
-
|
89
|
-
end # module SportDb
|
90
|
-
|
@@ -1,59 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
|
5
|
-
|
6
|
-
def self.read( path ) ## todo/check: use as a shortcut helper - why? why not?
|
7
|
-
## note: always assume utf-8 for now!!!
|
8
|
-
File.open( path, 'r:utf-8') {|f| f.read }
|
9
|
-
end
|
10
|
-
|
11
|
-
|
12
|
-
########################
|
13
|
-
## todo/fix: turn into Datafile::Bundle.new and Bundle#write/save -why? why not?
|
14
|
-
class Bundle
|
15
|
-
def initialize( path )
|
16
|
-
@path = path
|
17
|
-
@buf = String.new('')
|
18
|
-
end
|
19
|
-
|
20
|
-
def <<(value)
|
21
|
-
if value.is_a?( Array ) ## assume array of datafiles (file paths)
|
22
|
-
datafiles = value
|
23
|
-
datafiles.each do |datafile|
|
24
|
-
text = Datafile.read( datafile )
|
25
|
-
## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
|
26
|
-
text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
|
27
|
-
@buf << text
|
28
|
-
@buf << "\n\n"
|
29
|
-
end
|
30
|
-
else ## assume string (e.g. header, comments, etc.)
|
31
|
-
text = value
|
32
|
-
@buf << text
|
33
|
-
@buf << "\n\n"
|
34
|
-
end
|
35
|
-
end
|
36
|
-
alias_method :write, :<<
|
37
|
-
|
38
|
-
## todo/fix/check: write only on close? or write on every write and use close for close?
|
39
|
-
def close
|
40
|
-
File.open( @path, 'w:utf-8' ) do |f|
|
41
|
-
f.write @buf
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end # class Bundle
|
45
|
-
|
46
|
-
|
47
|
-
def self.write_bundle( path, datafiles:, header: nil )
|
48
|
-
bundle = Bundle.new( path )
|
49
|
-
bundle.write( header ) if header
|
50
|
-
datafiles.each do |datafile|
|
51
|
-
text = read( datafile )
|
52
|
-
## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
|
53
|
-
text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
|
54
|
-
bundle.write( text )
|
55
|
-
end
|
56
|
-
bundle.close
|
57
|
-
end
|
58
|
-
|
59
|
-
end # module Datafile
|