sportdb-structs 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +4 -0
- data/Rakefile +1 -0
- data/lib/sportdb/csv/goal.rb +192 -0
- data/lib/sportdb/csv/goal_parser_csv.rb +28 -0
- data/lib/sportdb/csv/match_parser_csv.rb +490 -0
- data/lib/sportdb/csv/match_status_parser.rb +63 -0
- data/lib/sportdb/structs/country.rb +101 -5
- data/lib/sportdb/structs/league.rb +148 -15
- data/lib/sportdb/structs/match.rb +17 -15
- data/lib/sportdb/structs/round.rb +3 -2
- data/lib/sportdb/structs/team.rb +12 -9
- data/lib/sportdb/structs/version.rb +2 -2
- data/lib/sportdb/structs.rb +40 -3
- metadata +23 -5
@@ -0,0 +1,490 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class CsvMatchParser
|
4
|
+
|
5
|
+
#############
|
6
|
+
# helpers
|
7
|
+
def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
|
8
|
+
|
9
|
+
## check if headers incl. season if yes,has priority over col mapping
|
10
|
+
## e.g. no need to specify twice (if using headers)
|
11
|
+
col = headers[:season] if headers && headers[:season]
|
12
|
+
|
13
|
+
seasons = Hash.new( 0 ) ## default value is 0
|
14
|
+
|
15
|
+
## todo/fix: yes, use CsvHash.foreach - why? why not?
|
16
|
+
## use read_csv with block to switch to foreach!!!!
|
17
|
+
rows = read_csv( path, sep: sep )
|
18
|
+
|
19
|
+
rows.each_with_index do |row,i|
|
20
|
+
puts "[#{i}] " + row.inspect if i < 2
|
21
|
+
|
22
|
+
season = row[ col ] ## column name defaults to 'Season'
|
23
|
+
seasons[ season ] += 1
|
24
|
+
end
|
25
|
+
|
26
|
+
pp seasons
|
27
|
+
|
28
|
+
## note: only return season keys/names (not hash with usage counter)
|
29
|
+
seasons.keys
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
##########
|
34
|
+
# main machinery
|
35
|
+
|
36
|
+
## todo/fix: use a generic "global" parse_csv method - why? why not?
|
37
|
+
## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
|
38
|
+
## ## note: do NOT symbolize keys - keep them as is!!!!!!
|
39
|
+
## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
|
40
|
+
## CsvHash.parse( text, sep: sep )
|
41
|
+
## end
|
42
|
+
|
43
|
+
def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
|
44
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
45
|
+
parse( txt, headers: headers,
|
46
|
+
filters: filters,
|
47
|
+
converters: converters,
|
48
|
+
sep: sep )
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
|
52
|
+
new( txt ).parse( headers: headers,
|
53
|
+
filters: filters,
|
54
|
+
converters: converters,
|
55
|
+
sep: sep )
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
def initialize( txt )
|
60
|
+
@txt = txt
|
61
|
+
end
|
62
|
+
|
63
|
+
def parse( headers: nil, filters: nil, converters: nil, sep: nil )
|
64
|
+
|
65
|
+
headers_mapping = {}
|
66
|
+
|
67
|
+
rows = parse_csv( @txt, sep: sep )
|
68
|
+
|
69
|
+
return [] if rows.empty? ## no rows / empty?
|
70
|
+
|
71
|
+
|
72
|
+
## fix/todo: use logger!!!!
|
73
|
+
## pp csv
|
74
|
+
|
75
|
+
if headers ## use user supplied headers if present
|
76
|
+
headers_mapping = headers_mapping.merge( headers )
|
77
|
+
else
|
78
|
+
|
79
|
+
## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
|
80
|
+
headers = rows[0].keys
|
81
|
+
pp headers
|
82
|
+
|
83
|
+
# note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
|
84
|
+
# e.g. row['HomeTeam'] || row['HT'] will NOT work for now
|
85
|
+
|
86
|
+
if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
|
87
|
+
## assume our own football.csv format, see github.com/footballcsv
|
88
|
+
headers_mapping[:team1] = find_header( headers, ['Team 1'] )
|
89
|
+
headers_mapping[:team2] = find_header( headers, ['Team 2'] )
|
90
|
+
headers_mapping[:date] = find_header( headers, ['Date'] )
|
91
|
+
headers_mapping[:time] = find_header( headers, ['Time'] )
|
92
|
+
|
93
|
+
## check for all-in-one full time (ft) and half time (ht9 scores?
|
94
|
+
headers_mapping[:score] = find_header( headers, ['FT'] )
|
95
|
+
headers_mapping[:scorei] = find_header( headers, ['HT'] )
|
96
|
+
|
97
|
+
headers_mapping[:round] = find_header( headers, ['Round', 'Matchday'] )
|
98
|
+
|
99
|
+
## optional headers - note: find_header returns nil if header NOT found
|
100
|
+
header_stage = find_header( headers, ['Stage'] )
|
101
|
+
headers_mapping[:stage] = header_stage if header_stage
|
102
|
+
|
103
|
+
header_group = find_header( headers, ['Group'] )
|
104
|
+
headers_mapping[:group] = header_group if header_group
|
105
|
+
|
106
|
+
|
107
|
+
header_et = find_header( headers, ['ET', 'AET'] ) ## (after) extra time
|
108
|
+
headers_mapping[:score_et] = header_et if header_et
|
109
|
+
|
110
|
+
header_p = find_header( headers, ['P', 'PEN'] ) ## penalties
|
111
|
+
headers_mapping[:score_p] = header_p if header_p
|
112
|
+
|
113
|
+
header_notes = find_header( headers, ['Notes', 'Comments'] )
|
114
|
+
headers_mapping[:notes] = header_notes if header_notes
|
115
|
+
|
116
|
+
|
117
|
+
header_league = find_header( headers, ['League'] )
|
118
|
+
headers_mapping[:league] = header_league if header_league
|
119
|
+
else
|
120
|
+
## else try footballdata.uk and others
|
121
|
+
headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
|
122
|
+
headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
|
123
|
+
headers_mapping[:date] = find_header( headers, ['Date'] )
|
124
|
+
headers_mapping[:time] = find_header( headers, ['Time'] )
|
125
|
+
|
126
|
+
## note: FT = Full Time, HG = Home Goal, AG = Away Goal
|
127
|
+
headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
|
128
|
+
headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
|
129
|
+
|
130
|
+
## check for half time scores ?
|
131
|
+
## note: HT = Half Time
|
132
|
+
headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
|
133
|
+
headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
pp headers_mapping
|
138
|
+
|
139
|
+
### todo/fix: check headers - how?
|
140
|
+
## if present HomeTeam or HT required etc.
|
141
|
+
## issue error/warn is not present
|
142
|
+
##
|
143
|
+
## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
|
144
|
+
## exit 1
|
145
|
+
##
|
146
|
+
|
147
|
+
matches = []
|
148
|
+
|
149
|
+
rows.each_with_index do |row,i|
|
150
|
+
|
151
|
+
## fix/todo: use logger!!!!
|
152
|
+
## puts "[#{i}] " + row.inspect if i < 2
|
153
|
+
|
154
|
+
|
155
|
+
## todo/fix: move to its own (helper) method - filter or such!!!!
|
156
|
+
if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
|
157
|
+
skip = false
|
158
|
+
filters.each do |header, value|
|
159
|
+
if row[ header ] != value ## e.g. row['Season']
|
160
|
+
skip = true
|
161
|
+
break
|
162
|
+
end
|
163
|
+
end
|
164
|
+
next if skip ## if header values NOT matching
|
165
|
+
end
|
166
|
+
|
167
|
+
|
168
|
+
## note:
|
169
|
+
## add converters after filters for now (why not before filters?)
|
170
|
+
if converters ## any converters defined?
|
171
|
+
## convert single proc shortcut to array with single converter
|
172
|
+
converters = [converters] if converters.is_a?( Proc )
|
173
|
+
|
174
|
+
## assumes array of procs
|
175
|
+
converters.each do |converter|
|
176
|
+
row = converter.call( row )
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
|
182
|
+
team1 = row[ headers_mapping[ :team1 ]]
|
183
|
+
team2 = row[ headers_mapping[ :team2 ]]
|
184
|
+
|
185
|
+
|
186
|
+
## check if data present - if not skip (might be empty row)
|
187
|
+
## note: (old classic) csv reader returns nil for empty fields
|
188
|
+
## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
|
189
|
+
if (team1.nil? || team1.empty?) &&
|
190
|
+
(team2.nil? || team2.empty?)
|
191
|
+
puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
|
192
|
+
pp row
|
193
|
+
next
|
194
|
+
end
|
195
|
+
|
196
|
+
## remove possible match played counters e.g. (4) (11) etc.
|
197
|
+
team1 = team1.sub( /\(\d+\)/, '' ).strip
|
198
|
+
team2 = team2.sub( /\(\d+\)/, '' ).strip
|
199
|
+
|
200
|
+
|
201
|
+
|
202
|
+
col = row[ headers_mapping[ :time ]]
|
203
|
+
|
204
|
+
if col.nil?
|
205
|
+
time = nil
|
206
|
+
else
|
207
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
208
|
+
|
209
|
+
if col.empty?
|
210
|
+
col =~ /^-{1,}$/ || # e.g. - or ---
|
211
|
+
col =~ /^\?{1,}$/ # e.g. ? or ???
|
212
|
+
## note: allow missing / unknown date for match
|
213
|
+
time = nil
|
214
|
+
else
|
215
|
+
if col =~ /^\d{1,2}:\d{2}$/
|
216
|
+
time_fmt = '%H:%M' # e.g. 17:00 or 3:00
|
217
|
+
elsif col =~ /^\d{1,2}.\d{2}$/
|
218
|
+
time_fmt = '%H.%M' # e.g. 17:00 or 3:00
|
219
|
+
else
|
220
|
+
puts "*** !!! wrong (unknown) time format >>#{col}<<; cannot continue; fix it; sorry"
|
221
|
+
## todo/fix: add to errors/warns list - why? why not?
|
222
|
+
exit 1
|
223
|
+
end
|
224
|
+
|
225
|
+
## todo/check: use date object (keep string?) - why? why not?
|
226
|
+
## todo/fix: yes!! use date object!!!! do NOT use string
|
227
|
+
time = Time.strptime( col, time_fmt ).strftime( '%H:%M' )
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
|
233
|
+
col = row[ headers_mapping[ :date ]]
|
234
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
235
|
+
|
236
|
+
if col.empty? ||
|
237
|
+
col =~ /^-{1,}$/ || # e.g. - or ---
|
238
|
+
col =~ /^\?{1,}$/ # e.g. ? or ???
|
239
|
+
## note: allow missing / unknown date for match
|
240
|
+
date = nil
|
241
|
+
else
|
242
|
+
## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
|
243
|
+
col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
|
244
|
+
col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
|
245
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
246
|
+
|
247
|
+
if col =~ /^\d{2}\/\d{2}\/\d{4}$/
|
248
|
+
date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
|
249
|
+
elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
|
250
|
+
date_fmt = '%d/%m/%y' # e.g. 17/08/02
|
251
|
+
elsif col =~ /^\d{4}-\d{1,2}-\d{1,2}$/ ## "standard" / default date format
|
252
|
+
date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
|
253
|
+
elsif col =~ /^\d{1,2} \w{3} \d{4}$/
|
254
|
+
date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
|
255
|
+
elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
|
256
|
+
date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
|
257
|
+
else
|
258
|
+
puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
|
259
|
+
## todo/fix: add to errors/warns list - why? why not?
|
260
|
+
exit 1
|
261
|
+
end
|
262
|
+
|
263
|
+
## todo/check: use date object (keep string?) - why? why not?
|
264
|
+
## todo/fix: yes!! use date object!!!! do NOT use string
|
265
|
+
date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
|
266
|
+
end
|
267
|
+
|
268
|
+
|
269
|
+
##
|
270
|
+
## todo/fix: round might not always be just a simple integer number!!!
|
271
|
+
## might be text such as Final | Leg 1 or such!!!!
|
272
|
+
round = nil
|
273
|
+
## check for (optional) round / matchday
|
274
|
+
if headers_mapping[ :round ]
|
275
|
+
col = row[ headers_mapping[ :round ]]
|
276
|
+
## todo: issue warning if not ? or - (and just empty string) why? why not
|
277
|
+
## (old attic) was: round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
|
278
|
+
|
279
|
+
## note: make round always a string for now!!!! e.g. "1", "2" too!!
|
280
|
+
round = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
281
|
+
## note: allow missing round for match / defaults to nil
|
282
|
+
nil
|
283
|
+
else
|
284
|
+
col
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
|
289
|
+
score1 = nil
|
290
|
+
score2 = nil
|
291
|
+
score1i = nil
|
292
|
+
score2i = nil
|
293
|
+
|
294
|
+
## check for full time scores ?
|
295
|
+
if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
|
296
|
+
ft = [ row[ headers_mapping[ :score1 ]],
|
297
|
+
row[ headers_mapping[ :score2 ]] ]
|
298
|
+
|
299
|
+
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
300
|
+
score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
|
301
|
+
score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
|
302
|
+
end
|
303
|
+
|
304
|
+
## check for half time scores ?
|
305
|
+
if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
|
306
|
+
ht = [ row[ headers_mapping[ :score1i ]],
|
307
|
+
row[ headers_mapping[ :score2i ]] ]
|
308
|
+
|
309
|
+
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
310
|
+
score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
|
311
|
+
score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
|
312
|
+
end
|
313
|
+
|
314
|
+
|
315
|
+
## check for all-in-one full time scores?
|
316
|
+
if headers_mapping[ :score ]
|
317
|
+
col = row[ headers_mapping[ :score ]]
|
318
|
+
score = parse_score( col )
|
319
|
+
if score
|
320
|
+
score1 = score[0]
|
321
|
+
score2 = score[1]
|
322
|
+
else
|
323
|
+
puts "!! ERROR - invalid score (ft) format >#{col}<:"
|
324
|
+
pp row
|
325
|
+
exit 1
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
if headers_mapping[ :scorei ]
|
330
|
+
col = row[ headers_mapping[ :scorei ]]
|
331
|
+
score = parse_score( col )
|
332
|
+
if score
|
333
|
+
score1i = score[0]
|
334
|
+
score2i = score[1]
|
335
|
+
else
|
336
|
+
puts "!! ERROR - invalid score (ht) format >#{col}<:"
|
337
|
+
pp row
|
338
|
+
exit 1
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
####
|
343
|
+
## try optional score - extra time (et) and penalities (p/pen)
|
344
|
+
score1et = nil
|
345
|
+
score2et = nil
|
346
|
+
score1p = nil
|
347
|
+
score2p = nil
|
348
|
+
|
349
|
+
if headers_mapping[ :score_et ]
|
350
|
+
col = row[ headers_mapping[ :score_et ]]
|
351
|
+
score = parse_score( col )
|
352
|
+
if score
|
353
|
+
score1et = score[0]
|
354
|
+
score2et = score[1]
|
355
|
+
else
|
356
|
+
puts "!! ERROR - invalid score (et) format >#{col}<:"
|
357
|
+
pp row
|
358
|
+
exit 1
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
if headers_mapping[ :score_p ]
|
363
|
+
col = row[ headers_mapping[ :score_p ]]
|
364
|
+
score = parse_score( col )
|
365
|
+
if score
|
366
|
+
score1p = score[0]
|
367
|
+
score2p = score[1]
|
368
|
+
else
|
369
|
+
puts "!! ERROR - invalid score (p) format >#{col}<:"
|
370
|
+
pp row
|
371
|
+
exit 1
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
|
376
|
+
## try some optional headings / columns
|
377
|
+
stage = nil
|
378
|
+
if headers_mapping[ :stage ]
|
379
|
+
col = row[ headers_mapping[ :stage ]]
|
380
|
+
## todo/fix: check can col be nil e.g. col.nil? possible?
|
381
|
+
stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
382
|
+
## note: allow missing stage for match / defaults to "regular"
|
383
|
+
nil
|
384
|
+
elsif col == '?'
|
385
|
+
## note: default explicit unknown to unknown for now AND not regular - why? why not?
|
386
|
+
'?' ## todo/check: use unkown and NOT ? - why? why not?
|
387
|
+
else
|
388
|
+
col
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
group = nil
|
393
|
+
if headers_mapping[ :group ]
|
394
|
+
col = row[ headers_mapping[ :group ]]
|
395
|
+
## todo/fix: check can col be nil e.g. col.nil? possible?
|
396
|
+
group = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
397
|
+
## note: allow missing stage for match / defaults to "regular"
|
398
|
+
nil
|
399
|
+
else
|
400
|
+
col
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
status = nil ## e.g. AWARDED, CANCELLED, POSTPONED, etc.
|
405
|
+
if headers_mapping[ :notes ]
|
406
|
+
col = row[ headers_mapping[ :notes ]]
|
407
|
+
## check for optional (match) status in notes / comments
|
408
|
+
status = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
409
|
+
nil
|
410
|
+
else
|
411
|
+
StatusParser.parse( col ) # note: returns nil if no (match) status found
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
|
416
|
+
league = nil
|
417
|
+
league = row[ headers_mapping[ :league ]] if headers_mapping[ :league ]
|
418
|
+
|
419
|
+
|
420
|
+
## puts 'match attributes:'
|
421
|
+
attributes = {
|
422
|
+
date: date,
|
423
|
+
time: time,
|
424
|
+
team1: team1, team2: team2,
|
425
|
+
score1: score1, score2: score2,
|
426
|
+
score1i: score1i, score2i: score2i,
|
427
|
+
score1et: score1et, score2et: score2et,
|
428
|
+
score1p: score1p, score2p: score2p,
|
429
|
+
round: round,
|
430
|
+
stage: stage,
|
431
|
+
group: group,
|
432
|
+
status: status,
|
433
|
+
league: league
|
434
|
+
}
|
435
|
+
## pp attributes
|
436
|
+
|
437
|
+
match = Sports::Match.new( **attributes )
|
438
|
+
matches << match
|
439
|
+
end
|
440
|
+
|
441
|
+
## pp matches
|
442
|
+
matches
|
443
|
+
end
|
444
|
+
|
445
|
+
|
446
|
+
private
|
447
|
+
|
448
|
+
def find_header( headers, candidates )
|
449
|
+
## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
|
450
|
+
|
451
|
+
candidates.each do |candidate|
|
452
|
+
return candidate if headers.include?( candidate ) ## bingo!!!
|
453
|
+
end
|
454
|
+
nil ## no matching header found!!!
|
455
|
+
end
|
456
|
+
|
457
|
+
########
|
458
|
+
# more helpers
|
459
|
+
#
|
460
|
+
|
461
|
+
def parse_score( str )
|
462
|
+
if str.nil? ## todo/check: remove nil case - possible? - why? why not?
|
463
|
+
[nil,nil]
|
464
|
+
else
|
465
|
+
## remove (optional single) note/footnote/endnote markers
|
466
|
+
## e.g. (*) or (a), (b),
|
467
|
+
## or [*], [A], [1], etc.
|
468
|
+
## - allow (1) or maybe (*1) in the future - why? why not?
|
469
|
+
str = str.sub( /\( [a-z*] \)
|
470
|
+
|
|
471
|
+
\[ [1-9a-z*] \]
|
472
|
+
/ix, '' ).strip
|
473
|
+
|
474
|
+
if str.empty? || str == '?' || str == '-' || str == 'n/a'
|
475
|
+
[nil,nil]
|
476
|
+
### todo/check: use regex with named capture groups here - why? why not?
|
477
|
+
elsif str =~ /^\d{1,2}[:-]\d{1,2}$/ ## sanity check scores format
|
478
|
+
score = str.split( /[:-]/ )
|
479
|
+
[score[0].to_i, score[1].to_i]
|
480
|
+
else
|
481
|
+
nil ## note: returns nil if invalid / unparseable format!!!
|
482
|
+
end
|
483
|
+
end
|
484
|
+
end # method parse_score
|
485
|
+
|
486
|
+
|
487
|
+
|
488
|
+
end # class CsvMatchParser
|
489
|
+
end # module Sports
|
490
|
+
|
@@ -0,0 +1,63 @@
|
|
1
|
+
#####################
|
2
|
+
# helpers for parsing & finding match status e.g.
|
3
|
+
# - cancelled / canceled
|
4
|
+
# - awarded
|
5
|
+
# - abandoned
|
6
|
+
# - replay
|
7
|
+
# etc.
|
8
|
+
|
9
|
+
|
10
|
+
module SportDb
|
11
|
+
|
12
|
+
|
13
|
+
### todo/fix: move Status inside Match struct - why? why not?
|
14
|
+
|
15
|
+
class Status
|
16
|
+
# note: use a class as an "enum"-like namespace for now - why? why not?
|
17
|
+
# move class into Match e.g. Match::Status - why? why not?
|
18
|
+
CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
|
19
|
+
AWARDED = 'AWARDED'
|
20
|
+
POSTPONED = 'POSTPONED'
|
21
|
+
ABANDONED = 'ABANDONED'
|
22
|
+
REPLAY = 'REPLAY'
|
23
|
+
end # class Status
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
#
|
28
|
+
# todo/fix - move self.parse to class Status e.g.
|
29
|
+
# use Status.parse( str ) NOT StatusParser...
|
30
|
+
|
31
|
+
class StatusParser
|
32
|
+
|
33
|
+
def self.parse( str )
|
34
|
+
## note: returns nil if no match found
|
35
|
+
## note: english usage - cancelled (in UK), canceled (in US)
|
36
|
+
if str =~ /^(cancelled|
|
37
|
+
canceled|
|
38
|
+
can\.
|
39
|
+
)/xi
|
40
|
+
Status::CANCELLED
|
41
|
+
elsif str =~ /^(awarded|
|
42
|
+
awd\.
|
43
|
+
)/xi
|
44
|
+
Status::AWARDED
|
45
|
+
elsif str =~ /^(postponed
|
46
|
+
)/xi
|
47
|
+
Status::POSTPONED
|
48
|
+
elsif str =~ /^(abandoned|
|
49
|
+
abd\.
|
50
|
+
)/xi
|
51
|
+
Status::ABANDONED
|
52
|
+
elsif str =~ /^(replay
|
53
|
+
)/xi
|
54
|
+
Status::REPLAY
|
55
|
+
else
|
56
|
+
# no match
|
57
|
+
nil
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
end # class StatusParser
|
62
|
+
end # module SportDb
|
63
|
+
|
@@ -11,7 +11,7 @@ class City
|
|
11
11
|
attr_reader :key, :name, :country
|
12
12
|
attr_accessor :alt_names
|
13
13
|
|
14
|
-
def initialize( key: nil,
|
14
|
+
def initialize( key: nil,
|
15
15
|
name:, country: )
|
16
16
|
## note: auto-generate key "on-the-fly" if missing for now - why? why not?
|
17
17
|
## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
|
@@ -23,6 +23,7 @@ class City
|
|
23
23
|
end # class City
|
24
24
|
|
25
25
|
|
26
|
+
|
26
27
|
class Country
|
27
28
|
|
28
29
|
## note: is read-only/immutable for now - why? why not?
|
@@ -33,9 +34,9 @@ class Country
|
|
33
34
|
def initialize( key: nil, name:, code:, tags: [] )
|
34
35
|
## note: auto-generate key "on-the-fly" if missing for now - why? why not?
|
35
36
|
## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
|
36
|
-
@key = begin
|
37
|
+
@key = begin
|
37
38
|
if key
|
38
|
-
key
|
39
|
+
key
|
39
40
|
elsif code
|
40
41
|
code.downcase
|
41
42
|
else
|
@@ -47,14 +48,109 @@ class Country
|
|
47
48
|
@tags = tags
|
48
49
|
end
|
49
50
|
|
50
|
-
|
51
|
+
|
52
|
+
#############################
|
53
|
+
### virtual helpers
|
54
|
+
## 1) codes (returns uniq array of all codes in lowercase
|
55
|
+
## incl. key, code and alt_codes in alt_names)
|
56
|
+
## 2) names (returns uniq array of all names - with language tags stripped)
|
57
|
+
##
|
58
|
+
## 3a) adjective/adj - might be nil??
|
59
|
+
## b) adjectives/adjs
|
60
|
+
|
61
|
+
## note - alt_names - returns all-in-one alt names (& codes)
|
62
|
+
|
63
|
+
## note: split names into names AND codes
|
64
|
+
## 1) key plus all lower case names are codes
|
65
|
+
## 2) all upper case names are names AND codes
|
66
|
+
## 3) all other names are names
|
67
|
+
|
68
|
+
## only allow asci a to z in code & name for now - why? why not?
|
69
|
+
## e.g. USA, etc.
|
70
|
+
IS_CODE_N_NAME_RE = %r{^
|
71
|
+
[A-Z]+
|
72
|
+
$}x
|
73
|
+
## must be all lowercase (unicode letters allowed for now - why? why not?
|
74
|
+
## e.g. nirl, a, ö, etc.
|
75
|
+
IS_CODE_RE = %r{^
|
76
|
+
[\p{Ll}]+
|
77
|
+
$}x
|
78
|
+
|
79
|
+
def codes
|
80
|
+
## note - "auto-magically" downcase code (and code'n'name matches)!!!
|
81
|
+
codes = [@key, @code.downcase]
|
82
|
+
alt_names.each do |name|
|
83
|
+
if IS_CODE_N_NAME_RE.match?( name )
|
84
|
+
codes << name.downcase
|
85
|
+
elsif IS_CODE_RE.match?( name )
|
86
|
+
codes << name
|
87
|
+
else ## assume name
|
88
|
+
## do nothing - skip/ignore
|
89
|
+
end
|
90
|
+
end
|
91
|
+
codes.uniq
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
include SportDb::NameHelper # pulls-in strip_lang
|
96
|
+
|
97
|
+
def names
|
98
|
+
names = [@name]
|
99
|
+
alt_names.each do |name|
|
100
|
+
if IS_CODE_N_NAME_RE.match?( name )
|
101
|
+
names << name
|
102
|
+
elsif IS_CODE_RE.match?( name )
|
103
|
+
## do nothing - skip/ignore
|
104
|
+
else ## assume name
|
105
|
+
names << strip_lang( name )
|
106
|
+
end
|
107
|
+
end
|
108
|
+
names.uniq
|
109
|
+
end
|
110
|
+
|
111
|
+
## country adjectives - quick hack for now inline here
|
112
|
+
##
|
113
|
+
## todo - add language marker - why? why not`
|
114
|
+
## e.g. Österr. => Österr. [de]
|
115
|
+
## Deutsche` => Deutsche [de]
|
116
|
+
##
|
117
|
+
##
|
118
|
+
## todo/fix - add more - see
|
119
|
+
## https://en.wikipedia.org/wiki/List_of_adjectival_and_demonymic_forms_for_countries_and_nations
|
120
|
+
ADJ = {
|
121
|
+
'at' => ['Österr.', 'Austrian'],
|
122
|
+
'de' => ['Deutsche', 'German'],
|
123
|
+
'eng' => ['English'],
|
124
|
+
'sco' => ['Scottish'],
|
125
|
+
'wal' => ['Welsh'],
|
126
|
+
'nir' => ['Northern Irish'],
|
127
|
+
'ie' => ['Irish'],
|
128
|
+
|
129
|
+
'it' => ['Italian'],
|
130
|
+
'sm' => ['San Marinese'],
|
131
|
+
'fr' => ['French'],
|
132
|
+
'hu' => ['Hungarian'],
|
133
|
+
'gr' => ['Greek'],
|
134
|
+
'pt' => ['Portuguese'],
|
135
|
+
'ch' => ['Swiss'],
|
136
|
+
'tr' => ['Turkish'],
|
137
|
+
}
|
138
|
+
|
139
|
+
## note - adjective might be nil!!!
|
140
|
+
def adjective() adjectives[0]; end
|
141
|
+
def adjectives() ADJ[@key] || []; end
|
142
|
+
|
143
|
+
alias_method :adj, :adjective
|
144
|
+
alias_method :adjs, :adjectives
|
145
|
+
|
146
|
+
def pretty_print( printer )
|
51
147
|
buf = String.new
|
52
148
|
buf << "<Country: #{@key} - #{@name} (#{@code})"
|
53
149
|
buf << "|#{@alt_names.join('|')}" if @alt_names && !@alt_names.empty?
|
54
150
|
buf << ", #{@tags.join('|')})" if @tags && !@tags.empty?
|
55
151
|
buf << ">"
|
56
152
|
|
57
|
-
printer.text( buf )
|
153
|
+
printer.text( buf )
|
58
154
|
end
|
59
155
|
end # class Country
|
60
156
|
|