sportdb-structs 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,490 +0,0 @@
1
-
2
- module SportDb
3
- class CsvMatchParser
4
-
5
- #############
6
- # helpers
7
- def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
8
-
9
- ## check if headers incl. season if yes,has priority over col mapping
10
- ## e.g. no need to specify twice (if using headers)
11
- col = headers[:season] if headers && headers[:season]
12
-
13
- seasons = Hash.new( 0 ) ## default value is 0
14
-
15
- ## todo/fix: yes, use CsvHash.foreach - why? why not?
16
- ## use read_csv with block to switch to foreach!!!!
17
- rows = read_csv( path, sep: sep )
18
-
19
- rows.each_with_index do |row,i|
20
- puts "[#{i}] " + row.inspect if i < 2
21
-
22
- season = row[ col ] ## column name defaults to 'Season'
23
- seasons[ season ] += 1
24
- end
25
-
26
- pp seasons
27
-
28
- ## note: only return season keys/names (not hash with usage counter)
29
- seasons.keys
30
- end
31
-
32
-
33
- ##########
34
- # main machinery
35
-
36
- ## todo/fix: use a generic "global" parse_csv method - why? why not?
37
- ## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
38
- ## ## note: do NOT symbolize keys - keep them as is!!!!!!
39
- ## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
40
- ## CsvHash.parse( text, sep: sep )
41
- ## end
42
-
43
- def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
44
- txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
45
- parse( txt, headers: headers,
46
- filters: filters,
47
- converters: converters,
48
- sep: sep )
49
- end
50
-
51
- def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
52
- new( txt ).parse( headers: headers,
53
- filters: filters,
54
- converters: converters,
55
- sep: sep )
56
- end
57
-
58
-
59
- def initialize( txt )
60
- @txt = txt
61
- end
62
-
63
- def parse( headers: nil, filters: nil, converters: nil, sep: nil )
64
-
65
- headers_mapping = {}
66
-
67
- rows = parse_csv( @txt, sep: sep )
68
-
69
- return [] if rows.empty? ## no rows / empty?
70
-
71
-
72
- ## fix/todo: use logger!!!!
73
- ## pp csv
74
-
75
- if headers ## use user supplied headers if present
76
- headers_mapping = headers_mapping.merge( headers )
77
- else
78
-
79
- ## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
80
- headers = rows[0].keys
81
- pp headers
82
-
83
- # note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
84
- # e.g. row['HomeTeam'] || row['HT'] will NOT work for now
85
-
86
- if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
87
- ## assume our own football.csv format, see github.com/footballcsv
88
- headers_mapping[:team1] = find_header( headers, ['Team 1'] )
89
- headers_mapping[:team2] = find_header( headers, ['Team 2'] )
90
- headers_mapping[:date] = find_header( headers, ['Date'] )
91
- headers_mapping[:time] = find_header( headers, ['Time'] )
92
-
93
- ## check for all-in-one full time (ft) and half time (ht9 scores?
94
- headers_mapping[:score] = find_header( headers, ['FT'] )
95
- headers_mapping[:scorei] = find_header( headers, ['HT'] )
96
-
97
- headers_mapping[:round] = find_header( headers, ['Round', 'Matchday'] )
98
-
99
- ## optional headers - note: find_header returns nil if header NOT found
100
- header_stage = find_header( headers, ['Stage'] )
101
- headers_mapping[:stage] = header_stage if header_stage
102
-
103
- header_group = find_header( headers, ['Group'] )
104
- headers_mapping[:group] = header_group if header_group
105
-
106
-
107
- header_et = find_header( headers, ['ET', 'AET'] ) ## (after) extra time
108
- headers_mapping[:score_et] = header_et if header_et
109
-
110
- header_p = find_header( headers, ['P', 'PEN'] ) ## penalties
111
- headers_mapping[:score_p] = header_p if header_p
112
-
113
- header_notes = find_header( headers, ['Notes', 'Comments'] )
114
- headers_mapping[:notes] = header_notes if header_notes
115
-
116
-
117
- header_league = find_header( headers, ['League'] )
118
- headers_mapping[:league] = header_league if header_league
119
- else
120
- ## else try footballdata.uk and others
121
- headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
122
- headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
123
- headers_mapping[:date] = find_header( headers, ['Date'] )
124
- headers_mapping[:time] = find_header( headers, ['Time'] )
125
-
126
- ## note: FT = Full Time, HG = Home Goal, AG = Away Goal
127
- headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
128
- headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
129
-
130
- ## check for half time scores ?
131
- ## note: HT = Half Time
132
- headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
133
- headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
134
- end
135
- end
136
-
137
- pp headers_mapping
138
-
139
- ### todo/fix: check headers - how?
140
- ## if present HomeTeam or HT required etc.
141
- ## issue error/warn is not present
142
- ##
143
- ## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
144
- ## exit 1
145
- ##
146
-
147
- matches = []
148
-
149
- rows.each_with_index do |row,i|
150
-
151
- ## fix/todo: use logger!!!!
152
- ## puts "[#{i}] " + row.inspect if i < 2
153
-
154
-
155
- ## todo/fix: move to its own (helper) method - filter or such!!!!
156
- if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
157
- skip = false
158
- filters.each do |header, value|
159
- if row[ header ] != value ## e.g. row['Season']
160
- skip = true
161
- break
162
- end
163
- end
164
- next if skip ## if header values NOT matching
165
- end
166
-
167
-
168
- ## note:
169
- ## add converters after filters for now (why not before filters?)
170
- if converters ## any converters defined?
171
- ## convert single proc shortcut to array with single converter
172
- converters = [converters] if converters.is_a?( Proc )
173
-
174
- ## assumes array of procs
175
- converters.each do |converter|
176
- row = converter.call( row )
177
- end
178
- end
179
-
180
-
181
-
182
- team1 = row[ headers_mapping[ :team1 ]]
183
- team2 = row[ headers_mapping[ :team2 ]]
184
-
185
-
186
- ## check if data present - if not skip (might be empty row)
187
- ## note: (old classic) csv reader returns nil for empty fields
188
- ## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
189
- if (team1.nil? || team1.empty?) &&
190
- (team2.nil? || team2.empty?)
191
- puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
192
- pp row
193
- next
194
- end
195
-
196
- ## remove possible match played counters e.g. (4) (11) etc.
197
- team1 = team1.sub( /\(\d+\)/, '' ).strip
198
- team2 = team2.sub( /\(\d+\)/, '' ).strip
199
-
200
-
201
-
202
- col = row[ headers_mapping[ :time ]]
203
-
204
- if col.nil?
205
- time = nil
206
- else
207
- col = col.strip # make sure not leading or trailing spaces left over
208
-
209
- if col.empty?
210
- col =~ /^-{1,}$/ || # e.g. - or ---
211
- col =~ /^\?{1,}$/ # e.g. ? or ???
212
- ## note: allow missing / unknown date for match
213
- time = nil
214
- else
215
- if col =~ /^\d{1,2}:\d{2}$/
216
- time_fmt = '%H:%M' # e.g. 17:00 or 3:00
217
- elsif col =~ /^\d{1,2}.\d{2}$/
218
- time_fmt = '%H.%M' # e.g. 17:00 or 3:00
219
- else
220
- puts "*** !!! wrong (unknown) time format >>#{col}<<; cannot continue; fix it; sorry"
221
- ## todo/fix: add to errors/warns list - why? why not?
222
- exit 1
223
- end
224
-
225
- ## todo/check: use date object (keep string?) - why? why not?
226
- ## todo/fix: yes!! use date object!!!! do NOT use string
227
- time = Time.strptime( col, time_fmt ).strftime( '%H:%M' )
228
- end
229
- end
230
-
231
-
232
-
233
- col = row[ headers_mapping[ :date ]]
234
- col = col.strip # make sure not leading or trailing spaces left over
235
-
236
- if col.empty? ||
237
- col =~ /^-{1,}$/ || # e.g. - or ---
238
- col =~ /^\?{1,}$/ # e.g. ? or ???
239
- ## note: allow missing / unknown date for match
240
- date = nil
241
- else
242
- ## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
243
- col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
244
- col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
245
- col = col.strip # make sure not leading or trailing spaces left over
246
-
247
- if col =~ /^\d{2}\/\d{2}\/\d{4}$/
248
- date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
249
- elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
250
- date_fmt = '%d/%m/%y' # e.g. 17/08/02
251
- elsif col =~ /^\d{4}-\d{2}-\d{2}$/ ## "standard" / default date format
252
- date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
253
- elsif col =~ /^\d{1,2} \w{3} \d{4}$/
254
- date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
255
- elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
256
- date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
257
- else
258
- puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
259
- ## todo/fix: add to errors/warns list - why? why not?
260
- exit 1
261
- end
262
-
263
- ## todo/check: use date object (keep string?) - why? why not?
264
- ## todo/fix: yes!! use date object!!!! do NOT use string
265
- date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
266
- end
267
-
268
-
269
- ##
270
- ## todo/fix: round might not always be just a simple integer number!!!
271
- ## might be text such as Final | Leg 1 or such!!!!
272
- round = nil
273
- ## check for (optional) round / matchday
274
- if headers_mapping[ :round ]
275
- col = row[ headers_mapping[ :round ]]
276
- ## todo: issue warning if not ? or - (and just empty string) why? why not
277
- ## (old attic) was: round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
278
-
279
- ## note: make round always a string for now!!!! e.g. "1", "2" too!!
280
- round = if col.nil? || col.empty? || col == '-' || col == 'n/a'
281
- ## note: allow missing round for match / defaults to nil
282
- nil
283
- else
284
- col
285
- end
286
- end
287
-
288
-
289
- score1 = nil
290
- score2 = nil
291
- score1i = nil
292
- score2i = nil
293
-
294
- ## check for full time scores ?
295
- if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
296
- ft = [ row[ headers_mapping[ :score1 ]],
297
- row[ headers_mapping[ :score2 ]] ]
298
-
299
- ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
300
- score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
301
- score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
302
- end
303
-
304
- ## check for half time scores ?
305
- if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
306
- ht = [ row[ headers_mapping[ :score1i ]],
307
- row[ headers_mapping[ :score2i ]] ]
308
-
309
- ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
310
- score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
311
- score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
312
- end
313
-
314
-
315
- ## check for all-in-one full time scores?
316
- if headers_mapping[ :score ]
317
- col = row[ headers_mapping[ :score ]]
318
- score = parse_score( col )
319
- if score
320
- score1 = score[0]
321
- score2 = score[1]
322
- else
323
- puts "!! ERROR - invalid score (ft) format >#{col}<:"
324
- pp row
325
- exit 1
326
- end
327
- end
328
-
329
- if headers_mapping[ :scorei ]
330
- col = row[ headers_mapping[ :scorei ]]
331
- score = parse_score( col )
332
- if score
333
- score1i = score[0]
334
- score2i = score[1]
335
- else
336
- puts "!! ERROR - invalid score (ht) format >#{col}<:"
337
- pp row
338
- exit 1
339
- end
340
- end
341
-
342
- ####
343
- ## try optional score - extra time (et) and penalities (p/pen)
344
- score1et = nil
345
- score2et = nil
346
- score1p = nil
347
- score2p = nil
348
-
349
- if headers_mapping[ :score_et ]
350
- col = row[ headers_mapping[ :score_et ]]
351
- score = parse_score( col )
352
- if score
353
- score1et = score[0]
354
- score2et = score[1]
355
- else
356
- puts "!! ERROR - invalid score (et) format >#{col}<:"
357
- pp row
358
- exit 1
359
- end
360
- end
361
-
362
- if headers_mapping[ :score_p ]
363
- col = row[ headers_mapping[ :score_p ]]
364
- score = parse_score( col )
365
- if score
366
- score1p = score[0]
367
- score2p = score[1]
368
- else
369
- puts "!! ERROR - invalid score (p) format >#{col}<:"
370
- pp row
371
- exit 1
372
- end
373
- end
374
-
375
-
376
- ## try some optional headings / columns
377
- stage = nil
378
- if headers_mapping[ :stage ]
379
- col = row[ headers_mapping[ :stage ]]
380
- ## todo/fix: check can col be nil e.g. col.nil? possible?
381
- stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
382
- ## note: allow missing stage for match / defaults to "regular"
383
- nil
384
- elsif col == '?'
385
- ## note: default explicit unknown to unknown for now AND not regular - why? why not?
386
- '?' ## todo/check: use unkown and NOT ? - why? why not?
387
- else
388
- col
389
- end
390
- end
391
-
392
- group = nil
393
- if headers_mapping[ :group ]
394
- col = row[ headers_mapping[ :group ]]
395
- ## todo/fix: check can col be nil e.g. col.nil? possible?
396
- group = if col.nil? || col.empty? || col == '-' || col == 'n/a'
397
- ## note: allow missing stage for match / defaults to "regular"
398
- nil
399
- else
400
- col
401
- end
402
- end
403
-
404
- status = nil ## e.g. AWARDED, CANCELLED, POSTPONED, etc.
405
- if headers_mapping[ :notes ]
406
- col = row[ headers_mapping[ :notes ]]
407
- ## check for optional (match) status in notes / comments
408
- status = if col.nil? || col.empty? || col == '-' || col == 'n/a'
409
- nil
410
- else
411
- StatusParser.parse( col ) # note: returns nil if no (match) status found
412
- end
413
- end
414
-
415
-
416
- league = nil
417
- league = row[ headers_mapping[ :league ]] if headers_mapping[ :league ]
418
-
419
-
420
- ## puts 'match attributes:'
421
- attributes = {
422
- date: date,
423
- time: time,
424
- team1: team1, team2: team2,
425
- score1: score1, score2: score2,
426
- score1i: score1i, score2i: score2i,
427
- score1et: score1et, score2et: score2et,
428
- score1p: score1p, score2p: score2p,
429
- round: round,
430
- stage: stage,
431
- group: group,
432
- status: status,
433
- league: league
434
- }
435
- ## pp attributes
436
-
437
- match = Sports::Match.new( **attributes )
438
- matches << match
439
- end
440
-
441
- ## pp matches
442
- matches
443
- end
444
-
445
-
446
- private
447
-
448
- def find_header( headers, candidates )
449
- ## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
450
-
451
- candidates.each do |candidate|
452
- return candidate if headers.include?( candidate ) ## bingo!!!
453
- end
454
- nil ## no matching header found!!!
455
- end
456
-
457
- ########
458
- # more helpers
459
- #
460
-
461
- def parse_score( str )
462
- if str.nil? ## todo/check: remove nil case - possible? - why? why not?
463
- [nil,nil]
464
- else
465
- ## remove (optional single) note/footnote/endnote markers
466
- ## e.g. (*) or (a), (b),
467
- ## or [*], [A], [1], etc.
468
- ## - allow (1) or maybe (*1) in the future - why? why not?
469
- str = str.sub( /\( [a-z*] \)
470
- |
471
- \[ [1-9a-z*] \]
472
- /ix, '' ).strip
473
-
474
- if str.empty? || str == '?' || str == '-' || str == 'n/a'
475
- [nil,nil]
476
- ### todo/check: use regex with named capture groups here - why? why not?
477
- elsif str =~ /^\d{1,2}[:-]\d{1,2}$/ ## sanity check scores format
478
- score = str.split( /[:-]/ )
479
- [score[0].to_i, score[1].to_i]
480
- else
481
- nil ## note: returns nil if invalid / unparseable format!!!
482
- end
483
- end
484
- end # method parse_score
485
-
486
-
487
-
488
- end # class CsvMatchParser
489
- end # module Sports
490
-
@@ -1,90 +0,0 @@
1
- #####################
2
- # helpers for parsing & finding match status e.g.
3
- # - cancelled / canceled
4
- # - awarded
5
- # - abandoned
6
- # - replay
7
- # etc.
8
-
9
-
10
- module SportDb
11
-
12
-
13
- ### todo/fix: move Status inside Match struct - why? why not?
14
-
15
- class Status
16
- # note: use a class as an "enum"-like namespace for now - why? why not?
17
- # move class into Match e.g. Match::Status - why? why not?
18
- CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
19
- AWARDED = 'AWARDED'
20
- POSTPONED = 'POSTPONED'
21
- ABANDONED = 'ABANDONED'
22
- REPLAY = 'REPLAY'
23
- end # class Status
24
-
25
-
26
-
27
- class StatusParser
28
-
29
- def self.parse( str )
30
- ## note: returns nil if no match found
31
- ## note: english usage - cancelled (in UK), canceled (in US)
32
- if str =~ /^(cancelled|
33
- canceled|
34
- can\.
35
- )/xi
36
- Status::CANCELLED
37
- elsif str =~ /^(awarded|
38
- awd\.
39
- )/xi
40
- Status::AWARDED
41
- elsif str =~ /^(postponed
42
- )/xi
43
- Status::POSTPONED
44
- elsif str =~ /^(abandoned|
45
- abd\.
46
- )/xi
47
- Status::ABANDONED
48
- elsif str =~ /^(replay
49
- )/xi
50
- Status::REPLAY
51
- else
52
- # no match
53
- nil
54
- end
55
- end
56
-
57
-
58
- RUN_RE = /\[
59
- (?<text>[^\]]+)
60
- \]
61
- /x
62
- def self.find!( line )
63
- ## for now check all "protected" text run blocks e.g. []
64
- ## puts "line: >#{line}<"
65
-
66
- status = nil
67
-
68
- str = line
69
- while m = str.match( RUN_RE )
70
- str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
71
-
72
- ## check for status match
73
- match_str = m[0] ## keep a copy of the match string (for later sub)
74
- text = m[:text].strip
75
- ## puts " text: >#{text}<"
76
-
77
- status = parse( text )
78
-
79
- if status
80
- line.sub!( match_str, "[STATUS.#{status}]" )
81
- break
82
- end
83
- end # while match
84
-
85
- status
86
- end # method find!
87
- end # class StatusParser
88
-
89
- end # module SportDb
90
-