sportdb-formats 1.1.3 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +0 -24
  3. data/Rakefile +2 -5
  4. data/lib/sportdb/formats.rb +39 -74
  5. data/lib/sportdb/formats/event/event_reader.rb +1 -1
  6. data/lib/sportdb/formats/league/league_outline_reader.rb +18 -6
  7. data/lib/sportdb/formats/package.rb +2 -2
  8. data/lib/sportdb/formats/team/club_index_history.rb +2 -6
  9. data/lib/sportdb/formats/team/club_reader_history.rb +1 -1
  10. data/lib/sportdb/formats/team/club_reader_props.rb +18 -2
  11. data/lib/sportdb/formats/version.rb +1 -1
  12. data/test/helper.rb +3 -0
  13. metadata +5 -71
  14. data/lib/sportdb/formats/config.rb +0 -40
  15. data/lib/sportdb/formats/match/match_parser_csv.rb +0 -458
  16. data/lib/sportdb/formats/match/match_status_parser.rb +0 -86
  17. data/lib/sportdb/formats/name_helper.rb +0 -87
  18. data/lib/sportdb/formats/score/score_formats.rb +0 -239
  19. data/lib/sportdb/formats/score/score_parser.rb +0 -204
  20. data/lib/sportdb/formats/season_utils.rb +0 -16
  21. data/lib/sportdb/formats/structs/country.rb +0 -31
  22. data/lib/sportdb/formats/structs/group.rb +0 -18
  23. data/lib/sportdb/formats/structs/league.rb +0 -37
  24. data/lib/sportdb/formats/structs/match.rb +0 -157
  25. data/lib/sportdb/formats/structs/matchlist.rb +0 -220
  26. data/lib/sportdb/formats/structs/round.rb +0 -25
  27. data/lib/sportdb/formats/structs/season.rb +0 -192
  28. data/lib/sportdb/formats/structs/standings.rb +0 -268
  29. data/lib/sportdb/formats/structs/team.rb +0 -157
  30. data/lib/sportdb/formats/structs/team_usage.rb +0 -88
  31. data/test/test_clubs.rb +0 -40
  32. data/test/test_csv_reader.rb +0 -31
  33. data/test/test_match.rb +0 -30
  34. data/test/test_match_status_parser.rb +0 -49
  35. data/test/test_name_helper.rb +0 -67
  36. data/test/test_scores.rb +0 -124
  37. data/test/test_season.rb +0 -111
@@ -1,40 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module SportDb
4
- module Import
5
-
6
- class Configuration
7
- ##
8
- ## todo: allow configure of countries_dir like clubs_dir
9
- ## "fallback" and use a default built-in world/countries.txt
10
-
11
- attr_accessor :catalog
12
-
13
- attr_reader :lang
14
- def lang=(value)
15
- ## check/todo: always use to_sym - why? needed?
16
- DateFormats.lang = value
17
- ScoreFormats.lang = value
18
- SportDb.lang.lang = value
19
-
20
- ## todo/fix: change SportDb.lang to SportDb.parser.lang or lang_parser or utils or someting !!!!
21
- ## use Sport.lang only as a read-only shortcut a la catalog for config.lang!!!!
22
- end
23
-
24
- end # class Configuration
25
-
26
-
27
- ## lets you use
28
- ## SportDb::Import.configure do |config|
29
- ## config.lang = 'it'
30
- ## end
31
-
32
- def self.configure() yield( config ); end
33
-
34
- def self.config() @config ||= Configuration.new; end
35
-
36
- ## e.g. use config.catalog -- keep Import.catalog as a shortcut (for "read-only" access)
37
- def self.catalog() config.catalog; end
38
-
39
- end # module Import
40
- end # module SportDb
@@ -1,458 +0,0 @@
1
- # encoding: utf-8
2
-
3
-
4
- module SportDb
5
- class CsvMatchParser
6
-
7
- #############
8
- # helpers
9
- def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
10
-
11
- ## check if headers incl. season if yes,has priority over col mapping
12
- ## e.g. no need to specify twice (if using headers)
13
- col = headers[:season] if headers && headers[:season]
14
-
15
- seasons = Hash.new( 0 ) ## default value is 0
16
-
17
- ## todo/fix: yes, use CsvHash.foreach - why? why not?
18
- ## use read_csv with block to switch to foreach!!!!
19
- rows = read_csv( path, sep: sep )
20
-
21
- rows.each_with_index do |row,i|
22
- puts "[#{i}] " + row.inspect if i < 2
23
-
24
- season = row[ col ] ## column name defaults to 'Season'
25
- seasons[ season ] += 1
26
- end
27
-
28
- pp seasons
29
-
30
- ## note: only return season keys/names (not hash with usage counter)
31
- seasons.keys
32
- end
33
-
34
-
35
- ##########
36
- # main machinery
37
-
38
- ## todo/fix: use a generic "global" parse_csv method - why? why not?
39
- ## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
40
- ## ## note: do NOT symbolize keys - keep them as is!!!!!!
41
- ## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
42
- ## CsvHash.parse( text, sep: sep )
43
- ## end
44
-
45
- def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
46
- txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
47
- parse( txt, headers: headers,
48
- filters: filters,
49
- converters: converters,
50
- sep: sep )
51
- end
52
-
53
- def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
54
- new( txt ).parse( headers: headers,
55
- filters: filters,
56
- converters: converters,
57
- sep: sep )
58
- end
59
-
60
-
61
- def initialize( txt )
62
- @txt = txt
63
- end
64
-
65
- def parse( headers: nil, filters: nil, converters: nil, sep: nil )
66
-
67
- headers_mapping = {}
68
-
69
- rows = parse_csv( @txt, sep: sep )
70
-
71
- return [] if rows.empty? ## no rows / empty?
72
-
73
-
74
- ## fix/todo: use logger!!!!
75
- ## pp csv
76
-
77
- if headers ## use user supplied headers if present
78
- headers_mapping = headers_mapping.merge( headers )
79
- else
80
-
81
- ## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
82
- headers = rows[0].keys
83
- pp headers
84
-
85
- # note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
86
- # e.g. row['HomeTeam'] || row['HT'] will NOT work for now
87
-
88
- if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
89
- ## assume our own football.csv format, see github.com/footballcsv
90
- headers_mapping[:team1] = find_header( headers, ['Team 1'] )
91
- headers_mapping[:team2] = find_header( headers, ['Team 2'] )
92
- headers_mapping[:date] = find_header( headers, ['Date'] )
93
-
94
- ## check for all-in-one full time (ft) and half time (ht9 scores?
95
- headers_mapping[:score] = find_header( headers, ['FT'] )
96
- headers_mapping[:scorei] = find_header( headers, ['HT'] )
97
-
98
- headers_mapping[:round] = find_header( headers, ['Round', 'Matchday'] )
99
-
100
- ## optional headers - note: find_header returns nil if header NOT found
101
- header_stage = find_header( headers, ['Stage'] )
102
- headers_mapping[:stage] = header_stage if header_stage
103
-
104
- header_group = find_header( headers, ['Group'] )
105
- headers_mapping[:group] = header_group if header_group
106
-
107
-
108
- header_et = find_header( headers, ['ET', 'AET'] ) ## (after) extra time
109
- headers_mapping[:score_et] = header_et if header_et
110
-
111
- header_p = find_header( headers, ['P', 'PEN'] ) ## penalties
112
- headers_mapping[:score_p] = header_p if header_p
113
-
114
- header_notes = find_header( headers, ['Notes', 'Comments'] )
115
- headers_mapping[:notes] = header_notes if header_notes
116
-
117
-
118
- header_league = find_header( headers, ['League'] )
119
- headers_mapping[:league] = header_league if header_league
120
- else
121
- ## else try footballdata.uk and others
122
- headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
123
- headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
124
- headers_mapping[:date] = find_header( headers, ['Date'] )
125
-
126
- ## note: FT = Full Time, HG = Home Goal, AG = Away Goal
127
- headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
128
- headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
129
-
130
- ## check for half time scores ?
131
- ## note: HT = Half Time
132
- headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
133
- headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
134
- end
135
- end
136
-
137
- pp headers_mapping
138
-
139
- ### todo/fix: check headers - how?
140
- ## if present HomeTeam or HT required etc.
141
- ## issue error/warn is not present
142
- ##
143
- ## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
144
- ## exit 1
145
- ##
146
-
147
- matches = []
148
-
149
- rows.each_with_index do |row,i|
150
-
151
- ## fix/todo: use logger!!!!
152
- ## puts "[#{i}] " + row.inspect if i < 2
153
-
154
-
155
- ## todo/fix: move to its own (helper) method - filter or such!!!!
156
- if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
157
- skip = false
158
- filters.each do |header, value|
159
- if row[ header ] != value ## e.g. row['Season']
160
- skip = true
161
- break
162
- end
163
- end
164
- next if skip ## if header values NOT matching
165
- end
166
-
167
-
168
- ## note:
169
- ## add converters after filters for now (why not before filters?)
170
- if converters ## any converters defined?
171
- ## convert single proc shortcut to array with single converter
172
- converters = [converters] if converters.is_a?( Proc )
173
-
174
- ## assumes array of procs
175
- converters.each do |converter|
176
- row = converter.call( row )
177
- end
178
- end
179
-
180
-
181
-
182
- team1 = row[ headers_mapping[ :team1 ]]
183
- team2 = row[ headers_mapping[ :team2 ]]
184
-
185
-
186
- ## check if data present - if not skip (might be empty row)
187
- ## note: (old classic) csv reader returns nil for empty fields
188
- ## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
189
- if (team1.nil? || team1.empty?) &&
190
- (team2.nil? || team2.empty?)
191
- puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
192
- pp row
193
- next
194
- end
195
-
196
- ## remove possible match played counters e.g. (4) (11) etc.
197
- team1 = team1.sub( /\(\d+\)/, '' ).strip
198
- team2 = team2.sub( /\(\d+\)/, '' ).strip
199
-
200
-
201
-
202
- col = row[ headers_mapping[ :date ]]
203
- col = col.strip # make sure not leading or trailing spaces left over
204
-
205
- if col.empty? ||
206
- col =~ /^-{1,}$/ || # e.g. - or ---
207
- col =~ /^\?{1,}$/ # e.g. ? or ???
208
- ## note: allow missing / unknown date for match
209
- date = nil
210
- else
211
- ## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
212
- col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
213
- col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
214
- col = col.strip # make sure not leading or trailing spaces left over
215
-
216
- if col =~ /^\d{2}\/\d{2}\/\d{4}$/
217
- date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
218
- elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
219
- date_fmt = '%d/%m/%y' # e.g. 17/08/02
220
- elsif col =~ /^\d{4}-\d{2}-\d{2}$/ ## "standard" / default date format
221
- date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
222
- elsif col =~ /^\d{1,2} \w{3} \d{4}$/
223
- date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
224
- elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
225
- date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
226
- else
227
- puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
228
- ## todo/fix: add to errors/warns list - why? why not?
229
- exit 1
230
- end
231
-
232
- ## todo/check: use date object (keep string?) - why? why not?
233
- ## todo/fix: yes!! use date object!!!! do NOT use string
234
- date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
235
- end
236
-
237
-
238
- ##
239
- ## todo/fix: round might not always be just a simple integer number!!!
240
- ## might be text such as Final | Leg 1 or such!!!!
241
- round = nil
242
- ## check for (optional) round / matchday
243
- if headers_mapping[ :round ]
244
- col = row[ headers_mapping[ :round ]]
245
- ## todo: issue warning if not ? or - (and just empty string) why? why not
246
- ## (old attic) was: round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
247
-
248
- ## note: make round always a string for now!!!! e.g. "1", "2" too!!
249
- round = if col.nil? || col.empty? || col == '-' || col == 'n/a'
250
- ## note: allow missing round for match / defaults to nil
251
- nil
252
- else
253
- col
254
- end
255
- end
256
-
257
-
258
- score1 = nil
259
- score2 = nil
260
- score1i = nil
261
- score2i = nil
262
-
263
- ## check for full time scores ?
264
- if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
265
- ft = [ row[ headers_mapping[ :score1 ]],
266
- row[ headers_mapping[ :score2 ]] ]
267
-
268
- ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
269
- score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
270
- score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
271
- end
272
-
273
- ## check for half time scores ?
274
- if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
275
- ht = [ row[ headers_mapping[ :score1i ]],
276
- row[ headers_mapping[ :score2i ]] ]
277
-
278
- ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
279
- score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
280
- score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
281
- end
282
-
283
-
284
- ## check for all-in-one full time scores?
285
- if headers_mapping[ :score ]
286
- col = row[ headers_mapping[ :score ]]
287
- score = parse_score( col )
288
- if score
289
- score1 = score[0]
290
- score2 = score[1]
291
- else
292
- puts "!! ERROR - invalid score (ft) format >#{col}<:"
293
- pp row
294
- exit 1
295
- end
296
- end
297
-
298
- if headers_mapping[ :scorei ]
299
- col = row[ headers_mapping[ :scorei ]]
300
- score = parse_score( col )
301
- if score
302
- score1i = score[0]
303
- score2i = score[1]
304
- else
305
- puts "!! ERROR - invalid score (ht) format >#{col}<:"
306
- pp row
307
- exit 1
308
- end
309
- end
310
-
311
- ####
312
- ## try optional score - extra time (et) and penalities (p/pen)
313
- score1et = nil
314
- score2et = nil
315
- score1p = nil
316
- score2p = nil
317
-
318
- if headers_mapping[ :score_et ]
319
- col = row[ headers_mapping[ :score_et ]]
320
- score = parse_score( col )
321
- if score
322
- score1et = score[0]
323
- score2et = score[1]
324
- else
325
- puts "!! ERROR - invalid score (et) format >#{col}<:"
326
- pp row
327
- exit 1
328
- end
329
- end
330
-
331
- if headers_mapping[ :score_p ]
332
- col = row[ headers_mapping[ :score_p ]]
333
- score = parse_score( col )
334
- if score
335
- score1p = score[0]
336
- score2p = score[1]
337
- else
338
- puts "!! ERROR - invalid score (p) format >#{col}<:"
339
- pp row
340
- exit 1
341
- end
342
- end
343
-
344
-
345
- ## try some optional headings / columns
346
- stage = nil
347
- if headers_mapping[ :stage ]
348
- col = row[ headers_mapping[ :stage ]]
349
- ## todo/fix: check can col be nil e.g. col.nil? possible?
350
- stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
351
- ## note: allow missing stage for match / defaults to "regular"
352
- nil
353
- elsif col == '?'
354
- ## note: default explicit unknown to unknown for now AND not regular - why? why not?
355
- '?' ## todo/check: use unkown and NOT ? - why? why not?
356
- else
357
- col
358
- end
359
- end
360
-
361
- group = nil
362
- if headers_mapping[ :group ]
363
- col = row[ headers_mapping[ :group ]]
364
- ## todo/fix: check can col be nil e.g. col.nil? possible?
365
- group = if col.nil? || col.empty? || col == '-' || col == 'n/a'
366
- ## note: allow missing stage for match / defaults to "regular"
367
- nil
368
- else
369
- col
370
- end
371
- end
372
-
373
- status = nil ## e.g. AWARDED, CANCELLED, POSTPONED, etc.
374
- if headers_mapping[ :notes ]
375
- col = row[ headers_mapping[ :notes ]]
376
- ## check for optional (match) status in notes / comments
377
- status = if col.nil? || col.empty? || col == '-' || col == 'n/a'
378
- nil
379
- else
380
- StatusParser.parse( col ) # note: returns nil if no (match) status found
381
- end
382
- end
383
-
384
-
385
- league = nil
386
- league = row[ headers_mapping[ :league ]] if headers_mapping[ :league ]
387
-
388
-
389
- ## puts 'match attributes:'
390
- attributes = {
391
- date: date,
392
- team1: team1, team2: team2,
393
- score1: score1, score2: score2,
394
- score1i: score1i, score2i: score2i,
395
- score1et: score1et, score2et: score2et,
396
- score1p: score1p, score2p: score2p,
397
- round: round,
398
- stage: stage,
399
- group: group,
400
- status: status,
401
- league: league
402
- }
403
- ## pp attributes
404
-
405
- match = Import::Match.new( **attributes )
406
- matches << match
407
- end
408
-
409
- ## pp matches
410
- matches
411
- end
412
-
413
-
414
- private
415
-
416
- def find_header( headers, candidates )
417
- ## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
418
-
419
- candidates.each do |candidate|
420
- return candidate if headers.include?( candidate ) ## bingo!!!
421
- end
422
- nil ## no matching header found!!!
423
- end
424
-
425
- ########
426
- # more helpers
427
- #
428
-
429
- def parse_score( str )
430
- if str.nil? ## todo/check: remove nil case - possible? - why? why not?
431
- [nil,nil]
432
- else
433
- ## remove (optional single) note/footnote/endnote markers
434
- ## e.g. (*) or (a), (b),
435
- ## or [*], [A], [1], etc.
436
- ## - allow (1) or maybe (*1) in the future - why? why not?
437
- str = str.sub( /\( [a-z*] \)
438
- |
439
- \[ [1-9a-z*] \]
440
- /ix, '' ).strip
441
-
442
- if str.empty? || str == '?' || str == '-' || str == 'n/a'
443
- [nil,nil]
444
- ### todo/check: use regex with named capture groups here - why? why not?
445
- elsif str =~ /^\d{1,2}[:-]\d{1,2}$/ ## sanity check scores format
446
- score = str.split( /[:-]/ )
447
- [score[0].to_i, score[1].to_i]
448
- else
449
- nil ## note: returns nil if invalid / unparseable format!!!
450
- end
451
- end
452
- end # method parse_score
453
-
454
-
455
-
456
- end # class CsvMatchParser
457
- end # module SportDb
458
-