sportdb-formats 1.1.3 → 1.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +0 -24
  3. data/Rakefile +2 -5
  4. data/lib/sportdb/formats.rb +39 -74
  5. data/lib/sportdb/formats/event/event_reader.rb +1 -1
  6. data/lib/sportdb/formats/league/league_outline_reader.rb +18 -6
  7. data/lib/sportdb/formats/package.rb +2 -2
  8. data/lib/sportdb/formats/team/club_index_history.rb +2 -6
  9. data/lib/sportdb/formats/team/club_reader_history.rb +1 -1
  10. data/lib/sportdb/formats/team/club_reader_props.rb +18 -2
  11. data/lib/sportdb/formats/version.rb +1 -1
  12. data/test/helper.rb +3 -0
  13. metadata +5 -71
  14. data/lib/sportdb/formats/config.rb +0 -40
  15. data/lib/sportdb/formats/match/match_parser_csv.rb +0 -458
  16. data/lib/sportdb/formats/match/match_status_parser.rb +0 -86
  17. data/lib/sportdb/formats/name_helper.rb +0 -87
  18. data/lib/sportdb/formats/score/score_formats.rb +0 -239
  19. data/lib/sportdb/formats/score/score_parser.rb +0 -204
  20. data/lib/sportdb/formats/season_utils.rb +0 -16
  21. data/lib/sportdb/formats/structs/country.rb +0 -31
  22. data/lib/sportdb/formats/structs/group.rb +0 -18
  23. data/lib/sportdb/formats/structs/league.rb +0 -37
  24. data/lib/sportdb/formats/structs/match.rb +0 -157
  25. data/lib/sportdb/formats/structs/matchlist.rb +0 -220
  26. data/lib/sportdb/formats/structs/round.rb +0 -25
  27. data/lib/sportdb/formats/structs/season.rb +0 -192
  28. data/lib/sportdb/formats/structs/standings.rb +0 -268
  29. data/lib/sportdb/formats/structs/team.rb +0 -157
  30. data/lib/sportdb/formats/structs/team_usage.rb +0 -88
  31. data/test/test_clubs.rb +0 -40
  32. data/test/test_csv_reader.rb +0 -31
  33. data/test/test_match.rb +0 -30
  34. data/test/test_match_status_parser.rb +0 -49
  35. data/test/test_name_helper.rb +0 -67
  36. data/test/test_scores.rb +0 -124
  37. data/test/test_season.rb +0 -111
@@ -1,40 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module SportDb
4
- module Import
5
-
6
- class Configuration
7
- ##
8
- ## todo: allow configure of countries_dir like clubs_dir
9
- ## "fallback" and use a default built-in world/countries.txt
10
-
11
- attr_accessor :catalog
12
-
13
- attr_reader :lang
14
- def lang=(value)
15
- ## check/todo: always use to_sym - why? needed?
16
- DateFormats.lang = value
17
- ScoreFormats.lang = value
18
- SportDb.lang.lang = value
19
-
20
- ## todo/fix: change SportDb.lang to SportDb.parser.lang or lang_parser or utils or someting !!!!
21
- ## use Sport.lang only as a read-only shortcut a la catalog for config.lang!!!!
22
- end
23
-
24
- end # class Configuration
25
-
26
-
27
- ## lets you use
28
- ## SportDb::Import.configure do |config|
29
- ## config.lang = 'it'
30
- ## end
31
-
32
- def self.configure() yield( config ); end
33
-
34
- def self.config() @config ||= Configuration.new; end
35
-
36
- ## e.g. use config.catalog -- keep Import.catalog as a shortcut (for "read-only" access)
37
- def self.catalog() config.catalog; end
38
-
39
- end # module Import
40
- end # module SportDb
@@ -1,458 +0,0 @@
1
- # encoding: utf-8
2
-
3
-
4
- module SportDb
5
- class CsvMatchParser
6
-
7
- #############
8
- # helpers
9
- def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
10
-
11
- ## check if headers incl. season if yes,has priority over col mapping
12
- ## e.g. no need to specify twice (if using headers)
13
- col = headers[:season] if headers && headers[:season]
14
-
15
- seasons = Hash.new( 0 ) ## default value is 0
16
-
17
- ## todo/fix: yes, use CsvHash.foreach - why? why not?
18
- ## use read_csv with block to switch to foreach!!!!
19
- rows = read_csv( path, sep: sep )
20
-
21
- rows.each_with_index do |row,i|
22
- puts "[#{i}] " + row.inspect if i < 2
23
-
24
- season = row[ col ] ## column name defaults to 'Season'
25
- seasons[ season ] += 1
26
- end
27
-
28
- pp seasons
29
-
30
- ## note: only return season keys/names (not hash with usage counter)
31
- seasons.keys
32
- end
33
-
34
-
35
- ##########
36
- # main machinery
37
-
38
- ## todo/fix: use a generic "global" parse_csv method - why? why not?
39
- ## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
40
- ## ## note: do NOT symbolize keys - keep them as is!!!!!!
41
- ## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
42
- ## CsvHash.parse( text, sep: sep )
43
- ## end
44
-
45
- def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
46
- txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
47
- parse( txt, headers: headers,
48
- filters: filters,
49
- converters: converters,
50
- sep: sep )
51
- end
52
-
53
- def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
54
- new( txt ).parse( headers: headers,
55
- filters: filters,
56
- converters: converters,
57
- sep: sep )
58
- end
59
-
60
-
61
- def initialize( txt )
62
- @txt = txt
63
- end
64
-
65
- def parse( headers: nil, filters: nil, converters: nil, sep: nil )
66
-
67
- headers_mapping = {}
68
-
69
- rows = parse_csv( @txt, sep: sep )
70
-
71
- return [] if rows.empty? ## no rows / empty?
72
-
73
-
74
- ## fix/todo: use logger!!!!
75
- ## pp csv
76
-
77
- if headers ## use user supplied headers if present
78
- headers_mapping = headers_mapping.merge( headers )
79
- else
80
-
81
- ## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
82
- headers = rows[0].keys
83
- pp headers
84
-
85
- # note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
86
- # e.g. row['HomeTeam'] || row['HT'] will NOT work for now
87
-
88
- if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
89
- ## assume our own football.csv format, see github.com/footballcsv
90
- headers_mapping[:team1] = find_header( headers, ['Team 1'] )
91
- headers_mapping[:team2] = find_header( headers, ['Team 2'] )
92
- headers_mapping[:date] = find_header( headers, ['Date'] )
93
-
94
- ## check for all-in-one full time (ft) and half time (ht9 scores?
95
- headers_mapping[:score] = find_header( headers, ['FT'] )
96
- headers_mapping[:scorei] = find_header( headers, ['HT'] )
97
-
98
- headers_mapping[:round] = find_header( headers, ['Round', 'Matchday'] )
99
-
100
- ## optional headers - note: find_header returns nil if header NOT found
101
- header_stage = find_header( headers, ['Stage'] )
102
- headers_mapping[:stage] = header_stage if header_stage
103
-
104
- header_group = find_header( headers, ['Group'] )
105
- headers_mapping[:group] = header_group if header_group
106
-
107
-
108
- header_et = find_header( headers, ['ET', 'AET'] ) ## (after) extra time
109
- headers_mapping[:score_et] = header_et if header_et
110
-
111
- header_p = find_header( headers, ['P', 'PEN'] ) ## penalties
112
- headers_mapping[:score_p] = header_p if header_p
113
-
114
- header_notes = find_header( headers, ['Notes', 'Comments'] )
115
- headers_mapping[:notes] = header_notes if header_notes
116
-
117
-
118
- header_league = find_header( headers, ['League'] )
119
- headers_mapping[:league] = header_league if header_league
120
- else
121
- ## else try footballdata.uk and others
122
- headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
123
- headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
124
- headers_mapping[:date] = find_header( headers, ['Date'] )
125
-
126
- ## note: FT = Full Time, HG = Home Goal, AG = Away Goal
127
- headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
128
- headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
129
-
130
- ## check for half time scores ?
131
- ## note: HT = Half Time
132
- headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
133
- headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
134
- end
135
- end
136
-
137
- pp headers_mapping
138
-
139
- ### todo/fix: check headers - how?
140
- ## if present HomeTeam or HT required etc.
141
- ## issue error/warn is not present
142
- ##
143
- ## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
144
- ## exit 1
145
- ##
146
-
147
- matches = []
148
-
149
- rows.each_with_index do |row,i|
150
-
151
- ## fix/todo: use logger!!!!
152
- ## puts "[#{i}] " + row.inspect if i < 2
153
-
154
-
155
- ## todo/fix: move to its own (helper) method - filter or such!!!!
156
- if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
157
- skip = false
158
- filters.each do |header, value|
159
- if row[ header ] != value ## e.g. row['Season']
160
- skip = true
161
- break
162
- end
163
- end
164
- next if skip ## if header values NOT matching
165
- end
166
-
167
-
168
- ## note:
169
- ## add converters after filters for now (why not before filters?)
170
- if converters ## any converters defined?
171
- ## convert single proc shortcut to array with single converter
172
- converters = [converters] if converters.is_a?( Proc )
173
-
174
- ## assumes array of procs
175
- converters.each do |converter|
176
- row = converter.call( row )
177
- end
178
- end
179
-
180
-
181
-
182
- team1 = row[ headers_mapping[ :team1 ]]
183
- team2 = row[ headers_mapping[ :team2 ]]
184
-
185
-
186
- ## check if data present - if not skip (might be empty row)
187
- ## note: (old classic) csv reader returns nil for empty fields
188
- ## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
189
- if (team1.nil? || team1.empty?) &&
190
- (team2.nil? || team2.empty?)
191
- puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
192
- pp row
193
- next
194
- end
195
-
196
- ## remove possible match played counters e.g. (4) (11) etc.
197
- team1 = team1.sub( /\(\d+\)/, '' ).strip
198
- team2 = team2.sub( /\(\d+\)/, '' ).strip
199
-
200
-
201
-
202
- col = row[ headers_mapping[ :date ]]
203
- col = col.strip # make sure not leading or trailing spaces left over
204
-
205
- if col.empty? ||
206
- col =~ /^-{1,}$/ || # e.g. - or ---
207
- col =~ /^\?{1,}$/ # e.g. ? or ???
208
- ## note: allow missing / unknown date for match
209
- date = nil
210
- else
211
- ## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
212
- col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
213
- col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
214
- col = col.strip # make sure not leading or trailing spaces left over
215
-
216
- if col =~ /^\d{2}\/\d{2}\/\d{4}$/
217
- date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
218
- elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
219
- date_fmt = '%d/%m/%y' # e.g. 17/08/02
220
- elsif col =~ /^\d{4}-\d{2}-\d{2}$/ ## "standard" / default date format
221
- date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
222
- elsif col =~ /^\d{1,2} \w{3} \d{4}$/
223
- date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
224
- elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
225
- date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
226
- else
227
- puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
228
- ## todo/fix: add to errors/warns list - why? why not?
229
- exit 1
230
- end
231
-
232
- ## todo/check: use date object (keep string?) - why? why not?
233
- ## todo/fix: yes!! use date object!!!! do NOT use string
234
- date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
235
- end
236
-
237
-
238
- ##
239
- ## todo/fix: round might not always be just a simple integer number!!!
240
- ## might be text such as Final | Leg 1 or such!!!!
241
- round = nil
242
- ## check for (optional) round / matchday
243
- if headers_mapping[ :round ]
244
- col = row[ headers_mapping[ :round ]]
245
- ## todo: issue warning if not ? or - (and just empty string) why? why not
246
- ## (old attic) was: round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
247
-
248
- ## note: make round always a string for now!!!! e.g. "1", "2" too!!
249
- round = if col.nil? || col.empty? || col == '-' || col == 'n/a'
250
- ## note: allow missing round for match / defaults to nil
251
- nil
252
- else
253
- col
254
- end
255
- end
256
-
257
-
258
- score1 = nil
259
- score2 = nil
260
- score1i = nil
261
- score2i = nil
262
-
263
- ## check for full time scores ?
264
- if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
265
- ft = [ row[ headers_mapping[ :score1 ]],
266
- row[ headers_mapping[ :score2 ]] ]
267
-
268
- ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
269
- score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
270
- score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
271
- end
272
-
273
- ## check for half time scores ?
274
- if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
275
- ht = [ row[ headers_mapping[ :score1i ]],
276
- row[ headers_mapping[ :score2i ]] ]
277
-
278
- ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
279
- score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
280
- score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
281
- end
282
-
283
-
284
- ## check for all-in-one full time scores?
285
- if headers_mapping[ :score ]
286
- col = row[ headers_mapping[ :score ]]
287
- score = parse_score( col )
288
- if score
289
- score1 = score[0]
290
- score2 = score[1]
291
- else
292
- puts "!! ERROR - invalid score (ft) format >#{col}<:"
293
- pp row
294
- exit 1
295
- end
296
- end
297
-
298
- if headers_mapping[ :scorei ]
299
- col = row[ headers_mapping[ :scorei ]]
300
- score = parse_score( col )
301
- if score
302
- score1i = score[0]
303
- score2i = score[1]
304
- else
305
- puts "!! ERROR - invalid score (ht) format >#{col}<:"
306
- pp row
307
- exit 1
308
- end
309
- end
310
-
311
- ####
312
- ## try optional score - extra time (et) and penalities (p/pen)
313
- score1et = nil
314
- score2et = nil
315
- score1p = nil
316
- score2p = nil
317
-
318
- if headers_mapping[ :score_et ]
319
- col = row[ headers_mapping[ :score_et ]]
320
- score = parse_score( col )
321
- if score
322
- score1et = score[0]
323
- score2et = score[1]
324
- else
325
- puts "!! ERROR - invalid score (et) format >#{col}<:"
326
- pp row
327
- exit 1
328
- end
329
- end
330
-
331
- if headers_mapping[ :score_p ]
332
- col = row[ headers_mapping[ :score_p ]]
333
- score = parse_score( col )
334
- if score
335
- score1p = score[0]
336
- score2p = score[1]
337
- else
338
- puts "!! ERROR - invalid score (p) format >#{col}<:"
339
- pp row
340
- exit 1
341
- end
342
- end
343
-
344
-
345
- ## try some optional headings / columns
346
- stage = nil
347
- if headers_mapping[ :stage ]
348
- col = row[ headers_mapping[ :stage ]]
349
- ## todo/fix: check can col be nil e.g. col.nil? possible?
350
- stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
351
- ## note: allow missing stage for match / defaults to "regular"
352
- nil
353
- elsif col == '?'
354
- ## note: default explicit unknown to unknown for now AND not regular - why? why not?
355
- '?' ## todo/check: use unkown and NOT ? - why? why not?
356
- else
357
- col
358
- end
359
- end
360
-
361
- group = nil
362
- if headers_mapping[ :group ]
363
- col = row[ headers_mapping[ :group ]]
364
- ## todo/fix: check can col be nil e.g. col.nil? possible?
365
- group = if col.nil? || col.empty? || col == '-' || col == 'n/a'
366
- ## note: allow missing stage for match / defaults to "regular"
367
- nil
368
- else
369
- col
370
- end
371
- end
372
-
373
- status = nil ## e.g. AWARDED, CANCELLED, POSTPONED, etc.
374
- if headers_mapping[ :notes ]
375
- col = row[ headers_mapping[ :notes ]]
376
- ## check for optional (match) status in notes / comments
377
- status = if col.nil? || col.empty? || col == '-' || col == 'n/a'
378
- nil
379
- else
380
- StatusParser.parse( col ) # note: returns nil if no (match) status found
381
- end
382
- end
383
-
384
-
385
- league = nil
386
- league = row[ headers_mapping[ :league ]] if headers_mapping[ :league ]
387
-
388
-
389
- ## puts 'match attributes:'
390
- attributes = {
391
- date: date,
392
- team1: team1, team2: team2,
393
- score1: score1, score2: score2,
394
- score1i: score1i, score2i: score2i,
395
- score1et: score1et, score2et: score2et,
396
- score1p: score1p, score2p: score2p,
397
- round: round,
398
- stage: stage,
399
- group: group,
400
- status: status,
401
- league: league
402
- }
403
- ## pp attributes
404
-
405
- match = Import::Match.new( **attributes )
406
- matches << match
407
- end
408
-
409
- ## pp matches
410
- matches
411
- end
412
-
413
-
414
- private
415
-
416
- def find_header( headers, candidates )
417
- ## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
418
-
419
- candidates.each do |candidate|
420
- return candidate if headers.include?( candidate ) ## bingo!!!
421
- end
422
- nil ## no matching header found!!!
423
- end
424
-
425
- ########
426
- # more helpers
427
- #
428
-
429
- def parse_score( str )
430
- if str.nil? ## todo/check: remove nil case - possible? - why? why not?
431
- [nil,nil]
432
- else
433
- ## remove (optional single) note/footnote/endnote markers
434
- ## e.g. (*) or (a), (b),
435
- ## or [*], [A], [1], etc.
436
- ## - allow (1) or maybe (*1) in the future - why? why not?
437
- str = str.sub( /\( [a-z*] \)
438
- |
439
- \[ [1-9a-z*] \]
440
- /ix, '' ).strip
441
-
442
- if str.empty? || str == '?' || str == '-' || str == 'n/a'
443
- [nil,nil]
444
- ### todo/check: use regex with named capture groups here - why? why not?
445
- elsif str =~ /^\d{1,2}[:-]\d{1,2}$/ ## sanity check scores format
446
- score = str.split( /[:-]/ )
447
- [score[0].to_i, score[1].to_i]
448
- else
449
- nil ## note: returns nil if invalid / unparseable format!!!
450
- end
451
- end
452
- end # method parse_score
453
-
454
-
455
-
456
- end # class CsvMatchParser
457
- end # module SportDb
458
-