sports 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9ff86ce7425d973d8bb3adc8235bc0889c050fbd
4
+ data.tar.gz: 82decba8bbf6f6964612ca0913086224abfdeb1f
5
+ SHA512:
6
+ metadata.gz: '039b38b1e98a51a30390b88dbda7d35035134938af42767f96c7cf0ecf043c70a6ec2c58a99f59e9f0fe98a636755bbff8310def3431ada51a4a142dd668e956'
7
+ data.tar.gz: fe78d06e9c4aa9a44ef1bcc26859da1884fa191b1ea4a28d3bd059c2198f91c875a4182b3a65aa569591ef47e0db92123cc96f5ec331e9fe832fb2a7226bf321
@@ -0,0 +1,3 @@
1
+ ### 0.0.1 / 2020-08-17
2
+
3
+ * Everything is new. First release.
@@ -0,0 +1,28 @@
1
+ CHANGELOG.md
2
+ Manifest.txt
3
+ README.md
4
+ Rakefile
5
+ lib/sports.rb
6
+ lib/sports/config.rb
7
+ lib/sports/match_parser_csv.rb
8
+ lib/sports/match_status_parser.rb
9
+ lib/sports/name_helper.rb
10
+ lib/sports/season.rb
11
+ lib/sports/structs/country.rb
12
+ lib/sports/structs/goal.rb
13
+ lib/sports/structs/group.rb
14
+ lib/sports/structs/league.rb
15
+ lib/sports/structs/match.rb
16
+ lib/sports/structs/matchlist.rb
17
+ lib/sports/structs/round.rb
18
+ lib/sports/structs/standings.rb
19
+ lib/sports/structs/team.rb
20
+ lib/sports/structs/team_usage.rb
21
+ lib/sports/version.rb
22
+ test/helper.rb
23
+ test/test_clubs.rb
24
+ test/test_csv_reader.rb
25
+ test/test_match.rb
26
+ test/test_match_status_parser.rb
27
+ test/test_name_helper.rb
28
+ test/test_season.rb
@@ -0,0 +1,26 @@
1
+ # sports - sport data structures for matches, scores, leagues, seasons, rounds, groups, teams, clubs and more"
2
+
3
+
4
+ * home :: [github.com/sportdb/sport.db](https://github.com/sportdb/sport.db)
5
+ * bugs :: [github.com/sportdb/sport.db/issues](https://github.com/sportdb/sport.db/issues)
6
+ * gem :: [rubygems.org/gems/sports](https://rubygems.org/gems/sports)
7
+ * rdoc :: [rubydoc.info/gems/sports](http://rubydoc.info/gems/sports)
8
+ * forum :: [opensport](http://groups.google.com/group/opensport)
9
+
10
+
11
+
12
+ ## Usage
13
+
14
+ To be done
15
+
16
+ ## License
17
+
18
+ The `sports` scripts are dedicated to the public domain.
19
+ Use it as you please with no restrictions whatsoever.
20
+
21
+
22
+ ## Questions? Comments?
23
+
24
+ Send them along to the
25
+ [Open Sports & Friends Forum/Mailing List](http://groups.google.com/group/opensport).
26
+ Thanks!
@@ -0,0 +1,32 @@
1
+ require 'hoe'
2
+ require './lib/sports/version.rb'
3
+
4
+ Hoe.spec 'sports' do
5
+
6
+ self.version = Sports::VERSION
7
+
8
+ self.summary = "sports - sport data structures for matches, scores, leagues, seasons, rounds, groups, teams, clubs and more"
9
+ self.description = summary
10
+
11
+ self.urls = ['https://github.com/sportdb/sport.db']
12
+
13
+ self.author = 'Gerald Bauer'
14
+ self.email = 'opensport@googlegroups.com'
15
+
16
+ # switch extension to .markdown for gihub formatting
17
+ self.readme_file = 'README.md'
18
+ self.history_file = 'CHANGELOG.md'
19
+
20
+ self.licenses = ['Public Domain']
21
+
22
+ self.extra_deps = [
23
+ ['alphabets', '>= 1.0.0'],
24
+ ['date-formats', '>= 1.0.1'],
25
+ ['score-formats', '>= 0.0.1'],
26
+ ['csvreader', '>= 1.2.4'],
27
+ ]
28
+
29
+ self.spec_extras = {
30
+ required_ruby_version: '>= 2.2.2'
31
+ }
32
+ end
@@ -0,0 +1,55 @@
1
+ ## 3rd party gems
2
+ require 'alphabets' # downcase_i18n, unaccent, variants, ...
3
+ require 'date/formats' # DateFormats.parse, find!, ...
4
+ require 'score/formats'
5
+ require 'csvreader'
6
+
7
+
8
+ def read_csv( path, sep: nil,
9
+ symbolize_names: nil )
10
+ opts = {}
11
+ opts[:sep] = sep if sep
12
+ opts[:header_converters] = :symbol if symbolize_names
13
+
14
+ CsvHash.read( path, **opts )
15
+ end
16
+
17
+ def parse_csv( txt, sep: nil,
18
+ symbolize_names: nil )
19
+ opts = {}
20
+ opts[:sep] = sep if sep
21
+ opts[:header_converters] = :symbol if symbolize_names
22
+
23
+ CsvHash.parse( txt, **opts )
24
+ end
25
+
26
+
27
+ ###
28
+ # our own code
29
+ require 'sports/version' # let version always go first
30
+ require 'sports/config'
31
+ require 'sports/season'
32
+
33
+ require 'sports/name_helper'
34
+
35
+ require 'sports/structs/country'
36
+ require 'sports/structs/league'
37
+ require 'sports/structs/team'
38
+ require 'sports/structs/round'
39
+ require 'sports/structs/group'
40
+ require 'sports/structs/goal'
41
+ require 'sports/structs/match'
42
+ require 'sports/structs/matchlist'
43
+ require 'sports/structs/standings'
44
+ require 'sports/structs/team_usage'
45
+
46
+
47
+ require 'sports/match_status_parser'
48
+ require 'sports/match_parser_csv'
49
+
50
+
51
+
52
+
53
+ puts Sports.banner # say hello
54
+
55
+
@@ -0,0 +1,25 @@
1
+
2
+ module Sports
3
+
4
+ class Configuration
5
+
6
+ attr_reader :lang
7
+ def lang=(value)
8
+ ## check/todo: always use to_sym - why? needed?
9
+ DateFormats.lang = value
10
+ ScoreFormats.lang = value
11
+ end
12
+
13
+ end # class Configuration
14
+
15
+
16
+ ## lets you use
17
+ ## Sports.configure do |config|
18
+ ## config.lang = 'it'
19
+ ## end
20
+
21
+ def self.configure() yield( config ); end
22
+
23
+ def self.config() @config ||= Configuration.new; end
24
+
25
+ end # module Sports
@@ -0,0 +1,456 @@
1
+
2
+ module Sports
3
+ class CsvMatchParser
4
+
5
+ #############
6
+ # helpers
7
+ def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
8
+
9
+ ## check if headers incl. season if yes,has priority over col mapping
10
+ ## e.g. no need to specify twice (if using headers)
11
+ col = headers[:season] if headers && headers[:season]
12
+
13
+ seasons = Hash.new( 0 ) ## default value is 0
14
+
15
+ ## todo/fix: yes, use CsvHash.foreach - why? why not?
16
+ ## use read_csv with block to switch to foreach!!!!
17
+ rows = read_csv( path, sep: sep )
18
+
19
+ rows.each_with_index do |row,i|
20
+ puts "[#{i}] " + row.inspect if i < 2
21
+
22
+ season = row[ col ] ## column name defaults to 'Season'
23
+ seasons[ season ] += 1
24
+ end
25
+
26
+ pp seasons
27
+
28
+ ## note: only return season keys/names (not hash with usage counter)
29
+ seasons.keys
30
+ end
31
+
32
+
33
+ ##########
34
+ # main machinery
35
+
36
+ ## todo/fix: use a generic "global" parse_csv method - why? why not?
37
+ ## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
38
+ ## ## note: do NOT symbolize keys - keep them as is!!!!!!
39
+ ## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
40
+ ## CsvHash.parse( text, sep: sep )
41
+ ## end
42
+
43
+ def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
44
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
45
+ parse( txt, headers: headers,
46
+ filters: filters,
47
+ converters: converters,
48
+ sep: sep )
49
+ end
50
+
51
+ def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
52
+ new( txt ).parse( headers: headers,
53
+ filters: filters,
54
+ converters: converters,
55
+ sep: sep )
56
+ end
57
+
58
+
59
+ def initialize( txt )
60
+ @txt = txt
61
+ end
62
+
63
+ def parse( headers: nil, filters: nil, converters: nil, sep: nil )
64
+
65
+ headers_mapping = {}
66
+
67
+ rows = parse_csv( @txt, sep: sep )
68
+
69
+ return [] if rows.empty? ## no rows / empty?
70
+
71
+
72
+ ## fix/todo: use logger!!!!
73
+ ## pp csv
74
+
75
+ if headers ## use user supplied headers if present
76
+ headers_mapping = headers_mapping.merge( headers )
77
+ else
78
+
79
+ ## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
80
+ headers = rows[0].keys
81
+ pp headers
82
+
83
+ # note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
84
+ # e.g. row['HomeTeam'] || row['HT'] will NOT work for now
85
+
86
+ if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
87
+ ## assume our own football.csv format, see github.com/footballcsv
88
+ headers_mapping[:team1] = find_header( headers, ['Team 1'] )
89
+ headers_mapping[:team2] = find_header( headers, ['Team 2'] )
90
+ headers_mapping[:date] = find_header( headers, ['Date'] )
91
+
92
+ ## check for all-in-one full time (ft) and half time (ht9 scores?
93
+ headers_mapping[:score] = find_header( headers, ['FT'] )
94
+ headers_mapping[:scorei] = find_header( headers, ['HT'] )
95
+
96
+ headers_mapping[:round] = find_header( headers, ['Round', 'Matchday'] )
97
+
98
+ ## optional headers - note: find_header returns nil if header NOT found
99
+ header_stage = find_header( headers, ['Stage'] )
100
+ headers_mapping[:stage] = header_stage if header_stage
101
+
102
+ header_group = find_header( headers, ['Group'] )
103
+ headers_mapping[:group] = header_group if header_group
104
+
105
+
106
+ header_et = find_header( headers, ['ET', 'AET'] ) ## (after) extra time
107
+ headers_mapping[:score_et] = header_et if header_et
108
+
109
+ header_p = find_header( headers, ['P', 'PEN'] ) ## penalties
110
+ headers_mapping[:score_p] = header_p if header_p
111
+
112
+ header_notes = find_header( headers, ['Notes', 'Comments'] )
113
+ headers_mapping[:notes] = header_notes if header_notes
114
+
115
+
116
+ header_league = find_header( headers, ['League'] )
117
+ headers_mapping[:league] = header_league if header_league
118
+ else
119
+ ## else try footballdata.uk and others
120
+ headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
121
+ headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
122
+ headers_mapping[:date] = find_header( headers, ['Date'] )
123
+
124
+ ## note: FT = Full Time, HG = Home Goal, AG = Away Goal
125
+ headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
126
+ headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
127
+
128
+ ## check for half time scores ?
129
+ ## note: HT = Half Time
130
+ headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
131
+ headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
132
+ end
133
+ end
134
+
135
+ pp headers_mapping
136
+
137
+ ### todo/fix: check headers - how?
138
+ ## if present HomeTeam or HT required etc.
139
+ ## issue error/warn is not present
140
+ ##
141
+ ## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
142
+ ## exit 1
143
+ ##
144
+
145
+ matches = []
146
+
147
+ rows.each_with_index do |row,i|
148
+
149
+ ## fix/todo: use logger!!!!
150
+ ## puts "[#{i}] " + row.inspect if i < 2
151
+
152
+
153
+ ## todo/fix: move to its own (helper) method - filter or such!!!!
154
+ if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
155
+ skip = false
156
+ filters.each do |header, value|
157
+ if row[ header ] != value ## e.g. row['Season']
158
+ skip = true
159
+ break
160
+ end
161
+ end
162
+ next if skip ## if header values NOT matching
163
+ end
164
+
165
+
166
+ ## note:
167
+ ## add converters after filters for now (why not before filters?)
168
+ if converters ## any converters defined?
169
+ ## convert single proc shortcut to array with single converter
170
+ converters = [converters] if converters.is_a?( Proc )
171
+
172
+ ## assumes array of procs
173
+ converters.each do |converter|
174
+ row = converter.call( row )
175
+ end
176
+ end
177
+
178
+
179
+
180
+ team1 = row[ headers_mapping[ :team1 ]]
181
+ team2 = row[ headers_mapping[ :team2 ]]
182
+
183
+
184
+ ## check if data present - if not skip (might be empty row)
185
+ ## note: (old classic) csv reader returns nil for empty fields
186
+ ## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
187
+ if (team1.nil? || team1.empty?) &&
188
+ (team2.nil? || team2.empty?)
189
+ puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
190
+ pp row
191
+ next
192
+ end
193
+
194
+ ## remove possible match played counters e.g. (4) (11) etc.
195
+ team1 = team1.sub( /\(\d+\)/, '' ).strip
196
+ team2 = team2.sub( /\(\d+\)/, '' ).strip
197
+
198
+
199
+
200
+ col = row[ headers_mapping[ :date ]]
201
+ col = col.strip # make sure not leading or trailing spaces left over
202
+
203
+ if col.empty? ||
204
+ col =~ /^-{1,}$/ || # e.g. - or ---
205
+ col =~ /^\?{1,}$/ # e.g. ? or ???
206
+ ## note: allow missing / unknown date for match
207
+ date = nil
208
+ else
209
+ ## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
210
+ col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
211
+ col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
212
+ col = col.strip # make sure not leading or trailing spaces left over
213
+
214
+ if col =~ /^\d{2}\/\d{2}\/\d{4}$/
215
+ date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
216
+ elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
217
+ date_fmt = '%d/%m/%y' # e.g. 17/08/02
218
+ elsif col =~ /^\d{4}-\d{2}-\d{2}$/ ## "standard" / default date format
219
+ date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
220
+ elsif col =~ /^\d{1,2} \w{3} \d{4}$/
221
+ date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
222
+ elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
223
+ date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
224
+ else
225
+ puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
226
+ ## todo/fix: add to errors/warns list - why? why not?
227
+ exit 1
228
+ end
229
+
230
+ ## todo/check: use date object (keep string?) - why? why not?
231
+ ## todo/fix: yes!! use date object!!!! do NOT use string
232
+ date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
233
+ end
234
+
235
+
236
+ ##
237
+ ## todo/fix: round might not always be just a simple integer number!!!
238
+ ## might be text such as Final | Leg 1 or such!!!!
239
+ round = nil
240
+ ## check for (optional) round / matchday
241
+ if headers_mapping[ :round ]
242
+ col = row[ headers_mapping[ :round ]]
243
+ ## todo: issue warning if not ? or - (and just empty string) why? why not
244
+ ## (old attic) was: round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
245
+
246
+ ## note: make round always a string for now!!!! e.g. "1", "2" too!!
247
+ round = if col.nil? || col.empty? || col == '-' || col == 'n/a'
248
+ ## note: allow missing round for match / defaults to nil
249
+ nil
250
+ else
251
+ col
252
+ end
253
+ end
254
+
255
+
256
+ score1 = nil
257
+ score2 = nil
258
+ score1i = nil
259
+ score2i = nil
260
+
261
+ ## check for full time scores ?
262
+ if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
263
+ ft = [ row[ headers_mapping[ :score1 ]],
264
+ row[ headers_mapping[ :score2 ]] ]
265
+
266
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
267
+ score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
268
+ score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
269
+ end
270
+
271
+ ## check for half time scores ?
272
+ if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
273
+ ht = [ row[ headers_mapping[ :score1i ]],
274
+ row[ headers_mapping[ :score2i ]] ]
275
+
276
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
277
+ score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
278
+ score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
279
+ end
280
+
281
+
282
+ ## check for all-in-one full time scores?
283
+ if headers_mapping[ :score ]
284
+ col = row[ headers_mapping[ :score ]]
285
+ score = parse_score( col )
286
+ if score
287
+ score1 = score[0]
288
+ score2 = score[1]
289
+ else
290
+ puts "!! ERROR - invalid score (ft) format >#{col}<:"
291
+ pp row
292
+ exit 1
293
+ end
294
+ end
295
+
296
+ if headers_mapping[ :scorei ]
297
+ col = row[ headers_mapping[ :scorei ]]
298
+ score = parse_score( col )
299
+ if score
300
+ score1i = score[0]
301
+ score2i = score[1]
302
+ else
303
+ puts "!! ERROR - invalid score (ht) format >#{col}<:"
304
+ pp row
305
+ exit 1
306
+ end
307
+ end
308
+
309
+ ####
310
+ ## try optional score - extra time (et) and penalities (p/pen)
311
+ score1et = nil
312
+ score2et = nil
313
+ score1p = nil
314
+ score2p = nil
315
+
316
+ if headers_mapping[ :score_et ]
317
+ col = row[ headers_mapping[ :score_et ]]
318
+ score = parse_score( col )
319
+ if score
320
+ score1et = score[0]
321
+ score2et = score[1]
322
+ else
323
+ puts "!! ERROR - invalid score (et) format >#{col}<:"
324
+ pp row
325
+ exit 1
326
+ end
327
+ end
328
+
329
+ if headers_mapping[ :score_p ]
330
+ col = row[ headers_mapping[ :score_p ]]
331
+ score = parse_score( col )
332
+ if score
333
+ score1p = score[0]
334
+ score2p = score[1]
335
+ else
336
+ puts "!! ERROR - invalid score (p) format >#{col}<:"
337
+ pp row
338
+ exit 1
339
+ end
340
+ end
341
+
342
+
343
+ ## try some optional headings / columns
344
+ stage = nil
345
+ if headers_mapping[ :stage ]
346
+ col = row[ headers_mapping[ :stage ]]
347
+ ## todo/fix: check can col be nil e.g. col.nil? possible?
348
+ stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
349
+ ## note: allow missing stage for match / defaults to "regular"
350
+ nil
351
+ elsif col == '?'
352
+ ## note: default explicit unknown to unknown for now AND not regular - why? why not?
353
+ '?' ## todo/check: use unkown and NOT ? - why? why not?
354
+ else
355
+ col
356
+ end
357
+ end
358
+
359
+ group = nil
360
+ if headers_mapping[ :group ]
361
+ col = row[ headers_mapping[ :group ]]
362
+ ## todo/fix: check can col be nil e.g. col.nil? possible?
363
+ group = if col.nil? || col.empty? || col == '-' || col == 'n/a'
364
+ ## note: allow missing stage for match / defaults to "regular"
365
+ nil
366
+ else
367
+ col
368
+ end
369
+ end
370
+
371
+ status = nil ## e.g. AWARDED, CANCELLED, POSTPONED, etc.
372
+ if headers_mapping[ :notes ]
373
+ col = row[ headers_mapping[ :notes ]]
374
+ ## check for optional (match) status in notes / comments
375
+ status = if col.nil? || col.empty? || col == '-' || col == 'n/a'
376
+ nil
377
+ else
378
+ StatusParser.parse( col ) # note: returns nil if no (match) status found
379
+ end
380
+ end
381
+
382
+
383
+ league = nil
384
+ league = row[ headers_mapping[ :league ]] if headers_mapping[ :league ]
385
+
386
+
387
+ ## puts 'match attributes:'
388
+ attributes = {
389
+ date: date,
390
+ team1: team1, team2: team2,
391
+ score1: score1, score2: score2,
392
+ score1i: score1i, score2i: score2i,
393
+ score1et: score1et, score2et: score2et,
394
+ score1p: score1p, score2p: score2p,
395
+ round: round,
396
+ stage: stage,
397
+ group: group,
398
+ status: status,
399
+ league: league
400
+ }
401
+ ## pp attributes
402
+
403
+ match = Match.new( **attributes )
404
+ matches << match
405
+ end
406
+
407
+ ## pp matches
408
+ matches
409
+ end
410
+
411
+
412
+ private
413
+
414
+ def find_header( headers, candidates )
415
+ ## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
416
+
417
+ candidates.each do |candidate|
418
+ return candidate if headers.include?( candidate ) ## bingo!!!
419
+ end
420
+ nil ## no matching header found!!!
421
+ end
422
+
423
+ ########
424
+ # more helpers
425
+ #
426
+
427
+ def parse_score( str )
428
+ if str.nil? ## todo/check: remove nil case - possible? - why? why not?
429
+ [nil,nil]
430
+ else
431
+ ## remove (optional single) note/footnote/endnote markers
432
+ ## e.g. (*) or (a), (b),
433
+ ## or [*], [A], [1], etc.
434
+ ## - allow (1) or maybe (*1) in the future - why? why not?
435
+ str = str.sub( /\( [a-z*] \)
436
+ |
437
+ \[ [1-9a-z*] \]
438
+ /ix, '' ).strip
439
+
440
+ if str.empty? || str == '?' || str == '-' || str == 'n/a'
441
+ [nil,nil]
442
+ ### todo/check: use regex with named capture groups here - why? why not?
443
+ elsif str =~ /^\d{1,2}[:-]\d{1,2}$/ ## sanity check scores format
444
+ score = str.split( /[:-]/ )
445
+ [score[0].to_i, score[1].to_i]
446
+ else
447
+ nil ## note: returns nil if invalid / unparseable format!!!
448
+ end
449
+ end
450
+ end # method parse_score
451
+
452
+
453
+
454
+ end # class CsvMatchParser
455
+ end # module Sports
456
+