sportdb-structs 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4603607b4234fbe74e0b57ed548a61a7d899041497d2e0e7f3104396b00e943a
4
- data.tar.gz: 24945611ea43fca109b2a4b993ad6bf913daee4e034762d3797799d5a8a7bea5
3
+ metadata.gz: 6efbd5615829502f2960305080f413c167a199b0c5be71fd2ca46882633bb447
4
+ data.tar.gz: 7d9025f5ac565666527eb87b67a6f94e6607b4d8cca023a5478d601b523144d1
5
5
  SHA512:
6
- metadata.gz: 236bb3e8ac4e58e16b398e2fd9bdceabbd74e527a4aa1ef2af823976f31ef7b63f7342b640829218a2c9c08effd060675db67c4ec24d17497cee8071db4df0db
7
- data.tar.gz: 7bdc16e2b06599fad689166a06bcd6b9aa4ef9ba4ddd0bb6c47a0d6aa68f6b20d9e2184ba0f211d9d9d455da6a7832ec14e9b5455aafe0e3c2c2e0c43f14dd5c
6
+ metadata.gz: 78e8dea8c1842fe14e57ecc4794263c2ab29a17d43bd45c9131aa8889c4e05f026506d43768293679c88ebb563caf47dfca9610f0a1736bb964f325fa945886d
7
+ data.tar.gz: b950d1600926c9c6394c6267a874dcc6fe17ce5cd7f06cfe34244474aaa8bce535ce598142b98b8ef3f8e17cb31c1f6d1003b1eca8331451bdc75fb3542a0ccf
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.4.2
1
+ ### 0.5.0
2
2
 
3
3
  ### 0.0.1 / 2020-08-24
4
4
 
data/Manifest.txt CHANGED
@@ -2,6 +2,10 @@ CHANGELOG.md
2
2
  Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
+ lib/sportdb/csv/goal.rb
6
+ lib/sportdb/csv/goal_parser_csv.rb
7
+ lib/sportdb/csv/match_parser_csv.rb
8
+ lib/sportdb/csv/match_status_parser.rb
5
9
  lib/sportdb/structs.rb
6
10
  lib/sportdb/structs/country.rb
7
11
  lib/sportdb/structs/event_info.rb
data/Rakefile CHANGED
@@ -20,6 +20,7 @@ Hoe.spec 'sportdb-structs' do
20
20
  self.licenses = ['Public Domain']
21
21
 
22
22
  self.extra_deps = [
23
+ ['cocos', '>= 0.4.0'],
23
24
  ['alphabets', '>= 1.0.2'],
24
25
  ['score-formats', '>= 0.1.1'],
25
26
  ['season-formats', '>= 0.0.1'],
@@ -0,0 +1,192 @@
1
+
2
+ module Sports
3
+
4
+ ## "free-standing" goal event - for import/export in separate event / goal datafiles
5
+ ## returned by CsvGoalParser and others
6
+ class GoalEvent
7
+
8
+ def self.build( row ) ## rename to parse or such - why? why not?
9
+
10
+ ## split match_id
11
+ team_str, more_str = row['Match'].split( '|' )
12
+ team1_str, team2_str = team_str.split( ' - ' )
13
+
14
+ more_str = more_str.strip
15
+ team1_str = team1_str.strip
16
+ team2_str = team2_str.strip
17
+
18
+ # check if more_str is a date otherwise assume round
19
+ date_fmt = if more_str =~ /^[A-Z]{3} [0-9]{1,2}$/i ## Apr 4
20
+ '%b %d'
21
+ elsif more_str =~ /^[A-Z]{3} [0-9]{1,2} [0-9]{4}$/i ## Apr 4 2019
22
+ '%b %d %Y'
23
+ else
24
+ nil
25
+ end
26
+
27
+ if date_fmt
28
+ date = Date.strptime( more_str, date_fmt )
29
+ round = nil
30
+ else
31
+ date = nil
32
+ round = more_str
33
+ end
34
+
35
+
36
+ values = row['Score'].split('-')
37
+ values = values.map { |value| value.strip }
38
+ score1 = values[0].to_i
39
+ score2 = values[1].to_i
40
+
41
+ minute = nil
42
+ offset = nil
43
+ if m=%r{([0-9]+)
44
+ (?:[ ]+
45
+ \+([0-9]+)
46
+ )?
47
+ ['.]
48
+ $}x.match( row['Minute'])
49
+ minute = m[1].to_i
50
+ offset = m[2] ? m[2].to_i : nil
51
+ else
52
+ puts "!! ERROR - unsupported minute (goal) format >#{row['Minute']}<"
53
+ exit 1
54
+ end
55
+
56
+ attributes = {
57
+ team1: team1_str,
58
+ team2: team2_str,
59
+ date: date,
60
+ round: round,
61
+ score1: score1,
62
+ score2: score2,
63
+ minute: minute,
64
+ offset: offset,
65
+ player: row['Player'],
66
+ owngoal: ['(og)', '(o.g.)'].include?( row['Extra']),
67
+ penalty: ['(pen)', '(pen.)'].include?( row['Extra']),
68
+ notes: (row['Notes'].nil? || row['Notes'].empty?) ? nil : row['Notes']
69
+ }
70
+
71
+ new( **attributes )
72
+ end
73
+
74
+
75
+ ## match id
76
+ attr_reader :team1,
77
+ :team2,
78
+ :round, ## optional
79
+ :date ## optional
80
+
81
+ ## main attributes
82
+ attr_reader :score1,
83
+ :score2,
84
+ :player,
85
+ :minute,
86
+ :offset,
87
+ :owngoal,
88
+ :penalty,
89
+ :notes
90
+
91
+
92
+ ## todo/check: or just use match.hash or such if match mapping known - why? why not?
93
+ def match_id
94
+ if round
95
+ "#{@team1} - #{@team2} | #{@round}"
96
+ else
97
+ "#{@team1} - #{@team2} | #{@date}"
98
+ end
99
+ end
100
+
101
+
102
+ def owngoal?() @owngoal==true; end
103
+ def penalty?() @penalty==true; end
104
+
105
+ def initialize( team1:,
106
+ team2:,
107
+ round: nil,
108
+ date: nil,
109
+ score1:,
110
+ score2:,
111
+ player:,
112
+ minute:,
113
+ offset: nil,
114
+ owngoal: false,
115
+ penalty: false,
116
+ notes: nil
117
+ )
118
+ @team1 = team1
119
+ @team2 = team2
120
+ @round = round
121
+ @date = date
122
+
123
+ @score1 = score1
124
+ @score2 = score2
125
+ @player = player
126
+ @minute = minute
127
+ @offset = offset
128
+ @owngoal = owngoal
129
+ @penalty = penalty
130
+ @notes = notes
131
+ end
132
+
133
+
134
+ ## note: lets you use normalize teams or such acts like a Match struct
135
+ def update( **kwargs )
136
+ ## todo/fix: use team1_name, team2_name or similar - for compat with db activerecord version? why? why not?
137
+ @team1 = kwargs[:team1] if kwargs.has_key? :team1
138
+ @team2 = kwargs[:team2] if kwargs.has_key? :team2
139
+ end
140
+ end # class GoalEvent
141
+
142
+
143
+ ### extend "basic" goal struct with goal event build
144
+ class Goal ### nested (non-freestanding) inside match (match is parent)
145
+
146
+ def self.build( events ) ## check/todo - rename to build_from_event/row or such - why? why not?
147
+ ## build an array of goal structs from (csv) recs
148
+ recs = []
149
+
150
+ last_score1 = 0
151
+ last_score2 = 0
152
+
153
+ events.each do |event|
154
+
155
+ if last_score1+1 == event.score1 && last_score2 == event.score2
156
+ team = 1
157
+ elsif last_score2+1 == event.score2 && last_score1 == event.score1
158
+ team = 2
159
+ else
160
+ puts "!! ERROR - unexpected score advance (one goal at a time expected):"
161
+ puts " #{last_score1}-#{last_score2}=> #{event.score1}-#{event.score2}"
162
+ exit 1
163
+ end
164
+
165
+ last_score1 = event.score1
166
+ last_score2 = event.score2
167
+
168
+
169
+ attributes = {
170
+ score1: event.score1,
171
+ score2: event.score2,
172
+ team: team,
173
+ minute: event.minute,
174
+ offset: event.offset,
175
+ player: event.player,
176
+ owngoal: event.owngoal,
177
+ penalty: event.penalty,
178
+ notes: event.notes
179
+ }
180
+
181
+ recs << new( **attributes )
182
+ end
183
+
184
+ recs
185
+ end
186
+ end # class Goal
187
+
188
+
189
+ end # module Sports
190
+
191
+
192
+
@@ -0,0 +1,28 @@
1
+
2
+ module SportDb
3
+ class CsvGoalParser
4
+
5
+
6
+ def self.read( path )
7
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
8
+ parse( txt )
9
+ end
10
+
11
+ def self.parse( txt )
12
+ new( txt ).parse
13
+ end
14
+
15
+
16
+ def initialize( txt )
17
+ @txt = txt
18
+ end
19
+
20
+ def parse
21
+ rows = parse_csv( @txt )
22
+ recs = rows.map { |row| Sports::GoalEvent.build( row ) }
23
+ ## pp recs[0]
24
+ recs
25
+ end
26
+
27
+ end # class CsvGoalParser
28
+ end # module Sports
@@ -0,0 +1,490 @@
1
+
2
+ module SportDb
3
+ class CsvMatchParser
4
+
5
+ #############
6
+ # helpers
7
+ def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
8
+
9
+ ## check if headers incl. season if yes,has priority over col mapping
10
+ ## e.g. no need to specify twice (if using headers)
11
+ col = headers[:season] if headers && headers[:season]
12
+
13
+ seasons = Hash.new( 0 ) ## default value is 0
14
+
15
+ ## todo/fix: yes, use CsvHash.foreach - why? why not?
16
+ ## use read_csv with block to switch to foreach!!!!
17
+ rows = read_csv( path, sep: sep )
18
+
19
+ rows.each_with_index do |row,i|
20
+ puts "[#{i}] " + row.inspect if i < 2
21
+
22
+ season = row[ col ] ## column name defaults to 'Season'
23
+ seasons[ season ] += 1
24
+ end
25
+
26
+ pp seasons
27
+
28
+ ## note: only return season keys/names (not hash with usage counter)
29
+ seasons.keys
30
+ end
31
+
32
+
33
+ ##########
34
+ # main machinery
35
+
36
+ ## todo/fix: use a generic "global" parse_csv method - why? why not?
37
+ ## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
38
+ ## ## note: do NOT symbolize keys - keep them as is!!!!!!
39
+ ## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
40
+ ## CsvHash.parse( text, sep: sep )
41
+ ## end
42
+
43
+ def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
44
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
45
+ parse( txt, headers: headers,
46
+ filters: filters,
47
+ converters: converters,
48
+ sep: sep )
49
+ end
50
+
51
+ def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
52
+ new( txt ).parse( headers: headers,
53
+ filters: filters,
54
+ converters: converters,
55
+ sep: sep )
56
+ end
57
+
58
+
59
+ def initialize( txt )
60
+ @txt = txt
61
+ end
62
+
63
+ def parse( headers: nil, filters: nil, converters: nil, sep: nil )
64
+
65
+ headers_mapping = {}
66
+
67
+ rows = parse_csv( @txt, sep: sep )
68
+
69
+ return [] if rows.empty? ## no rows / empty?
70
+
71
+
72
+ ## fix/todo: use logger!!!!
73
+ ## pp csv
74
+
75
+ if headers ## use user supplied headers if present
76
+ headers_mapping = headers_mapping.merge( headers )
77
+ else
78
+
79
+ ## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
80
+ headers = rows[0].keys
81
+ pp headers
82
+
83
+ # note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
84
+ # e.g. row['HomeTeam'] || row['HT'] will NOT work for now
85
+
86
+ if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
87
+ ## assume our own football.csv format, see github.com/footballcsv
88
+ headers_mapping[:team1] = find_header( headers, ['Team 1'] )
89
+ headers_mapping[:team2] = find_header( headers, ['Team 2'] )
90
+ headers_mapping[:date] = find_header( headers, ['Date'] )
91
+ headers_mapping[:time] = find_header( headers, ['Time'] )
92
+
93
+ ## check for all-in-one full time (ft) and half time (ht9 scores?
94
+ headers_mapping[:score] = find_header( headers, ['FT'] )
95
+ headers_mapping[:scorei] = find_header( headers, ['HT'] )
96
+
97
+ headers_mapping[:round] = find_header( headers, ['Round', 'Matchday'] )
98
+
99
+ ## optional headers - note: find_header returns nil if header NOT found
100
+ header_stage = find_header( headers, ['Stage'] )
101
+ headers_mapping[:stage] = header_stage if header_stage
102
+
103
+ header_group = find_header( headers, ['Group'] )
104
+ headers_mapping[:group] = header_group if header_group
105
+
106
+
107
+ header_et = find_header( headers, ['ET', 'AET'] ) ## (after) extra time
108
+ headers_mapping[:score_et] = header_et if header_et
109
+
110
+ header_p = find_header( headers, ['P', 'PEN'] ) ## penalties
111
+ headers_mapping[:score_p] = header_p if header_p
112
+
113
+ header_notes = find_header( headers, ['Notes', 'Comments'] )
114
+ headers_mapping[:notes] = header_notes if header_notes
115
+
116
+
117
+ header_league = find_header( headers, ['League'] )
118
+ headers_mapping[:league] = header_league if header_league
119
+ else
120
+ ## else try footballdata.uk and others
121
+ headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
122
+ headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
123
+ headers_mapping[:date] = find_header( headers, ['Date'] )
124
+ headers_mapping[:time] = find_header( headers, ['Time'] )
125
+
126
+ ## note: FT = Full Time, HG = Home Goal, AG = Away Goal
127
+ headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
128
+ headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
129
+
130
+ ## check for half time scores ?
131
+ ## note: HT = Half Time
132
+ headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
133
+ headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
134
+ end
135
+ end
136
+
137
+ pp headers_mapping
138
+
139
+ ### todo/fix: check headers - how?
140
+ ## if present HomeTeam or HT required etc.
141
+ ## issue error/warn is not present
142
+ ##
143
+ ## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
144
+ ## exit 1
145
+ ##
146
+
147
+ matches = []
148
+
149
+ rows.each_with_index do |row,i|
150
+
151
+ ## fix/todo: use logger!!!!
152
+ ## puts "[#{i}] " + row.inspect if i < 2
153
+
154
+
155
+ ## todo/fix: move to its own (helper) method - filter or such!!!!
156
+ if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
157
+ skip = false
158
+ filters.each do |header, value|
159
+ if row[ header ] != value ## e.g. row['Season']
160
+ skip = true
161
+ break
162
+ end
163
+ end
164
+ next if skip ## if header values NOT matching
165
+ end
166
+
167
+
168
+ ## note:
169
+ ## add converters after filters for now (why not before filters?)
170
+ if converters ## any converters defined?
171
+ ## convert single proc shortcut to array with single converter
172
+ converters = [converters] if converters.is_a?( Proc )
173
+
174
+ ## assumes array of procs
175
+ converters.each do |converter|
176
+ row = converter.call( row )
177
+ end
178
+ end
179
+
180
+
181
+
182
+ team1 = row[ headers_mapping[ :team1 ]]
183
+ team2 = row[ headers_mapping[ :team2 ]]
184
+
185
+
186
+ ## check if data present - if not skip (might be empty row)
187
+ ## note: (old classic) csv reader returns nil for empty fields
188
+ ## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
189
+ if (team1.nil? || team1.empty?) &&
190
+ (team2.nil? || team2.empty?)
191
+ puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
192
+ pp row
193
+ next
194
+ end
195
+
196
+ ## remove possible match played counters e.g. (4) (11) etc.
197
+ team1 = team1.sub( /\(\d+\)/, '' ).strip
198
+ team2 = team2.sub( /\(\d+\)/, '' ).strip
199
+
200
+
201
+
202
+ col = row[ headers_mapping[ :time ]]
203
+
204
+ if col.nil?
205
+ time = nil
206
+ else
207
+ col = col.strip # make sure not leading or trailing spaces left over
208
+
209
+ if col.empty?
210
+ col =~ /^-{1,}$/ || # e.g. - or ---
211
+ col =~ /^\?{1,}$/ # e.g. ? or ???
212
+ ## note: allow missing / unknown date for match
213
+ time = nil
214
+ else
215
+ if col =~ /^\d{1,2}:\d{2}$/
216
+ time_fmt = '%H:%M' # e.g. 17:00 or 3:00
217
+ elsif col =~ /^\d{1,2}.\d{2}$/
218
+ time_fmt = '%H.%M' # e.g. 17:00 or 3:00
219
+ else
220
+ puts "*** !!! wrong (unknown) time format >>#{col}<<; cannot continue; fix it; sorry"
221
+ ## todo/fix: add to errors/warns list - why? why not?
222
+ exit 1
223
+ end
224
+
225
+ ## todo/check: use date object (keep string?) - why? why not?
226
+ ## todo/fix: yes!! use date object!!!! do NOT use string
227
+ time = Time.strptime( col, time_fmt ).strftime( '%H:%M' )
228
+ end
229
+ end
230
+
231
+
232
+
233
+ col = row[ headers_mapping[ :date ]]
234
+ col = col.strip # make sure not leading or trailing spaces left over
235
+
236
+ if col.empty? ||
237
+ col =~ /^-{1,}$/ || # e.g. - or ---
238
+ col =~ /^\?{1,}$/ # e.g. ? or ???
239
+ ## note: allow missing / unknown date for match
240
+ date = nil
241
+ else
242
+ ## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
243
+ col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
244
+ col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
245
+ col = col.strip # make sure not leading or trailing spaces left over
246
+
247
+ if col =~ /^\d{2}\/\d{2}\/\d{4}$/
248
+ date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
249
+ elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
250
+ date_fmt = '%d/%m/%y' # e.g. 17/08/02
251
+ elsif col =~ /^\d{4}-\d{1,2}-\d{1,2}$/ ## "standard" / default date format
252
+ date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
253
+ elsif col =~ /^\d{1,2} \w{3} \d{4}$/
254
+ date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
255
+ elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
256
+ date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
257
+ else
258
+ puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
259
+ ## todo/fix: add to errors/warns list - why? why not?
260
+ exit 1
261
+ end
262
+
263
+ ## todo/check: use date object (keep string?) - why? why not?
264
+ ## todo/fix: yes!! use date object!!!! do NOT use string
265
+ date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
266
+ end
267
+
268
+
269
+ ##
270
+ ## todo/fix: round might not always be just a simple integer number!!!
271
+ ## might be text such as Final | Leg 1 or such!!!!
272
+ round = nil
273
+ ## check for (optional) round / matchday
274
+ if headers_mapping[ :round ]
275
+ col = row[ headers_mapping[ :round ]]
276
+ ## todo: issue warning if not ? or - (and just empty string) why? why not
277
+ ## (old attic) was: round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
278
+
279
+ ## note: make round always a string for now!!!! e.g. "1", "2" too!!
280
+ round = if col.nil? || col.empty? || col == '-' || col == 'n/a'
281
+ ## note: allow missing round for match / defaults to nil
282
+ nil
283
+ else
284
+ col
285
+ end
286
+ end
287
+
288
+
289
+ score1 = nil
290
+ score2 = nil
291
+ score1i = nil
292
+ score2i = nil
293
+
294
+ ## check for full time scores ?
295
+ if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
296
+ ft = [ row[ headers_mapping[ :score1 ]],
297
+ row[ headers_mapping[ :score2 ]] ]
298
+
299
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
300
+ score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
301
+ score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
302
+ end
303
+
304
+ ## check for half time scores ?
305
+ if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
306
+ ht = [ row[ headers_mapping[ :score1i ]],
307
+ row[ headers_mapping[ :score2i ]] ]
308
+
309
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
310
+ score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
311
+ score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
312
+ end
313
+
314
+
315
+ ## check for all-in-one full time scores?
316
+ if headers_mapping[ :score ]
317
+ col = row[ headers_mapping[ :score ]]
318
+ score = parse_score( col )
319
+ if score
320
+ score1 = score[0]
321
+ score2 = score[1]
322
+ else
323
+ puts "!! ERROR - invalid score (ft) format >#{col}<:"
324
+ pp row
325
+ exit 1
326
+ end
327
+ end
328
+
329
+ if headers_mapping[ :scorei ]
330
+ col = row[ headers_mapping[ :scorei ]]
331
+ score = parse_score( col )
332
+ if score
333
+ score1i = score[0]
334
+ score2i = score[1]
335
+ else
336
+ puts "!! ERROR - invalid score (ht) format >#{col}<:"
337
+ pp row
338
+ exit 1
339
+ end
340
+ end
341
+
342
+ ####
343
+ ## try optional score - extra time (et) and penalities (p/pen)
344
+ score1et = nil
345
+ score2et = nil
346
+ score1p = nil
347
+ score2p = nil
348
+
349
+ if headers_mapping[ :score_et ]
350
+ col = row[ headers_mapping[ :score_et ]]
351
+ score = parse_score( col )
352
+ if score
353
+ score1et = score[0]
354
+ score2et = score[1]
355
+ else
356
+ puts "!! ERROR - invalid score (et) format >#{col}<:"
357
+ pp row
358
+ exit 1
359
+ end
360
+ end
361
+
362
+ if headers_mapping[ :score_p ]
363
+ col = row[ headers_mapping[ :score_p ]]
364
+ score = parse_score( col )
365
+ if score
366
+ score1p = score[0]
367
+ score2p = score[1]
368
+ else
369
+ puts "!! ERROR - invalid score (p) format >#{col}<:"
370
+ pp row
371
+ exit 1
372
+ end
373
+ end
374
+
375
+
376
+ ## try some optional headings / columns
377
+ stage = nil
378
+ if headers_mapping[ :stage ]
379
+ col = row[ headers_mapping[ :stage ]]
380
+ ## todo/fix: check can col be nil e.g. col.nil? possible?
381
+ stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
382
+ ## note: allow missing stage for match / defaults to "regular"
383
+ nil
384
+ elsif col == '?'
385
+ ## note: default explicit unknown to unknown for now AND not regular - why? why not?
386
+ '?' ## todo/check: use unkown and NOT ? - why? why not?
387
+ else
388
+ col
389
+ end
390
+ end
391
+
392
+ group = nil
393
+ if headers_mapping[ :group ]
394
+ col = row[ headers_mapping[ :group ]]
395
+ ## todo/fix: check can col be nil e.g. col.nil? possible?
396
+ group = if col.nil? || col.empty? || col == '-' || col == 'n/a'
397
+ ## note: allow missing stage for match / defaults to "regular"
398
+ nil
399
+ else
400
+ col
401
+ end
402
+ end
403
+
404
+ status = nil ## e.g. AWARDED, CANCELLED, POSTPONED, etc.
405
+ if headers_mapping[ :notes ]
406
+ col = row[ headers_mapping[ :notes ]]
407
+ ## check for optional (match) status in notes / comments
408
+ status = if col.nil? || col.empty? || col == '-' || col == 'n/a'
409
+ nil
410
+ else
411
+ StatusParser.parse( col ) # note: returns nil if no (match) status found
412
+ end
413
+ end
414
+
415
+
416
+ league = nil
417
+ league = row[ headers_mapping[ :league ]] if headers_mapping[ :league ]
418
+
419
+
420
+ ## puts 'match attributes:'
421
+ attributes = {
422
+ date: date,
423
+ time: time,
424
+ team1: team1, team2: team2,
425
+ score1: score1, score2: score2,
426
+ score1i: score1i, score2i: score2i,
427
+ score1et: score1et, score2et: score2et,
428
+ score1p: score1p, score2p: score2p,
429
+ round: round,
430
+ stage: stage,
431
+ group: group,
432
+ status: status,
433
+ league: league
434
+ }
435
+ ## pp attributes
436
+
437
+ match = Sports::Match.new( **attributes )
438
+ matches << match
439
+ end
440
+
441
+ ## pp matches
442
+ matches
443
+ end
444
+
445
+
446
+ private
447
+
448
+ def find_header( headers, candidates )
449
+ ## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
450
+
451
+ candidates.each do |candidate|
452
+ return candidate if headers.include?( candidate ) ## bingo!!!
453
+ end
454
+ nil ## no matching header found!!!
455
+ end
456
+
457
+ ########
458
+ # more helpers
459
+ #
460
+
461
+ def parse_score( str )
462
+ if str.nil? ## todo/check: remove nil case - possible? - why? why not?
463
+ [nil,nil]
464
+ else
465
+ ## remove (optional single) note/footnote/endnote markers
466
+ ## e.g. (*) or (a), (b),
467
+ ## or [*], [A], [1], etc.
468
+ ## - allow (1) or maybe (*1) in the future - why? why not?
469
+ str = str.sub( /\( [a-z*] \)
470
+ |
471
+ \[ [1-9a-z*] \]
472
+ /ix, '' ).strip
473
+
474
+ if str.empty? || str == '?' || str == '-' || str == 'n/a'
475
+ [nil,nil]
476
+ ### todo/check: use regex with named capture groups here - why? why not?
477
+ elsif str =~ /^\d{1,2}[:-]\d{1,2}$/ ## sanity check scores format
478
+ score = str.split( /[:-]/ )
479
+ [score[0].to_i, score[1].to_i]
480
+ else
481
+ nil ## note: returns nil if invalid / unparseable format!!!
482
+ end
483
+ end
484
+ end # method parse_score
485
+
486
+
487
+
488
+ end # class CsvMatchParser
489
+ end # module Sports
490
+
@@ -0,0 +1,63 @@
1
+ #####################
2
+ # helpers for parsing & finding match status e.g.
3
+ # - cancelled / canceled
4
+ # - awarded
5
+ # - abandoned
6
+ # - replay
7
+ # etc.
8
+
9
+
10
+ module SportDb
11
+
12
+
13
+ ### todo/fix: move Status inside Match struct - why? why not?
14
+
15
+ class Status
16
+ # note: use a class as an "enum"-like namespace for now - why? why not?
17
+ # move class into Match e.g. Match::Status - why? why not?
18
+ CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
19
+ AWARDED = 'AWARDED'
20
+ POSTPONED = 'POSTPONED'
21
+ ABANDONED = 'ABANDONED'
22
+ REPLAY = 'REPLAY'
23
+ end # class Status
24
+
25
+
26
+
27
+ #
28
+ # todo/fix - move self.parse to class Status e.g.
29
+ # use Status.parse( str ) NOT StatusParser...
30
+
31
+ class StatusParser
32
+
33
+ def self.parse( str )
34
+ ## note: returns nil if no match found
35
+ ## note: english usage - cancelled (in UK), canceled (in US)
36
+ if str =~ /^(cancelled|
37
+ canceled|
38
+ can\.
39
+ )/xi
40
+ Status::CANCELLED
41
+ elsif str =~ /^(awarded|
42
+ awd\.
43
+ )/xi
44
+ Status::AWARDED
45
+ elsif str =~ /^(postponed
46
+ )/xi
47
+ Status::POSTPONED
48
+ elsif str =~ /^(abandoned|
49
+ abd\.
50
+ )/xi
51
+ Status::ABANDONED
52
+ elsif str =~ /^(replay
53
+ )/xi
54
+ Status::REPLAY
55
+ else
56
+ # no match
57
+ nil
58
+ end
59
+ end
60
+
61
+ end # class StatusParser
62
+ end # module SportDb
63
+
@@ -16,9 +16,34 @@ class LeaguePeriod
16
16
  @start_season = start_season
17
17
  @end_season = end_season
18
18
  end
19
+
20
+
21
+ def pretty_print( printer )
22
+ buf = String.new
23
+ buf << "<LeaguePeriod"
24
+ buf << " #{@key}"
25
+ buf << " (#{@start_season}-#{@end_season})" if @start_season || @end_season
26
+ buf << " -"
27
+ buf << " #{@name}"
28
+ if @name != @qname
29
+ buf << " | #{@qname}"
30
+ else
31
+ buf << "*"
32
+ end
33
+ buf << ">"
34
+
35
+ printer.text( buf )
36
+ end
19
37
  end # class LeaguePeriod
20
38
 
21
39
 
40
+ ################
41
+ # todo: add a type field -
42
+ # add a tier field - why? why not?
43
+ # e.g. league/cup (or national_league, national_cup, intl_cup, etc.?)
44
+ # e.g. 1st-tier, 2nd-tier, etc.
45
+
46
+
22
47
  class League
23
48
  attr_reader :key, :name, :country, :intl
24
49
  attr_accessor :alt_names
@@ -48,6 +73,86 @@ class League
48
73
  alias_method :club?, :clubs?
49
74
  alias_method :national_team?, :national_teams?
50
75
 
76
+
77
+ #############################
78
+ ### virtual helpers
79
+ ## 1) codes (returns uniq array of all codes in lowercase
80
+ ## incl. key, code and alt_codes in alt_names)
81
+ ## 2) names (returns uniq array of all names - with language tags stripped)
82
+ ##
83
+
84
+ ## note: split names into names AND codes
85
+ ## 1) key plus all lower case names are codes
86
+ ## 2) all upper case names are names AND codes
87
+ ## 3) all other names are names
88
+
89
+ ## only allow asci a to z (why? why not?)
90
+ ## excludes Ö1 or such (what else?)
91
+ ## allow space and dot - why? why not?
92
+ ## e.g. HNL 1
93
+ ## NB I or NB II etc.
94
+ IS_CODE_N_NAME_RE = %r{^
95
+ [\p{Lu}0-9. ]+
96
+ $}x
97
+ ## add space (or /) - why? why not?
98
+ IS_CODE_RE = %r{^
99
+ [\p{Ll}0-9.]+
100
+ $}x
101
+
102
+
103
+ def codes
104
+ ## change/rename to more_codes - why? why?
105
+ ## get reference (tier/canonicial) codes via periods!!!!
106
+
107
+ ## note - "auto-magically" downcase code (and code'n'name matches)!!
108
+ ## note - do NOT include key as code for now!!!
109
+ ##
110
+ ## todo/check - auto-remove space from code - why? why not?
111
+ ## e.g. NB I, NB II, HNL 1 => NBI, NBII, HBNL1, etc -
112
+ codes = []
113
+ alt_names.each do |name|
114
+ if IS_CODE_N_NAME_RE.match?( name )
115
+ codes << name.downcase
116
+ elsif IS_CODE_RE.match?( name )
117
+ codes << name
118
+ else ## assume name
119
+ ## do nothing - skip/ignore
120
+ end
121
+ end
122
+ codes.uniq
123
+ end
124
+
125
+
126
+ include SportDb::NameHelper # pulls-in strip_lang
127
+
128
+ def names
129
+ names = [@name]
130
+ alt_names.each do |name|
131
+ if IS_CODE_N_NAME_RE.match?( name )
132
+ names << name
133
+ elsif IS_CODE_RE.match?( name )
134
+ ## do nothing - skip/ignore
135
+ else ## assume name
136
+ names << strip_lang( name )
137
+ end
138
+ end
139
+
140
+ ## report duplicate names - why? why not
141
+ ## check for duplicates - simple check for now - fix/improve
142
+ ## todo/fix: (auto)remove duplicates - why? why not?
143
+ count = names.size
144
+ count_uniq = names.uniq.size
145
+ if count != count_uniq
146
+ puts "** !!! ERROR !!! - #{count-count_uniq} duplicate name(s):"
147
+ pp names
148
+ pp self
149
+ exit 1
150
+ end
151
+
152
+ names.uniq
153
+ end
154
+
155
+
51
156
  =begin
52
157
  @alt_names=[],
53
158
  @clubs=true,
@@ -66,7 +171,7 @@ class League
66
171
  else
67
172
  " NATIONAL TEAMS"
68
173
  end
69
- buf << ": #{@key} - #{@name}"
174
+ buf << ": #{@name}"
70
175
 
71
176
  if @alt_names && !@alt_names.empty?
72
177
  buf << "|"
@@ -83,10 +188,7 @@ class League
83
188
 
84
189
  printer.text( buf )
85
190
  end
86
-
87
-
88
-
89
-
90
191
  end # class League
91
192
 
193
+
92
194
  end # module Sports
@@ -181,12 +181,14 @@ class Match
181
181
 
182
182
 
183
183
  def as_json
184
- ##
184
+ #####
185
+ ## note - use string keys (NOT symbol for data keys)
186
+ ## for easier json compatibility
185
187
  data = {}
186
188
 
187
189
  ## check round
188
190
  if @round
189
- data[:round ] = if round.is_a?( Integer )
191
+ data['round'] = if round.is_a?( Integer )
190
192
  "Matchday #{@round}"
191
193
  else ## assume string
192
194
  @round
@@ -194,28 +196,28 @@ def as_json
194
196
  end
195
197
 
196
198
 
197
- data[:num] = @num if @num
199
+ data['num'] = @num if @num
198
200
  if @date
199
201
  ## assume 2020-09-19 date format!!
200
- data[:date] = @date.is_a?( String ) ? @date : @date.strftime('%Y-%m-%d')
202
+ data['date'] = @date.is_a?( String ) ? @date : @date.strftime('%Y-%m-%d')
201
203
 
202
- data[:time] = @time if @time
204
+ data['time'] = @time if @time
203
205
  end
204
206
 
205
- data[:team1] = @team1.is_a?( String ) ? @team1 : @team1.name
206
- data[:team2] = @team2.is_a?( String ) ? @team2 : @team2.name
207
+ data['team1'] = @team1.is_a?( String ) ? @team1 : @team1.name
208
+ data['team2'] = @team2.is_a?( String ) ? @team2 : @team2.name
207
209
 
208
- data[:score] = {}
210
+ data['score'] = {}
209
211
 
210
- data[:score][:ht] = [@score1i, @score2i] if @score1i && @score2i
211
- data[:score][:ft] = [@score1, @score2] if @score1 && @score2
212
- data[:score][:et] = [@score1et, @score2et] if @score1et && @score2et
213
- data[:score][:p] = [@score1p, @score2p] if @score1p && @score2p
212
+ data['score']['ht'] = [@score1i, @score2i] if @score1i && @score2i
213
+ data['score']['ft'] = [@score1, @score2] if @score1 && @score2
214
+ data['score']['et'] = [@score1et, @score2et] if @score1et && @score2et
215
+ data['score']['p'] = [@score1p, @score2p] if @score1p && @score2p
214
216
 
215
- data[:status] = @status if @status
217
+ data['status'] = @status if @status
216
218
 
217
- data[:group] = @group if @group
218
- data[:stage] = @stage if @stage
219
+ data['group'] = @group if @group
220
+ data['stage'] = @stage if @stage
219
221
 
220
222
  =begin
221
223
  "round": "Spieltag 1",
@@ -21,13 +21,14 @@ module Sports
21
21
  def pretty_print( printer )
22
22
  ## todo/check - how to display/format key - use () or not - why? why not?
23
23
  buf = String.new
24
- buf << "<Round: "
24
+ buf << "<Round"
25
+ buf << " AUTO" if @auto
26
+ buf << ": "
25
27
  buf << "(#{@num}) " if @num
26
28
  buf << "#{@name}, "
27
29
  buf << "#{@start_date}"
28
30
  buf << " - #{@end_date}" if @start_date != @end_date
29
31
  buf << " (knockout)" if @knockout
30
- buf << " (auto)" if @auto
31
32
  buf << ">"
32
33
 
33
34
  printer.text( buf )
@@ -64,7 +64,7 @@ class Team # shared base for clubs AND natinal_teams
64
64
 
65
65
  def initialize( **kwargs )
66
66
  @alt_names = []
67
-
67
+
68
68
  update( **kwargs ) unless kwargs.empty?
69
69
  end
70
70
 
@@ -89,14 +89,14 @@ class NationalTeam < Team
89
89
  self ## note - MUST return self for chaining
90
90
  end
91
91
 
92
- def pretty_print( printer )
92
+ def pretty_print( printer )
93
93
  buf = String.new
94
94
  buf << "<NationalTeam: #{@name}"
95
95
  ## use code from country or from team ???
96
- buf << " (#{@code})"
96
+ buf << " (#{@code})"
97
97
  buf << ">"
98
98
 
99
- printer.text( buf )
99
+ printer.text( buf )
100
100
  end
101
101
  end # class NationalTeam
102
102
 
@@ -121,8 +121,9 @@ class Club < Team
121
121
  def geos() @a == nil ? @geos : @a.geos; end
122
122
 
123
123
 
124
- def initialize( **kwargs )
125
- super
124
+ def initialize( auto: false, **kwargs )
125
+ @auto = auto
126
+ super( **kwargs ) ## todo/check - use super only or super() - must it always come first?
126
127
  end
127
128
 
128
129
  def update( **kwargs )
@@ -135,13 +136,15 @@ class Club < Team
135
136
  end
136
137
 
137
138
 
138
- def pretty_print( printer )
139
+ def pretty_print( printer )
139
140
  buf = String.new
140
- buf << "<Club: #{@name}"
141
+ buf << "<Club"
142
+ buf << " AUTO" if @auto
143
+ buf << ": #{@name}"
141
144
  buf << " (#{@country.code})" if @country
142
145
  buf << ">"
143
146
 
144
- printer.text( buf )
147
+ printer.text( buf )
145
148
  end
146
149
  end # class Club
147
150
 
@@ -3,8 +3,8 @@ module SportDb
3
3
  module Module
4
4
  module Structs
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 4
7
- PATCH = 2
6
+ MINOR = 5
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -3,6 +3,8 @@ require 'alphabets' # unaccent, downcase_i18n, variants, ...
3
3
  require 'season/formats' # Season.parse, ...
4
4
  require 'score/formats'
5
5
 
6
+ require 'cocos' # pull-in for read_csv & more
7
+
6
8
 
7
9
  ###
8
10
  # our own code
@@ -102,6 +104,38 @@ end # module SportDb
102
104
 
103
105
 
104
106
 
107
+ ###
108
+ # csv (tabular dataset) support / machinery
109
+ require_relative 'csv/match_status_parser'
110
+ require_relative 'csv/goal'
111
+ require_relative 'csv/goal_parser_csv'
112
+ require_relative 'csv/match_parser_csv'
113
+
114
+
115
+ ### add convenience shortcut helpers
116
+ module Sports
117
+ class Match
118
+ def self.read_csv( path, headers: nil, filters: nil, converters: nil, sep: nil )
119
+ SportDb::CsvMatchParser.read( path,
120
+ headers: headers,
121
+ filters: filters,
122
+ converters: converters,
123
+ sep: sep )
124
+ end
125
+
126
+ def self.parse_csv( txt, headers: nil, filters: nil, converters: nil, sep: nil )
127
+ SportDb::CsvMatchParser.parse( txt,
128
+ headers: headers,
129
+ filters: filters,
130
+ converters: converters,
131
+ sep: sep )
132
+ end
133
+ end # class Match
134
+ end # module Sports
135
+
136
+
137
+
138
+
105
139
  #####
106
140
  # note: add Sport and Football convenience alias - why? why not?
107
141
  Sport = Sports
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-structs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-17 00:00:00.000000000 Z
11
+ date: 2024-12-30 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: cocos
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.4.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.4.0
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: alphabets
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -78,14 +92,14 @@ dependencies:
78
92
  requirements:
79
93
  - - "~>"
80
94
  - !ruby/object:Gem::Version
81
- version: '4.1'
95
+ version: '4.2'
82
96
  type: :development
83
97
  prerelease: false
84
98
  version_requirements: !ruby/object:Gem::Requirement
85
99
  requirements:
86
100
  - - "~>"
87
101
  - !ruby/object:Gem::Version
88
- version: '4.1'
102
+ version: '4.2'
89
103
  description: sportdb-structs - sport data structures for matches, scores, leagues,
90
104
  seasons, rounds, groups, teams, clubs and more
91
105
  email: gerald.bauer@gmail.com
@@ -100,6 +114,10 @@ files:
100
114
  - Manifest.txt
101
115
  - README.md
102
116
  - Rakefile
117
+ - lib/sportdb/csv/goal.rb
118
+ - lib/sportdb/csv/goal_parser_csv.rb
119
+ - lib/sportdb/csv/match_parser_csv.rb
120
+ - lib/sportdb/csv/match_status_parser.rb
103
121
  - lib/sportdb/structs.rb
104
122
  - lib/sportdb/structs/country.rb
105
123
  - lib/sportdb/structs/event_info.rb
@@ -136,7 +154,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
136
154
  - !ruby/object:Gem::Version
137
155
  version: '0'
138
156
  requirements: []
139
- rubygems_version: 3.4.10
157
+ rubygems_version: 3.5.22
140
158
  signing_key:
141
159
  specification_version: 4
142
160
  summary: sportdb-structs - sport data structures for matches, scores, leagues, seasons,