sportdb-structs 0.4.2 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4603607b4234fbe74e0b57ed548a61a7d899041497d2e0e7f3104396b00e943a
4
- data.tar.gz: 24945611ea43fca109b2a4b993ad6bf913daee4e034762d3797799d5a8a7bea5
3
+ metadata.gz: 6efbd5615829502f2960305080f413c167a199b0c5be71fd2ca46882633bb447
4
+ data.tar.gz: 7d9025f5ac565666527eb87b67a6f94e6607b4d8cca023a5478d601b523144d1
5
5
  SHA512:
6
- metadata.gz: 236bb3e8ac4e58e16b398e2fd9bdceabbd74e527a4aa1ef2af823976f31ef7b63f7342b640829218a2c9c08effd060675db67c4ec24d17497cee8071db4df0db
7
- data.tar.gz: 7bdc16e2b06599fad689166a06bcd6b9aa4ef9ba4ddd0bb6c47a0d6aa68f6b20d9e2184ba0f211d9d9d455da6a7832ec14e9b5455aafe0e3c2c2e0c43f14dd5c
6
+ metadata.gz: 78e8dea8c1842fe14e57ecc4794263c2ab29a17d43bd45c9131aa8889c4e05f026506d43768293679c88ebb563caf47dfca9610f0a1736bb964f325fa945886d
7
+ data.tar.gz: b950d1600926c9c6394c6267a874dcc6fe17ce5cd7f06cfe34244474aaa8bce535ce598142b98b8ef3f8e17cb31c1f6d1003b1eca8331451bdc75fb3542a0ccf
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.4.2
1
+ ### 0.5.0
2
2
 
3
3
  ### 0.0.1 / 2020-08-24
4
4
 
data/Manifest.txt CHANGED
@@ -2,6 +2,10 @@ CHANGELOG.md
2
2
  Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
+ lib/sportdb/csv/goal.rb
6
+ lib/sportdb/csv/goal_parser_csv.rb
7
+ lib/sportdb/csv/match_parser_csv.rb
8
+ lib/sportdb/csv/match_status_parser.rb
5
9
  lib/sportdb/structs.rb
6
10
  lib/sportdb/structs/country.rb
7
11
  lib/sportdb/structs/event_info.rb
data/Rakefile CHANGED
@@ -20,6 +20,7 @@ Hoe.spec 'sportdb-structs' do
20
20
  self.licenses = ['Public Domain']
21
21
 
22
22
  self.extra_deps = [
23
+ ['cocos', '>= 0.4.0'],
23
24
  ['alphabets', '>= 1.0.2'],
24
25
  ['score-formats', '>= 0.1.1'],
25
26
  ['season-formats', '>= 0.0.1'],
@@ -0,0 +1,192 @@
1
+
2
+ module Sports
3
+
4
+ ## "free-standing" goal event - for import/export in separate event / goal datafiles
5
+ ## returned by CsvGoalParser and others
6
+ class GoalEvent
7
+
8
+ def self.build( row ) ## rename to parse or such - why? why not?
9
+
10
+ ## split match_id
11
+ team_str, more_str = row['Match'].split( '|' )
12
+ team1_str, team2_str = team_str.split( ' - ' )
13
+
14
+ more_str = more_str.strip
15
+ team1_str = team1_str.strip
16
+ team2_str = team2_str.strip
17
+
18
+ # check if more_str is a date otherwise assume round
19
+ date_fmt = if more_str =~ /^[A-Z]{3} [0-9]{1,2}$/i ## Apr 4
20
+ '%b %d'
21
+ elsif more_str =~ /^[A-Z]{3} [0-9]{1,2} [0-9]{4}$/i ## Apr 4 2019
22
+ '%b %d %Y'
23
+ else
24
+ nil
25
+ end
26
+
27
+ if date_fmt
28
+ date = Date.strptime( more_str, date_fmt )
29
+ round = nil
30
+ else
31
+ date = nil
32
+ round = more_str
33
+ end
34
+
35
+
36
+ values = row['Score'].split('-')
37
+ values = values.map { |value| value.strip }
38
+ score1 = values[0].to_i
39
+ score2 = values[1].to_i
40
+
41
+ minute = nil
42
+ offset = nil
43
+ if m=%r{([0-9]+)
44
+ (?:[ ]+
45
+ \+([0-9]+)
46
+ )?
47
+ ['.]
48
+ $}x.match( row['Minute'])
49
+ minute = m[1].to_i
50
+ offset = m[2] ? m[2].to_i : nil
51
+ else
52
+ puts "!! ERROR - unsupported minute (goal) format >#{row['Minute']}<"
53
+ exit 1
54
+ end
55
+
56
+ attributes = {
57
+ team1: team1_str,
58
+ team2: team2_str,
59
+ date: date,
60
+ round: round,
61
+ score1: score1,
62
+ score2: score2,
63
+ minute: minute,
64
+ offset: offset,
65
+ player: row['Player'],
66
+ owngoal: ['(og)', '(o.g.)'].include?( row['Extra']),
67
+ penalty: ['(pen)', '(pen.)'].include?( row['Extra']),
68
+ notes: (row['Notes'].nil? || row['Notes'].empty?) ? nil : row['Notes']
69
+ }
70
+
71
+ new( **attributes )
72
+ end
73
+
74
+
75
+ ## match id
76
+ attr_reader :team1,
77
+ :team2,
78
+ :round, ## optional
79
+ :date ## optional
80
+
81
+ ## main attributes
82
+ attr_reader :score1,
83
+ :score2,
84
+ :player,
85
+ :minute,
86
+ :offset,
87
+ :owngoal,
88
+ :penalty,
89
+ :notes
90
+
91
+
92
+ ## todo/check: or just use match.hash or such if match mapping known - why? why not?
93
+ def match_id
94
+ if round
95
+ "#{@team1} - #{@team2} | #{@round}"
96
+ else
97
+ "#{@team1} - #{@team2} | #{@date}"
98
+ end
99
+ end
100
+
101
+
102
+ def owngoal?() @owngoal==true; end
103
+ def penalty?() @penalty==true; end
104
+
105
+ def initialize( team1:,
106
+ team2:,
107
+ round: nil,
108
+ date: nil,
109
+ score1:,
110
+ score2:,
111
+ player:,
112
+ minute:,
113
+ offset: nil,
114
+ owngoal: false,
115
+ penalty: false,
116
+ notes: nil
117
+ )
118
+ @team1 = team1
119
+ @team2 = team2
120
+ @round = round
121
+ @date = date
122
+
123
+ @score1 = score1
124
+ @score2 = score2
125
+ @player = player
126
+ @minute = minute
127
+ @offset = offset
128
+ @owngoal = owngoal
129
+ @penalty = penalty
130
+ @notes = notes
131
+ end
132
+
133
+
134
+ ## note: lets you use normalize teams or such acts like a Match struct
135
+ def update( **kwargs )
136
+ ## todo/fix: use team1_name, team2_name or similar - for compat with db activerecord version? why? why not?
137
+ @team1 = kwargs[:team1] if kwargs.has_key? :team1
138
+ @team2 = kwargs[:team2] if kwargs.has_key? :team2
139
+ end
140
+ end # class GoalEvent
141
+
142
+
143
+ ### extend "basic" goal struct with goal event build
144
+ class Goal ### nested (non-freestanding) inside match (match is parent)
145
+
146
+ def self.build( events ) ## check/todo - rename to build_from_event/row or such - why? why not?
147
+ ## build an array of goal structs from (csv) recs
148
+ recs = []
149
+
150
+ last_score1 = 0
151
+ last_score2 = 0
152
+
153
+ events.each do |event|
154
+
155
+ if last_score1+1 == event.score1 && last_score2 == event.score2
156
+ team = 1
157
+ elsif last_score2+1 == event.score2 && last_score1 == event.score1
158
+ team = 2
159
+ else
160
+ puts "!! ERROR - unexpected score advance (one goal at a time expected):"
161
+ puts " #{last_score1}-#{last_score2}=> #{event.score1}-#{event.score2}"
162
+ exit 1
163
+ end
164
+
165
+ last_score1 = event.score1
166
+ last_score2 = event.score2
167
+
168
+
169
+ attributes = {
170
+ score1: event.score1,
171
+ score2: event.score2,
172
+ team: team,
173
+ minute: event.minute,
174
+ offset: event.offset,
175
+ player: event.player,
176
+ owngoal: event.owngoal,
177
+ penalty: event.penalty,
178
+ notes: event.notes
179
+ }
180
+
181
+ recs << new( **attributes )
182
+ end
183
+
184
+ recs
185
+ end
186
+ end # class Goal
187
+
188
+
189
+ end # module Sports
190
+
191
+
192
+
@@ -0,0 +1,28 @@
1
+
2
+ module SportDb
3
+ class CsvGoalParser
4
+
5
+
6
+ def self.read( path )
7
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
8
+ parse( txt )
9
+ end
10
+
11
+ def self.parse( txt )
12
+ new( txt ).parse
13
+ end
14
+
15
+
16
+ def initialize( txt )
17
+ @txt = txt
18
+ end
19
+
20
+ def parse
21
+ rows = parse_csv( @txt )
22
+ recs = rows.map { |row| Sports::GoalEvent.build( row ) }
23
+ ## pp recs[0]
24
+ recs
25
+ end
26
+
27
+ end # class CsvGoalParser
28
+ end # module Sports
@@ -0,0 +1,490 @@
1
+
2
+ module SportDb
3
+ class CsvMatchParser
4
+
5
+ #############
6
+ # helpers
7
+ def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
8
+
9
+ ## check if headers incl. season if yes,has priority over col mapping
10
+ ## e.g. no need to specify twice (if using headers)
11
+ col = headers[:season] if headers && headers[:season]
12
+
13
+ seasons = Hash.new( 0 ) ## default value is 0
14
+
15
+ ## todo/fix: yes, use CsvHash.foreach - why? why not?
16
+ ## use read_csv with block to switch to foreach!!!!
17
+ rows = read_csv( path, sep: sep )
18
+
19
+ rows.each_with_index do |row,i|
20
+ puts "[#{i}] " + row.inspect if i < 2
21
+
22
+ season = row[ col ] ## column name defaults to 'Season'
23
+ seasons[ season ] += 1
24
+ end
25
+
26
+ pp seasons
27
+
28
+ ## note: only return season keys/names (not hash with usage counter)
29
+ seasons.keys
30
+ end
31
+
32
+
33
+ ##########
34
+ # main machinery
35
+
36
+ ## todo/fix: use a generic "global" parse_csv method - why? why not?
37
+ ## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
38
+ ## ## note: do NOT symbolize keys - keep them as is!!!!!!
39
+ ## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
40
+ ## CsvHash.parse( text, sep: sep )
41
+ ## end
42
+
43
+ def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
44
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
45
+ parse( txt, headers: headers,
46
+ filters: filters,
47
+ converters: converters,
48
+ sep: sep )
49
+ end
50
+
51
+ def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
52
+ new( txt ).parse( headers: headers,
53
+ filters: filters,
54
+ converters: converters,
55
+ sep: sep )
56
+ end
57
+
58
+
59
+ def initialize( txt )
60
+ @txt = txt
61
+ end
62
+
63
+ def parse( headers: nil, filters: nil, converters: nil, sep: nil )
64
+
65
+ headers_mapping = {}
66
+
67
+ rows = parse_csv( @txt, sep: sep )
68
+
69
+ return [] if rows.empty? ## no rows / empty?
70
+
71
+
72
+ ## fix/todo: use logger!!!!
73
+ ## pp csv
74
+
75
+ if headers ## use user supplied headers if present
76
+ headers_mapping = headers_mapping.merge( headers )
77
+ else
78
+
79
+ ## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
80
+ headers = rows[0].keys
81
+ pp headers
82
+
83
+ # note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
84
+ # e.g. row['HomeTeam'] || row['HT'] will NOT work for now
85
+
86
+ if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
87
+ ## assume our own football.csv format, see github.com/footballcsv
88
+ headers_mapping[:team1] = find_header( headers, ['Team 1'] )
89
+ headers_mapping[:team2] = find_header( headers, ['Team 2'] )
90
+ headers_mapping[:date] = find_header( headers, ['Date'] )
91
+ headers_mapping[:time] = find_header( headers, ['Time'] )
92
+
93
+ ## check for all-in-one full time (ft) and half time (ht9 scores?
94
+ headers_mapping[:score] = find_header( headers, ['FT'] )
95
+ headers_mapping[:scorei] = find_header( headers, ['HT'] )
96
+
97
+ headers_mapping[:round] = find_header( headers, ['Round', 'Matchday'] )
98
+
99
+ ## optional headers - note: find_header returns nil if header NOT found
100
+ header_stage = find_header( headers, ['Stage'] )
101
+ headers_mapping[:stage] = header_stage if header_stage
102
+
103
+ header_group = find_header( headers, ['Group'] )
104
+ headers_mapping[:group] = header_group if header_group
105
+
106
+
107
+ header_et = find_header( headers, ['ET', 'AET'] ) ## (after) extra time
108
+ headers_mapping[:score_et] = header_et if header_et
109
+
110
+ header_p = find_header( headers, ['P', 'PEN'] ) ## penalties
111
+ headers_mapping[:score_p] = header_p if header_p
112
+
113
+ header_notes = find_header( headers, ['Notes', 'Comments'] )
114
+ headers_mapping[:notes] = header_notes if header_notes
115
+
116
+
117
+ header_league = find_header( headers, ['League'] )
118
+ headers_mapping[:league] = header_league if header_league
119
+ else
120
+ ## else try footballdata.uk and others
121
+ headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
122
+ headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
123
+ headers_mapping[:date] = find_header( headers, ['Date'] )
124
+ headers_mapping[:time] = find_header( headers, ['Time'] )
125
+
126
+ ## note: FT = Full Time, HG = Home Goal, AG = Away Goal
127
+ headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
128
+ headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
129
+
130
+ ## check for half time scores ?
131
+ ## note: HT = Half Time
132
+ headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
133
+ headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
134
+ end
135
+ end
136
+
137
+ pp headers_mapping
138
+
139
+ ### todo/fix: check headers - how?
140
+ ## if present HomeTeam or HT required etc.
141
+ ## issue error/warn is not present
142
+ ##
143
+ ## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
144
+ ## exit 1
145
+ ##
146
+
147
+ matches = []
148
+
149
+ rows.each_with_index do |row,i|
150
+
151
+ ## fix/todo: use logger!!!!
152
+ ## puts "[#{i}] " + row.inspect if i < 2
153
+
154
+
155
+ ## todo/fix: move to its own (helper) method - filter or such!!!!
156
+ if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
157
+ skip = false
158
+ filters.each do |header, value|
159
+ if row[ header ] != value ## e.g. row['Season']
160
+ skip = true
161
+ break
162
+ end
163
+ end
164
+ next if skip ## if header values NOT matching
165
+ end
166
+
167
+
168
+ ## note:
169
+ ## add converters after filters for now (why not before filters?)
170
+ if converters ## any converters defined?
171
+ ## convert single proc shortcut to array with single converter
172
+ converters = [converters] if converters.is_a?( Proc )
173
+
174
+ ## assumes array of procs
175
+ converters.each do |converter|
176
+ row = converter.call( row )
177
+ end
178
+ end
179
+
180
+
181
+
182
+ team1 = row[ headers_mapping[ :team1 ]]
183
+ team2 = row[ headers_mapping[ :team2 ]]
184
+
185
+
186
+ ## check if data present - if not skip (might be empty row)
187
+ ## note: (old classic) csv reader returns nil for empty fields
188
+ ## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
189
+ if (team1.nil? || team1.empty?) &&
190
+ (team2.nil? || team2.empty?)
191
+ puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
192
+ pp row
193
+ next
194
+ end
195
+
196
+ ## remove possible match played counters e.g. (4) (11) etc.
197
+ team1 = team1.sub( /\(\d+\)/, '' ).strip
198
+ team2 = team2.sub( /\(\d+\)/, '' ).strip
199
+
200
+
201
+
202
+ col = row[ headers_mapping[ :time ]]
203
+
204
+ if col.nil?
205
+ time = nil
206
+ else
207
+ col = col.strip # make sure not leading or trailing spaces left over
208
+
209
+ if col.empty?
210
+ col =~ /^-{1,}$/ || # e.g. - or ---
211
+ col =~ /^\?{1,}$/ # e.g. ? or ???
212
+ ## note: allow missing / unknown date for match
213
+ time = nil
214
+ else
215
+ if col =~ /^\d{1,2}:\d{2}$/
216
+ time_fmt = '%H:%M' # e.g. 17:00 or 3:00
217
+ elsif col =~ /^\d{1,2}.\d{2}$/
218
+ time_fmt = '%H.%M' # e.g. 17:00 or 3:00
219
+ else
220
+ puts "*** !!! wrong (unknown) time format >>#{col}<<; cannot continue; fix it; sorry"
221
+ ## todo/fix: add to errors/warns list - why? why not?
222
+ exit 1
223
+ end
224
+
225
+ ## todo/check: use date object (keep string?) - why? why not?
226
+ ## todo/fix: yes!! use date object!!!! do NOT use string
227
+ time = Time.strptime( col, time_fmt ).strftime( '%H:%M' )
228
+ end
229
+ end
230
+
231
+
232
+
233
+ col = row[ headers_mapping[ :date ]]
234
+ col = col.strip # make sure not leading or trailing spaces left over
235
+
236
+ if col.empty? ||
237
+ col =~ /^-{1,}$/ || # e.g. - or ---
238
+ col =~ /^\?{1,}$/ # e.g. ? or ???
239
+ ## note: allow missing / unknown date for match
240
+ date = nil
241
+ else
242
+ ## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
243
+ col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
244
+ col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
245
+ col = col.strip # make sure not leading or trailing spaces left over
246
+
247
+ if col =~ /^\d{2}\/\d{2}\/\d{4}$/
248
+ date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
249
+ elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
250
+ date_fmt = '%d/%m/%y' # e.g. 17/08/02
251
+ elsif col =~ /^\d{4}-\d{1,2}-\d{1,2}$/ ## "standard" / default date format
252
+ date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
253
+ elsif col =~ /^\d{1,2} \w{3} \d{4}$/
254
+ date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
255
+ elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
256
+ date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
257
+ else
258
+ puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
259
+ ## todo/fix: add to errors/warns list - why? why not?
260
+ exit 1
261
+ end
262
+
263
+ ## todo/check: use date object (keep string?) - why? why not?
264
+ ## todo/fix: yes!! use date object!!!! do NOT use string
265
+ date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
266
+ end
267
+
268
+
269
+ ##
270
+ ## todo/fix: round might not always be just a simple integer number!!!
271
+ ## might be text such as Final | Leg 1 or such!!!!
272
+ round = nil
273
+ ## check for (optional) round / matchday
274
+ if headers_mapping[ :round ]
275
+ col = row[ headers_mapping[ :round ]]
276
+ ## todo: issue warning if not ? or - (and just empty string) why? why not
277
+ ## (old attic) was: round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
278
+
279
+ ## note: make round always a string for now!!!! e.g. "1", "2" too!!
280
+ round = if col.nil? || col.empty? || col == '-' || col == 'n/a'
281
+ ## note: allow missing round for match / defaults to nil
282
+ nil
283
+ else
284
+ col
285
+ end
286
+ end
287
+
288
+
289
+ score1 = nil
290
+ score2 = nil
291
+ score1i = nil
292
+ score2i = nil
293
+
294
+ ## check for full time scores ?
295
+ if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
296
+ ft = [ row[ headers_mapping[ :score1 ]],
297
+ row[ headers_mapping[ :score2 ]] ]
298
+
299
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
300
+ score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
301
+ score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
302
+ end
303
+
304
+ ## check for half time scores ?
305
+ if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
306
+ ht = [ row[ headers_mapping[ :score1i ]],
307
+ row[ headers_mapping[ :score2i ]] ]
308
+
309
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
310
+ score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
311
+ score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
312
+ end
313
+
314
+
315
+ ## check for all-in-one full time scores?
316
+ if headers_mapping[ :score ]
317
+ col = row[ headers_mapping[ :score ]]
318
+ score = parse_score( col )
319
+ if score
320
+ score1 = score[0]
321
+ score2 = score[1]
322
+ else
323
+ puts "!! ERROR - invalid score (ft) format >#{col}<:"
324
+ pp row
325
+ exit 1
326
+ end
327
+ end
328
+
329
+ if headers_mapping[ :scorei ]
330
+ col = row[ headers_mapping[ :scorei ]]
331
+ score = parse_score( col )
332
+ if score
333
+ score1i = score[0]
334
+ score2i = score[1]
335
+ else
336
+ puts "!! ERROR - invalid score (ht) format >#{col}<:"
337
+ pp row
338
+ exit 1
339
+ end
340
+ end
341
+
342
+ ####
343
+ ## try optional score - extra time (et) and penalities (p/pen)
344
+ score1et = nil
345
+ score2et = nil
346
+ score1p = nil
347
+ score2p = nil
348
+
349
+ if headers_mapping[ :score_et ]
350
+ col = row[ headers_mapping[ :score_et ]]
351
+ score = parse_score( col )
352
+ if score
353
+ score1et = score[0]
354
+ score2et = score[1]
355
+ else
356
+ puts "!! ERROR - invalid score (et) format >#{col}<:"
357
+ pp row
358
+ exit 1
359
+ end
360
+ end
361
+
362
+ if headers_mapping[ :score_p ]
363
+ col = row[ headers_mapping[ :score_p ]]
364
+ score = parse_score( col )
365
+ if score
366
+ score1p = score[0]
367
+ score2p = score[1]
368
+ else
369
+ puts "!! ERROR - invalid score (p) format >#{col}<:"
370
+ pp row
371
+ exit 1
372
+ end
373
+ end
374
+
375
+
376
+ ## try some optional headings / columns
377
+ stage = nil
378
+ if headers_mapping[ :stage ]
379
+ col = row[ headers_mapping[ :stage ]]
380
+ ## todo/fix: check can col be nil e.g. col.nil? possible?
381
+ stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
382
+ ## note: allow missing stage for match / defaults to "regular"
383
+ nil
384
+ elsif col == '?'
385
+ ## note: default explicit unknown to unknown for now AND not regular - why? why not?
386
+ '?' ## todo/check: use unkown and NOT ? - why? why not?
387
+ else
388
+ col
389
+ end
390
+ end
391
+
392
+ group = nil
393
+ if headers_mapping[ :group ]
394
+ col = row[ headers_mapping[ :group ]]
395
+ ## todo/fix: check can col be nil e.g. col.nil? possible?
396
+ group = if col.nil? || col.empty? || col == '-' || col == 'n/a'
397
+ ## note: allow missing stage for match / defaults to "regular"
398
+ nil
399
+ else
400
+ col
401
+ end
402
+ end
403
+
404
+ status = nil ## e.g. AWARDED, CANCELLED, POSTPONED, etc.
405
+ if headers_mapping[ :notes ]
406
+ col = row[ headers_mapping[ :notes ]]
407
+ ## check for optional (match) status in notes / comments
408
+ status = if col.nil? || col.empty? || col == '-' || col == 'n/a'
409
+ nil
410
+ else
411
+ StatusParser.parse( col ) # note: returns nil if no (match) status found
412
+ end
413
+ end
414
+
415
+
416
+ league = nil
417
+ league = row[ headers_mapping[ :league ]] if headers_mapping[ :league ]
418
+
419
+
420
+ ## puts 'match attributes:'
421
+ attributes = {
422
+ date: date,
423
+ time: time,
424
+ team1: team1, team2: team2,
425
+ score1: score1, score2: score2,
426
+ score1i: score1i, score2i: score2i,
427
+ score1et: score1et, score2et: score2et,
428
+ score1p: score1p, score2p: score2p,
429
+ round: round,
430
+ stage: stage,
431
+ group: group,
432
+ status: status,
433
+ league: league
434
+ }
435
+ ## pp attributes
436
+
437
+ match = Sports::Match.new( **attributes )
438
+ matches << match
439
+ end
440
+
441
+ ## pp matches
442
+ matches
443
+ end
444
+
445
+
446
+ private
447
+
448
+ def find_header( headers, candidates )
449
+ ## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
450
+
451
+ candidates.each do |candidate|
452
+ return candidate if headers.include?( candidate ) ## bingo!!!
453
+ end
454
+ nil ## no matching header found!!!
455
+ end
456
+
457
+ ########
458
+ # more helpers
459
+ #
460
+
461
+ def parse_score( str )
462
+ if str.nil? ## todo/check: remove nil case - possible? - why? why not?
463
+ [nil,nil]
464
+ else
465
+ ## remove (optional single) note/footnote/endnote markers
466
+ ## e.g. (*) or (a), (b),
467
+ ## or [*], [A], [1], etc.
468
+ ## - allow (1) or maybe (*1) in the future - why? why not?
469
+ str = str.sub( /\( [a-z*] \)
470
+ |
471
+ \[ [1-9a-z*] \]
472
+ /ix, '' ).strip
473
+
474
+ if str.empty? || str == '?' || str == '-' || str == 'n/a'
475
+ [nil,nil]
476
+ ### todo/check: use regex with named capture groups here - why? why not?
477
+ elsif str =~ /^\d{1,2}[:-]\d{1,2}$/ ## sanity check scores format
478
+ score = str.split( /[:-]/ )
479
+ [score[0].to_i, score[1].to_i]
480
+ else
481
+ nil ## note: returns nil if invalid / unparseable format!!!
482
+ end
483
+ end
484
+ end # method parse_score
485
+
486
+
487
+
488
+ end # class CsvMatchParser
489
+ end # module Sports
490
+
@@ -0,0 +1,63 @@
1
+ #####################
2
+ # helpers for parsing & finding match status e.g.
3
+ # - cancelled / canceled
4
+ # - awarded
5
+ # - abandoned
6
+ # - replay
7
+ # etc.
8
+
9
+
10
+ module SportDb
11
+
12
+
13
+ ### todo/fix: move Status inside Match struct - why? why not?
14
+
15
+ class Status
16
+ # note: use a class as an "enum"-like namespace for now - why? why not?
17
+ # move class into Match e.g. Match::Status - why? why not?
18
+ CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
19
+ AWARDED = 'AWARDED'
20
+ POSTPONED = 'POSTPONED'
21
+ ABANDONED = 'ABANDONED'
22
+ REPLAY = 'REPLAY'
23
+ end # class Status
24
+
25
+
26
+
27
+ #
28
+ # todo/fix - move self.parse to class Status e.g.
29
+ # use Status.parse( str ) NOT StatusParser...
30
+
31
+ class StatusParser
32
+
33
+ def self.parse( str )
34
+ ## note: returns nil if no match found
35
+ ## note: english usage - cancelled (in UK), canceled (in US)
36
+ if str =~ /^(cancelled|
37
+ canceled|
38
+ can\.
39
+ )/xi
40
+ Status::CANCELLED
41
+ elsif str =~ /^(awarded|
42
+ awd\.
43
+ )/xi
44
+ Status::AWARDED
45
+ elsif str =~ /^(postponed
46
+ )/xi
47
+ Status::POSTPONED
48
+ elsif str =~ /^(abandoned|
49
+ abd\.
50
+ )/xi
51
+ Status::ABANDONED
52
+ elsif str =~ /^(replay
53
+ )/xi
54
+ Status::REPLAY
55
+ else
56
+ # no match
57
+ nil
58
+ end
59
+ end
60
+
61
+ end # class StatusParser
62
+ end # module SportDb
63
+
@@ -16,9 +16,34 @@ class LeaguePeriod
16
16
  @start_season = start_season
17
17
  @end_season = end_season
18
18
  end
19
+
20
+
21
+ def pretty_print( printer )
22
+ buf = String.new
23
+ buf << "<LeaguePeriod"
24
+ buf << " #{@key}"
25
+ buf << " (#{@start_season}-#{@end_season})" if @start_season || @end_season
26
+ buf << " -"
27
+ buf << " #{@name}"
28
+ if @name != @qname
29
+ buf << " | #{@qname}"
30
+ else
31
+ buf << "*"
32
+ end
33
+ buf << ">"
34
+
35
+ printer.text( buf )
36
+ end
19
37
  end # class LeaguePeriod
20
38
 
21
39
 
40
+ ################
41
+ # todo: add a type field -
42
+ # add a tier field - why? why not?
43
+ # e.g. league/cup (or national_league, national_cup, intl_cup, etc.?)
44
+ # e.g. 1st-tier, 2nd-tier, etc.
45
+
46
+
22
47
  class League
23
48
  attr_reader :key, :name, :country, :intl
24
49
  attr_accessor :alt_names
@@ -48,6 +73,86 @@ class League
48
73
  alias_method :club?, :clubs?
49
74
  alias_method :national_team?, :national_teams?
50
75
 
76
+
77
+ #############################
78
+ ### virtual helpers
79
+ ## 1) codes (returns uniq array of all codes in lowercase
80
+ ## incl. key, code and alt_codes in alt_names)
81
+ ## 2) names (returns uniq array of all names - with language tags stripped)
82
+ ##
83
+
84
+ ## note: split names into names AND codes
85
+ ## 1) key plus all lower case names are codes
86
+ ## 2) all upper case names are names AND codes
87
+ ## 3) all other names are names
88
+
89
+ ## only allow asci a to z (why? why not?)
90
+ ## excludes Ö1 or such (what else?)
91
+ ## allow space and dot - why? why not?
92
+ ## e.g. HNL 1
93
+ ## NB I or NB II etc.
94
+ IS_CODE_N_NAME_RE = %r{^
95
+ [\p{Lu}0-9. ]+
96
+ $}x
97
+ ## add space (or /) - why? why not?
98
+ IS_CODE_RE = %r{^
99
+ [\p{Ll}0-9.]+
100
+ $}x
101
+
102
+
103
+ def codes
104
+ ## change/rename to more_codes - why? why?
105
+ ## get reference (tier/canonicial) codes via periods!!!!
106
+
107
+ ## note - "auto-magically" downcase code (and code'n'name matches)!!
108
+ ## note - do NOT include key as code for now!!!
109
+ ##
110
+ ## todo/check - auto-remove space from code - why? why not?
111
+ ## e.g. NB I, NB II, HNL 1 => NBI, NBII, HBNL1, etc -
112
+ codes = []
113
+ alt_names.each do |name|
114
+ if IS_CODE_N_NAME_RE.match?( name )
115
+ codes << name.downcase
116
+ elsif IS_CODE_RE.match?( name )
117
+ codes << name
118
+ else ## assume name
119
+ ## do nothing - skip/ignore
120
+ end
121
+ end
122
+ codes.uniq
123
+ end
124
+
125
+
126
+ include SportDb::NameHelper # pulls-in strip_lang
127
+
128
+ def names
129
+ names = [@name]
130
+ alt_names.each do |name|
131
+ if IS_CODE_N_NAME_RE.match?( name )
132
+ names << name
133
+ elsif IS_CODE_RE.match?( name )
134
+ ## do nothing - skip/ignore
135
+ else ## assume name
136
+ names << strip_lang( name )
137
+ end
138
+ end
139
+
140
+ ## report duplicate names - why? why not
141
+ ## check for duplicates - simple check for now - fix/improve
142
+ ## todo/fix: (auto)remove duplicates - why? why not?
143
+ count = names.size
144
+ count_uniq = names.uniq.size
145
+ if count != count_uniq
146
+ puts "** !!! ERROR !!! - #{count-count_uniq} duplicate name(s):"
147
+ pp names
148
+ pp self
149
+ exit 1
150
+ end
151
+
152
+ names.uniq
153
+ end
154
+
155
+
51
156
  =begin
52
157
  @alt_names=[],
53
158
  @clubs=true,
@@ -66,7 +171,7 @@ class League
66
171
  else
67
172
  " NATIONAL TEAMS"
68
173
  end
69
- buf << ": #{@key} - #{@name}"
174
+ buf << ": #{@name}"
70
175
 
71
176
  if @alt_names && !@alt_names.empty?
72
177
  buf << "|"
@@ -83,10 +188,7 @@ class League
83
188
 
84
189
  printer.text( buf )
85
190
  end
86
-
87
-
88
-
89
-
90
191
  end # class League
91
192
 
193
+
92
194
  end # module Sports
@@ -181,12 +181,14 @@ class Match
181
181
 
182
182
 
183
183
  def as_json
184
- ##
184
+ #####
185
+ ## note - use string keys (NOT symbol for data keys)
186
+ ## for easier json compatibility
185
187
  data = {}
186
188
 
187
189
  ## check round
188
190
  if @round
189
- data[:round ] = if round.is_a?( Integer )
191
+ data['round'] = if round.is_a?( Integer )
190
192
  "Matchday #{@round}"
191
193
  else ## assume string
192
194
  @round
@@ -194,28 +196,28 @@ def as_json
194
196
  end
195
197
 
196
198
 
197
- data[:num] = @num if @num
199
+ data['num'] = @num if @num
198
200
  if @date
199
201
  ## assume 2020-09-19 date format!!
200
- data[:date] = @date.is_a?( String ) ? @date : @date.strftime('%Y-%m-%d')
202
+ data['date'] = @date.is_a?( String ) ? @date : @date.strftime('%Y-%m-%d')
201
203
 
202
- data[:time] = @time if @time
204
+ data['time'] = @time if @time
203
205
  end
204
206
 
205
- data[:team1] = @team1.is_a?( String ) ? @team1 : @team1.name
206
- data[:team2] = @team2.is_a?( String ) ? @team2 : @team2.name
207
+ data['team1'] = @team1.is_a?( String ) ? @team1 : @team1.name
208
+ data['team2'] = @team2.is_a?( String ) ? @team2 : @team2.name
207
209
 
208
- data[:score] = {}
210
+ data['score'] = {}
209
211
 
210
- data[:score][:ht] = [@score1i, @score2i] if @score1i && @score2i
211
- data[:score][:ft] = [@score1, @score2] if @score1 && @score2
212
- data[:score][:et] = [@score1et, @score2et] if @score1et && @score2et
213
- data[:score][:p] = [@score1p, @score2p] if @score1p && @score2p
212
+ data['score']['ht'] = [@score1i, @score2i] if @score1i && @score2i
213
+ data['score']['ft'] = [@score1, @score2] if @score1 && @score2
214
+ data['score']['et'] = [@score1et, @score2et] if @score1et && @score2et
215
+ data['score']['p'] = [@score1p, @score2p] if @score1p && @score2p
214
216
 
215
- data[:status] = @status if @status
217
+ data['status'] = @status if @status
216
218
 
217
- data[:group] = @group if @group
218
- data[:stage] = @stage if @stage
219
+ data['group'] = @group if @group
220
+ data['stage'] = @stage if @stage
219
221
 
220
222
  =begin
221
223
  "round": "Spieltag 1",
@@ -21,13 +21,14 @@ module Sports
21
21
  def pretty_print( printer )
22
22
  ## todo/check - how to display/format key - use () or not - why? why not?
23
23
  buf = String.new
24
- buf << "<Round: "
24
+ buf << "<Round"
25
+ buf << " AUTO" if @auto
26
+ buf << ": "
25
27
  buf << "(#{@num}) " if @num
26
28
  buf << "#{@name}, "
27
29
  buf << "#{@start_date}"
28
30
  buf << " - #{@end_date}" if @start_date != @end_date
29
31
  buf << " (knockout)" if @knockout
30
- buf << " (auto)" if @auto
31
32
  buf << ">"
32
33
 
33
34
  printer.text( buf )
@@ -64,7 +64,7 @@ class Team # shared base for clubs AND natinal_teams
64
64
 
65
65
  def initialize( **kwargs )
66
66
  @alt_names = []
67
-
67
+
68
68
  update( **kwargs ) unless kwargs.empty?
69
69
  end
70
70
 
@@ -89,14 +89,14 @@ class NationalTeam < Team
89
89
  self ## note - MUST return self for chaining
90
90
  end
91
91
 
92
- def pretty_print( printer )
92
+ def pretty_print( printer )
93
93
  buf = String.new
94
94
  buf << "<NationalTeam: #{@name}"
95
95
  ## use code from country or from team ???
96
- buf << " (#{@code})"
96
+ buf << " (#{@code})"
97
97
  buf << ">"
98
98
 
99
- printer.text( buf )
99
+ printer.text( buf )
100
100
  end
101
101
  end # class NationalTeam
102
102
 
@@ -121,8 +121,9 @@ class Club < Team
121
121
  def geos() @a == nil ? @geos : @a.geos; end
122
122
 
123
123
 
124
- def initialize( **kwargs )
125
- super
124
+ def initialize( auto: false, **kwargs )
125
+ @auto = auto
126
+ super( **kwargs ) ## todo/check - use super only or super() - must it always come first?
126
127
  end
127
128
 
128
129
  def update( **kwargs )
@@ -135,13 +136,15 @@ class Club < Team
135
136
  end
136
137
 
137
138
 
138
- def pretty_print( printer )
139
+ def pretty_print( printer )
139
140
  buf = String.new
140
- buf << "<Club: #{@name}"
141
+ buf << "<Club"
142
+ buf << " AUTO" if @auto
143
+ buf << ": #{@name}"
141
144
  buf << " (#{@country.code})" if @country
142
145
  buf << ">"
143
146
 
144
- printer.text( buf )
147
+ printer.text( buf )
145
148
  end
146
149
  end # class Club
147
150
 
@@ -3,8 +3,8 @@ module SportDb
3
3
  module Module
4
4
  module Structs
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 4
7
- PATCH = 2
6
+ MINOR = 5
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -3,6 +3,8 @@ require 'alphabets' # unaccent, downcase_i18n, variants, ...
3
3
  require 'season/formats' # Season.parse, ...
4
4
  require 'score/formats'
5
5
 
6
+ require 'cocos' # pull-in for read_csv & more
7
+
6
8
 
7
9
  ###
8
10
  # our own code
@@ -102,6 +104,38 @@ end # module SportDb
102
104
 
103
105
 
104
106
 
107
+ ###
108
+ # csv (tabular dataset) support / machinery
109
+ require_relative 'csv/match_status_parser'
110
+ require_relative 'csv/goal'
111
+ require_relative 'csv/goal_parser_csv'
112
+ require_relative 'csv/match_parser_csv'
113
+
114
+
115
+ ### add convenience shortcut helpers
116
+ module Sports
117
+ class Match
118
+ def self.read_csv( path, headers: nil, filters: nil, converters: nil, sep: nil )
119
+ SportDb::CsvMatchParser.read( path,
120
+ headers: headers,
121
+ filters: filters,
122
+ converters: converters,
123
+ sep: sep )
124
+ end
125
+
126
+ def self.parse_csv( txt, headers: nil, filters: nil, converters: nil, sep: nil )
127
+ SportDb::CsvMatchParser.parse( txt,
128
+ headers: headers,
129
+ filters: filters,
130
+ converters: converters,
131
+ sep: sep )
132
+ end
133
+ end # class Match
134
+ end # module Sports
135
+
136
+
137
+
138
+
105
139
  #####
106
140
  # note: add Sport and Football convenience alias - why? why not?
107
141
  Sport = Sports
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-structs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-17 00:00:00.000000000 Z
11
+ date: 2024-12-30 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: cocos
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.4.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.4.0
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: alphabets
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -78,14 +92,14 @@ dependencies:
78
92
  requirements:
79
93
  - - "~>"
80
94
  - !ruby/object:Gem::Version
81
- version: '4.1'
95
+ version: '4.2'
82
96
  type: :development
83
97
  prerelease: false
84
98
  version_requirements: !ruby/object:Gem::Requirement
85
99
  requirements:
86
100
  - - "~>"
87
101
  - !ruby/object:Gem::Version
88
- version: '4.1'
102
+ version: '4.2'
89
103
  description: sportdb-structs - sport data structures for matches, scores, leagues,
90
104
  seasons, rounds, groups, teams, clubs and more
91
105
  email: gerald.bauer@gmail.com
@@ -100,6 +114,10 @@ files:
100
114
  - Manifest.txt
101
115
  - README.md
102
116
  - Rakefile
117
+ - lib/sportdb/csv/goal.rb
118
+ - lib/sportdb/csv/goal_parser_csv.rb
119
+ - lib/sportdb/csv/match_parser_csv.rb
120
+ - lib/sportdb/csv/match_status_parser.rb
103
121
  - lib/sportdb/structs.rb
104
122
  - lib/sportdb/structs/country.rb
105
123
  - lib/sportdb/structs/event_info.rb
@@ -136,7 +154,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
136
154
  - !ruby/object:Gem::Version
137
155
  version: '0'
138
156
  requirements: []
139
- rubygems_version: 3.4.10
157
+ rubygems_version: 3.5.22
140
158
  signing_key:
141
159
  specification_version: 4
142
160
  summary: sportdb-structs - sport data structures for matches, scores, leagues, seasons,