sportdb-structs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6c25c7b64beba8af786dfd7140966707d859dc00
4
+ data.tar.gz: acb61fc5862d1d18aebcdec9a86d249fbfcd6fcb
5
+ SHA512:
6
+ metadata.gz: ec8c7ae81f43d71285e38ef1c371c8349f31466513ba64762b31ad8563a48a69788c29d3cceeaa1cce00fcd763dcc339f98a054e21f297a883652d0895ef07a5
7
+ data.tar.gz: 6309474ac08d69ded6f8af10d771a0faf83abcc41f98251bb8eae5b4851daa8d5a8c405defa8ee66c33a279151a93faca79d585351e8c256f84df300e9f2653c
@@ -0,0 +1,3 @@
1
+ ### 0.0.1 / 2020-08-24
2
+
3
+ * Everything is new. First release.
@@ -0,0 +1,29 @@
1
+ CHANGELOG.md
2
+ Manifest.txt
3
+ README.md
4
+ Rakefile
5
+ lib/sportdb/structs.rb
6
+ lib/sportdb/structs/config.rb
7
+ lib/sportdb/structs/goal_parser_csv.rb
8
+ lib/sportdb/structs/match_parser_csv.rb
9
+ lib/sportdb/structs/match_status_parser.rb
10
+ lib/sportdb/structs/name_helper.rb
11
+ lib/sportdb/structs/season.rb
12
+ lib/sportdb/structs/structs/country.rb
13
+ lib/sportdb/structs/structs/goal.rb
14
+ lib/sportdb/structs/structs/group.rb
15
+ lib/sportdb/structs/structs/league.rb
16
+ lib/sportdb/structs/structs/match.rb
17
+ lib/sportdb/structs/structs/matchlist.rb
18
+ lib/sportdb/structs/structs/round.rb
19
+ lib/sportdb/structs/structs/standings.rb
20
+ lib/sportdb/structs/structs/team.rb
21
+ lib/sportdb/structs/structs/team_usage.rb
22
+ lib/sportdb/structs/version.rb
23
+ test/helper.rb
24
+ test/test_clubs.rb
25
+ test/test_csv_reader.rb
26
+ test/test_match.rb
27
+ test/test_match_status_parser.rb
28
+ test/test_name_helper.rb
29
+ test/test_season.rb
@@ -0,0 +1,29 @@
1
+ # sportdb-structs - sport data structures for matches, scores, leagues, seasons, rounds, groups, teams, clubs and more
2
+
3
+
4
+ * home :: [github.com/sportdb/sport.db](https://github.com/sportdb/sport.db)
5
+ * bugs :: [github.com/sportdb/sport.db/issues](https://github.com/sportdb/sport.db/issues)
6
+ * gem :: [rubygems.org/gems/sportdb-structs](https://rubygems.org/gems/sportdb-structs)
7
+ * rdoc :: [rubydoc.info/gems/sportdb-structs](http://rubydoc.info/gems/sportdb-structs)
8
+ * forum :: [opensport](http://groups.google.com/group/opensport)
9
+
10
+
11
+
12
+ ## Usage
13
+
14
+ To be done
15
+
16
+
17
+
18
+
19
+ ## License
20
+
21
+ The `sportdb-structs` scripts are dedicated to the public domain.
22
+ Use it as you please with no restrictions whatsoever.
23
+
24
+
25
+ ## Questions? Comments?
26
+
27
+ Send them along to the
28
+ [Open Sports & Friends Forum/Mailing List](http://groups.google.com/group/opensport).
29
+ Thanks!
@@ -0,0 +1,33 @@
1
+ require 'hoe'
2
+ require './lib/sportdb/structs/version.rb'
3
+
4
+ Hoe.spec 'sportdb-structs' do
5
+
6
+ self.version = SportDb::Module::Structs::VERSION
7
+
8
+ self.summary = "sportdb-structs - sport data structures for matches, scores, leagues, seasons, rounds, groups, teams, clubs and more"
9
+ self.description = summary
10
+
11
+ self.urls = ['https://github.com/sportdb/sport.db']
12
+
13
+ self.author = 'Gerald Bauer'
14
+ self.email = 'opensport@googlegroups.com'
15
+
16
+ # switch extension to .markdown for gihub formatting
17
+ self.readme_file = 'README.md'
18
+ self.history_file = 'CHANGELOG.md'
19
+
20
+ self.licenses = ['Public Domain']
21
+
22
+ self.extra_deps = [
23
+ ['alphabets', '>= 1.0.0'],
24
+ ['date-formats', '>= 1.0.1'],
25
+ ['score-formats', '>= 0.1.0'],
26
+ ['csvreader', '>= 1.2.4'],
27
+ ['sportdb-langs', '>= 0.1.1'],
28
+ ]
29
+
30
+ self.spec_extras = {
31
+ required_ruby_version: '>= 2.2.2'
32
+ }
33
+ end
@@ -0,0 +1,125 @@
1
+ ## 3rd party gems
2
+ require 'alphabets' # downcase_i18n, unaccent, variants, ...
3
+ require 'date/formats' # DateFormats.parse, find!, ...
4
+ require 'score/formats'
5
+ require 'csvreader'
6
+
7
+
8
+ def read_csv( path, sep: nil,
9
+ symbolize_names: nil )
10
+ opts = {}
11
+ opts[:sep] = sep if sep
12
+ opts[:header_converters] = :symbol if symbolize_names
13
+
14
+ CsvHash.read( path, **opts )
15
+ end
16
+
17
+ def parse_csv( txt, sep: nil,
18
+ symbolize_names: nil )
19
+ opts = {}
20
+ opts[:sep] = sep if sep
21
+ opts[:header_converters] = :symbol if symbolize_names
22
+
23
+ CsvHash.parse( txt, **opts )
24
+ end
25
+
26
+
27
+
28
+ ## more sportdb libs/gems
29
+ require 'sportdb/langs'
30
+
31
+ ## todo/fix: move shortcut up to sportdb/langs!!!
32
+ module SportDb
33
+ Logging = LogUtils::Logging ## logging machinery shortcut; use LogUtils for now
34
+ end
35
+
36
+
37
+ ###
38
+ # our own code
39
+ require 'sportdb/structs/version' # let version always go first
40
+ require 'sportdb/structs/config'
41
+ require 'sportdb/structs/season'
42
+
43
+ require 'sportdb/structs/name_helper'
44
+
45
+ require 'sportdb/structs/structs/country'
46
+ require 'sportdb/structs/structs/league'
47
+ require 'sportdb/structs/structs/team'
48
+ require 'sportdb/structs/structs/round'
49
+ require 'sportdb/structs/structs/group'
50
+ require 'sportdb/structs/structs/goal'
51
+ require 'sportdb/structs/structs/match'
52
+ require 'sportdb/structs/structs/matchlist'
53
+ require 'sportdb/structs/structs/standings'
54
+ require 'sportdb/structs/structs/team_usage'
55
+
56
+
57
+ require 'sportdb/structs/match_status_parser'
58
+ require 'sportdb/structs/match_parser_csv'
59
+ require 'sportdb/structs/goal_parser_csv'
60
+
61
+
62
+
63
+
64
+ ### add convenience shortcut helpers
65
+ module Sports
66
+ class Match
67
+ def self.read_csv( path, headers: nil, filters: nil, converters: nil, sep: nil )
68
+ SportDb::CsvMatchParser.read( path,
69
+ headers: headers,
70
+ filters: filters,
71
+ converters: converters,
72
+ sep: sep )
73
+ end
74
+
75
+ def self.parse_csv( txt, headers: nil, filters: nil, converters: nil, sep: nil )
76
+ SportDb::CsvMatchParser.parse( txt,
77
+ headers: headers,
78
+ filters: filters,
79
+ converters: converters,
80
+ sep: sep )
81
+ end
82
+ end # class Match
83
+ end # module Sports
84
+
85
+
86
+
87
+ module Sports
88
+ ## lets you use
89
+ ## Sports.configure do |config|
90
+ ## config.lang = 'it'
91
+ ## end
92
+
93
+ ## note: just forward to SportDb::Import configuration!!!!!
94
+ ## keep Sports module / namespace "clean"
95
+ ## that is, only include data structures (e.g. Match,League,etc) for now - why? why not?
96
+ def self.configure() yield( config ); end
97
+ def self.config() SportDb::Import.config; end
98
+ end # module Sports
99
+
100
+
101
+
102
+ #####
103
+ # note: add Sport and Football convenience alias - why? why not?
104
+ Sport = Sports
105
+ Football = Sports
106
+
107
+
108
+
109
+
110
+ ## let's put test configuration in its own namespace / module
111
+ module SportDb
112
+ class Test ## todo/check: works with module too? use a module - why? why not?
113
+
114
+ ####
115
+ # todo/fix: find a better way to configure shared test datasets - why? why not?
116
+ # note: use one-up (..) directory for now as default - why? why not?
117
+ def self.data_dir() @data_dir ||= '../test'; end
118
+ def self.data_dir=( path ) @data_dir = path; end
119
+ end
120
+ end # module SportDb
121
+
122
+
123
+ puts SportDb::Module::Structs.banner # say hello
124
+
125
+
@@ -0,0 +1,39 @@
1
+ module SportDb
2
+ module Import
3
+
4
+ class Configuration
5
+ ##
6
+ ## todo: allow configure of countries_dir like clubs_dir
7
+ ## "fallback" and use a default built-in world/countries.txt
8
+
9
+ attr_accessor :catalog
10
+
11
+ attr_reader :lang
12
+ def lang=(value)
13
+ ## check/todo: always use to_sym - why? needed?
14
+ DateFormats.lang = value
15
+ ScoreFormats.lang = value
16
+ SportDb.lang.lang = value
17
+
18
+ ## todo/fix: change SportDb.lang to SportDb.parser.lang or lang_parser or utils or someting !!!!
19
+ ## use Sport.lang only as a read-only shortcut a la catalog for config.lang!!!!
20
+ end
21
+
22
+ end # class Configuration
23
+
24
+
25
+ ## lets you use
26
+ ## SportDb::Import.configure do |config|
27
+ ## config.lang = 'it'
28
+ ## end
29
+
30
+ def self.configure() yield( config ); end
31
+
32
+ def self.config() @config ||= Configuration.new; end
33
+
34
+ ## e.g. use config.catalog -- keep Import.catalog as a shortcut (for "read-only" access)
35
+ def self.catalog() config.catalog; end
36
+
37
+ end # module Import
38
+ end # module SportDb
39
+
@@ -0,0 +1,28 @@
1
+
2
+ module SportDb
3
+ class CsvGoalParser
4
+
5
+
6
+ def self.read( path )
7
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
8
+ parse( txt )
9
+ end
10
+
11
+ def self.parse( txt )
12
+ new( txt ).parse
13
+ end
14
+
15
+
16
+ def initialize( txt )
17
+ @txt = txt
18
+ end
19
+
20
+ def parse
21
+ rows = parse_csv( @txt )
22
+ recs = rows.map { |row| Sports::GoalEvent.build( row ) }
23
+ ## pp recs[0]
24
+ recs
25
+ end
26
+
27
+ end # class CsvGoalParser
28
+ end # module Sports
@@ -0,0 +1,490 @@
1
+
2
+ module SportDb
3
+ class CsvMatchParser
4
+
5
+ #############
6
+ # helpers
7
+ def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
8
+
9
+ ## check if headers incl. season if yes,has priority over col mapping
10
+ ## e.g. no need to specify twice (if using headers)
11
+ col = headers[:season] if headers && headers[:season]
12
+
13
+ seasons = Hash.new( 0 ) ## default value is 0
14
+
15
+ ## todo/fix: yes, use CsvHash.foreach - why? why not?
16
+ ## use read_csv with block to switch to foreach!!!!
17
+ rows = read_csv( path, sep: sep )
18
+
19
+ rows.each_with_index do |row,i|
20
+ puts "[#{i}] " + row.inspect if i < 2
21
+
22
+ season = row[ col ] ## column name defaults to 'Season'
23
+ seasons[ season ] += 1
24
+ end
25
+
26
+ pp seasons
27
+
28
+ ## note: only return season keys/names (not hash with usage counter)
29
+ seasons.keys
30
+ end
31
+
32
+
33
+ ##########
34
+ # main machinery
35
+
36
+ ## todo/fix: use a generic "global" parse_csv method - why? why not?
37
+ ## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
38
+ ## ## note: do NOT symbolize keys - keep them as is!!!!!!
39
+ ## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
40
+ ## CsvHash.parse( text, sep: sep )
41
+ ## end
42
+
43
+ def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
44
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
45
+ parse( txt, headers: headers,
46
+ filters: filters,
47
+ converters: converters,
48
+ sep: sep )
49
+ end
50
+
51
+ def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
52
+ new( txt ).parse( headers: headers,
53
+ filters: filters,
54
+ converters: converters,
55
+ sep: sep )
56
+ end
57
+
58
+
59
+ def initialize( txt )
60
+ @txt = txt
61
+ end
62
+
63
+ def parse( headers: nil, filters: nil, converters: nil, sep: nil )
64
+
65
+ headers_mapping = {}
66
+
67
+ rows = parse_csv( @txt, sep: sep )
68
+
69
+ return [] if rows.empty? ## no rows / empty?
70
+
71
+
72
+ ## fix/todo: use logger!!!!
73
+ ## pp csv
74
+
75
+ if headers ## use user supplied headers if present
76
+ headers_mapping = headers_mapping.merge( headers )
77
+ else
78
+
79
+ ## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
80
+ headers = rows[0].keys
81
+ pp headers
82
+
83
+ # note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
84
+ # e.g. row['HomeTeam'] || row['HT'] will NOT work for now
85
+
86
+ if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
87
+ ## assume our own football.csv format, see github.com/footballcsv
88
+ headers_mapping[:team1] = find_header( headers, ['Team 1'] )
89
+ headers_mapping[:team2] = find_header( headers, ['Team 2'] )
90
+ headers_mapping[:date] = find_header( headers, ['Date'] )
91
+ headers_mapping[:time] = find_header( headers, ['Time'] )
92
+
93
+ ## check for all-in-one full time (ft) and half time (ht9 scores?
94
+ headers_mapping[:score] = find_header( headers, ['FT'] )
95
+ headers_mapping[:scorei] = find_header( headers, ['HT'] )
96
+
97
+ headers_mapping[:round] = find_header( headers, ['Round', 'Matchday'] )
98
+
99
+ ## optional headers - note: find_header returns nil if header NOT found
100
+ header_stage = find_header( headers, ['Stage'] )
101
+ headers_mapping[:stage] = header_stage if header_stage
102
+
103
+ header_group = find_header( headers, ['Group'] )
104
+ headers_mapping[:group] = header_group if header_group
105
+
106
+
107
+ header_et = find_header( headers, ['ET', 'AET'] ) ## (after) extra time
108
+ headers_mapping[:score_et] = header_et if header_et
109
+
110
+ header_p = find_header( headers, ['P', 'PEN'] ) ## penalties
111
+ headers_mapping[:score_p] = header_p if header_p
112
+
113
+ header_notes = find_header( headers, ['Notes', 'Comments'] )
114
+ headers_mapping[:notes] = header_notes if header_notes
115
+
116
+
117
+ header_league = find_header( headers, ['League'] )
118
+ headers_mapping[:league] = header_league if header_league
119
+ else
120
+ ## else try footballdata.uk and others
121
+ headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
122
+ headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
123
+ headers_mapping[:date] = find_header( headers, ['Date'] )
124
+ headers_mapping[:time] = find_header( headers, ['Time'] )
125
+
126
+ ## note: FT = Full Time, HG = Home Goal, AG = Away Goal
127
+ headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
128
+ headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
129
+
130
+ ## check for half time scores ?
131
+ ## note: HT = Half Time
132
+ headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
133
+ headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
134
+ end
135
+ end
136
+
137
+ pp headers_mapping
138
+
139
+ ### todo/fix: check headers - how?
140
+ ## if present HomeTeam or HT required etc.
141
+ ## issue error/warn is not present
142
+ ##
143
+ ## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
144
+ ## exit 1
145
+ ##
146
+
147
+ matches = []
148
+
149
+ rows.each_with_index do |row,i|
150
+
151
+ ## fix/todo: use logger!!!!
152
+ ## puts "[#{i}] " + row.inspect if i < 2
153
+
154
+
155
+ ## todo/fix: move to its own (helper) method - filter or such!!!!
156
+ if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
157
+ skip = false
158
+ filters.each do |header, value|
159
+ if row[ header ] != value ## e.g. row['Season']
160
+ skip = true
161
+ break
162
+ end
163
+ end
164
+ next if skip ## if header values NOT matching
165
+ end
166
+
167
+
168
+ ## note:
169
+ ## add converters after filters for now (why not before filters?)
170
+ if converters ## any converters defined?
171
+ ## convert single proc shortcut to array with single converter
172
+ converters = [converters] if converters.is_a?( Proc )
173
+
174
+ ## assumes array of procs
175
+ converters.each do |converter|
176
+ row = converter.call( row )
177
+ end
178
+ end
179
+
180
+
181
+
182
+ team1 = row[ headers_mapping[ :team1 ]]
183
+ team2 = row[ headers_mapping[ :team2 ]]
184
+
185
+
186
+ ## check if data present - if not skip (might be empty row)
187
+ ## note: (old classic) csv reader returns nil for empty fields
188
+ ## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
189
+ if (team1.nil? || team1.empty?) &&
190
+ (team2.nil? || team2.empty?)
191
+ puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
192
+ pp row
193
+ next
194
+ end
195
+
196
+ ## remove possible match played counters e.g. (4) (11) etc.
197
+ team1 = team1.sub( /\(\d+\)/, '' ).strip
198
+ team2 = team2.sub( /\(\d+\)/, '' ).strip
199
+
200
+
201
+
202
+ col = row[ headers_mapping[ :time ]]
203
+
204
+ if col.nil?
205
+ time = nil
206
+ else
207
+ col = col.strip # make sure not leading or trailing spaces left over
208
+
209
+ if col.empty?
210
+ col =~ /^-{1,}$/ || # e.g. - or ---
211
+ col =~ /^\?{1,}$/ # e.g. ? or ???
212
+ ## note: allow missing / unknown date for match
213
+ time = nil
214
+ else
215
+ if col =~ /^\d{1,2}:\d{2}$/
216
+ time_fmt = '%H:%M' # e.g. 17:00 or 3:00
217
+ elsif col =~ /^\d{1,2}.\d{2}$/
218
+ time_fmt = '%H.%M' # e.g. 17:00 or 3:00
219
+ else
220
+ puts "*** !!! wrong (unknown) time format >>#{col}<<; cannot continue; fix it; sorry"
221
+ ## todo/fix: add to errors/warns list - why? why not?
222
+ exit 1
223
+ end
224
+
225
+ ## todo/check: use date object (keep string?) - why? why not?
226
+ ## todo/fix: yes!! use date object!!!! do NOT use string
227
+ time = Time.strptime( col, time_fmt ).strftime( '%H:%M' )
228
+ end
229
+ end
230
+
231
+
232
+
233
+ col = row[ headers_mapping[ :date ]]
234
+ col = col.strip # make sure not leading or trailing spaces left over
235
+
236
+ if col.empty? ||
237
+ col =~ /^-{1,}$/ || # e.g. - or ---
238
+ col =~ /^\?{1,}$/ # e.g. ? or ???
239
+ ## note: allow missing / unknown date for match
240
+ date = nil
241
+ else
242
+ ## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
243
+ col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
244
+ col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
245
+ col = col.strip # make sure not leading or trailing spaces left over
246
+
247
+ if col =~ /^\d{2}\/\d{2}\/\d{4}$/
248
+ date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
249
+ elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
250
+ date_fmt = '%d/%m/%y' # e.g. 17/08/02
251
+ elsif col =~ /^\d{4}-\d{2}-\d{2}$/ ## "standard" / default date format
252
+ date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
253
+ elsif col =~ /^\d{1,2} \w{3} \d{4}$/
254
+ date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
255
+ elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
256
+ date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
257
+ else
258
+ puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
259
+ ## todo/fix: add to errors/warns list - why? why not?
260
+ exit 1
261
+ end
262
+
263
+ ## todo/check: use date object (keep string?) - why? why not?
264
+ ## todo/fix: yes!! use date object!!!! do NOT use string
265
+ date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
266
+ end
267
+
268
+
269
+ ##
270
+ ## todo/fix: round might not always be just a simple integer number!!!
271
+ ## might be text such as Final | Leg 1 or such!!!!
272
+ round = nil
273
+ ## check for (optional) round / matchday
274
+ if headers_mapping[ :round ]
275
+ col = row[ headers_mapping[ :round ]]
276
+ ## todo: issue warning if not ? or - (and just empty string) why? why not
277
+ ## (old attic) was: round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
278
+
279
+ ## note: make round always a string for now!!!! e.g. "1", "2" too!!
280
+ round = if col.nil? || col.empty? || col == '-' || col == 'n/a'
281
+ ## note: allow missing round for match / defaults to nil
282
+ nil
283
+ else
284
+ col
285
+ end
286
+ end
287
+
288
+
289
+ score1 = nil
290
+ score2 = nil
291
+ score1i = nil
292
+ score2i = nil
293
+
294
+ ## check for full time scores ?
295
+ if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
296
+ ft = [ row[ headers_mapping[ :score1 ]],
297
+ row[ headers_mapping[ :score2 ]] ]
298
+
299
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
300
+ score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
301
+ score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
302
+ end
303
+
304
+ ## check for half time scores ?
305
+ if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
306
+ ht = [ row[ headers_mapping[ :score1i ]],
307
+ row[ headers_mapping[ :score2i ]] ]
308
+
309
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
310
+ score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
311
+ score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
312
+ end
313
+
314
+
315
+ ## check for all-in-one full time scores?
316
+ if headers_mapping[ :score ]
317
+ col = row[ headers_mapping[ :score ]]
318
+ score = parse_score( col )
319
+ if score
320
+ score1 = score[0]
321
+ score2 = score[1]
322
+ else
323
+ puts "!! ERROR - invalid score (ft) format >#{col}<:"
324
+ pp row
325
+ exit 1
326
+ end
327
+ end
328
+
329
+ if headers_mapping[ :scorei ]
330
+ col = row[ headers_mapping[ :scorei ]]
331
+ score = parse_score( col )
332
+ if score
333
+ score1i = score[0]
334
+ score2i = score[1]
335
+ else
336
+ puts "!! ERROR - invalid score (ht) format >#{col}<:"
337
+ pp row
338
+ exit 1
339
+ end
340
+ end
341
+
342
+ ####
343
+ ## try optional score - extra time (et) and penalities (p/pen)
344
+ score1et = nil
345
+ score2et = nil
346
+ score1p = nil
347
+ score2p = nil
348
+
349
+ if headers_mapping[ :score_et ]
350
+ col = row[ headers_mapping[ :score_et ]]
351
+ score = parse_score( col )
352
+ if score
353
+ score1et = score[0]
354
+ score2et = score[1]
355
+ else
356
+ puts "!! ERROR - invalid score (et) format >#{col}<:"
357
+ pp row
358
+ exit 1
359
+ end
360
+ end
361
+
362
+ if headers_mapping[ :score_p ]
363
+ col = row[ headers_mapping[ :score_p ]]
364
+ score = parse_score( col )
365
+ if score
366
+ score1p = score[0]
367
+ score2p = score[1]
368
+ else
369
+ puts "!! ERROR - invalid score (p) format >#{col}<:"
370
+ pp row
371
+ exit 1
372
+ end
373
+ end
374
+
375
+
376
+ ## try some optional headings / columns
377
+ stage = nil
378
+ if headers_mapping[ :stage ]
379
+ col = row[ headers_mapping[ :stage ]]
380
+ ## todo/fix: check can col be nil e.g. col.nil? possible?
381
+ stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
382
+ ## note: allow missing stage for match / defaults to "regular"
383
+ nil
384
+ elsif col == '?'
385
+ ## note: default explicit unknown to unknown for now AND not regular - why? why not?
386
+ '?' ## todo/check: use unkown and NOT ? - why? why not?
387
+ else
388
+ col
389
+ end
390
+ end
391
+
392
+ group = nil
393
+ if headers_mapping[ :group ]
394
+ col = row[ headers_mapping[ :group ]]
395
+ ## todo/fix: check can col be nil e.g. col.nil? possible?
396
+ group = if col.nil? || col.empty? || col == '-' || col == 'n/a'
397
+ ## note: allow missing stage for match / defaults to "regular"
398
+ nil
399
+ else
400
+ col
401
+ end
402
+ end
403
+
404
+ status = nil ## e.g. AWARDED, CANCELLED, POSTPONED, etc.
405
+ if headers_mapping[ :notes ]
406
+ col = row[ headers_mapping[ :notes ]]
407
+ ## check for optional (match) status in notes / comments
408
+ status = if col.nil? || col.empty? || col == '-' || col == 'n/a'
409
+ nil
410
+ else
411
+ StatusParser.parse( col ) # note: returns nil if no (match) status found
412
+ end
413
+ end
414
+
415
+
416
+ league = nil
417
+ league = row[ headers_mapping[ :league ]] if headers_mapping[ :league ]
418
+
419
+
420
+ ## puts 'match attributes:'
421
+ attributes = {
422
+ date: date,
423
+ time: time,
424
+ team1: team1, team2: team2,
425
+ score1: score1, score2: score2,
426
+ score1i: score1i, score2i: score2i,
427
+ score1et: score1et, score2et: score2et,
428
+ score1p: score1p, score2p: score2p,
429
+ round: round,
430
+ stage: stage,
431
+ group: group,
432
+ status: status,
433
+ league: league
434
+ }
435
+ ## pp attributes
436
+
437
+ match = Sports::Match.new( **attributes )
438
+ matches << match
439
+ end
440
+
441
+ ## pp matches
442
+ matches
443
+ end
444
+
445
+
446
+ private
447
+
448
+ def find_header( headers, candidates )
449
+ ## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
450
+
451
+ candidates.each do |candidate|
452
+ return candidate if headers.include?( candidate ) ## bingo!!!
453
+ end
454
+ nil ## no matching header found!!!
455
+ end
456
+
457
+ ########
458
+ # more helpers
459
+ #
460
+
461
+ def parse_score( str )
462
+ if str.nil? ## todo/check: remove nil case - possible? - why? why not?
463
+ [nil,nil]
464
+ else
465
+ ## remove (optional single) note/footnote/endnote markers
466
+ ## e.g. (*) or (a), (b),
467
+ ## or [*], [A], [1], etc.
468
+ ## - allow (1) or maybe (*1) in the future - why? why not?
469
+ str = str.sub( /\( [a-z*] \)
470
+ |
471
+ \[ [1-9a-z*] \]
472
+ /ix, '' ).strip
473
+
474
+ if str.empty? || str == '?' || str == '-' || str == 'n/a'
475
+ [nil,nil]
476
+ ### todo/check: use regex with named capture groups here - why? why not?
477
+ elsif str =~ /^\d{1,2}[:-]\d{1,2}$/ ## sanity check scores format
478
+ score = str.split( /[:-]/ )
479
+ [score[0].to_i, score[1].to_i]
480
+ else
481
+ nil ## note: returns nil if invalid / unparseable format!!!
482
+ end
483
+ end
484
+ end # method parse_score
485
+
486
+
487
+
488
+ end # class CsvMatchParser
489
+ end # module Sports
490
+