sportdb-quick 0.0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f2ce8e81bd6aece43872ba2718123dd2b34d6f52807ab8260a2e91f01a8c28ed
4
- data.tar.gz: c91175dabbac6e590198df83151ee36d9a9daa853b77f0eedfa229f9db55edd0
3
+ metadata.gz: 5ee93b658f2f6fce94a131931ea37908a2edbfa19a182ec20072447b57ffc847
4
+ data.tar.gz: f5fa5ab664d71ea9a2ab8c62e1bd30d42fa678e0b2a7564124623dc5d7cd2963
5
5
  SHA512:
6
- metadata.gz: 53f5ca14405552fe662d35e6b1d1672afce956f27cc40801265a9f9d44a4d622b320a00a5ba935a04bae0453872ecb0da27b21927b184a87ed2755973eef118b
7
- data.tar.gz: a688b66358bf16ec13ccbb90b5e29eaf483ca3e69f64ca4816f7210038383ea002660fa62a8ac971e9764903490757c9a84864bf57075794d8e0a337970a55b4
6
+ metadata.gz: b0df0b970600ee24a88d09ddffec1caa8ef94cfa75561569638da954357cec2d97ffc7ea566f03e42ec814a5998c99da79f861c54a863a439f1778a3f0426ab0
7
+ data.tar.gz: 01e1ad205c4d271a68136784a17b8f1b7987bdfa5e9b4ba5f395b42f6b488a53747525ecd7bea1e2c188012219529d7611e7d35f49ec4e5e7d499cc791e4e80a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,5 @@
1
+ ### 0.1.1
2
+
1
3
  ### 0.0.1 / 2024-08-27
2
4
 
3
5
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -4,10 +4,12 @@ README.md
4
4
  Rakefile
5
5
  bin/fbt
6
6
  lib/sportdb/quick.rb
7
- lib/sportdb/quick/linter.rb
7
+ lib/sportdb/quick/csv/goal.rb
8
+ lib/sportdb/quick/csv/goal_parser_csv.rb
9
+ lib/sportdb/quick/csv/match_parser_csv.rb
10
+ lib/sportdb/quick/csv/match_status_parser.rb
8
11
  lib/sportdb/quick/match_parser.rb
9
12
  lib/sportdb/quick/opts.rb
10
- lib/sportdb/quick/outline_reader.rb
11
13
  lib/sportdb/quick/quick_league_outline_reader.rb
12
14
  lib/sportdb/quick/quick_match_reader.rb
13
15
  lib/sportdb/quick/version.rb
data/README.md CHANGED
@@ -1,5 +1,24 @@
1
- # sportdb-quick - football.txt (quick) match parsers and more
1
+ # sportdb-quick - football.txt (quick) match readers and more
2
2
 
3
3
 
4
4
 
5
5
 
6
+ ## Usage
7
+
8
+
9
+ ``` ruby
10
+ require 'sportdb/quick'
11
+
12
+
13
+ # path = "./euro/2024--germany/euro.txt"
14
+ path = "./deutschland/2024-25/1-bundesliga.txt"
15
+
16
+ matches = SportDb::QuickMatchReader.read( path )
17
+ pp matches
18
+
19
+ # try json for matches
20
+ data = matches.map {|match| match.as_json }
21
+ pp data
22
+ ```
23
+
24
+
data/Rakefile CHANGED
@@ -6,7 +6,7 @@ Hoe.spec 'sportdb-quick' do
6
6
 
7
7
  self.version = SportDb::Module::Quick::VERSION
8
8
 
9
- self.summary = "sportdb-quick - football.txt (quick) match parsers and more"
9
+ self.summary = "sportdb-quick - football.txt (quick) match readers and more"
10
10
  self.description = summary
11
11
 
12
12
  self.urls = { home: 'https://github.com/sportdb/sport.db' }
data/bin/fbt CHANGED
@@ -27,7 +27,7 @@ require 'optparse'
27
27
  args = ARGV
28
28
  opts = { debug: false,
29
29
  metal: false,
30
- quick: false }
30
+ quick: true }
31
31
 
32
32
  parser = OptionParser.new do |parser|
33
33
  parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
@@ -39,19 +39,19 @@ require 'optparse'
39
39
 
40
40
  parser.on( "--verbose", "--debug",
41
41
  "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
42
- opts[:debug] = debug
42
+ opts[:debug] = true
43
43
  end
44
44
 
45
45
  parser.on( "--metal",
46
46
  "turn off typed parse tree; show to the metal tokens"+
47
47
  " (default: #{opts[:metal]})" ) do |metal|
48
- opts[:metal] = metal
48
+ opts[:metal] = true
49
49
  end
50
50
 
51
51
  parser.on( "--quick",
52
52
  "use quick match reader; output matches in json"+
53
53
  " (default: #{opts[:quick]})" ) do |quick|
54
- opts[:quick] = quick
54
+ opts[:quick] = true
55
55
  end
56
56
  end
57
57
  parser.parse!( args )
@@ -92,9 +92,9 @@ if opts[:quick]
92
92
  puts " #{data.size} match(es)"
93
93
  end
94
94
  else
95
- SportDb::Quick::Linter.debug = true if opts[:debug]
95
+ SportDb::Parser::Linter.debug = true if opts[:debug]
96
96
 
97
- linter = SportDb::Quick::Linter.new
97
+ linter = SportDb::Parser::Linter.new
98
98
 
99
99
  errors = []
100
100
 
@@ -0,0 +1,192 @@
1
+
2
+ module Sports
3
+
4
+ ## "free-standing" goal event - for import/export in separate event / goal datafiles
5
+ ## returned by CsvGoalParser and others
6
+ class GoalEvent
7
+
8
+ def self.build( row ) ## rename to parse or such - why? why not?
9
+
10
+ ## split match_id
11
+ team_str, more_str = row['Match'].split( '|' )
12
+ team1_str, team2_str = team_str.split( ' - ' )
13
+
14
+ more_str = more_str.strip
15
+ team1_str = team1_str.strip
16
+ team2_str = team2_str.strip
17
+
18
+ # check if more_str is a date otherwise assume round
19
+ date_fmt = if more_str =~ /^[A-Z]{3} [0-9]{1,2}$/i ## Apr 4
20
+ '%b %d'
21
+ elsif more_str =~ /^[A-Z]{3} [0-9]{1,2} [0-9]{4}$/i ## Apr 4 2019
22
+ '%b %d %Y'
23
+ else
24
+ nil
25
+ end
26
+
27
+ if date_fmt
28
+ date = Date.strptime( more_str, date_fmt )
29
+ round = nil
30
+ else
31
+ date = nil
32
+ round = more_str
33
+ end
34
+
35
+
36
+ values = row['Score'].split('-')
37
+ values = values.map { |value| value.strip }
38
+ score1 = values[0].to_i
39
+ score2 = values[1].to_i
40
+
41
+ minute = nil
42
+ offset = nil
43
+ if m=%r{([0-9]+)
44
+ (?:[ ]+
45
+ \+([0-9]+)
46
+ )?
47
+ ['.]
48
+ $}x.match( row['Minute'])
49
+ minute = m[1].to_i
50
+ offset = m[2] ? m[2].to_i : nil
51
+ else
52
+ puts "!! ERROR - unsupported minute (goal) format >#{row['Minute']}<"
53
+ exit 1
54
+ end
55
+
56
+ attributes = {
57
+ team1: team1_str,
58
+ team2: team2_str,
59
+ date: date,
60
+ round: round,
61
+ score1: score1,
62
+ score2: score2,
63
+ minute: minute,
64
+ offset: offset,
65
+ player: row['Player'],
66
+ owngoal: ['(og)', '(o.g.)'].include?( row['Extra']),
67
+ penalty: ['(pen)', '(pen.)'].include?( row['Extra']),
68
+ notes: (row['Notes'].nil? || row['Notes'].empty?) ? nil : row['Notes']
69
+ }
70
+
71
+ new( **attributes )
72
+ end
73
+
74
+
75
+ ## match id
76
+ attr_reader :team1,
77
+ :team2,
78
+ :round, ## optional
79
+ :date ## optional
80
+
81
+ ## main attributes
82
+ attr_reader :score1,
83
+ :score2,
84
+ :player,
85
+ :minute,
86
+ :offset,
87
+ :owngoal,
88
+ :penalty,
89
+ :notes
90
+
91
+
92
+ ## todo/check: or just use match.hash or such if match mapping known - why? why not?
93
+ def match_id
94
+ if round
95
+ "#{@team1} - #{@team2} | #{@round}"
96
+ else
97
+ "#{@team1} - #{@team2} | #{@date}"
98
+ end
99
+ end
100
+
101
+
102
+ def owngoal?() @owngoal==true; end
103
+ def penalty?() @penalty==true; end
104
+
105
+ def initialize( team1:,
106
+ team2:,
107
+ round: nil,
108
+ date: nil,
109
+ score1:,
110
+ score2:,
111
+ player:,
112
+ minute:,
113
+ offset: nil,
114
+ owngoal: false,
115
+ penalty: false,
116
+ notes: nil
117
+ )
118
+ @team1 = team1
119
+ @team2 = team2
120
+ @round = round
121
+ @date = date
122
+
123
+ @score1 = score1
124
+ @score2 = score2
125
+ @player = player
126
+ @minute = minute
127
+ @offset = offset
128
+ @owngoal = owngoal
129
+ @penalty = penalty
130
+ @notes = notes
131
+ end
132
+
133
+
134
+ ## note: lets you use normalize teams or such acts like a Match struct
135
+ def update( **kwargs )
136
+ ## todo/fix: use team1_name, team2_name or similar - for compat with db activerecord version? why? why not?
137
+ @team1 = kwargs[:team1] if kwargs.has_key? :team1
138
+ @team2 = kwargs[:team2] if kwargs.has_key? :team2
139
+ end
140
+ end # class GoalEvent
141
+
142
+
143
+ ### extend "basic" goal struct with goal event build
144
+ class Goal ### nested (non-freestanding) inside match (match is parent)
145
+
146
+ def self.build( events ) ## check/todo - rename to build_from_event/row or such - why? why not?
147
+ ## build an array of goal structs from (csv) recs
148
+ recs = []
149
+
150
+ last_score1 = 0
151
+ last_score2 = 0
152
+
153
+ events.each do |event|
154
+
155
+ if last_score1+1 == event.score1 && last_score2 == event.score2
156
+ team = 1
157
+ elsif last_score2+1 == event.score2 && last_score1 == event.score1
158
+ team = 2
159
+ else
160
+ puts "!! ERROR - unexpected score advance (one goal at a time expected):"
161
+ puts " #{last_score1}-#{last_score2}=> #{event.score1}-#{event.score2}"
162
+ exit 1
163
+ end
164
+
165
+ last_score1 = event.score1
166
+ last_score2 = event.score2
167
+
168
+
169
+ attributes = {
170
+ score1: event.score1,
171
+ score2: event.score2,
172
+ team: team,
173
+ minute: event.minute,
174
+ offset: event.offset,
175
+ player: event.player,
176
+ owngoal: event.owngoal,
177
+ penalty: event.penalty,
178
+ notes: event.notes
179
+ }
180
+
181
+ recs << new( **attributes )
182
+ end
183
+
184
+ recs
185
+ end
186
+ end # class Goal
187
+
188
+
189
+ end # module Sports
190
+
191
+
192
+
@@ -0,0 +1,28 @@
1
+
2
+ module SportDb
3
+ class CsvGoalParser
4
+
5
+
6
+ def self.read( path )
7
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
8
+ parse( txt )
9
+ end
10
+
11
+ def self.parse( txt )
12
+ new( txt ).parse
13
+ end
14
+
15
+
16
+ def initialize( txt )
17
+ @txt = txt
18
+ end
19
+
20
+ def parse
21
+ rows = parse_csv( @txt )
22
+ recs = rows.map { |row| Sports::GoalEvent.build( row ) }
23
+ ## pp recs[0]
24
+ recs
25
+ end
26
+
27
+ end # class CsvGoalParser
28
+ end # module Sports
@@ -0,0 +1,490 @@
1
+
2
+ module SportDb
3
+ class CsvMatchParser
4
+
5
+ #############
6
+ # helpers
7
+ def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
8
+
9
+ ## check if headers incl. season if yes,has priority over col mapping
10
+ ## e.g. no need to specify twice (if using headers)
11
+ col = headers[:season] if headers && headers[:season]
12
+
13
+ seasons = Hash.new( 0 ) ## default value is 0
14
+
15
+ ## todo/fix: yes, use CsvHash.foreach - why? why not?
16
+ ## use read_csv with block to switch to foreach!!!!
17
+ rows = read_csv( path, sep: sep )
18
+
19
+ rows.each_with_index do |row,i|
20
+ puts "[#{i}] " + row.inspect if i < 2
21
+
22
+ season = row[ col ] ## column name defaults to 'Season'
23
+ seasons[ season ] += 1
24
+ end
25
+
26
+ pp seasons
27
+
28
+ ## note: only return season keys/names (not hash with usage counter)
29
+ seasons.keys
30
+ end
31
+
32
+
33
+ ##########
34
+ # main machinery
35
+
36
+ ## todo/fix: use a generic "global" parse_csv method - why? why not?
37
+ ## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
38
+ ## ## note: do NOT symbolize keys - keep them as is!!!!!!
39
+ ## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
40
+ ## CsvHash.parse( text, sep: sep )
41
+ ## end
42
+
43
+ def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
44
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
45
+ parse( txt, headers: headers,
46
+ filters: filters,
47
+ converters: converters,
48
+ sep: sep )
49
+ end
50
+
51
+ def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
52
+ new( txt ).parse( headers: headers,
53
+ filters: filters,
54
+ converters: converters,
55
+ sep: sep )
56
+ end
57
+
58
+
59
+ def initialize( txt )
60
+ @txt = txt
61
+ end
62
+
63
+ def parse( headers: nil, filters: nil, converters: nil, sep: nil )
64
+
65
+ headers_mapping = {}
66
+
67
+ rows = parse_csv( @txt, sep: sep )
68
+
69
+ return [] if rows.empty? ## no rows / empty?
70
+
71
+
72
+ ## fix/todo: use logger!!!!
73
+ ## pp csv
74
+
75
+ if headers ## use user supplied headers if present
76
+ headers_mapping = headers_mapping.merge( headers )
77
+ else
78
+
79
+ ## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
80
+ headers = rows[0].keys
81
+ pp headers
82
+
83
+ # note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
84
+ # e.g. row['HomeTeam'] || row['HT'] will NOT work for now
85
+
86
+ if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
87
+ ## assume our own football.csv format, see github.com/footballcsv
88
+ headers_mapping[:team1] = find_header( headers, ['Team 1'] )
89
+ headers_mapping[:team2] = find_header( headers, ['Team 2'] )
90
+ headers_mapping[:date] = find_header( headers, ['Date'] )
91
+ headers_mapping[:time] = find_header( headers, ['Time'] )
92
+
93
+ ## check for all-in-one full time (ft) and half time (ht9 scores?
94
+ headers_mapping[:score] = find_header( headers, ['FT'] )
95
+ headers_mapping[:scorei] = find_header( headers, ['HT'] )
96
+
97
+ headers_mapping[:round] = find_header( headers, ['Round', 'Matchday'] )
98
+
99
+ ## optional headers - note: find_header returns nil if header NOT found
100
+ header_stage = find_header( headers, ['Stage'] )
101
+ headers_mapping[:stage] = header_stage if header_stage
102
+
103
+ header_group = find_header( headers, ['Group'] )
104
+ headers_mapping[:group] = header_group if header_group
105
+
106
+
107
+ header_et = find_header( headers, ['ET', 'AET'] ) ## (after) extra time
108
+ headers_mapping[:score_et] = header_et if header_et
109
+
110
+ header_p = find_header( headers, ['P', 'PEN'] ) ## penalties
111
+ headers_mapping[:score_p] = header_p if header_p
112
+
113
+ header_notes = find_header( headers, ['Notes', 'Comments'] )
114
+ headers_mapping[:notes] = header_notes if header_notes
115
+
116
+
117
+ header_league = find_header( headers, ['League'] )
118
+ headers_mapping[:league] = header_league if header_league
119
+ else
120
+ ## else try footballdata.uk and others
121
+ headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
122
+ headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
123
+ headers_mapping[:date] = find_header( headers, ['Date'] )
124
+ headers_mapping[:time] = find_header( headers, ['Time'] )
125
+
126
+ ## note: FT = Full Time, HG = Home Goal, AG = Away Goal
127
+ headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
128
+ headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
129
+
130
+ ## check for half time scores ?
131
+ ## note: HT = Half Time
132
+ headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
133
+ headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
134
+ end
135
+ end
136
+
137
+ pp headers_mapping
138
+
139
+ ### todo/fix: check headers - how?
140
+ ## if present HomeTeam or HT required etc.
141
+ ## issue error/warn is not present
142
+ ##
143
+ ## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
144
+ ## exit 1
145
+ ##
146
+
147
+ matches = []
148
+
149
+ rows.each_with_index do |row,i|
150
+
151
+ ## fix/todo: use logger!!!!
152
+ ## puts "[#{i}] " + row.inspect if i < 2
153
+
154
+
155
+ ## todo/fix: move to its own (helper) method - filter or such!!!!
156
+ if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
157
+ skip = false
158
+ filters.each do |header, value|
159
+ if row[ header ] != value ## e.g. row['Season']
160
+ skip = true
161
+ break
162
+ end
163
+ end
164
+ next if skip ## if header values NOT matching
165
+ end
166
+
167
+
168
+ ## note:
169
+ ## add converters after filters for now (why not before filters?)
170
+ if converters ## any converters defined?
171
+ ## convert single proc shortcut to array with single converter
172
+ converters = [converters] if converters.is_a?( Proc )
173
+
174
+ ## assumes array of procs
175
+ converters.each do |converter|
176
+ row = converter.call( row )
177
+ end
178
+ end
179
+
180
+
181
+
182
+ team1 = row[ headers_mapping[ :team1 ]]
183
+ team2 = row[ headers_mapping[ :team2 ]]
184
+
185
+
186
+ ## check if data present - if not skip (might be empty row)
187
+ ## note: (old classic) csv reader returns nil for empty fields
188
+ ## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
189
+ if (team1.nil? || team1.empty?) &&
190
+ (team2.nil? || team2.empty?)
191
+ puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
192
+ pp row
193
+ next
194
+ end
195
+
196
+ ## remove possible match played counters e.g. (4) (11) etc.
197
+ team1 = team1.sub( /\(\d+\)/, '' ).strip
198
+ team2 = team2.sub( /\(\d+\)/, '' ).strip
199
+
200
+
201
+
202
+ col = row[ headers_mapping[ :time ]]
203
+
204
+ if col.nil?
205
+ time = nil
206
+ else
207
+ col = col.strip # make sure not leading or trailing spaces left over
208
+
209
+ if col.empty?
210
+ col =~ /^-{1,}$/ || # e.g. - or ---
211
+ col =~ /^\?{1,}$/ # e.g. ? or ???
212
+ ## note: allow missing / unknown date for match
213
+ time = nil
214
+ else
215
+ if col =~ /^\d{1,2}:\d{2}$/
216
+ time_fmt = '%H:%M' # e.g. 17:00 or 3:00
217
+ elsif col =~ /^\d{1,2}.\d{2}$/
218
+ time_fmt = '%H.%M' # e.g. 17:00 or 3:00
219
+ else
220
+ puts "*** !!! wrong (unknown) time format >>#{col}<<; cannot continue; fix it; sorry"
221
+ ## todo/fix: add to errors/warns list - why? why not?
222
+ exit 1
223
+ end
224
+
225
+ ## todo/check: use date object (keep string?) - why? why not?
226
+ ## todo/fix: yes!! use date object!!!! do NOT use string
227
+ time = Time.strptime( col, time_fmt ).strftime( '%H:%M' )
228
+ end
229
+ end
230
+
231
+
232
+
233
+ col = row[ headers_mapping[ :date ]]
234
+ col = col.strip # make sure not leading or trailing spaces left over
235
+
236
+ if col.empty? ||
237
+ col =~ /^-{1,}$/ || # e.g. - or ---
238
+ col =~ /^\?{1,}$/ # e.g. ? or ???
239
+ ## note: allow missing / unknown date for match
240
+ date = nil
241
+ else
242
+ ## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
243
+ col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
244
+ col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
245
+ col = col.strip # make sure not leading or trailing spaces left over
246
+
247
+ if col =~ /^\d{2}\/\d{2}\/\d{4}$/
248
+ date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
249
+ elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
250
+ date_fmt = '%d/%m/%y' # e.g. 17/08/02
251
+ elsif col =~ /^\d{4}-\d{1,2}-\d{1,2}$/ ## "standard" / default date format
252
+ date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
253
+ elsif col =~ /^\d{1,2} \w{3} \d{4}$/
254
+ date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
255
+ elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
256
+ date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
257
+ else
258
+ puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
259
+ ## todo/fix: add to errors/warns list - why? why not?
260
+ exit 1
261
+ end
262
+
263
+ ## todo/check: use date object (keep string?) - why? why not?
264
+ ## todo/fix: yes!! use date object!!!! do NOT use string
265
+ date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
266
+ end
267
+
268
+
269
+ ##
270
+ ## todo/fix: round might not always be just a simple integer number!!!
271
+ ## might be text such as Final | Leg 1 or such!!!!
272
+ round = nil
273
+ ## check for (optional) round / matchday
274
+ if headers_mapping[ :round ]
275
+ col = row[ headers_mapping[ :round ]]
276
+ ## todo: issue warning if not ? or - (and just empty string) why? why not
277
+ ## (old attic) was: round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
278
+
279
+ ## note: make round always a string for now!!!! e.g. "1", "2" too!!
280
+ round = if col.nil? || col.empty? || col == '-' || col == 'n/a'
281
+ ## note: allow missing round for match / defaults to nil
282
+ nil
283
+ else
284
+ col
285
+ end
286
+ end
287
+
288
+
289
+ score1 = nil
290
+ score2 = nil
291
+ score1i = nil
292
+ score2i = nil
293
+
294
+ ## check for full time scores ?
295
+ if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
296
+ ft = [ row[ headers_mapping[ :score1 ]],
297
+ row[ headers_mapping[ :score2 ]] ]
298
+
299
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
300
+ score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
301
+ score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
302
+ end
303
+
304
+ ## check for half time scores ?
305
+ if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
306
+ ht = [ row[ headers_mapping[ :score1i ]],
307
+ row[ headers_mapping[ :score2i ]] ]
308
+
309
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
310
+ score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
311
+ score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
312
+ end
313
+
314
+
315
+ ## check for all-in-one full time scores?
316
+ if headers_mapping[ :score ]
317
+ col = row[ headers_mapping[ :score ]]
318
+ score = parse_score( col )
319
+ if score
320
+ score1 = score[0]
321
+ score2 = score[1]
322
+ else
323
+ puts "!! ERROR - invalid score (ft) format >#{col}<:"
324
+ pp row
325
+ exit 1
326
+ end
327
+ end
328
+
329
+ if headers_mapping[ :scorei ]
330
+ col = row[ headers_mapping[ :scorei ]]
331
+ score = parse_score( col )
332
+ if score
333
+ score1i = score[0]
334
+ score2i = score[1]
335
+ else
336
+ puts "!! ERROR - invalid score (ht) format >#{col}<:"
337
+ pp row
338
+ exit 1
339
+ end
340
+ end
341
+
342
+ ####
343
+ ## try optional score - extra time (et) and penalities (p/pen)
344
+ score1et = nil
345
+ score2et = nil
346
+ score1p = nil
347
+ score2p = nil
348
+
349
+ if headers_mapping[ :score_et ]
350
+ col = row[ headers_mapping[ :score_et ]]
351
+ score = parse_score( col )
352
+ if score
353
+ score1et = score[0]
354
+ score2et = score[1]
355
+ else
356
+ puts "!! ERROR - invalid score (et) format >#{col}<:"
357
+ pp row
358
+ exit 1
359
+ end
360
+ end
361
+
362
+ if headers_mapping[ :score_p ]
363
+ col = row[ headers_mapping[ :score_p ]]
364
+ score = parse_score( col )
365
+ if score
366
+ score1p = score[0]
367
+ score2p = score[1]
368
+ else
369
+ puts "!! ERROR - invalid score (p) format >#{col}<:"
370
+ pp row
371
+ exit 1
372
+ end
373
+ end
374
+
375
+
376
+ ## try some optional headings / columns
377
+ stage = nil
378
+ if headers_mapping[ :stage ]
379
+ col = row[ headers_mapping[ :stage ]]
380
+ ## todo/fix: check can col be nil e.g. col.nil? possible?
381
+ stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
382
+ ## note: allow missing stage for match / defaults to "regular"
383
+ nil
384
+ elsif col == '?'
385
+ ## note: default explicit unknown to unknown for now AND not regular - why? why not?
386
+ '?' ## todo/check: use unkown and NOT ? - why? why not?
387
+ else
388
+ col
389
+ end
390
+ end
391
+
392
+ group = nil
393
+ if headers_mapping[ :group ]
394
+ col = row[ headers_mapping[ :group ]]
395
+ ## todo/fix: check can col be nil e.g. col.nil? possible?
396
+ group = if col.nil? || col.empty? || col == '-' || col == 'n/a'
397
+ ## note: allow missing stage for match / defaults to "regular"
398
+ nil
399
+ else
400
+ col
401
+ end
402
+ end
403
+
404
+ status = nil ## e.g. AWARDED, CANCELLED, POSTPONED, etc.
405
+ if headers_mapping[ :notes ]
406
+ col = row[ headers_mapping[ :notes ]]
407
+ ## check for optional (match) status in notes / comments
408
+ status = if col.nil? || col.empty? || col == '-' || col == 'n/a'
409
+ nil
410
+ else
411
+ StatusParser.parse( col ) # note: returns nil if no (match) status found
412
+ end
413
+ end
414
+
415
+
416
+ league = nil
417
+ league = row[ headers_mapping[ :league ]] if headers_mapping[ :league ]
418
+
419
+
420
+ ## puts 'match attributes:'
421
+ attributes = {
422
+ date: date,
423
+ time: time,
424
+ team1: team1, team2: team2,
425
+ score1: score1, score2: score2,
426
+ score1i: score1i, score2i: score2i,
427
+ score1et: score1et, score2et: score2et,
428
+ score1p: score1p, score2p: score2p,
429
+ round: round,
430
+ stage: stage,
431
+ group: group,
432
+ status: status,
433
+ league: league
434
+ }
435
+ ## pp attributes
436
+
437
+ match = Sports::Match.new( **attributes )
438
+ matches << match
439
+ end
440
+
441
+ ## pp matches
442
+ matches
443
+ end
444
+
445
+
446
+ private
447
+
448
+ def find_header( headers, candidates )
449
+ ## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
450
+
451
+ candidates.each do |candidate|
452
+ return candidate if headers.include?( candidate ) ## bingo!!!
453
+ end
454
+ nil ## no matching header found!!!
455
+ end
456
+
457
+ ########
458
+ # more helpers
459
+ #
460
+
461
+ def parse_score( str )
462
+ if str.nil? ## todo/check: remove nil case - possible? - why? why not?
463
+ [nil,nil]
464
+ else
465
+ ## remove (optional single) note/footnote/endnote markers
466
+ ## e.g. (*) or (a), (b),
467
+ ## or [*], [A], [1], etc.
468
+ ## - allow (1) or maybe (*1) in the future - why? why not?
469
+ str = str.sub( /\( [a-z*] \)
470
+ |
471
+ \[ [1-9a-z*] \]
472
+ /ix, '' ).strip
473
+
474
+ if str.empty? || str == '?' || str == '-' || str == 'n/a'
475
+ [nil,nil]
476
+ ### todo/check: use regex with named capture groups here - why? why not?
477
+ elsif str =~ /^\d{1,2}[:-]\d{1,2}$/ ## sanity check scores format
478
+ score = str.split( /[:-]/ )
479
+ [score[0].to_i, score[1].to_i]
480
+ else
481
+ nil ## note: returns nil if invalid / unparseable format!!!
482
+ end
483
+ end
484
+ end # method parse_score
485
+
486
+
487
+
488
+ end # class CsvMatchParser
489
+ end # module Sports
490
+
@@ -0,0 +1,90 @@
1
+ #####################
2
+ # helpers for parsing & finding match status e.g.
3
+ # - cancelled / canceled
4
+ # - awarded
5
+ # - abandoned
6
+ # - replay
7
+ # etc.
8
+
9
+
10
+ module SportDb
11
+
12
+
13
+ ### todo/fix: move Status inside Match struct - why? why not?
14
+
15
+ class Status
16
+ # note: use a class as an "enum"-like namespace for now - why? why not?
17
+ # move class into Match e.g. Match::Status - why? why not?
18
+ CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
19
+ AWARDED = 'AWARDED'
20
+ POSTPONED = 'POSTPONED'
21
+ ABANDONED = 'ABANDONED'
22
+ REPLAY = 'REPLAY'
23
+ end # class Status
24
+
25
+
26
+
27
+ class StatusParser
28
+
29
+ def self.parse( str )
30
+ ## note: returns nil if no match found
31
+ ## note: english usage - cancelled (in UK), canceled (in US)
32
+ if str =~ /^(cancelled|
33
+ canceled|
34
+ can\.
35
+ )/xi
36
+ Status::CANCELLED
37
+ elsif str =~ /^(awarded|
38
+ awd\.
39
+ )/xi
40
+ Status::AWARDED
41
+ elsif str =~ /^(postponed
42
+ )/xi
43
+ Status::POSTPONED
44
+ elsif str =~ /^(abandoned|
45
+ abd\.
46
+ )/xi
47
+ Status::ABANDONED
48
+ elsif str =~ /^(replay
49
+ )/xi
50
+ Status::REPLAY
51
+ else
52
+ # no match
53
+ nil
54
+ end
55
+ end
56
+
57
+
58
+ RUN_RE = /\[
59
+ (?<text>[^\]]+)
60
+ \]
61
+ /x
62
+ def self.find!( line )
63
+ ## for now check all "protected" text run blocks e.g. []
64
+ ## puts "line: >#{line}<"
65
+
66
+ status = nil
67
+
68
+ str = line
69
+ while m = str.match( RUN_RE )
70
+ str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
71
+
72
+ ## check for status match
73
+ match_str = m[0] ## keep a copy of the match string (for later sub)
74
+ text = m[:text].strip
75
+ ## puts " text: >#{text}<"
76
+
77
+ status = parse( text )
78
+
79
+ if status
80
+ line.sub!( match_str, "[STATUS.#{status}]" )
81
+ break
82
+ end
83
+ end # while match
84
+
85
+ status
86
+ end # method find!
87
+ end # class StatusParser
88
+
89
+ end # module SportDb
90
+
@@ -41,6 +41,15 @@ class QuickLeagueOutlineReader
41
41
  pp heading
42
42
  exit 1
43
43
  end
44
+ elsif node[0] == :h2
45
+ ## todo/check - make sure parsed h1 first
46
+ heading = node[1]
47
+ ## reuse league, season from h1
48
+ secs << { league: secs[-1][:league],
49
+ season: secs[-1][:season],
50
+ stage: heading,
51
+ lines: []
52
+ }
44
53
  elsif node[0] == :p ## paragraph with (text) lines
45
54
  lines = node[1]
46
55
  ## note: skip lines if no heading seen
@@ -3,7 +3,7 @@ module SportDb
3
3
  module Module
4
4
  module Quick
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 0
6
+ MINOR = 1
7
7
  PATCH = 1
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
data/lib/sportdb/quick.rb CHANGED
@@ -17,8 +17,10 @@ end
17
17
  ## our own code
18
18
  require_relative 'quick/version'
19
19
  require_relative 'quick/opts'
20
- require_relative 'quick/linter'
21
- require_relative 'quick/outline_reader'
20
+
21
+ # require_relative 'quick/linter'
22
+ # require_relative 'quick/outline_reader'
23
+
22
24
 
23
25
  require_relative 'quick/match_parser'
24
26
 
@@ -26,6 +28,39 @@ require_relative 'quick/quick_league_outline_reader'
26
28
  require_relative 'quick/quick_match_reader'
27
29
 
28
30
 
31
+
32
+
33
+ ###
34
+ # csv (tabular dataset) support / machinery
35
+ require_relative 'quick/csv/match_status_parser'
36
+ require_relative 'quick/csv/goal'
37
+ require_relative 'quick/csv/goal_parser_csv'
38
+ require_relative 'quick/csv/match_parser_csv'
39
+
40
+
41
+ ### add convenience shortcut helpers
42
+ module Sports
43
+ class Match
44
+ def self.read_csv( path, headers: nil, filters: nil, converters: nil, sep: nil )
45
+ SportDb::CsvMatchParser.read( path,
46
+ headers: headers,
47
+ filters: filters,
48
+ converters: converters,
49
+ sep: sep )
50
+ end
51
+
52
+ def self.parse_csv( txt, headers: nil, filters: nil, converters: nil, sep: nil )
53
+ SportDb::CsvMatchParser.parse( txt,
54
+ headers: headers,
55
+ filters: filters,
56
+ converters: converters,
57
+ sep: sep )
58
+ end
59
+ end # class Match
60
+ end # module Sports
61
+
62
+
63
+
29
64
  puts SportDb::Module::Quick.banner # say hello
30
65
 
31
66
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-quick
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-27 00:00:00.000000000 Z
11
+ date: 2024-09-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sportdb-parser
@@ -86,7 +86,7 @@ dependencies:
86
86
  - - "~>"
87
87
  - !ruby/object:Gem::Version
88
88
  version: '4.1'
89
- description: sportdb-quick - football.txt (quick) match parsers and more
89
+ description: sportdb-quick - football.txt (quick) match readers and more
90
90
  email: gerald.bauer@gmail.com
91
91
  executables:
92
92
  - fbt
@@ -102,10 +102,12 @@ files:
102
102
  - Rakefile
103
103
  - bin/fbt
104
104
  - lib/sportdb/quick.rb
105
- - lib/sportdb/quick/linter.rb
105
+ - lib/sportdb/quick/csv/goal.rb
106
+ - lib/sportdb/quick/csv/goal_parser_csv.rb
107
+ - lib/sportdb/quick/csv/match_parser_csv.rb
108
+ - lib/sportdb/quick/csv/match_status_parser.rb
106
109
  - lib/sportdb/quick/match_parser.rb
107
110
  - lib/sportdb/quick/opts.rb
108
- - lib/sportdb/quick/outline_reader.rb
109
111
  - lib/sportdb/quick/quick_league_outline_reader.rb
110
112
  - lib/sportdb/quick/quick_match_reader.rb
111
113
  - lib/sportdb/quick/version.rb
@@ -133,5 +135,5 @@ requirements: []
133
135
  rubygems_version: 3.4.10
134
136
  signing_key:
135
137
  specification_version: 4
136
- summary: sportdb-quick - football.txt (quick) match parsers and more
138
+ summary: sportdb-quick - football.txt (quick) match readers and more
137
139
  test_files: []
@@ -1,149 +0,0 @@
1
-
2
- module SportDb
3
- module Quick
4
-
5
- ###
6
- ## note - Linter for now nested inside Parser - keep? why? why not?
7
- class Linter
8
-
9
- def self.debug=(value) @@debug = value; end
10
- def self.debug?() @@debug ||= false; end ## note: default is FALSE
11
- def debug?() self.class.debug?; end
12
-
13
-
14
-
15
- attr_reader :errors
16
-
17
- def initialize
18
- @errors = []
19
- @parser = Parser.new ## use own parser instance (not shared) - why? why not?
20
- end
21
-
22
-
23
- def errors?() @errors.size > 0; end
24
-
25
-
26
-
27
- ## note: colon (:) MUST be followed by one (or more) spaces
28
- ## make sure mon feb 12 18:10 will not match
29
- ## allow 1. FC Köln etc.
30
- ## Mainz 05:
31
- ## limit to 30 chars max
32
- ## only allow chars incl. intl buut (NOT ()[]/;)
33
- ##
34
- ## Group A:
35
- ## Group B: - remove colon
36
- ## or lookup first
37
-
38
- ATTRIB_RE = %r{^
39
- [ ]*? # slurp leading spaces
40
- (?<key>[^:|\]\[()\/; -]
41
- [^:|\]\[()\/;]{0,30}
42
- )
43
- [ ]*? # slurp trailing spaces
44
- :[ ]+
45
- (?<value>.+)
46
- [ ]*? # slurp trailing spaces
47
- $
48
- }ix
49
-
50
-
51
- #########
52
- ## parse - false (default) - tokenize (only)
53
- ## - true - tokenize & parse
54
- def read( path, parse: false )
55
- ## note: every (new) read call - resets errors list to empty
56
- @errors = []
57
-
58
- nodes = OutlineReader.read( path )
59
-
60
- ## process nodes
61
- h1 = nil
62
- orphans = 0 ## track paragraphs's with no heading
63
-
64
- attrib_found = false
65
-
66
-
67
- nodes.each do |node|
68
- type = node[0]
69
-
70
- if type == :h1
71
- h1 = node[1] ## get heading text
72
- puts
73
- puts " = Heading 1 >#{node[1]}<"
74
- elsif type == :p
75
-
76
- if h1.nil?
77
- orphans += 1 ## only warn once
78
- puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
79
- next
80
- end
81
-
82
- lines = node[1]
83
-
84
- tree = []
85
- lines.each_with_index do |line,i|
86
-
87
- if debug?
88
- puts
89
- puts "line >#{line}<"
90
- end
91
-
92
-
93
- ## skip new (experimental attrib syntax)
94
- if attrib_found == false &&
95
- ATTRIB_RE.match?( line )
96
- ## note: check attrib regex AFTER group def e.g.:
97
- ## Group A:
98
- ## Group B: etc.
99
- ## todo/fix - change Group A: to Group A etc.
100
- ## Group B: to Group B
101
- attrib_found = true
102
- ## logger.debug "skipping key/value line - >#{line}<"
103
- next
104
- end
105
-
106
- if attrib_found
107
- ## check if line ends with dot
108
- ## if not slurp up lines to the next do!!!
109
- ## logger.debug "skipping key/value line - >#{line}<"
110
- attrib_found = false if line.end_with?( '.' )
111
- # logger.debug "skipping key/value line (cont.) - >#{line}<"
112
- next
113
- end
114
-
115
- t, error_messages = if parse
116
- @parser.parse_with_errors( line )
117
- else
118
- @parser.tokenize_with_errors( line )
119
- end
120
-
121
-
122
- if error_messages.size > 0
123
- ## add to "global" error list
124
- ## make a triplet tuple (file / msg / line text)
125
- error_messages.each do |msg|
126
- @errors << [ path,
127
- msg,
128
- line
129
- ]
130
- end
131
- end
132
-
133
- pp t if debug?
134
-
135
- tree << t
136
- end
137
-
138
- ## pp tree
139
- else
140
- pp node
141
- raise ArgumentError, "unsupported (node) type >#{type}<"
142
- end
143
- end # each node
144
- end # read
145
- end # class Linter
146
-
147
-
148
- end # module Quick
149
- end # module SportDb
@@ -1,97 +0,0 @@
1
-
2
-
3
- module SportDb
4
-
5
- class OutlineReader
6
-
7
- def self.debug=(value) @@debug = value; end
8
- def self.debug?() @@debug ||= false; end
9
- def debug?() self.class.debug?; end
10
-
11
-
12
-
13
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
14
- txt = File.open( path, 'r:utf-8' ) {|f| f.read }
15
- parse( txt )
16
- end
17
-
18
- def self.parse( txt )
19
- new( txt ).parse
20
- end
21
-
22
- def initialize( txt )
23
- @txt = txt
24
- end
25
-
26
- ## note: skip "decorative" only heading e.g. ========
27
- ## todo/check: find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
28
- HEADING_BLANK_RE = %r{\A
29
- ={1,}
30
- \z}x
31
-
32
- ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
33
- HEADING_RE = %r{\A
34
- (?<marker>={1,}) ## 1. leading ======
35
- [ ]*
36
- (?<text>[^=]+) ## 2. text (note: for now no "inline" = allowed)
37
- [ ]*
38
- =* ## 3. (optional) trailing ====
39
- \z}x
40
-
41
- def parse
42
- outline=[] ## outline structure
43
- start_para = true ## start new para(graph) on new text line?
44
-
45
- @txt.each_line do |line|
46
- line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not?
47
-
48
- if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
49
- start_para = true
50
- next
51
- end
52
-
53
- break if line == '__END__'
54
-
55
- next if line.start_with?( '#' ) ## skip comments too
56
- ## strip inline (until end-of-line) comments too
57
- ## e.g Eupen | KAS Eupen ## [de]
58
- ## => Eupen | KAS Eupen
59
- ## e.g bq Bonaire, BOE # CONCACAF
60
- ## => bq Bonaire, BOE
61
- line = line.sub( /#.*/, '' ).strip
62
- pp line if debug?
63
-
64
- ## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
65
- next if HEADING_BLANK_RE.match( line ) # skip "decorative" only heading e.g. ========
66
-
67
- ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
68
- if m=HEADING_RE.match( line )
69
- start_para = true
70
-
71
- heading_marker = m[:marker]
72
- heading_level = heading_marker.length ## count number of = for heading level
73
- heading = m[:text].strip
74
-
75
- puts "heading #{heading_level} >#{heading}<" if debug?
76
- outline << [:"h#{heading_level}", heading]
77
- else ## assume it's a (plain/regular) text line
78
- if start_para
79
- outline << [:p, [line]]
80
- start_para = false
81
- else
82
- node = outline[-1] ## get last entry
83
- if node[0] == :p ## assert it's a p(aragraph) node!!!
84
- node[1] << line ## add line to p(aragraph)
85
- else
86
- puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
87
- pp node
88
- exit 1
89
- end
90
- end
91
- end
92
- end
93
- outline
94
- end # method read
95
- end # class OutlineReader
96
-
97
- end # module SportDb