sportdb-quick 0.0.1 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f2ce8e81bd6aece43872ba2718123dd2b34d6f52807ab8260a2e91f01a8c28ed
4
- data.tar.gz: c91175dabbac6e590198df83151ee36d9a9daa853b77f0eedfa229f9db55edd0
3
+ metadata.gz: 5ee93b658f2f6fce94a131931ea37908a2edbfa19a182ec20072447b57ffc847
4
+ data.tar.gz: f5fa5ab664d71ea9a2ab8c62e1bd30d42fa678e0b2a7564124623dc5d7cd2963
5
5
  SHA512:
6
- metadata.gz: 53f5ca14405552fe662d35e6b1d1672afce956f27cc40801265a9f9d44a4d622b320a00a5ba935a04bae0453872ecb0da27b21927b184a87ed2755973eef118b
7
- data.tar.gz: a688b66358bf16ec13ccbb90b5e29eaf483ca3e69f64ca4816f7210038383ea002660fa62a8ac971e9764903490757c9a84864bf57075794d8e0a337970a55b4
6
+ metadata.gz: b0df0b970600ee24a88d09ddffec1caa8ef94cfa75561569638da954357cec2d97ffc7ea566f03e42ec814a5998c99da79f861c54a863a439f1778a3f0426ab0
7
+ data.tar.gz: 01e1ad205c4d271a68136784a17b8f1b7987bdfa5e9b4ba5f395b42f6b488a53747525ecd7bea1e2c188012219529d7611e7d35f49ec4e5e7d499cc791e4e80a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,5 @@
1
+ ### 0.1.1
2
+
1
3
  ### 0.0.1 / 2024-08-27
2
4
 
3
5
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -4,10 +4,12 @@ README.md
4
4
  Rakefile
5
5
  bin/fbt
6
6
  lib/sportdb/quick.rb
7
- lib/sportdb/quick/linter.rb
7
+ lib/sportdb/quick/csv/goal.rb
8
+ lib/sportdb/quick/csv/goal_parser_csv.rb
9
+ lib/sportdb/quick/csv/match_parser_csv.rb
10
+ lib/sportdb/quick/csv/match_status_parser.rb
8
11
  lib/sportdb/quick/match_parser.rb
9
12
  lib/sportdb/quick/opts.rb
10
- lib/sportdb/quick/outline_reader.rb
11
13
  lib/sportdb/quick/quick_league_outline_reader.rb
12
14
  lib/sportdb/quick/quick_match_reader.rb
13
15
  lib/sportdb/quick/version.rb
data/README.md CHANGED
@@ -1,5 +1,24 @@
1
- # sportdb-quick - football.txt (quick) match parsers and more
1
+ # sportdb-quick - football.txt (quick) match readers and more
2
2
 
3
3
 
4
4
 
5
5
 
6
+ ## Usage
7
+
8
+
9
+ ``` ruby
10
+ require 'sportdb/quick'
11
+
12
+
13
+ # path = "./euro/2024--germany/euro.txt"
14
+ path = "./deutschland/2024-25/1-bundesliga.txt"
15
+
16
+ matches = SportDb::QuickMatchReader.read( path )
17
+ pp matches
18
+
19
+ # try json for matches
20
+ data = matches.map {|match| match.as_json }
21
+ pp data
22
+ ```
23
+
24
+
data/Rakefile CHANGED
@@ -6,7 +6,7 @@ Hoe.spec 'sportdb-quick' do
6
6
 
7
7
  self.version = SportDb::Module::Quick::VERSION
8
8
 
9
- self.summary = "sportdb-quick - football.txt (quick) match parsers and more"
9
+ self.summary = "sportdb-quick - football.txt (quick) match readers and more"
10
10
  self.description = summary
11
11
 
12
12
  self.urls = { home: 'https://github.com/sportdb/sport.db' }
data/bin/fbt CHANGED
@@ -27,7 +27,7 @@ require 'optparse'
27
27
  args = ARGV
28
28
  opts = { debug: false,
29
29
  metal: false,
30
- quick: false }
30
+ quick: true }
31
31
 
32
32
  parser = OptionParser.new do |parser|
33
33
  parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
@@ -39,19 +39,19 @@ require 'optparse'
39
39
 
40
40
  parser.on( "--verbose", "--debug",
41
41
  "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
42
- opts[:debug] = debug
42
+ opts[:debug] = true
43
43
  end
44
44
 
45
45
  parser.on( "--metal",
46
46
  "turn off typed parse tree; show to the metal tokens"+
47
47
  " (default: #{opts[:metal]})" ) do |metal|
48
- opts[:metal] = metal
48
+ opts[:metal] = true
49
49
  end
50
50
 
51
51
  parser.on( "--quick",
52
52
  "use quick match reader; output matches in json"+
53
53
  " (default: #{opts[:quick]})" ) do |quick|
54
- opts[:quick] = quick
54
+ opts[:quick] = true
55
55
  end
56
56
  end
57
57
  parser.parse!( args )
@@ -92,9 +92,9 @@ if opts[:quick]
92
92
  puts " #{data.size} match(es)"
93
93
  end
94
94
  else
95
- SportDb::Quick::Linter.debug = true if opts[:debug]
95
+ SportDb::Parser::Linter.debug = true if opts[:debug]
96
96
 
97
- linter = SportDb::Quick::Linter.new
97
+ linter = SportDb::Parser::Linter.new
98
98
 
99
99
  errors = []
100
100
 
@@ -0,0 +1,192 @@
1
+
2
+ module Sports
3
+
4
+ ## "free-standing" goal event - for import/export in separate event / goal datafiles
5
+ ## returned by CsvGoalParser and others
6
+ class GoalEvent
7
+
8
+ def self.build( row ) ## rename to parse or such - why? why not?
9
+
10
+ ## split match_id
11
+ team_str, more_str = row['Match'].split( '|' )
12
+ team1_str, team2_str = team_str.split( ' - ' )
13
+
14
+ more_str = more_str.strip
15
+ team1_str = team1_str.strip
16
+ team2_str = team2_str.strip
17
+
18
+ # check if more_str is a date otherwise assume round
19
+ date_fmt = if more_str =~ /^[A-Z]{3} [0-9]{1,2}$/i ## Apr 4
20
+ '%b %d'
21
+ elsif more_str =~ /^[A-Z]{3} [0-9]{1,2} [0-9]{4}$/i ## Apr 4 2019
22
+ '%b %d %Y'
23
+ else
24
+ nil
25
+ end
26
+
27
+ if date_fmt
28
+ date = Date.strptime( more_str, date_fmt )
29
+ round = nil
30
+ else
31
+ date = nil
32
+ round = more_str
33
+ end
34
+
35
+
36
+ values = row['Score'].split('-')
37
+ values = values.map { |value| value.strip }
38
+ score1 = values[0].to_i
39
+ score2 = values[1].to_i
40
+
41
+ minute = nil
42
+ offset = nil
43
+ if m=%r{([0-9]+)
44
+ (?:[ ]+
45
+ \+([0-9]+)
46
+ )?
47
+ ['.]
48
+ $}x.match( row['Minute'])
49
+ minute = m[1].to_i
50
+ offset = m[2] ? m[2].to_i : nil
51
+ else
52
+ puts "!! ERROR - unsupported minute (goal) format >#{row['Minute']}<"
53
+ exit 1
54
+ end
55
+
56
+ attributes = {
57
+ team1: team1_str,
58
+ team2: team2_str,
59
+ date: date,
60
+ round: round,
61
+ score1: score1,
62
+ score2: score2,
63
+ minute: minute,
64
+ offset: offset,
65
+ player: row['Player'],
66
+ owngoal: ['(og)', '(o.g.)'].include?( row['Extra']),
67
+ penalty: ['(pen)', '(pen.)'].include?( row['Extra']),
68
+ notes: (row['Notes'].nil? || row['Notes'].empty?) ? nil : row['Notes']
69
+ }
70
+
71
+ new( **attributes )
72
+ end
73
+
74
+
75
+ ## match id
76
+ attr_reader :team1,
77
+ :team2,
78
+ :round, ## optional
79
+ :date ## optional
80
+
81
+ ## main attributes
82
+ attr_reader :score1,
83
+ :score2,
84
+ :player,
85
+ :minute,
86
+ :offset,
87
+ :owngoal,
88
+ :penalty,
89
+ :notes
90
+
91
+
92
+ ## todo/check: or just use match.hash or such if match mapping known - why? why not?
93
+ def match_id
94
+ if round
95
+ "#{@team1} - #{@team2} | #{@round}"
96
+ else
97
+ "#{@team1} - #{@team2} | #{@date}"
98
+ end
99
+ end
100
+
101
+
102
+ def owngoal?() @owngoal==true; end
103
+ def penalty?() @penalty==true; end
104
+
105
+ def initialize( team1:,
106
+ team2:,
107
+ round: nil,
108
+ date: nil,
109
+ score1:,
110
+ score2:,
111
+ player:,
112
+ minute:,
113
+ offset: nil,
114
+ owngoal: false,
115
+ penalty: false,
116
+ notes: nil
117
+ )
118
+ @team1 = team1
119
+ @team2 = team2
120
+ @round = round
121
+ @date = date
122
+
123
+ @score1 = score1
124
+ @score2 = score2
125
+ @player = player
126
+ @minute = minute
127
+ @offset = offset
128
+ @owngoal = owngoal
129
+ @penalty = penalty
130
+ @notes = notes
131
+ end
132
+
133
+
134
+ ## note: lets you use normalize teams or such acts like a Match struct
135
+ def update( **kwargs )
136
+ ## todo/fix: use team1_name, team2_name or similar - for compat with db activerecord version? why? why not?
137
+ @team1 = kwargs[:team1] if kwargs.has_key? :team1
138
+ @team2 = kwargs[:team2] if kwargs.has_key? :team2
139
+ end
140
+ end # class GoalEvent
141
+
142
+
143
+ ### extend "basic" goal struct with goal event build
144
+ class Goal ### nested (non-freestanding) inside match (match is parent)
145
+
146
+ def self.build( events ) ## check/todo - rename to build_from_event/row or such - why? why not?
147
+ ## build an array of goal structs from (csv) recs
148
+ recs = []
149
+
150
+ last_score1 = 0
151
+ last_score2 = 0
152
+
153
+ events.each do |event|
154
+
155
+ if last_score1+1 == event.score1 && last_score2 == event.score2
156
+ team = 1
157
+ elsif last_score2+1 == event.score2 && last_score1 == event.score1
158
+ team = 2
159
+ else
160
+ puts "!! ERROR - unexpected score advance (one goal at a time expected):"
161
+ puts " #{last_score1}-#{last_score2}=> #{event.score1}-#{event.score2}"
162
+ exit 1
163
+ end
164
+
165
+ last_score1 = event.score1
166
+ last_score2 = event.score2
167
+
168
+
169
+ attributes = {
170
+ score1: event.score1,
171
+ score2: event.score2,
172
+ team: team,
173
+ minute: event.minute,
174
+ offset: event.offset,
175
+ player: event.player,
176
+ owngoal: event.owngoal,
177
+ penalty: event.penalty,
178
+ notes: event.notes
179
+ }
180
+
181
+ recs << new( **attributes )
182
+ end
183
+
184
+ recs
185
+ end
186
+ end # class Goal
187
+
188
+
189
+ end # module Sports
190
+
191
+
192
+
@@ -0,0 +1,28 @@
1
+
2
+ module SportDb
3
+ class CsvGoalParser
4
+
5
+
6
+ def self.read( path )
7
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
8
+ parse( txt )
9
+ end
10
+
11
+ def self.parse( txt )
12
+ new( txt ).parse
13
+ end
14
+
15
+
16
+ def initialize( txt )
17
+ @txt = txt
18
+ end
19
+
20
+ def parse
21
+ rows = parse_csv( @txt )
22
+ recs = rows.map { |row| Sports::GoalEvent.build( row ) }
23
+ ## pp recs[0]
24
+ recs
25
+ end
26
+
27
+ end # class CsvGoalParser
28
+ end # module Sports
@@ -0,0 +1,490 @@
1
+
2
+ module SportDb
3
+ class CsvMatchParser
4
+
5
+ #############
6
+ # helpers
7
+ def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
8
+
9
+ ## check if headers incl. season if yes,has priority over col mapping
10
+ ## e.g. no need to specify twice (if using headers)
11
+ col = headers[:season] if headers && headers[:season]
12
+
13
+ seasons = Hash.new( 0 ) ## default value is 0
14
+
15
+ ## todo/fix: yes, use CsvHash.foreach - why? why not?
16
+ ## use read_csv with block to switch to foreach!!!!
17
+ rows = read_csv( path, sep: sep )
18
+
19
+ rows.each_with_index do |row,i|
20
+ puts "[#{i}] " + row.inspect if i < 2
21
+
22
+ season = row[ col ] ## column name defaults to 'Season'
23
+ seasons[ season ] += 1
24
+ end
25
+
26
+ pp seasons
27
+
28
+ ## note: only return season keys/names (not hash with usage counter)
29
+ seasons.keys
30
+ end
31
+
32
+
33
+ ##########
34
+ # main machinery
35
+
36
+ ## todo/fix: use a generic "global" parse_csv method - why? why not?
37
+ ## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
38
+ ## ## note: do NOT symbolize keys - keep them as is!!!!!!
39
+ ## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
40
+ ## CsvHash.parse( text, sep: sep )
41
+ ## end
42
+
43
+ def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
44
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
45
+ parse( txt, headers: headers,
46
+ filters: filters,
47
+ converters: converters,
48
+ sep: sep )
49
+ end
50
+
51
+ def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
52
+ new( txt ).parse( headers: headers,
53
+ filters: filters,
54
+ converters: converters,
55
+ sep: sep )
56
+ end
57
+
58
+
59
+ def initialize( txt )
60
+ @txt = txt
61
+ end
62
+
63
+ def parse( headers: nil, filters: nil, converters: nil, sep: nil )
64
+
65
+ headers_mapping = {}
66
+
67
+ rows = parse_csv( @txt, sep: sep )
68
+
69
+ return [] if rows.empty? ## no rows / empty?
70
+
71
+
72
+ ## fix/todo: use logger!!!!
73
+ ## pp csv
74
+
75
+ if headers ## use user supplied headers if present
76
+ headers_mapping = headers_mapping.merge( headers )
77
+ else
78
+
79
+ ## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
80
+ headers = rows[0].keys
81
+ pp headers
82
+
83
+ # note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
84
+ # e.g. row['HomeTeam'] || row['HT'] will NOT work for now
85
+
86
+ if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
87
+ ## assume our own football.csv format, see github.com/footballcsv
88
+ headers_mapping[:team1] = find_header( headers, ['Team 1'] )
89
+ headers_mapping[:team2] = find_header( headers, ['Team 2'] )
90
+ headers_mapping[:date] = find_header( headers, ['Date'] )
91
+ headers_mapping[:time] = find_header( headers, ['Time'] )
92
+
93
+ ## check for all-in-one full time (ft) and half time (ht9 scores?
94
+ headers_mapping[:score] = find_header( headers, ['FT'] )
95
+ headers_mapping[:scorei] = find_header( headers, ['HT'] )
96
+
97
+ headers_mapping[:round] = find_header( headers, ['Round', 'Matchday'] )
98
+
99
+ ## optional headers - note: find_header returns nil if header NOT found
100
+ header_stage = find_header( headers, ['Stage'] )
101
+ headers_mapping[:stage] = header_stage if header_stage
102
+
103
+ header_group = find_header( headers, ['Group'] )
104
+ headers_mapping[:group] = header_group if header_group
105
+
106
+
107
+ header_et = find_header( headers, ['ET', 'AET'] ) ## (after) extra time
108
+ headers_mapping[:score_et] = header_et if header_et
109
+
110
+ header_p = find_header( headers, ['P', 'PEN'] ) ## penalties
111
+ headers_mapping[:score_p] = header_p if header_p
112
+
113
+ header_notes = find_header( headers, ['Notes', 'Comments'] )
114
+ headers_mapping[:notes] = header_notes if header_notes
115
+
116
+
117
+ header_league = find_header( headers, ['League'] )
118
+ headers_mapping[:league] = header_league if header_league
119
+ else
120
+ ## else try footballdata.uk and others
121
+ headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
122
+ headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
123
+ headers_mapping[:date] = find_header( headers, ['Date'] )
124
+ headers_mapping[:time] = find_header( headers, ['Time'] )
125
+
126
+ ## note: FT = Full Time, HG = Home Goal, AG = Away Goal
127
+ headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
128
+ headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
129
+
130
+ ## check for half time scores ?
131
+ ## note: HT = Half Time
132
+ headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
133
+ headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
134
+ end
135
+ end
136
+
137
+ pp headers_mapping
138
+
139
+ ### todo/fix: check headers - how?
140
+ ## if present HomeTeam or HT required etc.
141
+ ## issue error/warn is not present
142
+ ##
143
+ ## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
144
+ ## exit 1
145
+ ##
146
+
147
+ matches = []
148
+
149
+ rows.each_with_index do |row,i|
150
+
151
+ ## fix/todo: use logger!!!!
152
+ ## puts "[#{i}] " + row.inspect if i < 2
153
+
154
+
155
+ ## todo/fix: move to its own (helper) method - filter or such!!!!
156
+ if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
157
+ skip = false
158
+ filters.each do |header, value|
159
+ if row[ header ] != value ## e.g. row['Season']
160
+ skip = true
161
+ break
162
+ end
163
+ end
164
+ next if skip ## if header values NOT matching
165
+ end
166
+
167
+
168
+ ## note:
169
+ ## add converters after filters for now (why not before filters?)
170
+ if converters ## any converters defined?
171
+ ## convert single proc shortcut to array with single converter
172
+ converters = [converters] if converters.is_a?( Proc )
173
+
174
+ ## assumes array of procs
175
+ converters.each do |converter|
176
+ row = converter.call( row )
177
+ end
178
+ end
179
+
180
+
181
+
182
+ team1 = row[ headers_mapping[ :team1 ]]
183
+ team2 = row[ headers_mapping[ :team2 ]]
184
+
185
+
186
+ ## check if data present - if not skip (might be empty row)
187
+ ## note: (old classic) csv reader returns nil for empty fields
188
+ ## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
189
+ if (team1.nil? || team1.empty?) &&
190
+ (team2.nil? || team2.empty?)
191
+ puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
192
+ pp row
193
+ next
194
+ end
195
+
196
+ ## remove possible match played counters e.g. (4) (11) etc.
197
+ team1 = team1.sub( /\(\d+\)/, '' ).strip
198
+ team2 = team2.sub( /\(\d+\)/, '' ).strip
199
+
200
+
201
+
202
+ col = row[ headers_mapping[ :time ]]
203
+
204
+ if col.nil?
205
+ time = nil
206
+ else
207
+ col = col.strip # make sure not leading or trailing spaces left over
208
+
209
+ if col.empty?
210
+ col =~ /^-{1,}$/ || # e.g. - or ---
211
+ col =~ /^\?{1,}$/ # e.g. ? or ???
212
+ ## note: allow missing / unknown date for match
213
+ time = nil
214
+ else
215
+ if col =~ /^\d{1,2}:\d{2}$/
216
+ time_fmt = '%H:%M' # e.g. 17:00 or 3:00
217
+ elsif col =~ /^\d{1,2}.\d{2}$/
218
+ time_fmt = '%H.%M' # e.g. 17:00 or 3:00
219
+ else
220
+ puts "*** !!! wrong (unknown) time format >>#{col}<<; cannot continue; fix it; sorry"
221
+ ## todo/fix: add to errors/warns list - why? why not?
222
+ exit 1
223
+ end
224
+
225
+ ## todo/check: use date object (keep string?) - why? why not?
226
+ ## todo/fix: yes!! use date object!!!! do NOT use string
227
+ time = Time.strptime( col, time_fmt ).strftime( '%H:%M' )
228
+ end
229
+ end
230
+
231
+
232
+
233
+ col = row[ headers_mapping[ :date ]]
234
+ col = col.strip # make sure not leading or trailing spaces left over
235
+
236
+ if col.empty? ||
237
+ col =~ /^-{1,}$/ || # e.g. - or ---
238
+ col =~ /^\?{1,}$/ # e.g. ? or ???
239
+ ## note: allow missing / unknown date for match
240
+ date = nil
241
+ else
242
+ ## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
243
+ col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
244
+ col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
245
+ col = col.strip # make sure not leading or trailing spaces left over
246
+
247
+ if col =~ /^\d{2}\/\d{2}\/\d{4}$/
248
+ date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
249
+ elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
250
+ date_fmt = '%d/%m/%y' # e.g. 17/08/02
251
+ elsif col =~ /^\d{4}-\d{1,2}-\d{1,2}$/ ## "standard" / default date format
252
+ date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
253
+ elsif col =~ /^\d{1,2} \w{3} \d{4}$/
254
+ date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
255
+ elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
256
+ date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
257
+ else
258
+ puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
259
+ ## todo/fix: add to errors/warns list - why? why not?
260
+ exit 1
261
+ end
262
+
263
+ ## todo/check: use date object (keep string?) - why? why not?
264
+ ## todo/fix: yes!! use date object!!!! do NOT use string
265
+ date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
266
+ end
267
+
268
+
269
+ ##
270
+ ## todo/fix: round might not always be just a simple integer number!!!
271
+ ## might be text such as Final | Leg 1 or such!!!!
272
+ round = nil
273
+ ## check for (optional) round / matchday
274
+ if headers_mapping[ :round ]
275
+ col = row[ headers_mapping[ :round ]]
276
+ ## todo: issue warning if not ? or - (and just empty string) why? why not
277
+ ## (old attic) was: round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
278
+
279
+ ## note: make round always a string for now!!!! e.g. "1", "2" too!!
280
+ round = if col.nil? || col.empty? || col == '-' || col == 'n/a'
281
+ ## note: allow missing round for match / defaults to nil
282
+ nil
283
+ else
284
+ col
285
+ end
286
+ end
287
+
288
+
289
+ score1 = nil
290
+ score2 = nil
291
+ score1i = nil
292
+ score2i = nil
293
+
294
+ ## check for full time scores ?
295
+ if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
296
+ ft = [ row[ headers_mapping[ :score1 ]],
297
+ row[ headers_mapping[ :score2 ]] ]
298
+
299
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
300
+ score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
301
+ score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
302
+ end
303
+
304
+ ## check for half time scores ?
305
+ if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
306
+ ht = [ row[ headers_mapping[ :score1i ]],
307
+ row[ headers_mapping[ :score2i ]] ]
308
+
309
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
310
+ score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
311
+ score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
312
+ end
313
+
314
+
315
+ ## check for all-in-one full time scores?
316
+ if headers_mapping[ :score ]
317
+ col = row[ headers_mapping[ :score ]]
318
+ score = parse_score( col )
319
+ if score
320
+ score1 = score[0]
321
+ score2 = score[1]
322
+ else
323
+ puts "!! ERROR - invalid score (ft) format >#{col}<:"
324
+ pp row
325
+ exit 1
326
+ end
327
+ end
328
+
329
+ if headers_mapping[ :scorei ]
330
+ col = row[ headers_mapping[ :scorei ]]
331
+ score = parse_score( col )
332
+ if score
333
+ score1i = score[0]
334
+ score2i = score[1]
335
+ else
336
+ puts "!! ERROR - invalid score (ht) format >#{col}<:"
337
+ pp row
338
+ exit 1
339
+ end
340
+ end
341
+
342
+ ####
343
+ ## try optional score - extra time (et) and penalities (p/pen)
344
+ score1et = nil
345
+ score2et = nil
346
+ score1p = nil
347
+ score2p = nil
348
+
349
+ if headers_mapping[ :score_et ]
350
+ col = row[ headers_mapping[ :score_et ]]
351
+ score = parse_score( col )
352
+ if score
353
+ score1et = score[0]
354
+ score2et = score[1]
355
+ else
356
+ puts "!! ERROR - invalid score (et) format >#{col}<:"
357
+ pp row
358
+ exit 1
359
+ end
360
+ end
361
+
362
+ if headers_mapping[ :score_p ]
363
+ col = row[ headers_mapping[ :score_p ]]
364
+ score = parse_score( col )
365
+ if score
366
+ score1p = score[0]
367
+ score2p = score[1]
368
+ else
369
+ puts "!! ERROR - invalid score (p) format >#{col}<:"
370
+ pp row
371
+ exit 1
372
+ end
373
+ end
374
+
375
+
376
+ ## try some optional headings / columns
377
+ stage = nil
378
+ if headers_mapping[ :stage ]
379
+ col = row[ headers_mapping[ :stage ]]
380
+ ## todo/fix: check can col be nil e.g. col.nil? possible?
381
+ stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
382
+ ## note: allow missing stage for match / defaults to "regular"
383
+ nil
384
+ elsif col == '?'
385
+ ## note: default explicit unknown to unknown for now AND not regular - why? why not?
386
+ '?' ## todo/check: use unkown and NOT ? - why? why not?
387
+ else
388
+ col
389
+ end
390
+ end
391
+
392
+ group = nil
393
+ if headers_mapping[ :group ]
394
+ col = row[ headers_mapping[ :group ]]
395
+ ## todo/fix: check can col be nil e.g. col.nil? possible?
396
+ group = if col.nil? || col.empty? || col == '-' || col == 'n/a'
397
+ ## note: allow missing stage for match / defaults to "regular"
398
+ nil
399
+ else
400
+ col
401
+ end
402
+ end
403
+
404
+ status = nil ## e.g. AWARDED, CANCELLED, POSTPONED, etc.
405
+ if headers_mapping[ :notes ]
406
+ col = row[ headers_mapping[ :notes ]]
407
+ ## check for optional (match) status in notes / comments
408
+ status = if col.nil? || col.empty? || col == '-' || col == 'n/a'
409
+ nil
410
+ else
411
+ StatusParser.parse( col ) # note: returns nil if no (match) status found
412
+ end
413
+ end
414
+
415
+
416
+ league = nil
417
+ league = row[ headers_mapping[ :league ]] if headers_mapping[ :league ]
418
+
419
+
420
+ ## puts 'match attributes:'
421
+ attributes = {
422
+ date: date,
423
+ time: time,
424
+ team1: team1, team2: team2,
425
+ score1: score1, score2: score2,
426
+ score1i: score1i, score2i: score2i,
427
+ score1et: score1et, score2et: score2et,
428
+ score1p: score1p, score2p: score2p,
429
+ round: round,
430
+ stage: stage,
431
+ group: group,
432
+ status: status,
433
+ league: league
434
+ }
435
+ ## pp attributes
436
+
437
+ match = Sports::Match.new( **attributes )
438
+ matches << match
439
+ end
440
+
441
+ ## pp matches
442
+ matches
443
+ end
444
+
445
+
446
+ private
447
+
448
+ def find_header( headers, candidates )
449
+ ## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
450
+
451
+ candidates.each do |candidate|
452
+ return candidate if headers.include?( candidate ) ## bingo!!!
453
+ end
454
+ nil ## no matching header found!!!
455
+ end
456
+
457
+ ########
458
+ # more helpers
459
+ #
460
+
461
+ def parse_score( str )
462
+ if str.nil? ## todo/check: remove nil case - possible? - why? why not?
463
+ [nil,nil]
464
+ else
465
+ ## remove (optional single) note/footnote/endnote markers
466
+ ## e.g. (*) or (a), (b),
467
+ ## or [*], [A], [1], etc.
468
+ ## - allow (1) or maybe (*1) in the future - why? why not?
469
+ str = str.sub( /\( [a-z*] \)
470
+ |
471
+ \[ [1-9a-z*] \]
472
+ /ix, '' ).strip
473
+
474
+ if str.empty? || str == '?' || str == '-' || str == 'n/a'
475
+ [nil,nil]
476
+ ### todo/check: use regex with named capture groups here - why? why not?
477
+ elsif str =~ /^\d{1,2}[:-]\d{1,2}$/ ## sanity check scores format
478
+ score = str.split( /[:-]/ )
479
+ [score[0].to_i, score[1].to_i]
480
+ else
481
+ nil ## note: returns nil if invalid / unparseable format!!!
482
+ end
483
+ end
484
+ end # method parse_score
485
+
486
+
487
+
488
+ end # class CsvMatchParser
489
+ end # module Sports
490
+
@@ -0,0 +1,90 @@
1
+ #####################
2
+ # helpers for parsing & finding match status e.g.
3
+ # - cancelled / canceled
4
+ # - awarded
5
+ # - abandoned
6
+ # - replay
7
+ # etc.
8
+
9
+
10
+ module SportDb
11
+
12
+
13
+ ### todo/fix: move Status inside Match struct - why? why not?
14
+
15
+ class Status
16
+ # note: use a class as an "enum"-like namespace for now - why? why not?
17
+ # move class into Match e.g. Match::Status - why? why not?
18
+ CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
19
+ AWARDED = 'AWARDED'
20
+ POSTPONED = 'POSTPONED'
21
+ ABANDONED = 'ABANDONED'
22
+ REPLAY = 'REPLAY'
23
+ end # class Status
24
+
25
+
26
+
27
+ class StatusParser
28
+
29
+ def self.parse( str )
30
+ ## note: returns nil if no match found
31
+ ## note: english usage - cancelled (in UK), canceled (in US)
32
+ if str =~ /^(cancelled|
33
+ canceled|
34
+ can\.
35
+ )/xi
36
+ Status::CANCELLED
37
+ elsif str =~ /^(awarded|
38
+ awd\.
39
+ )/xi
40
+ Status::AWARDED
41
+ elsif str =~ /^(postponed
42
+ )/xi
43
+ Status::POSTPONED
44
+ elsif str =~ /^(abandoned|
45
+ abd\.
46
+ )/xi
47
+ Status::ABANDONED
48
+ elsif str =~ /^(replay
49
+ )/xi
50
+ Status::REPLAY
51
+ else
52
+ # no match
53
+ nil
54
+ end
55
+ end
56
+
57
+
58
+ RUN_RE = /\[
59
+ (?<text>[^\]]+)
60
+ \]
61
+ /x
62
+ def self.find!( line )
63
+ ## for now check all "protected" text run blocks e.g. []
64
+ ## puts "line: >#{line}<"
65
+
66
+ status = nil
67
+
68
+ str = line
69
+ while m = str.match( RUN_RE )
70
+ str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
71
+
72
+ ## check for status match
73
+ match_str = m[0] ## keep a copy of the match string (for later sub)
74
+ text = m[:text].strip
75
+ ## puts " text: >#{text}<"
76
+
77
+ status = parse( text )
78
+
79
+ if status
80
+ line.sub!( match_str, "[STATUS.#{status}]" )
81
+ break
82
+ end
83
+ end # while match
84
+
85
+ status
86
+ end # method find!
87
+ end # class StatusParser
88
+
89
+ end # module SportDb
90
+
@@ -41,6 +41,15 @@ class QuickLeagueOutlineReader
41
41
  pp heading
42
42
  exit 1
43
43
  end
44
+ elsif node[0] == :h2
45
+ ## todo/check - make sure parsed h1 first
46
+ heading = node[1]
47
+ ## reuse league, season from h1
48
+ secs << { league: secs[-1][:league],
49
+ season: secs[-1][:season],
50
+ stage: heading,
51
+ lines: []
52
+ }
44
53
  elsif node[0] == :p ## paragraph with (text) lines
45
54
  lines = node[1]
46
55
  ## note: skip lines if no heading seen
@@ -3,7 +3,7 @@ module SportDb
3
3
  module Module
4
4
  module Quick
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 0
6
+ MINOR = 1
7
7
  PATCH = 1
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
data/lib/sportdb/quick.rb CHANGED
@@ -17,8 +17,10 @@ end
17
17
  ## our own code
18
18
  require_relative 'quick/version'
19
19
  require_relative 'quick/opts'
20
- require_relative 'quick/linter'
21
- require_relative 'quick/outline_reader'
20
+
21
+ # require_relative 'quick/linter'
22
+ # require_relative 'quick/outline_reader'
23
+
22
24
 
23
25
  require_relative 'quick/match_parser'
24
26
 
@@ -26,6 +28,39 @@ require_relative 'quick/quick_league_outline_reader'
26
28
  require_relative 'quick/quick_match_reader'
27
29
 
28
30
 
31
+
32
+
33
+ ###
34
+ # csv (tabular dataset) support / machinery
35
+ require_relative 'quick/csv/match_status_parser'
36
+ require_relative 'quick/csv/goal'
37
+ require_relative 'quick/csv/goal_parser_csv'
38
+ require_relative 'quick/csv/match_parser_csv'
39
+
40
+
41
+ ### add convenience shortcut helpers
42
+ module Sports
43
+ class Match
44
+ def self.read_csv( path, headers: nil, filters: nil, converters: nil, sep: nil )
45
+ SportDb::CsvMatchParser.read( path,
46
+ headers: headers,
47
+ filters: filters,
48
+ converters: converters,
49
+ sep: sep )
50
+ end
51
+
52
+ def self.parse_csv( txt, headers: nil, filters: nil, converters: nil, sep: nil )
53
+ SportDb::CsvMatchParser.parse( txt,
54
+ headers: headers,
55
+ filters: filters,
56
+ converters: converters,
57
+ sep: sep )
58
+ end
59
+ end # class Match
60
+ end # module Sports
61
+
62
+
63
+
29
64
  puts SportDb::Module::Quick.banner # say hello
30
65
 
31
66
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-quick
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-27 00:00:00.000000000 Z
11
+ date: 2024-09-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sportdb-parser
@@ -86,7 +86,7 @@ dependencies:
86
86
  - - "~>"
87
87
  - !ruby/object:Gem::Version
88
88
  version: '4.1'
89
- description: sportdb-quick - football.txt (quick) match parsers and more
89
+ description: sportdb-quick - football.txt (quick) match readers and more
90
90
  email: gerald.bauer@gmail.com
91
91
  executables:
92
92
  - fbt
@@ -102,10 +102,12 @@ files:
102
102
  - Rakefile
103
103
  - bin/fbt
104
104
  - lib/sportdb/quick.rb
105
- - lib/sportdb/quick/linter.rb
105
+ - lib/sportdb/quick/csv/goal.rb
106
+ - lib/sportdb/quick/csv/goal_parser_csv.rb
107
+ - lib/sportdb/quick/csv/match_parser_csv.rb
108
+ - lib/sportdb/quick/csv/match_status_parser.rb
106
109
  - lib/sportdb/quick/match_parser.rb
107
110
  - lib/sportdb/quick/opts.rb
108
- - lib/sportdb/quick/outline_reader.rb
109
111
  - lib/sportdb/quick/quick_league_outline_reader.rb
110
112
  - lib/sportdb/quick/quick_match_reader.rb
111
113
  - lib/sportdb/quick/version.rb
@@ -133,5 +135,5 @@ requirements: []
133
135
  rubygems_version: 3.4.10
134
136
  signing_key:
135
137
  specification_version: 4
136
- summary: sportdb-quick - football.txt (quick) match parsers and more
138
+ summary: sportdb-quick - football.txt (quick) match readers and more
137
139
  test_files: []
@@ -1,149 +0,0 @@
1
-
2
- module SportDb
3
- module Quick
4
-
5
- ###
6
- ## note - Linter for now nested inside Parser - keep? why? why not?
7
- class Linter
8
-
9
- def self.debug=(value) @@debug = value; end
10
- def self.debug?() @@debug ||= false; end ## note: default is FALSE
11
- def debug?() self.class.debug?; end
12
-
13
-
14
-
15
- attr_reader :errors
16
-
17
- def initialize
18
- @errors = []
19
- @parser = Parser.new ## use own parser instance (not shared) - why? why not?
20
- end
21
-
22
-
23
- def errors?() @errors.size > 0; end
24
-
25
-
26
-
27
- ## note: colon (:) MUST be followed by one (or more) spaces
28
- ## make sure mon feb 12 18:10 will not match
29
- ## allow 1. FC Köln etc.
30
- ## Mainz 05:
31
- ## limit to 30 chars max
32
- ## only allow chars incl. intl buut (NOT ()[]/;)
33
- ##
34
- ## Group A:
35
- ## Group B: - remove colon
36
- ## or lookup first
37
-
38
- ATTRIB_RE = %r{^
39
- [ ]*? # slurp leading spaces
40
- (?<key>[^:|\]\[()\/; -]
41
- [^:|\]\[()\/;]{0,30}
42
- )
43
- [ ]*? # slurp trailing spaces
44
- :[ ]+
45
- (?<value>.+)
46
- [ ]*? # slurp trailing spaces
47
- $
48
- }ix
49
-
50
-
51
- #########
52
- ## parse - false (default) - tokenize (only)
53
- ## - true - tokenize & parse
54
- def read( path, parse: false )
55
- ## note: every (new) read call - resets errors list to empty
56
- @errors = []
57
-
58
- nodes = OutlineReader.read( path )
59
-
60
- ## process nodes
61
- h1 = nil
62
- orphans = 0 ## track paragraphs's with no heading
63
-
64
- attrib_found = false
65
-
66
-
67
- nodes.each do |node|
68
- type = node[0]
69
-
70
- if type == :h1
71
- h1 = node[1] ## get heading text
72
- puts
73
- puts " = Heading 1 >#{node[1]}<"
74
- elsif type == :p
75
-
76
- if h1.nil?
77
- orphans += 1 ## only warn once
78
- puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
79
- next
80
- end
81
-
82
- lines = node[1]
83
-
84
- tree = []
85
- lines.each_with_index do |line,i|
86
-
87
- if debug?
88
- puts
89
- puts "line >#{line}<"
90
- end
91
-
92
-
93
- ## skip new (experimental attrib syntax)
94
- if attrib_found == false &&
95
- ATTRIB_RE.match?( line )
96
- ## note: check attrib regex AFTER group def e.g.:
97
- ## Group A:
98
- ## Group B: etc.
99
- ## todo/fix - change Group A: to Group A etc.
100
- ## Group B: to Group B
101
- attrib_found = true
102
- ## logger.debug "skipping key/value line - >#{line}<"
103
- next
104
- end
105
-
106
- if attrib_found
107
- ## check if line ends with dot
108
- ## if not slurp up lines to the next do!!!
109
- ## logger.debug "skipping key/value line - >#{line}<"
110
- attrib_found = false if line.end_with?( '.' )
111
- # logger.debug "skipping key/value line (cont.) - >#{line}<"
112
- next
113
- end
114
-
115
- t, error_messages = if parse
116
- @parser.parse_with_errors( line )
117
- else
118
- @parser.tokenize_with_errors( line )
119
- end
120
-
121
-
122
- if error_messages.size > 0
123
- ## add to "global" error list
124
- ## make a triplet tuple (file / msg / line text)
125
- error_messages.each do |msg|
126
- @errors << [ path,
127
- msg,
128
- line
129
- ]
130
- end
131
- end
132
-
133
- pp t if debug?
134
-
135
- tree << t
136
- end
137
-
138
- ## pp tree
139
- else
140
- pp node
141
- raise ArgumentError, "unsupported (node) type >#{type}<"
142
- end
143
- end # each node
144
- end # read
145
- end # class Linter
146
-
147
-
148
- end # module Quick
149
- end # module SportDb
@@ -1,97 +0,0 @@
1
-
2
-
3
- module SportDb
4
-
5
- class OutlineReader
6
-
7
- def self.debug=(value) @@debug = value; end
8
- def self.debug?() @@debug ||= false; end
9
- def debug?() self.class.debug?; end
10
-
11
-
12
-
13
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
14
- txt = File.open( path, 'r:utf-8' ) {|f| f.read }
15
- parse( txt )
16
- end
17
-
18
- def self.parse( txt )
19
- new( txt ).parse
20
- end
21
-
22
- def initialize( txt )
23
- @txt = txt
24
- end
25
-
26
- ## note: skip "decorative" only heading e.g. ========
27
- ## todo/check: find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
28
- HEADING_BLANK_RE = %r{\A
29
- ={1,}
30
- \z}x
31
-
32
- ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
33
- HEADING_RE = %r{\A
34
- (?<marker>={1,}) ## 1. leading ======
35
- [ ]*
36
- (?<text>[^=]+) ## 2. text (note: for now no "inline" = allowed)
37
- [ ]*
38
- =* ## 3. (optional) trailing ====
39
- \z}x
40
-
41
- def parse
42
- outline=[] ## outline structure
43
- start_para = true ## start new para(graph) on new text line?
44
-
45
- @txt.each_line do |line|
46
- line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not?
47
-
48
- if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
49
- start_para = true
50
- next
51
- end
52
-
53
- break if line == '__END__'
54
-
55
- next if line.start_with?( '#' ) ## skip comments too
56
- ## strip inline (until end-of-line) comments too
57
- ## e.g Eupen | KAS Eupen ## [de]
58
- ## => Eupen | KAS Eupen
59
- ## e.g bq Bonaire, BOE # CONCACAF
60
- ## => bq Bonaire, BOE
61
- line = line.sub( /#.*/, '' ).strip
62
- pp line if debug?
63
-
64
- ## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
65
- next if HEADING_BLANK_RE.match( line ) # skip "decorative" only heading e.g. ========
66
-
67
- ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
68
- if m=HEADING_RE.match( line )
69
- start_para = true
70
-
71
- heading_marker = m[:marker]
72
- heading_level = heading_marker.length ## count number of = for heading level
73
- heading = m[:text].strip
74
-
75
- puts "heading #{heading_level} >#{heading}<" if debug?
76
- outline << [:"h#{heading_level}", heading]
77
- else ## assume it's a (plain/regular) text line
78
- if start_para
79
- outline << [:p, [line]]
80
- start_para = false
81
- else
82
- node = outline[-1] ## get last entry
83
- if node[0] == :p ## assert it's a p(aragraph) node!!!
84
- node[1] << line ## add line to p(aragraph)
85
- else
86
- puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
87
- pp node
88
- exit 1
89
- end
90
- end
91
- end
92
- end
93
- outline
94
- end # method read
95
- end # class OutlineReader
96
-
97
- end # module SportDb