sportdb-parser 0.3.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,7 +54,7 @@ TEXT_RE = %r{
54
54
  )
55
55
 
56
56
  (?:(?: (?:[ ]
57
- (?!vs?\.?[ ]) ## note - exclude (v[ ]/vs[ ]/v.[ ]/vs.[ ])
57
+ (?!vs?[ ]) ## note - exclude (v[ ]/vs[ ])
58
58
  )
59
59
  | # only single spaces allowed inline!!!
60
60
  [-]
@@ -68,24 +68,41 @@ BASICS_RE = %r{
68
68
  (?<vs>
69
69
  (?<=[ ]) # Positive lookbehind for space
70
70
  (?:
71
- vs\.?| ## allow optional dot (eg. vs. v.)
72
- v\.?|
73
- -
74
- ) # not bigger match first e.g. vs than v etc.
71
+ vs|v
72
+ )
73
+ # not bigger match first e.g. vs than v etc.
74
+ # todo/fix - make vs|v case sensitive!!! only match v/vs - why? why not?
75
75
  (?=[ ]) # positive lookahead for space
76
76
  )
77
77
  |
78
+ (?<spaces> [ ]{2,}) |
79
+ (?<space> [ ])
80
+ |
81
+ (?<sym>[;,@|\[\]-])
82
+ }ix
83
+
84
+
85
+ ## removed from basics
86
+ =begin
78
87
  (?<none>
79
88
  (?<=[ \[]|^) # Positive lookbehind for space or [
80
89
  -
81
90
  (?=[ ]*;) # positive lookahead for space
82
91
  )
83
92
  |
84
- (?<spaces> [ ]{2,}) |
85
- (?<space> [ ])
86
- |
87
- (?<sym>[;,@|\[\]])
88
- }ix
93
+ (?<vs>
94
+ (?<=[ ]) # Positive lookbehind for space
95
+ (?:
96
+ vs\.?| ## allow optional dot (eg. vs. v.)
97
+ v\.?|
98
+ -
99
+ ) # not bigger match first e.g. vs than v etc.
100
+ (?=[ ]) # positive lookahead for space
101
+ )
102
+ |
103
+
104
+ make - into a simple symbol !!!
105
+ =end
89
106
 
90
107
 
91
108
  MINUTE_RE = %r{
@@ -141,8 +158,7 @@ end
141
158
 
142
159
 
143
160
 
144
- def tokenize_with_errors( line, typed: false,
145
- debug: false )
161
+ def tokenize_with_errors( line, debug: false )
146
162
  tokens = []
147
163
  errors = [] ## keep a list of errors - why? why not?
148
164
 
@@ -180,6 +196,10 @@ def tokenize_with_errors( line, typed: false,
180
196
 
181
197
  pp offsets if debug
182
198
 
199
+ ##
200
+ ## note: racc requires pairs e.g. [:TOKEN, VAL]
201
+ ## for VAL use "text" or ["text", { opts }] array
202
+
183
203
  t = if m[:space]
184
204
  ## skip space
185
205
  nil
@@ -187,15 +207,17 @@ def tokenize_with_errors( line, typed: false,
187
207
  ## skip spaces
188
208
  nil
189
209
  elsif m[:text]
190
- [:text, m[:text]] ## keep pos - why? why not?
210
+ [:TEXT, m[:text]] ## keep pos - why? why not?
191
211
  elsif m[:status] ## (match) status e.g. cancelled, awarded, etc.
212
+ ## todo/check - add text (or status)
213
+ # to opts hash {} by default (for value)
192
214
  if m[:status_note] ## includes note? e.g. awarded; originally 2-0
193
- [:status, m[:status], {note:m[:status_note]}]
215
+ [:STATUS, [m[:status], {status: m[:status],
216
+ note: m[:status_note]} ]]
194
217
  else
195
- [:status, m[:status]]
218
+ [:STATUS, [m[:status], {status: m[:status] } ]]
196
219
  end
197
220
  elsif m[:time]
198
- if typed
199
221
  ## unify to iso-format
200
222
  ### 12.40 => 12:40
201
223
  ## 12h40 => 12:40 etc.
@@ -208,15 +230,11 @@ def tokenize_with_errors( line, typed: false,
208
230
  (minute >=0 && minute <= 59)
209
231
  ## note - for debugging keep (pass along) "literal" time
210
232
  ## might use/add support for am/pm later
211
- [:time, m[:time], {h:hour,m:minute}]
233
+ [:TIME, [m[:time], {h:hour,m:minute}]]
212
234
  else
213
235
  raise ArgumentError, "parse error - time >#{m[:time]}< out-of-range"
214
236
  end
215
- else
216
- [:time, m[:time]]
217
- end
218
237
  elsif m[:date]
219
- if typed
220
238
  date = {}
221
239
  =begin
222
240
  ((?<day_name>#{DAY_NAMES})
@@ -237,14 +255,11 @@ def tokenize_with_errors( line, typed: false,
237
255
  date[:d] = m[:day].to_i(10) if m[:day]
238
256
  date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
239
257
  ## note - for debugging keep (pass along) "literal" date
240
- [:date, m[:date], date]
241
- else
242
- [:date, m[:date]]
243
- end
258
+ [:DATE, [m[:date], date]]
244
259
  elsif m[:timezone]
245
- [:timezone, m[:timezone]]
260
+ [:TIMEZONE, m[:timezone]]
246
261
  elsif m[:duration]
247
- if typed
262
+ ## todo/check/fix - if end: works for kwargs!!!!!
248
263
  duration = { start: {}, end: {}}
249
264
  duration[:start][:y] = m[:year1].to_i(10) if m[:year1]
250
265
  duration[:start][:m] = MONTH_MAP[ m[:month_name1].downcase ] if m[:month_name1]
@@ -255,19 +270,11 @@ def tokenize_with_errors( line, typed: false,
255
270
  duration[:end][:d] = m[:day2].to_i(10) if m[:day2]
256
271
  duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ] if m[:day_name2]
257
272
  ## note - for debugging keep (pass along) "literal" duration
258
- [:duration, m[:duration], duration]
259
- else
260
- [:duration, m[:duration]]
261
- end
262
- elsif m[:num]
263
- if typed
273
+ [:DURATION, [m[:duration], duration]]
274
+ elsif m[:num] ## fix - change to ord (for ordinal number!!!)
264
275
  ## note - strip enclosing () and convert to integer
265
- [:num, m[:value].to_i(10)]
266
- else
267
- [:num, m[:num]]
268
- end
276
+ [:ORD, [m[:num], { value: m[:value].to_i(10) } ]]
269
277
  elsif m[:score]
270
- if typed
271
278
  score = {}
272
279
  ## check for pen
273
280
  score[:p] = [m[:p1].to_i(10),
@@ -280,42 +287,39 @@ def tokenize_with_errors( line, typed: false,
280
287
  m[:ht2].to_i(10)] if m[:ht1] && m[:ht2]
281
288
 
282
289
  ## note - for debugging keep (pass along) "literal" score
283
- [:score, m[:score], score]
284
- else
285
- [:score, m[:score]]
286
- end
290
+ [:SCORE, [m[:score], score]]
287
291
  elsif m[:minute]
288
- if typed
289
292
  minute = {}
290
293
  minute[:m] = m[:value].to_i(10)
291
294
  minute[:offset] = m[:value2].to_i(10) if m[:value2]
292
295
  ## note - for debugging keep (pass along) "literal" minute
293
- [:minute, m[:minute], minute]
294
- else
295
- [:minute, m[:minute]]
296
- end
296
+ [:MINUTE, [m[:minute], minute]]
297
297
  elsif m[:og]
298
- typed ? [:og] : [:og, m[:og]] ## for typed drop - string version/variants
298
+ [:OG, m[:og]] ## for typed drop - string version/variants ?? why? why not?
299
299
  elsif m[:pen]
300
- typed ? [:pen] : [:pen, m[:pen]]
300
+ [:PEN, m[:pen]]
301
301
  elsif m[:vs]
302
- typed ? [:vs] : [:vs, m[:vs]]
303
- elsif m[:none]
304
- typed ? [:none] : [:none, m[:none]]
302
+ [:VS, m[:vs]]
305
303
  elsif m[:sym]
306
304
  sym = m[:sym]
307
305
  ## return symbols "inline" as is - why? why not?
306
+ ## (?<sym>[;,@|\[\]-])
307
+
308
308
  case sym
309
309
  when ',' then [:',']
310
310
  when ';' then [:';']
311
311
  when '@' then [:'@']
312
312
  when '|' then [:'|']
313
+ when '[' then [:'[']
314
+ when ']' then [:']']
315
+ when '-' then [:'-']
313
316
  else
314
317
  nil ## ignore others (e.g. brackets [])
315
318
  end
316
319
  else
317
320
  ## report error
318
- nil
321
+ puts "!!! TOKENIZE ERROR - no match found"
322
+ nil
319
323
  end
320
324
 
321
325
  tokens << t if t
@@ -342,10 +346,8 @@ end
342
346
 
343
347
 
344
348
  ### convience helper - ignore errors by default
345
- def tokenize( line, typed: false,
346
- debug: false )
347
- tokens, _ = tokenize_with_errors( line, typed: typed,
348
- debug: debug )
349
+ def tokenize( line, debug: false )
350
+ tokens, _ = tokenize_with_errors( line, debug: debug )
349
351
  tokens
350
352
  end
351
353
 
@@ -3,8 +3,8 @@ module SportDb
3
3
  module Module
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 3
7
- PATCH = 9
6
+ MINOR = 5
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -3,9 +3,6 @@ require 'cocos'
3
3
  require 'season/formats' # e.g. Season() support machinery
4
4
 
5
5
 
6
- ## more stdlibs
7
- require 'optparse' ## check - already auto-required in cocos? keep? why? why not?
8
-
9
6
 
10
7
 
11
8
  ####
@@ -35,10 +32,6 @@ require_relative 'parser/parser'
35
32
  require_relative 'parser/outline_reader'
36
33
 
37
34
 
38
- require_relative 'parser/opts'
39
- require_relative 'parser/linter'
40
- require_relative 'parser/fbtok/main'
41
-
42
35
 
43
36
  ###
44
37
  # make parser api (easily) available - why? why not?
@@ -54,5 +47,243 @@ end # module SportDb
54
47
  =end
55
48
 
56
49
 
50
+
51
+ module SportDb
52
+ class Tokenizer
53
+
54
+ attr_reader :tokens
55
+
56
+ def initialize( txt )
57
+ parser = Parser.new
58
+
59
+ tree = []
60
+
61
+ lines = txt.split( "\n" )
62
+ lines.each_with_index do |line,i|
63
+ next if line.strip.empty? || line.strip.start_with?( '#' )
64
+
65
+ puts "line >#{line}<"
66
+ tokens = parser.tokenize( line )
67
+ pp tokens
68
+
69
+ tree << tokens
70
+ end
71
+
72
+
73
+ =begin
74
+ ## quick hack
75
+ ## turn all text tokens followed by minute token
76
+ ## into player tokens!!!
77
+ ##
78
+ ## also auto-convert text tokens into team tokens - why? why not?
79
+ tree.each do |tokens|
80
+ tokens.each_with_index do |t0,idx|
81
+ t1 = tokens[idx+1]
82
+ if t1 && t1[0] == :minute && t0[0] == :text
83
+ t0[0] = :player
84
+ end
85
+ end
86
+ end
87
+ =end
88
+
89
+ =begin
90
+ ## auto-add/insert start tokens for known line patterns
91
+ ## START_GOALS for goals_line
92
+ ## why? why not?
93
+ =end
94
+
95
+ ## flatten
96
+ @tokens = []
97
+ tree.each do |tokens|
98
+ @tokens += tokens
99
+ @tokens << [:NEWLINE, "\n"] ## auto-add newlines
100
+ end
101
+
102
+
103
+ ## convert to racc format
104
+ @tokens = @tokens.map do |tok|
105
+ if tok.size == 1
106
+ [tok[0].to_s, tok[0].to_s]
107
+ elsif tok.size == 2
108
+ #############
109
+ ## pass 1
110
+ ## replace all texts with keyword matches (e.g. group, round, leg, etc.)
111
+ if tok[0] == :TEXT
112
+ text = tok[1]
113
+ tok = if parser.is_group?( text )
114
+ [:GROUP, text]
115
+ elsif parser.is_round?( text ) || parser.is_leg?( text )
116
+ [:ROUND, text]
117
+ else
118
+ tok ## pass through as-is (1:1)
119
+ end
120
+ end
121
+ ## pass 2
122
+ tok
123
+ else
124
+ raise ArgumentError, "tokens of size 1|2 expected; got #{tok.pretty_inspect}"
125
+ end
126
+ end
127
+ end
128
+
129
+
130
+
131
+ def next_token
132
+ @tokens.shift
133
+ end
134
+ end # class Tokenizer
135
+ end # module SportDb
136
+
137
+
138
+
139
+ ####
140
+ # RaccMatchParser support machinery (incl. node classes/abstract syntax tree)
141
+
142
+ class RaccMatchParser
143
+
144
+ GroupDef = Struct.new( :name, :teams ) do
145
+ def pretty_print( printer )
146
+ printer.text( "<GroupDef " )
147
+ printer.text( self.name )
148
+ printer.text( " teams=" + self.teams.pretty_inspect )
149
+ printer.text( ">" )
150
+ end
151
+ end
152
+
153
+
154
+ RoundDef = Struct.new( :name, :date, :duration ) do
155
+ def pretty_print( printer )
156
+ printer.text( "<RoundDef " )
157
+ printer.text( self.name )
158
+ printer.text( " date=" + self.date.pretty_inspect ) if date
159
+ printer.text( " durattion=" + self.duration.pretty_inspect ) if duration
160
+ printer.text( ">" )
161
+ end
162
+ end
163
+
164
+ DateHeader = Struct.new( :date ) do
165
+ def pretty_print( printer )
166
+ printer.text( "<DateHeader " )
167
+ printer.text( "#{self.date.pretty_inspect}>" )
168
+ end
169
+ end
170
+
171
+ GroupHeader = Struct.new( :name ) do
172
+ def pretty_print( printer )
173
+ printer.text( "<GroupHeader " )
174
+ printer.text( "#{self.name}>" )
175
+ end
176
+ end
177
+
178
+ RoundHeader = Struct.new( :names ) do
179
+ def pretty_print( printer )
180
+ printer.text( "<RoundHeader " )
181
+ printer.text( "#{self.names.join(', ')}>" )
182
+ end
183
+ end
184
+
185
+ MatchLine = Struct.new( :ord, :date, :time,
186
+ :team1, :team2, :score,
187
+ :geo ) do ## change to geos - why? why not?
188
+
189
+ def pretty_print( printer )
190
+ printer.text( "<MatchLine " )
191
+ printer.text( "#{self.team1} v #{self.team2}")
192
+ printer.breakable
193
+
194
+ members.zip(values) do |name, value|
195
+ next if [:team1, :team2].include?( name )
196
+ next if value.nil?
197
+
198
+ printer.text( "#{name}=#{value.pretty_inspect}" )
199
+ end
200
+
201
+ printer.text( ">" )
202
+ end
203
+
204
+ end
205
+
206
+ GoalLine = Struct.new( :goals1, :goals2 ) do
207
+ def pretty_print( printer )
208
+ printer.text( "<GoalLine " )
209
+ printer.text( "goals1=" + self.goals1.pretty_inspect + "," )
210
+ printer.breakable
211
+ printer.text( "goals2=" + self.goals2.pretty_inspect + ">" )
212
+ end
213
+ end
214
+
215
+ Goal = Struct.new( :player, :minutes ) do
216
+ def to_s
217
+ buf = String.new
218
+ buf << "#{self.player}"
219
+ buf << " "
220
+ buf << minutes.map { |min| min.to_s }.join(' ')
221
+ buf
222
+ end
223
+
224
+ def pretty_print( printer )
225
+ printer.text( to_s )
226
+ end
227
+
228
+ end
229
+
230
+ Minute = Struct.new( :m, :offset, :og, :pen ) do
231
+ def to_s
232
+ buf = String.new
233
+ buf << "#{self.m}"
234
+ buf << "+#{self.offset}" if self.offset
235
+ buf << "'"
236
+ buf << "(og)" if self.og
237
+ buf << "(pen)" if self.pen
238
+ buf
239
+ end
240
+
241
+ def pretty_print( printer )
242
+ printer.text( to_s )
243
+ end
244
+ end
245
+
246
+
247
+
248
+
249
+ def initialize(input)
250
+ puts "==> input:"
251
+ puts input
252
+ @tokenizer = SportDb::Tokenizer.new(input)
253
+ end
254
+
255
+
256
+ def next_token
257
+ tok = @tokenizer.next_token
258
+ puts "next_token => #{tok.pretty_inspect}"
259
+ tok
260
+ end
261
+
262
+ # on_error do |error_token_id, error_value, value_stack|
263
+ # puts "Parse error on token: #{error_token_id}, value: #{error_value}"
264
+ # end
265
+
266
+ def parse
267
+ puts "parse:"
268
+ @tree = []
269
+ do_parse
270
+ @tree
271
+ end
272
+
273
+
274
+ def on_error(*args)
275
+ puts "!! on error:"
276
+ puts "args=#{args.pretty_inspect}"
277
+ end
278
+
279
+ =begin
280
+ on_error do |error_token_id, error_value, value_stack|
281
+ puts "Parse error on token: #{error_token_id}, value: #{error_value}"
282
+ end
283
+ =end
284
+
285
+ end
286
+
287
+
57
288
  puts SportDb::Module::Parser.banner # say hello
58
289
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.9
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-13 00:00:00.000000000 Z
11
+ date: 2025-01-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: racc
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rdoc
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -64,18 +78,17 @@ dependencies:
64
78
  requirements:
65
79
  - - "~>"
66
80
  - !ruby/object:Gem::Version
67
- version: '4.1'
81
+ version: '4.2'
68
82
  type: :development
69
83
  prerelease: false
70
84
  version_requirements: !ruby/object:Gem::Requirement
71
85
  requirements:
72
86
  - - "~>"
73
87
  - !ruby/object:Gem::Version
74
- version: '4.1'
88
+ version: '4.2'
75
89
  description: sportdb-parser - football.txt match parser (& tokenizer)
76
90
  email: gerald.bauer@gmail.com
77
- executables:
78
- - fbtok
91
+ executables: []
79
92
  extensions: []
80
93
  extra_rdoc_files:
81
94
  - CHANGELOG.md
@@ -91,17 +104,13 @@ files:
91
104
  - Manifest.txt
92
105
  - README.md
93
106
  - Rakefile
94
- - bin/fbtok
95
107
  - config/rounds_de.txt
96
108
  - config/rounds_en.txt
97
109
  - config/rounds_es.txt
98
110
  - config/rounds_misc.txt
99
111
  - config/rounds_pt.txt
100
112
  - lib/sportdb/parser.rb
101
- - lib/sportdb/parser/fbtok/main.rb
102
113
  - lib/sportdb/parser/lang.rb
103
- - lib/sportdb/parser/linter.rb
104
- - lib/sportdb/parser/opts.rb
105
114
  - lib/sportdb/parser/outline_reader.rb
106
115
  - lib/sportdb/parser/parser.rb
107
116
  - lib/sportdb/parser/token-date.rb
@@ -131,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
131
140
  - !ruby/object:Gem::Version
132
141
  version: '0'
133
142
  requirements: []
134
- rubygems_version: 3.4.10
143
+ rubygems_version: 3.5.22
135
144
  signing_key:
136
145
  specification_version: 4
137
146
  summary: sportdb-parser - football.txt match parser (& tokenizer)
data/bin/fbtok DELETED
@@ -1,13 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- ## tip: to test run:
4
- ## ruby -I ./lib bin/fbtok
5
-
6
- require 'sportdb/parser'
7
-
8
-
9
- Fbtok.main( ARGV )
10
-
11
-
12
- puts "bye"
13
-