sportdb-parser 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -54,7 +54,7 @@ TEXT_RE = %r{
54
54
  )
55
55
 
56
56
  (?:(?: (?:[ ]
57
- (?!vs?\.?[ ]) ## note - exclude (v[ ]/vs[ ]/v.[ ]/vs.[ ])
57
+ (?!vs?[ ]) ## note - exclude (v[ ]/vs[ ])
58
58
  )
59
59
  | # only single spaces allowed inline!!!
60
60
  [-]
@@ -68,24 +68,41 @@ BASICS_RE = %r{
68
68
  (?<vs>
69
69
  (?<=[ ]) # Positive lookbehind for space
70
70
  (?:
71
- vs\.?| ## allow optional dot (eg. vs. v.)
72
- v\.?|
73
- -
74
- ) # not bigger match first e.g. vs than v etc.
71
+ vs|v
72
+ )
73
+ # not bigger match first e.g. vs than v etc.
74
+ # todo/fix - make vs|v case sensitive!!! only match v/vs - why? why not?
75
75
  (?=[ ]) # positive lookahead for space
76
76
  )
77
77
  |
78
+ (?<spaces> [ ]{2,}) |
79
+ (?<space> [ ])
80
+ |
81
+ (?<sym>[;,@|\[\]-])
82
+ }ix
83
+
84
+
85
+ ## removed from basics
86
+ =begin
78
87
  (?<none>
79
88
  (?<=[ \[]|^) # Positive lookbehind for space or [
80
89
  -
81
90
  (?=[ ]*;) # positive lookahead for space
82
91
  )
83
92
  |
84
- (?<spaces> [ ]{2,}) |
85
- (?<space> [ ])
86
- |
87
- (?<sym>[;,@|\[\]])
88
- }ix
93
+ (?<vs>
94
+ (?<=[ ]) # Positive lookbehind for space
95
+ (?:
96
+ vs\.?| ## allow optional dot (eg. vs. v.)
97
+ v\.?|
98
+ -
99
+ ) # not bigger match first e.g. vs than v etc.
100
+ (?=[ ]) # positive lookahead for space
101
+ )
102
+ |
103
+
104
+ make - into a simple symbol !!!
105
+ =end
89
106
 
90
107
 
91
108
  MINUTE_RE = %r{
@@ -141,8 +158,7 @@ end
141
158
 
142
159
 
143
160
 
144
- def tokenize_with_errors( line, typed: false,
145
- debug: false )
161
+ def tokenize_with_errors( line, debug: false )
146
162
  tokens = []
147
163
  errors = [] ## keep a list of errors - why? why not?
148
164
 
@@ -180,6 +196,10 @@ def tokenize_with_errors( line, typed: false,
180
196
 
181
197
  pp offsets if debug
182
198
 
199
+ ##
200
+ ## note: racc requires pairs e.g. [:TOKEN, VAL]
201
+ ## for VAL use "text" or ["text", { opts }] array
202
+
183
203
  t = if m[:space]
184
204
  ## skip space
185
205
  nil
@@ -187,15 +207,17 @@ def tokenize_with_errors( line, typed: false,
187
207
  ## skip spaces
188
208
  nil
189
209
  elsif m[:text]
190
- [:text, m[:text]] ## keep pos - why? why not?
210
+ [:TEXT, m[:text]] ## keep pos - why? why not?
191
211
  elsif m[:status] ## (match) status e.g. cancelled, awarded, etc.
212
+ ## todo/check - add text (or status)
213
+ # to opts hash {} by default (for value)
192
214
  if m[:status_note] ## includes note? e.g. awarded; originally 2-0
193
- [:status, m[:status], {note:m[:status_note]}]
215
+ [:STATUS, [m[:status], {status: m[:status],
216
+ note: m[:status_note]} ]]
194
217
  else
195
- [:status, m[:status]]
218
+ [:STATUS, [m[:status], {status: m[:status] } ]]
196
219
  end
197
220
  elsif m[:time]
198
- if typed
199
221
  ## unify to iso-format
200
222
  ### 12.40 => 12:40
201
223
  ## 12h40 => 12:40 etc.
@@ -208,15 +230,11 @@ def tokenize_with_errors( line, typed: false,
208
230
  (minute >=0 && minute <= 59)
209
231
  ## note - for debugging keep (pass along) "literal" time
210
232
  ## might use/add support for am/pm later
211
- [:time, m[:time], {h:hour,m:minute}]
233
+ [:TIME, [m[:time], {h:hour,m:minute}]]
212
234
  else
213
235
  raise ArgumentError, "parse error - time >#{m[:time]}< out-of-range"
214
236
  end
215
- else
216
- [:time, m[:time]]
217
- end
218
237
  elsif m[:date]
219
- if typed
220
238
  date = {}
221
239
  =begin
222
240
  ((?<day_name>#{DAY_NAMES})
@@ -237,14 +255,11 @@ def tokenize_with_errors( line, typed: false,
237
255
  date[:d] = m[:day].to_i(10) if m[:day]
238
256
  date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
239
257
  ## note - for debugging keep (pass along) "literal" date
240
- [:date, m[:date], date]
241
- else
242
- [:date, m[:date]]
243
- end
258
+ [:DATE, [m[:date], date]]
244
259
  elsif m[:timezone]
245
- [:timezone, m[:timezone]]
260
+ [:TIMEZONE, m[:timezone]]
246
261
  elsif m[:duration]
247
- if typed
262
+ ## todo/check/fix - if end: works for kwargs!!!!!
248
263
  duration = { start: {}, end: {}}
249
264
  duration[:start][:y] = m[:year1].to_i(10) if m[:year1]
250
265
  duration[:start][:m] = MONTH_MAP[ m[:month_name1].downcase ] if m[:month_name1]
@@ -255,19 +270,11 @@ def tokenize_with_errors( line, typed: false,
255
270
  duration[:end][:d] = m[:day2].to_i(10) if m[:day2]
256
271
  duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ] if m[:day_name2]
257
272
  ## note - for debugging keep (pass along) "literal" duration
258
- [:duration, m[:duration], duration]
259
- else
260
- [:duration, m[:duration]]
261
- end
262
- elsif m[:num]
263
- if typed
273
+ [:DURATION, [m[:duration], duration]]
274
+ elsif m[:num] ## fix - change to ord (for ordinal number!!!)
264
275
  ## note - strip enclosing () and convert to integer
265
- [:num, m[:value].to_i(10)]
266
- else
267
- [:num, m[:num]]
268
- end
276
+ [:ORD, [m[:num], { value: m[:value].to_i(10) } ]]
269
277
  elsif m[:score]
270
- if typed
271
278
  score = {}
272
279
  ## check for pen
273
280
  score[:p] = [m[:p1].to_i(10),
@@ -280,42 +287,39 @@ def tokenize_with_errors( line, typed: false,
280
287
  m[:ht2].to_i(10)] if m[:ht1] && m[:ht2]
281
288
 
282
289
  ## note - for debugging keep (pass along) "literal" score
283
- [:score, m[:score], score]
284
- else
285
- [:score, m[:score]]
286
- end
290
+ [:SCORE, [m[:score], score]]
287
291
  elsif m[:minute]
288
- if typed
289
292
  minute = {}
290
293
  minute[:m] = m[:value].to_i(10)
291
294
  minute[:offset] = m[:value2].to_i(10) if m[:value2]
292
295
  ## note - for debugging keep (pass along) "literal" minute
293
- [:minute, m[:minute], minute]
294
- else
295
- [:minute, m[:minute]]
296
- end
296
+ [:MINUTE, [m[:minute], minute]]
297
297
  elsif m[:og]
298
- typed ? [:og] : [:og, m[:og]] ## for typed drop - string version/variants
298
+ [:OG, m[:og]] ## for typed drop - string version/variants ?? why? why not?
299
299
  elsif m[:pen]
300
- typed ? [:pen] : [:pen, m[:pen]]
300
+ [:PEN, m[:pen]]
301
301
  elsif m[:vs]
302
- typed ? [:vs] : [:vs, m[:vs]]
303
- elsif m[:none]
304
- typed ? [:none] : [:none, m[:none]]
302
+ [:VS, m[:vs]]
305
303
  elsif m[:sym]
306
304
  sym = m[:sym]
307
305
  ## return symbols "inline" as is - why? why not?
306
+ ## (?<sym>[;,@|\[\]-])
307
+
308
308
  case sym
309
309
  when ',' then [:',']
310
310
  when ';' then [:';']
311
311
  when '@' then [:'@']
312
312
  when '|' then [:'|']
313
+ when '[' then [:'[']
314
+ when ']' then [:']']
315
+ when '-' then [:'-']
313
316
  else
314
317
  nil ## ignore others (e.g. brackets [])
315
318
  end
316
319
  else
317
320
  ## report error
318
- nil
321
+ puts "!!! TOKENIZE ERROR - no match found"
322
+ nil
319
323
  end
320
324
 
321
325
  tokens << t if t
@@ -342,10 +346,8 @@ end
342
346
 
343
347
 
344
348
  ### convience helper - ignore errors by default
345
- def tokenize( line, typed: false,
346
- debug: false )
347
- tokens, _ = tokenize_with_errors( line, typed: typed,
348
- debug: debug )
349
+ def tokenize( line, debug: false )
350
+ tokens, _ = tokenize_with_errors( line, debug: debug )
349
351
  tokens
350
352
  end
351
353
 
@@ -3,7 +3,7 @@ module SportDb
3
3
  module Module
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 4
6
+ MINOR = 5
7
7
  PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
@@ -47,5 +47,243 @@ end # module SportDb
47
47
  =end
48
48
 
49
49
 
50
+
51
+ module SportDb
52
+ class Tokenizer
53
+
54
+ attr_reader :tokens
55
+
56
+ def initialize( txt )
57
+ parser = Parser.new
58
+
59
+ tree = []
60
+
61
+ lines = txt.split( "\n" )
62
+ lines.each_with_index do |line,i|
63
+ next if line.strip.empty? || line.strip.start_with?( '#' )
64
+
65
+ puts "line >#{line}<"
66
+ tokens = parser.tokenize( line )
67
+ pp tokens
68
+
69
+ tree << tokens
70
+ end
71
+
72
+
73
+ =begin
74
+ ## quick hack
75
+ ## turn all text tokens followed by minute token
76
+ ## into player tokens!!!
77
+ ##
78
+ ## also auto-convert text tokens into team tokens - why? why not?
79
+ tree.each do |tokens|
80
+ tokens.each_with_index do |t0,idx|
81
+ t1 = tokens[idx+1]
82
+ if t1 && t1[0] == :minute && t0[0] == :text
83
+ t0[0] = :player
84
+ end
85
+ end
86
+ end
87
+ =end
88
+
89
+ =begin
90
+ ## auto-add/insert start tokens for known line patterns
91
+ ## START_GOALS for goals_line
92
+ ## why? why not?
93
+ =end
94
+
95
+ ## flatten
96
+ @tokens = []
97
+ tree.each do |tokens|
98
+ @tokens += tokens
99
+ @tokens << [:NEWLINE, "\n"] ## auto-add newlines
100
+ end
101
+
102
+
103
+ ## convert to racc format
104
+ @tokens = @tokens.map do |tok|
105
+ if tok.size == 1
106
+ [tok[0].to_s, tok[0].to_s]
107
+ elsif tok.size == 2
108
+ #############
109
+ ## pass 1
110
+ ## replace all texts with keyword matches (e.g. group, round, leg, etc.)
111
+ if tok[0] == :TEXT
112
+ text = tok[1]
113
+ tok = if parser.is_group?( text )
114
+ [:GROUP, text]
115
+ elsif parser.is_round?( text ) || parser.is_leg?( text )
116
+ [:ROUND, text]
117
+ else
118
+ tok ## pass through as-is (1:1)
119
+ end
120
+ end
121
+ ## pass 2
122
+ tok
123
+ else
124
+ raise ArgumentError, "tokens of size 1|2 expected; got #{tok.pretty_inspect}"
125
+ end
126
+ end
127
+ end
128
+
129
+
130
+
131
+ def next_token
132
+ @tokens.shift
133
+ end
134
+ end # class Tokenizer
135
+ end # module SportDb
136
+
137
+
138
+
139
+ ####
140
+ # RaccMatchParser support machinery (incl. node classes/abstract syntax tree)
141
+
142
+ class RaccMatchParser
143
+
144
+ GroupDef = Struct.new( :name, :teams ) do
145
+ def pretty_print( printer )
146
+ printer.text( "<GroupDef " )
147
+ printer.text( self.name )
148
+ printer.text( " teams=" + self.teams.pretty_inspect )
149
+ printer.text( ">" )
150
+ end
151
+ end
152
+
153
+
154
+ RoundDef = Struct.new( :name, :date, :duration ) do
155
+ def pretty_print( printer )
156
+ printer.text( "<RoundDef " )
157
+ printer.text( self.name )
158
+ printer.text( " date=" + self.date.pretty_inspect ) if date
159
+ printer.text( " durattion=" + self.duration.pretty_inspect ) if duration
160
+ printer.text( ">" )
161
+ end
162
+ end
163
+
164
+ DateHeader = Struct.new( :date ) do
165
+ def pretty_print( printer )
166
+ printer.text( "<DateHeader " )
167
+ printer.text( "#{self.date.pretty_inspect}>" )
168
+ end
169
+ end
170
+
171
+ GroupHeader = Struct.new( :name ) do
172
+ def pretty_print( printer )
173
+ printer.text( "<GroupHeader " )
174
+ printer.text( "#{self.name}>" )
175
+ end
176
+ end
177
+
178
+ RoundHeader = Struct.new( :names ) do
179
+ def pretty_print( printer )
180
+ printer.text( "<RoundHeader " )
181
+ printer.text( "#{self.names.join(', ')}>" )
182
+ end
183
+ end
184
+
185
+ MatchLine = Struct.new( :ord, :date, :time,
186
+ :team1, :team2, :score,
187
+ :geo ) do ## change to geos - why? why not?
188
+
189
+ def pretty_print( printer )
190
+ printer.text( "<MatchLine " )
191
+ printer.text( "#{self.team1} v #{self.team2}")
192
+ printer.breakable
193
+
194
+ members.zip(values) do |name, value|
195
+ next if [:team1, :team2].include?( name )
196
+ next if value.nil?
197
+
198
+ printer.text( "#{name}=#{value.pretty_inspect}" )
199
+ end
200
+
201
+ printer.text( ">" )
202
+ end
203
+
204
+ end
205
+
206
+ GoalLine = Struct.new( :goals1, :goals2 ) do
207
+ def pretty_print( printer )
208
+ printer.text( "<GoalLine " )
209
+ printer.text( "goals1=" + self.goals1.pretty_inspect + "," )
210
+ printer.breakable
211
+ printer.text( "goals2=" + self.goals2.pretty_inspect + ">" )
212
+ end
213
+ end
214
+
215
+ Goal = Struct.new( :player, :minutes ) do
216
+ def to_s
217
+ buf = String.new
218
+ buf << "#{self.player}"
219
+ buf << " "
220
+ buf << minutes.map { |min| min.to_s }.join(' ')
221
+ buf
222
+ end
223
+
224
+ def pretty_print( printer )
225
+ printer.text( to_s )
226
+ end
227
+
228
+ end
229
+
230
+ Minute = Struct.new( :m, :offset, :og, :pen ) do
231
+ def to_s
232
+ buf = String.new
233
+ buf << "#{self.m}"
234
+ buf << "+#{self.offset}" if self.offset
235
+ buf << "'"
236
+ buf << "(og)" if self.og
237
+ buf << "(pen)" if self.pen
238
+ buf
239
+ end
240
+
241
+ def pretty_print( printer )
242
+ printer.text( to_s )
243
+ end
244
+ end
245
+
246
+
247
+
248
+
249
+ def initialize(input)
250
+ puts "==> input:"
251
+ puts input
252
+ @tokenizer = SportDb::Tokenizer.new(input)
253
+ end
254
+
255
+
256
+ def next_token
257
+ tok = @tokenizer.next_token
258
+ puts "next_token => #{tok.pretty_inspect}"
259
+ tok
260
+ end
261
+
262
+ # on_error do |error_token_id, error_value, value_stack|
263
+ # puts "Parse error on token: #{error_token_id}, value: #{error_value}"
264
+ # end
265
+
266
+ def parse
267
+ puts "parse:"
268
+ @tree = []
269
+ do_parse
270
+ @tree
271
+ end
272
+
273
+
274
+ def on_error(*args)
275
+ puts "!! on error:"
276
+ puts "args=#{args.pretty_inspect}"
277
+ end
278
+
279
+ =begin
280
+ on_error do |error_token_id, error_value, value_stack|
281
+ puts "Parse error on token: #{error_token_id}, value: #{error_value}"
282
+ end
283
+ =end
284
+
285
+ end
286
+
287
+
50
288
  puts SportDb::Module::Parser.banner # say hello
51
289
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-02 00:00:00.000000000 Z
11
+ date: 2025-01-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: racc
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rdoc
43
57
  requirement: !ruby/object:Gem::Requirement