sportdb-parser 0.6.0 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -68,12 +68,14 @@ def initialize( txt, debug: false )
68
68
 
69
69
 
70
70
  def on_error(error_token_id, error_value, value_stack)
71
- args = [error_token_id, error_value, value_stack]
71
+ ## auto-add error_token (as string)
72
+ error_token = Racc_token_to_s_table[error_token_id]
73
+ args = [error_token, error_token_id, error_value, value_stack]
72
74
  puts
73
75
  puts "!! on parse error:"
74
76
  puts "args=#{args.pretty_inspect}"
75
77
 
76
- @errors << "parse error on token: #{error_token_id} with value: #{error_value}, stack: #{value_stack.pretty_inspect}"
78
+ @errors << "parse error on token: #{error_token} (#{error_token_id}) with value: #{error_value}, stack: #{value_stack.pretty_inspect}"
77
79
  ## exit 1 ## exit for now - get and print more info about context etc.!!
78
80
  end
79
81
 
@@ -147,15 +147,15 @@ DATE_II_RE = %r{
147
147
 
148
148
 
149
149
  # e.g. iso-date - 2011-08-25
150
- ## todo/check - allow 2011-8-25 or 2011-8-3 / 2011-08-03 etc. - why? why not?
150
+ ## note - allow/support ("shortcuts") e.g 2011-8-25 or 2011-8-3 / 2011-08-03 etc.
151
151
  DATE_III_RE = %r{
152
152
  (?<date>
153
153
  \b
154
154
  (?<year>\d{4})
155
155
  -
156
- (?<month>\d{2})
156
+ (?<month>\d{1,2})
157
157
  -
158
- (?<day>\d{2})
158
+ (?<day>\d{1,2})
159
159
  \b
160
160
  )}ix
161
161
 
@@ -214,29 +214,36 @@ end
214
214
  #
215
215
  # Sun Jun/23 - Wed Jun/26 -- YES
216
216
  # Jun/23 - Jun/26 -- YES
217
- # Tue Jun/25 + Wed Jun/26 -- YES
218
- # Jun/25 + Jun/26 -- YES
219
- #
220
- # Jun/25 - 26 - why? why not???
217
+ # Jun/25 - 26 - why? why not??? - YES - see blow variant iii!!!
218
+
219
+ # Tue Jun/25 + Wed Jun/26 -- NO
220
+ # Jun/25 + Jun/26 -- NO
221
221
  # Jun/25 .. 26 - why? why not???
222
222
  # Jun/25 to 26 - why? why not???
223
223
  # Jun/25 + 26 - add - why? why not???
224
224
  # Sun-Wed Jun/23-26 - add - why? why not???
225
225
  # Wed+Thu Jun/26+27 2024 - add - why? why not???
226
226
  #
227
- # maybe use comman and plus for list of dates
227
+ # maybe use comma and plus for list of dates
228
228
  # Tue Jun/25, Wed Jun/26, Thu Jun/27 ??
229
229
  # Tue Jun/25 + Wed Jun/26 + Thu Jun/27 ??
230
230
  #
231
231
  # add back optional comma (before) year - why? why not?
232
+ #
232
233
 
233
234
 
234
235
  ##
235
236
  # todo add plus later on - why? why not?
237
+ ### todo/fix add optional comma (,) before year
238
+
239
+ ### regex note/tip/remindr - \b () \b MUST always get enclosed in parantheses
240
+ ## because alternation (|) has lowest priority/binding
241
+
236
242
 
237
243
  DURATION_I_RE = %r{
238
244
  (?<duration>
239
245
  \b
246
+ (?:
240
247
  ## optional day name
241
248
  ((?<day_name1>#{DAY_NAMES})
242
249
  [ ]
@@ -245,12 +252,13 @@ DURATION_I_RE = %r{
245
252
  (?: \/|[ ] )
246
253
  (?<day1>\d{1,2})
247
254
  ## optional year
248
- ( [ ]
255
+ ( ,? # optional comma
256
+ [ ]
249
257
  (?<year1>\d{4})
250
258
  )?
251
259
 
252
260
  ## support + and - (add .. or such - why??)
253
- [ ]*[-][ ]*
261
+ [ ]* - [ ]*
254
262
 
255
263
  ## optional day name
256
264
  ((?<day_name2>#{DAY_NAMES})
@@ -260,20 +268,28 @@ DURATION_I_RE = %r{
260
268
  (?: \/|[ ] )
261
269
  (?<day2>\d{1,2})
262
270
  ## optional year
263
- ( [ ]
271
+ ( ,? # optional comma
272
+ [ ]
264
273
  (?<year2>\d{4})
265
274
  )?
275
+ )
266
276
  \b
267
277
  )}ix
268
278
 
269
279
 
280
+
281
+ # FIX - remove this variant
282
+ # "standardize on month day [year]" !!!!
283
+
284
+ =begin
270
285
  ###
271
286
  # variant ii
272
287
  # e.g. 26 July - 27 July
273
-
274
- DURATION_II_RE = %r{
288
+ # 26 July,
289
+ XXX_DURATION_II_RE = %r{
275
290
  (?<duration>
276
291
  \b
292
+ (?
277
293
  ## optional day name
278
294
  ((?<day_name1>#{DAY_NAMES})
279
295
  [ ]
@@ -282,7 +298,8 @@ DURATION_II_RE = %r{
282
298
  [ ]
283
299
  (?<month_name1>#{MONTH_NAMES})
284
300
  ## optional year
285
- ( [ ]
301
+ (
302
+ [ ]
286
303
  (?<year1>\d{4})
287
304
  )?
288
305
 
@@ -300,16 +317,50 @@ DURATION_II_RE = %r{
300
317
  ( [ ]
301
318
  (?<year2>\d{4})
302
319
  )?
320
+ )
321
+ \b
322
+ )}ix
323
+ =end
324
+
325
+
326
+ # variant ii
327
+ # add support for shorthand
328
+ # August 16-18, 2011
329
+ # September 13-15, 2011
330
+ # October 18-20, 2011
331
+ # March/6-8, 2012
332
+ # March 6-8 2012
333
+ # March 6-8
334
+ #
335
+ # - add support for August 16+17 or such (and check 16+18)
336
+ # use <op> to check if day2 is a plus or range or such - why? why not?
337
+
338
+ DURATION_II_RE = %r{
339
+ (?<duration>
340
+ \b
341
+ (?:
342
+ (?<month_name1>#{MONTH_NAMES})
343
+ [ /]
344
+ (?<day1>\d{1,2})
345
+ -
346
+ (?<day2>\d{1,2})
347
+ (?:
348
+ ,? ## optional comma
349
+ [ ]
350
+ (?<year1>\d{4})
351
+ )? ## optional year
352
+ )
303
353
  \b
304
354
  )}ix
305
355
 
306
356
 
357
+
307
358
  #############################################
308
359
  # map tables
309
360
  # note: order matters; first come-first matched/served
310
361
  DURATION_RE = Regexp.union(
311
362
  DURATION_I_RE,
312
- DURATION_II_RE
363
+ DURATION_II_RE,
313
364
  )
314
365
 
315
366
 
@@ -17,7 +17,7 @@ class Lexer
17
17
  ## 3-4 pen. 2-2 a.e.t.
18
18
  ## 2-2 a.e.t.
19
19
  SCORE__P_ET__RE = %r{
20
- (?<score>
20
+ (?<score_more>
21
21
  \b
22
22
  (?:
23
23
  (?<p1>\d{1,2}) - (?<p2>\d{1,2})
@@ -34,7 +34,7 @@ class Lexer
34
34
  ## note: allow SPECIAL with penalty only
35
35
  ## 3-4 pen.
36
36
  SCORE__P__RE = %r{
37
- (?<score>
37
+ (?<score_more>
38
38
  \b
39
39
  (?<p1>\d{1,2}) - (?<p2>\d{1,2})
40
40
  [ ]* #{P_EN}
@@ -52,7 +52,7 @@ class Lexer
52
52
  ## 2-2 a.e.t. (1-1)
53
53
 
54
54
  SCORE__P_ET_FT_HT__RE = %r{
55
- (?<score>
55
+ (?<score_more>
56
56
  \b
57
57
  (?:
58
58
  (?<p1>\d{1,2}) - (?<p2>\d{1,2})
@@ -79,7 +79,7 @@ class Lexer
79
79
  ## special case for case WITHOUT extra time!!
80
80
  ## same as above (but WITHOUT extra time and pen required)
81
81
  SCORE__P_FT_HT__RE = %r{
82
- (?<score>
82
+ (?<score_more>
83
83
  \b
84
84
  (?<p1>\d{1,2}) - (?<p2>\d{1,2})
85
85
  [ ]* #{P_EN} [ ]+
@@ -99,36 +99,47 @@ class Lexer
99
99
  ## note: \b works only after non-alphanum e.g. )
100
100
 
101
101
 
102
-
103
- ## e.g. 2-1 (1-1) or
104
- ## 2-1
105
-
102
+ ##########
103
+ ## e.g. 2-1 (1-1)
106
104
  SCORE__FT_HT__RE = %r{
107
- (?<score>
105
+ (?<score_more>
108
106
  \b
109
107
  (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
110
- (?:
111
108
  [ ]+ \( [ ]*
112
109
  (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
113
110
  [ ]* \)
114
- )? # note: make half time (HT) score optional for now
115
111
  (?=[ ,\]]|$)
116
112
  )}ix ## todo/check: remove loakahead assertion here - why require space?
117
113
  ## note: \b works only after non-alphanum e.g. )
118
114
 
119
-
115
+ #####
116
+ ## 2-1
117
+ SCORE__FT__RE = %r{
118
+ (?<score>
119
+ \b
120
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
121
+ \b
122
+ )}ix
120
123
 
121
124
  #############################################
122
125
  # map tables
123
126
  # note: order matters; first come-first matched/served
127
+ #
128
+ ## check - find a better name for SCORE_MORE - SCORE_EX, SCORE_BIG, or ___ - why? why not?
124
129
 
125
- SCORE_RE = Regexp.union(
130
+ SCORE_MORE_RE = Regexp.union(
126
131
  SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
127
132
  SCORE__P_FT_HT__RE, # e.g. 5-1 pen. (1-1)
128
133
  SCORE__P_ET__RE, # e.g. 2-2 a.e.t. or 5-1 pen. 2-2 a.e.t.
129
134
  SCORE__P__RE, # e.g. 5-1 pen.
130
- SCORE__FT_HT__RE, # e.g. 1-1 (1-0) or 1-1 -- note - must go last!!!
135
+ SCORE__FT_HT__RE, # e.g. 1-1 (1-0)
136
+ ## note - keep basic score as its own token!!!!
137
+ ## that is, SCORE & SCORE_MORE
138
+ ### SCORE__FT__RE, # e.g. 1-1 -- note - must go last!!!
131
139
  )
132
140
 
141
+ SCORE_RE = SCORE__FT__RE
142
+
143
+
133
144
  end # class Lexer
134
145
  end # module SportDb
@@ -111,7 +111,15 @@ BASICS_RE = %r{
111
111
  (?<spaces> [ ]{2,}) |
112
112
  (?<space> [ ])
113
113
  |
114
- (?<sym>[;,/@|\[\]-])
114
+ (?<sym> (?<=^|[ ]) ## positive lookahead
115
+ (?: ----|
116
+ ---|
117
+ --
118
+ )
119
+ (?=[ ]) ## positive lookahead
120
+ )
121
+ |
122
+ (?<sym> [;,/@|\[\]-] )
115
123
  }ix
116
124
 
117
125
 
@@ -124,7 +132,8 @@ RE = Regexp.union( PROP_KEY_RE, ## start with prop key (match will/should swit
124
132
  DURATION_RE, # note - duration MUST match before date
125
133
  DATE_RE,
126
134
  WDAY_RE, # allow standalone weekday name (e.g. Mo/Tu/etc.) - why? why not?
127
- SCORE_RE,
135
+ SCORE_MORE_RE,
136
+ SCORE_RE, ## note basic score e.g. 1-1 must go after SCORE_MORE_RE!!!
128
137
  BASICS_RE,
129
138
  MINUTE_RE,
130
139
  GOAL_OG_RE, GOAL_PEN_RE,
@@ -4,7 +4,7 @@ module SportDb
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
6
  MINOR = 6
7
- PATCH = 0
7
+ PATCH = 1
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-30 00:00:00.000000000 Z
11
+ date: 2025-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos