sportdb-parser 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -68,12 +68,14 @@ def initialize( txt, debug: false )
68
68
 
69
69
 
70
70
  def on_error(error_token_id, error_value, value_stack)
71
- args = [error_token_id, error_value, value_stack]
71
+ ## auto-add error_token (as string)
72
+ error_token = Racc_token_to_s_table[error_token_id]
73
+ args = [error_token, error_token_id, error_value, value_stack]
72
74
  puts
73
75
  puts "!! on parse error:"
74
76
  puts "args=#{args.pretty_inspect}"
75
77
 
76
- @errors << "parse error on token: #{error_token_id} with value: #{error_value}, stack: #{value_stack.pretty_inspect}"
78
+ @errors << "parse error on token: #{error_token} (#{error_token_id}) with value: #{error_value}, stack: #{value_stack.pretty_inspect}"
77
79
  ## exit 1 ## exit for now - get and print more info about context etc.!!
78
80
  end
79
81
 
@@ -147,15 +147,15 @@ DATE_II_RE = %r{
147
147
 
148
148
 
149
149
  # e.g. iso-date - 2011-08-25
150
- ## todo/check - allow 2011-8-25 or 2011-8-3 / 2011-08-03 etc. - why? why not?
150
+ ## note - allow/support ("shortcuts") e.g 2011-8-25 or 2011-8-3 / 2011-08-03 etc.
151
151
  DATE_III_RE = %r{
152
152
  (?<date>
153
153
  \b
154
154
  (?<year>\d{4})
155
155
  -
156
- (?<month>\d{2})
156
+ (?<month>\d{1,2})
157
157
  -
158
- (?<day>\d{2})
158
+ (?<day>\d{1,2})
159
159
  \b
160
160
  )}ix
161
161
 
@@ -214,29 +214,36 @@ end
214
214
  #
215
215
  # Sun Jun/23 - Wed Jun/26 -- YES
216
216
  # Jun/23 - Jun/26 -- YES
217
- # Tue Jun/25 + Wed Jun/26 -- YES
218
- # Jun/25 + Jun/26 -- YES
219
- #
220
- # Jun/25 - 26 - why? why not???
217
+ # Jun/25 - 26 - why? why not??? - YES - see blow variant iii!!!
218
+
219
+ # Tue Jun/25 + Wed Jun/26 -- NO
220
+ # Jun/25 + Jun/26 -- NO
221
221
  # Jun/25 .. 26 - why? why not???
222
222
  # Jun/25 to 26 - why? why not???
223
223
  # Jun/25 + 26 - add - why? why not???
224
224
  # Sun-Wed Jun/23-26 - add - why? why not???
225
225
  # Wed+Thu Jun/26+27 2024 - add - why? why not???
226
226
  #
227
- # maybe use comman and plus for list of dates
227
+ # maybe use comma and plus for list of dates
228
228
  # Tue Jun/25, Wed Jun/26, Thu Jun/27 ??
229
229
  # Tue Jun/25 + Wed Jun/26 + Thu Jun/27 ??
230
230
  #
231
231
  # add back optional comma (before) year - why? why not?
232
+ #
232
233
 
233
234
 
234
235
  ##
235
236
  # todo add plus later on - why? why not?
237
+ ### todo/fix add optional comma (,) before year
238
+
239
+ ### regex note/tip/remindr - \b () \b MUST always get enclosed in parantheses
240
+ ## because alternation (|) has lowest priority/binding
241
+
236
242
 
237
243
  DURATION_I_RE = %r{
238
244
  (?<duration>
239
245
  \b
246
+ (?:
240
247
  ## optional day name
241
248
  ((?<day_name1>#{DAY_NAMES})
242
249
  [ ]
@@ -245,12 +252,13 @@ DURATION_I_RE = %r{
245
252
  (?: \/|[ ] )
246
253
  (?<day1>\d{1,2})
247
254
  ## optional year
248
- ( [ ]
255
+ ( ,? # optional comma
256
+ [ ]
249
257
  (?<year1>\d{4})
250
258
  )?
251
259
 
252
260
  ## support + and - (add .. or such - why??)
253
- [ ]*[-][ ]*
261
+ [ ]* - [ ]*
254
262
 
255
263
  ## optional day name
256
264
  ((?<day_name2>#{DAY_NAMES})
@@ -260,20 +268,28 @@ DURATION_I_RE = %r{
260
268
  (?: \/|[ ] )
261
269
  (?<day2>\d{1,2})
262
270
  ## optional year
263
- ( [ ]
271
+ ( ,? # optional comma
272
+ [ ]
264
273
  (?<year2>\d{4})
265
274
  )?
275
+ )
266
276
  \b
267
277
  )}ix
268
278
 
269
279
 
280
+
281
+ # FIX - remove this variant
282
+ # "standardize on month day [year]" !!!!
283
+
284
+ =begin
270
285
  ###
271
286
  # variant ii
272
287
  # e.g. 26 July - 27 July
273
-
274
- DURATION_II_RE = %r{
288
+ # 26 July,
289
+ XXX_DURATION_II_RE = %r{
275
290
  (?<duration>
276
291
  \b
292
+ (?
277
293
  ## optional day name
278
294
  ((?<day_name1>#{DAY_NAMES})
279
295
  [ ]
@@ -282,7 +298,8 @@ DURATION_II_RE = %r{
282
298
  [ ]
283
299
  (?<month_name1>#{MONTH_NAMES})
284
300
  ## optional year
285
- ( [ ]
301
+ (
302
+ [ ]
286
303
  (?<year1>\d{4})
287
304
  )?
288
305
 
@@ -300,16 +317,50 @@ DURATION_II_RE = %r{
300
317
  ( [ ]
301
318
  (?<year2>\d{4})
302
319
  )?
320
+ )
321
+ \b
322
+ )}ix
323
+ =end
324
+
325
+
326
+ # variant ii
327
+ # add support for shorthand
328
+ # August 16-18, 2011
329
+ # September 13-15, 2011
330
+ # October 18-20, 2011
331
+ # March/6-8, 2012
332
+ # March 6-8 2012
333
+ # March 6-8
334
+ #
335
+ # - add support for August 16+17 or such (and check 16+18)
336
+ # use <op> to check if day2 is a plus or range or such - why? why not?
337
+
338
+ DURATION_II_RE = %r{
339
+ (?<duration>
340
+ \b
341
+ (?:
342
+ (?<month_name1>#{MONTH_NAMES})
343
+ [ /]
344
+ (?<day1>\d{1,2})
345
+ -
346
+ (?<day2>\d{1,2})
347
+ (?:
348
+ ,? ## optional comma
349
+ [ ]
350
+ (?<year1>\d{4})
351
+ )? ## optional year
352
+ )
303
353
  \b
304
354
  )}ix
305
355
 
306
356
 
357
+
307
358
  #############################################
308
359
  # map tables
309
360
  # note: order matters; first come-first matched/served
310
361
  DURATION_RE = Regexp.union(
311
362
  DURATION_I_RE,
312
- DURATION_II_RE
363
+ DURATION_II_RE,
313
364
  )
314
365
 
315
366
 
@@ -17,7 +17,7 @@ class Lexer
17
17
  ## 3-4 pen. 2-2 a.e.t.
18
18
  ## 2-2 a.e.t.
19
19
  SCORE__P_ET__RE = %r{
20
- (?<score>
20
+ (?<score_more>
21
21
  \b
22
22
  (?:
23
23
  (?<p1>\d{1,2}) - (?<p2>\d{1,2})
@@ -34,7 +34,7 @@ class Lexer
34
34
  ## note: allow SPECIAL with penalty only
35
35
  ## 3-4 pen.
36
36
  SCORE__P__RE = %r{
37
- (?<score>
37
+ (?<score_more>
38
38
  \b
39
39
  (?<p1>\d{1,2}) - (?<p2>\d{1,2})
40
40
  [ ]* #{P_EN}
@@ -52,7 +52,7 @@ class Lexer
52
52
  ## 2-2 a.e.t. (1-1)
53
53
 
54
54
  SCORE__P_ET_FT_HT__RE = %r{
55
- (?<score>
55
+ (?<score_more>
56
56
  \b
57
57
  (?:
58
58
  (?<p1>\d{1,2}) - (?<p2>\d{1,2})
@@ -79,7 +79,7 @@ class Lexer
79
79
  ## special case for case WITHOUT extra time!!
80
80
  ## same as above (but WITHOUT extra time and pen required)
81
81
  SCORE__P_FT_HT__RE = %r{
82
- (?<score>
82
+ (?<score_more>
83
83
  \b
84
84
  (?<p1>\d{1,2}) - (?<p2>\d{1,2})
85
85
  [ ]* #{P_EN} [ ]+
@@ -99,36 +99,47 @@ class Lexer
99
99
  ## note: \b works only after non-alphanum e.g. )
100
100
 
101
101
 
102
-
103
- ## e.g. 2-1 (1-1) or
104
- ## 2-1
105
-
102
+ ##########
103
+ ## e.g. 2-1 (1-1)
106
104
  SCORE__FT_HT__RE = %r{
107
- (?<score>
105
+ (?<score_more>
108
106
  \b
109
107
  (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
110
- (?:
111
108
  [ ]+ \( [ ]*
112
109
  (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
113
110
  [ ]* \)
114
- )? # note: make half time (HT) score optional for now
115
111
  (?=[ ,\]]|$)
116
112
  )}ix ## todo/check: remove loakahead assertion here - why require space?
117
113
  ## note: \b works only after non-alphanum e.g. )
118
114
 
119
-
115
+ #####
116
+ ## 2-1
117
+ SCORE__FT__RE = %r{
118
+ (?<score>
119
+ \b
120
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
121
+ \b
122
+ )}ix
120
123
 
121
124
  #############################################
122
125
  # map tables
123
126
  # note: order matters; first come-first matched/served
127
+ #
128
+ ## check - find a better name for SCORE_MORE - SCORE_EX, SCORE_BIG, or ___ - why? why not?
124
129
 
125
- SCORE_RE = Regexp.union(
130
+ SCORE_MORE_RE = Regexp.union(
126
131
  SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
127
132
  SCORE__P_FT_HT__RE, # e.g. 5-1 pen. (1-1)
128
133
  SCORE__P_ET__RE, # e.g. 2-2 a.e.t. or 5-1 pen. 2-2 a.e.t.
129
134
  SCORE__P__RE, # e.g. 5-1 pen.
130
- SCORE__FT_HT__RE, # e.g. 1-1 (1-0) or 1-1 -- note - must go last!!!
135
+ SCORE__FT_HT__RE, # e.g. 1-1 (1-0)
136
+ ## note - keep basic score as its own token!!!!
137
+ ## that is, SCORE & SCORE_MORE
138
+ ### SCORE__FT__RE, # e.g. 1-1 -- note - must go last!!!
131
139
  )
132
140
 
141
+ SCORE_RE = SCORE__FT__RE
142
+
143
+
133
144
  end # class Lexer
134
145
  end # module SportDb
@@ -111,7 +111,15 @@ BASICS_RE = %r{
111
111
  (?<spaces> [ ]{2,}) |
112
112
  (?<space> [ ])
113
113
  |
114
- (?<sym>[;,/@|\[\]-])
114
+ (?<sym> (?<=^|[ ]) ## positive lookahead
115
+ (?: ----|
116
+ ---|
117
+ --
118
+ )
119
+ (?=[ ]) ## positive lookahead
120
+ )
121
+ |
122
+ (?<sym> [;,/@|\[\]-] )
115
123
  }ix
116
124
 
117
125
 
@@ -124,7 +132,8 @@ RE = Regexp.union( PROP_KEY_RE, ## start with prop key (match will/should swit
124
132
  DURATION_RE, # note - duration MUST match before date
125
133
  DATE_RE,
126
134
  WDAY_RE, # allow standalone weekday name (e.g. Mo/Tu/etc.) - why? why not?
127
- SCORE_RE,
135
+ SCORE_MORE_RE,
136
+ SCORE_RE, ## note basic score e.g. 1-1 must go after SCORE_MORE_RE!!!
128
137
  BASICS_RE,
129
138
  MINUTE_RE,
130
139
  GOAL_OG_RE, GOAL_PEN_RE,
@@ -4,7 +4,7 @@ module SportDb
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
6
  MINOR = 6
7
- PATCH = 0
7
+ PATCH = 1
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-30 00:00:00.000000000 Z
11
+ date: 2025-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos