sportdb-parser 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/lib/sportdb/parser/lexer.rb +55 -9
- data/lib/sportdb/parser/parser.rb +493 -396
- data/lib/sportdb/parser/racc_parser.rb +4 -2
- data/lib/sportdb/parser/token-date.rb +66 -15
- data/lib/sportdb/parser/token-score.rb +25 -14
- data/lib/sportdb/parser/token.rb +11 -2
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +2 -2
@@ -68,12 +68,14 @@ def initialize( txt, debug: false )
|
|
68
68
|
|
69
69
|
|
70
70
|
def on_error(error_token_id, error_value, value_stack)
|
71
|
-
|
71
|
+
## auto-add error_token (as string)
|
72
|
+
error_token = Racc_token_to_s_table[error_token_id]
|
73
|
+
args = [error_token, error_token_id, error_value, value_stack]
|
72
74
|
puts
|
73
75
|
puts "!! on parse error:"
|
74
76
|
puts "args=#{args.pretty_inspect}"
|
75
77
|
|
76
|
-
@errors << "parse error on token: #{error_token_id} with value: #{error_value}, stack: #{value_stack.pretty_inspect}"
|
78
|
+
@errors << "parse error on token: #{error_token} (#{error_token_id}) with value: #{error_value}, stack: #{value_stack.pretty_inspect}"
|
77
79
|
## exit 1 ## exit for now - get and print more info about context etc.!!
|
78
80
|
end
|
79
81
|
|
@@ -147,15 +147,15 @@ DATE_II_RE = %r{
|
|
147
147
|
|
148
148
|
|
149
149
|
# e.g. iso-date - 2011-08-25
|
150
|
-
##
|
150
|
+
## note - allow/support ("shortcuts") e.g 2011-8-25 or 2011-8-3 / 2011-08-03 etc.
|
151
151
|
DATE_III_RE = %r{
|
152
152
|
(?<date>
|
153
153
|
\b
|
154
154
|
(?<year>\d{4})
|
155
155
|
-
|
156
|
-
(?<month>\d{2})
|
156
|
+
(?<month>\d{1,2})
|
157
157
|
-
|
158
|
-
(?<day>\d{2})
|
158
|
+
(?<day>\d{1,2})
|
159
159
|
\b
|
160
160
|
)}ix
|
161
161
|
|
@@ -214,29 +214,36 @@ end
|
|
214
214
|
#
|
215
215
|
# Sun Jun/23 - Wed Jun/26 -- YES
|
216
216
|
# Jun/23 - Jun/26 -- YES
|
217
|
-
#
|
218
|
-
|
219
|
-
#
|
220
|
-
# Jun/25
|
217
|
+
# Jun/25 - 26 - why? why not??? - YES - see blow variant iii!!!
|
218
|
+
|
219
|
+
# Tue Jun/25 + Wed Jun/26 -- NO
|
220
|
+
# Jun/25 + Jun/26 -- NO
|
221
221
|
# Jun/25 .. 26 - why? why not???
|
222
222
|
# Jun/25 to 26 - why? why not???
|
223
223
|
# Jun/25 + 26 - add - why? why not???
|
224
224
|
# Sun-Wed Jun/23-26 - add - why? why not???
|
225
225
|
# Wed+Thu Jun/26+27 2024 - add - why? why not???
|
226
226
|
#
|
227
|
-
# maybe use
|
227
|
+
# maybe use comma and plus for list of dates
|
228
228
|
# Tue Jun/25, Wed Jun/26, Thu Jun/27 ??
|
229
229
|
# Tue Jun/25 + Wed Jun/26 + Thu Jun/27 ??
|
230
230
|
#
|
231
231
|
# add back optional comma (before) year - why? why not?
|
232
|
+
#
|
232
233
|
|
233
234
|
|
234
235
|
##
|
235
236
|
# todo add plus later on - why? why not?
|
237
|
+
### todo/fix add optional comma (,) before year
|
238
|
+
|
239
|
+
### regex note/tip/remindr - \b () \b MUST always get enclosed in parantheses
|
240
|
+
## because alternation (|) has lowest priority/binding
|
241
|
+
|
236
242
|
|
237
243
|
DURATION_I_RE = %r{
|
238
244
|
(?<duration>
|
239
245
|
\b
|
246
|
+
(?:
|
240
247
|
## optional day name
|
241
248
|
((?<day_name1>#{DAY_NAMES})
|
242
249
|
[ ]
|
@@ -245,12 +252,13 @@ DURATION_I_RE = %r{
|
|
245
252
|
(?: \/|[ ] )
|
246
253
|
(?<day1>\d{1,2})
|
247
254
|
## optional year
|
248
|
-
(
|
255
|
+
( ,? # optional comma
|
256
|
+
[ ]
|
249
257
|
(?<year1>\d{4})
|
250
258
|
)?
|
251
259
|
|
252
260
|
## support + and - (add .. or such - why??)
|
253
|
-
[ ]*
|
261
|
+
[ ]* - [ ]*
|
254
262
|
|
255
263
|
## optional day name
|
256
264
|
((?<day_name2>#{DAY_NAMES})
|
@@ -260,20 +268,28 @@ DURATION_I_RE = %r{
|
|
260
268
|
(?: \/|[ ] )
|
261
269
|
(?<day2>\d{1,2})
|
262
270
|
## optional year
|
263
|
-
(
|
271
|
+
( ,? # optional comma
|
272
|
+
[ ]
|
264
273
|
(?<year2>\d{4})
|
265
274
|
)?
|
275
|
+
)
|
266
276
|
\b
|
267
277
|
)}ix
|
268
278
|
|
269
279
|
|
280
|
+
|
281
|
+
# FIX - remove this variant
|
282
|
+
# "standardize on month day [year]" !!!!
|
283
|
+
|
284
|
+
=begin
|
270
285
|
###
|
271
286
|
# variant ii
|
272
287
|
# e.g. 26 July - 27 July
|
273
|
-
|
274
|
-
|
288
|
+
# 26 July,
|
289
|
+
XXX_DURATION_II_RE = %r{
|
275
290
|
(?<duration>
|
276
291
|
\b
|
292
|
+
(?
|
277
293
|
## optional day name
|
278
294
|
((?<day_name1>#{DAY_NAMES})
|
279
295
|
[ ]
|
@@ -282,7 +298,8 @@ DURATION_II_RE = %r{
|
|
282
298
|
[ ]
|
283
299
|
(?<month_name1>#{MONTH_NAMES})
|
284
300
|
## optional year
|
285
|
-
(
|
301
|
+
(
|
302
|
+
[ ]
|
286
303
|
(?<year1>\d{4})
|
287
304
|
)?
|
288
305
|
|
@@ -300,16 +317,50 @@ DURATION_II_RE = %r{
|
|
300
317
|
( [ ]
|
301
318
|
(?<year2>\d{4})
|
302
319
|
)?
|
320
|
+
)
|
321
|
+
\b
|
322
|
+
)}ix
|
323
|
+
=end
|
324
|
+
|
325
|
+
|
326
|
+
# variant ii
|
327
|
+
# add support for shorthand
|
328
|
+
# August 16-18, 2011
|
329
|
+
# September 13-15, 2011
|
330
|
+
# October 18-20, 2011
|
331
|
+
# March/6-8, 2012
|
332
|
+
# March 6-8 2012
|
333
|
+
# March 6-8
|
334
|
+
#
|
335
|
+
# - add support for August 16+17 or such (and check 16+18)
|
336
|
+
# use <op> to check if day2 is a plus or range or such - why? why not?
|
337
|
+
|
338
|
+
DURATION_II_RE = %r{
|
339
|
+
(?<duration>
|
340
|
+
\b
|
341
|
+
(?:
|
342
|
+
(?<month_name1>#{MONTH_NAMES})
|
343
|
+
[ /]
|
344
|
+
(?<day1>\d{1,2})
|
345
|
+
-
|
346
|
+
(?<day2>\d{1,2})
|
347
|
+
(?:
|
348
|
+
,? ## optional comma
|
349
|
+
[ ]
|
350
|
+
(?<year1>\d{4})
|
351
|
+
)? ## optional year
|
352
|
+
)
|
303
353
|
\b
|
304
354
|
)}ix
|
305
355
|
|
306
356
|
|
357
|
+
|
307
358
|
#############################################
|
308
359
|
# map tables
|
309
360
|
# note: order matters; first come-first matched/served
|
310
361
|
DURATION_RE = Regexp.union(
|
311
362
|
DURATION_I_RE,
|
312
|
-
DURATION_II_RE
|
363
|
+
DURATION_II_RE,
|
313
364
|
)
|
314
365
|
|
315
366
|
|
@@ -17,7 +17,7 @@ class Lexer
|
|
17
17
|
## 3-4 pen. 2-2 a.e.t.
|
18
18
|
## 2-2 a.e.t.
|
19
19
|
SCORE__P_ET__RE = %r{
|
20
|
-
(?<
|
20
|
+
(?<score_more>
|
21
21
|
\b
|
22
22
|
(?:
|
23
23
|
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
@@ -34,7 +34,7 @@ class Lexer
|
|
34
34
|
## note: allow SPECIAL with penalty only
|
35
35
|
## 3-4 pen.
|
36
36
|
SCORE__P__RE = %r{
|
37
|
-
(?<
|
37
|
+
(?<score_more>
|
38
38
|
\b
|
39
39
|
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
40
40
|
[ ]* #{P_EN}
|
@@ -52,7 +52,7 @@ class Lexer
|
|
52
52
|
## 2-2 a.e.t. (1-1)
|
53
53
|
|
54
54
|
SCORE__P_ET_FT_HT__RE = %r{
|
55
|
-
(?<
|
55
|
+
(?<score_more>
|
56
56
|
\b
|
57
57
|
(?:
|
58
58
|
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
@@ -79,7 +79,7 @@ class Lexer
|
|
79
79
|
## special case for case WITHOUT extra time!!
|
80
80
|
## same as above (but WITHOUT extra time and pen required)
|
81
81
|
SCORE__P_FT_HT__RE = %r{
|
82
|
-
(?<
|
82
|
+
(?<score_more>
|
83
83
|
\b
|
84
84
|
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
85
85
|
[ ]* #{P_EN} [ ]+
|
@@ -99,36 +99,47 @@ class Lexer
|
|
99
99
|
## note: \b works only after non-alphanum e.g. )
|
100
100
|
|
101
101
|
|
102
|
-
|
103
|
-
## e.g. 2-1 (1-1)
|
104
|
-
## 2-1
|
105
|
-
|
102
|
+
##########
|
103
|
+
## e.g. 2-1 (1-1)
|
106
104
|
SCORE__FT_HT__RE = %r{
|
107
|
-
(?<
|
105
|
+
(?<score_more>
|
108
106
|
\b
|
109
107
|
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
110
|
-
(?:
|
111
108
|
[ ]+ \( [ ]*
|
112
109
|
(?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
|
113
110
|
[ ]* \)
|
114
|
-
)? # note: make half time (HT) score optional for now
|
115
111
|
(?=[ ,\]]|$)
|
116
112
|
)}ix ## todo/check: remove loakahead assertion here - why require space?
|
117
113
|
## note: \b works only after non-alphanum e.g. )
|
118
114
|
|
119
|
-
|
115
|
+
#####
|
116
|
+
## 2-1
|
117
|
+
SCORE__FT__RE = %r{
|
118
|
+
(?<score>
|
119
|
+
\b
|
120
|
+
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
121
|
+
\b
|
122
|
+
)}ix
|
120
123
|
|
121
124
|
#############################################
|
122
125
|
# map tables
|
123
126
|
# note: order matters; first come-first matched/served
|
127
|
+
#
|
128
|
+
## check - find a better name for SCORE_MORE - SCORE_EX, SCORE_BIG, or ___ - why? why not?
|
124
129
|
|
125
|
-
|
130
|
+
SCORE_MORE_RE = Regexp.union(
|
126
131
|
SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
|
127
132
|
SCORE__P_FT_HT__RE, # e.g. 5-1 pen. (1-1)
|
128
133
|
SCORE__P_ET__RE, # e.g. 2-2 a.e.t. or 5-1 pen. 2-2 a.e.t.
|
129
134
|
SCORE__P__RE, # e.g. 5-1 pen.
|
130
|
-
SCORE__FT_HT__RE, # e.g. 1-1 (1-0)
|
135
|
+
SCORE__FT_HT__RE, # e.g. 1-1 (1-0)
|
136
|
+
## note - keep basic score as its own token!!!!
|
137
|
+
## that is, SCORE & SCORE_MORE
|
138
|
+
### SCORE__FT__RE, # e.g. 1-1 -- note - must go last!!!
|
131
139
|
)
|
132
140
|
|
141
|
+
SCORE_RE = SCORE__FT__RE
|
142
|
+
|
143
|
+
|
133
144
|
end # class Lexer
|
134
145
|
end # module SportDb
|
data/lib/sportdb/parser/token.rb
CHANGED
@@ -111,7 +111,15 @@ BASICS_RE = %r{
|
|
111
111
|
(?<spaces> [ ]{2,}) |
|
112
112
|
(?<space> [ ])
|
113
113
|
|
|
114
|
-
(?<sym>[
|
114
|
+
(?<sym> (?<=^|[ ]) ## positive lookahead
|
115
|
+
(?: ----|
|
116
|
+
---|
|
117
|
+
--
|
118
|
+
)
|
119
|
+
(?=[ ]) ## positive lookahead
|
120
|
+
)
|
121
|
+
|
|
122
|
+
(?<sym> [;,/@|\[\]-] )
|
115
123
|
}ix
|
116
124
|
|
117
125
|
|
@@ -124,7 +132,8 @@ RE = Regexp.union( PROP_KEY_RE, ## start with prop key (match will/should swit
|
|
124
132
|
DURATION_RE, # note - duration MUST match before date
|
125
133
|
DATE_RE,
|
126
134
|
WDAY_RE, # allow standalone weekday name (e.g. Mo/Tu/etc.) - why? why not?
|
127
|
-
|
135
|
+
SCORE_MORE_RE,
|
136
|
+
SCORE_RE, ## note basic score e.g. 1-1 must go after SCORE_MORE_RE!!!
|
128
137
|
BASICS_RE,
|
129
138
|
MINUTE_RE,
|
130
139
|
GOAL_OG_RE, GOAL_PEN_RE,
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-01
|
11
|
+
date: 2025-02-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cocos
|