sportdb-parser 0.6.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/lib/sportdb/parser/lexer.rb +55 -9
- data/lib/sportdb/parser/parser.rb +493 -396
- data/lib/sportdb/parser/racc_parser.rb +4 -2
- data/lib/sportdb/parser/token-date.rb +66 -15
- data/lib/sportdb/parser/token-score.rb +25 -14
- data/lib/sportdb/parser/token.rb +11 -2
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +2 -2
@@ -68,12 +68,14 @@ def initialize( txt, debug: false )
|
|
68
68
|
|
69
69
|
|
70
70
|
def on_error(error_token_id, error_value, value_stack)
|
71
|
-
|
71
|
+
## auto-add error_token (as string)
|
72
|
+
error_token = Racc_token_to_s_table[error_token_id]
|
73
|
+
args = [error_token, error_token_id, error_value, value_stack]
|
72
74
|
puts
|
73
75
|
puts "!! on parse error:"
|
74
76
|
puts "args=#{args.pretty_inspect}"
|
75
77
|
|
76
|
-
@errors << "parse error on token: #{error_token_id} with value: #{error_value}, stack: #{value_stack.pretty_inspect}"
|
78
|
+
@errors << "parse error on token: #{error_token} (#{error_token_id}) with value: #{error_value}, stack: #{value_stack.pretty_inspect}"
|
77
79
|
## exit 1 ## exit for now - get and print more info about context etc.!!
|
78
80
|
end
|
79
81
|
|
@@ -147,15 +147,15 @@ DATE_II_RE = %r{
|
|
147
147
|
|
148
148
|
|
149
149
|
# e.g. iso-date - 2011-08-25
|
150
|
-
##
|
150
|
+
## note - allow/support ("shortcuts") e.g 2011-8-25 or 2011-8-3 / 2011-08-03 etc.
|
151
151
|
DATE_III_RE = %r{
|
152
152
|
(?<date>
|
153
153
|
\b
|
154
154
|
(?<year>\d{4})
|
155
155
|
-
|
156
|
-
(?<month>\d{2})
|
156
|
+
(?<month>\d{1,2})
|
157
157
|
-
|
158
|
-
(?<day>\d{2})
|
158
|
+
(?<day>\d{1,2})
|
159
159
|
\b
|
160
160
|
)}ix
|
161
161
|
|
@@ -214,29 +214,36 @@ end
|
|
214
214
|
#
|
215
215
|
# Sun Jun/23 - Wed Jun/26 -- YES
|
216
216
|
# Jun/23 - Jun/26 -- YES
|
217
|
-
#
|
218
|
-
|
219
|
-
#
|
220
|
-
# Jun/25
|
217
|
+
# Jun/25 - 26 - why? why not??? - YES - see blow variant iii!!!
|
218
|
+
|
219
|
+
# Tue Jun/25 + Wed Jun/26 -- NO
|
220
|
+
# Jun/25 + Jun/26 -- NO
|
221
221
|
# Jun/25 .. 26 - why? why not???
|
222
222
|
# Jun/25 to 26 - why? why not???
|
223
223
|
# Jun/25 + 26 - add - why? why not???
|
224
224
|
# Sun-Wed Jun/23-26 - add - why? why not???
|
225
225
|
# Wed+Thu Jun/26+27 2024 - add - why? why not???
|
226
226
|
#
|
227
|
-
# maybe use
|
227
|
+
# maybe use comma and plus for list of dates
|
228
228
|
# Tue Jun/25, Wed Jun/26, Thu Jun/27 ??
|
229
229
|
# Tue Jun/25 + Wed Jun/26 + Thu Jun/27 ??
|
230
230
|
#
|
231
231
|
# add back optional comma (before) year - why? why not?
|
232
|
+
#
|
232
233
|
|
233
234
|
|
234
235
|
##
|
235
236
|
# todo add plus later on - why? why not?
|
237
|
+
### todo/fix add optional comma (,) before year
|
238
|
+
|
239
|
+
### regex note/tip/remindr - \b () \b MUST always get enclosed in parantheses
|
240
|
+
## because alternation (|) has lowest priority/binding
|
241
|
+
|
236
242
|
|
237
243
|
DURATION_I_RE = %r{
|
238
244
|
(?<duration>
|
239
245
|
\b
|
246
|
+
(?:
|
240
247
|
## optional day name
|
241
248
|
((?<day_name1>#{DAY_NAMES})
|
242
249
|
[ ]
|
@@ -245,12 +252,13 @@ DURATION_I_RE = %r{
|
|
245
252
|
(?: \/|[ ] )
|
246
253
|
(?<day1>\d{1,2})
|
247
254
|
## optional year
|
248
|
-
(
|
255
|
+
( ,? # optional comma
|
256
|
+
[ ]
|
249
257
|
(?<year1>\d{4})
|
250
258
|
)?
|
251
259
|
|
252
260
|
## support + and - (add .. or such - why??)
|
253
|
-
[ ]*
|
261
|
+
[ ]* - [ ]*
|
254
262
|
|
255
263
|
## optional day name
|
256
264
|
((?<day_name2>#{DAY_NAMES})
|
@@ -260,20 +268,28 @@ DURATION_I_RE = %r{
|
|
260
268
|
(?: \/|[ ] )
|
261
269
|
(?<day2>\d{1,2})
|
262
270
|
## optional year
|
263
|
-
(
|
271
|
+
( ,? # optional comma
|
272
|
+
[ ]
|
264
273
|
(?<year2>\d{4})
|
265
274
|
)?
|
275
|
+
)
|
266
276
|
\b
|
267
277
|
)}ix
|
268
278
|
|
269
279
|
|
280
|
+
|
281
|
+
# FIX - remove this variant
|
282
|
+
# "standardize on month day [year]" !!!!
|
283
|
+
|
284
|
+
=begin
|
270
285
|
###
|
271
286
|
# variant ii
|
272
287
|
# e.g. 26 July - 27 July
|
273
|
-
|
274
|
-
|
288
|
+
# 26 July,
|
289
|
+
XXX_DURATION_II_RE = %r{
|
275
290
|
(?<duration>
|
276
291
|
\b
|
292
|
+
(?
|
277
293
|
## optional day name
|
278
294
|
((?<day_name1>#{DAY_NAMES})
|
279
295
|
[ ]
|
@@ -282,7 +298,8 @@ DURATION_II_RE = %r{
|
|
282
298
|
[ ]
|
283
299
|
(?<month_name1>#{MONTH_NAMES})
|
284
300
|
## optional year
|
285
|
-
(
|
301
|
+
(
|
302
|
+
[ ]
|
286
303
|
(?<year1>\d{4})
|
287
304
|
)?
|
288
305
|
|
@@ -300,16 +317,50 @@ DURATION_II_RE = %r{
|
|
300
317
|
( [ ]
|
301
318
|
(?<year2>\d{4})
|
302
319
|
)?
|
320
|
+
)
|
321
|
+
\b
|
322
|
+
)}ix
|
323
|
+
=end
|
324
|
+
|
325
|
+
|
326
|
+
# variant ii
|
327
|
+
# add support for shorthand
|
328
|
+
# August 16-18, 2011
|
329
|
+
# September 13-15, 2011
|
330
|
+
# October 18-20, 2011
|
331
|
+
# March/6-8, 2012
|
332
|
+
# March 6-8 2012
|
333
|
+
# March 6-8
|
334
|
+
#
|
335
|
+
# - add support for August 16+17 or such (and check 16+18)
|
336
|
+
# use <op> to check if day2 is a plus or range or such - why? why not?
|
337
|
+
|
338
|
+
DURATION_II_RE = %r{
|
339
|
+
(?<duration>
|
340
|
+
\b
|
341
|
+
(?:
|
342
|
+
(?<month_name1>#{MONTH_NAMES})
|
343
|
+
[ /]
|
344
|
+
(?<day1>\d{1,2})
|
345
|
+
-
|
346
|
+
(?<day2>\d{1,2})
|
347
|
+
(?:
|
348
|
+
,? ## optional comma
|
349
|
+
[ ]
|
350
|
+
(?<year1>\d{4})
|
351
|
+
)? ## optional year
|
352
|
+
)
|
303
353
|
\b
|
304
354
|
)}ix
|
305
355
|
|
306
356
|
|
357
|
+
|
307
358
|
#############################################
|
308
359
|
# map tables
|
309
360
|
# note: order matters; first come-first matched/served
|
310
361
|
DURATION_RE = Regexp.union(
|
311
362
|
DURATION_I_RE,
|
312
|
-
DURATION_II_RE
|
363
|
+
DURATION_II_RE,
|
313
364
|
)
|
314
365
|
|
315
366
|
|
@@ -17,7 +17,7 @@ class Lexer
|
|
17
17
|
## 3-4 pen. 2-2 a.e.t.
|
18
18
|
## 2-2 a.e.t.
|
19
19
|
SCORE__P_ET__RE = %r{
|
20
|
-
(?<
|
20
|
+
(?<score_more>
|
21
21
|
\b
|
22
22
|
(?:
|
23
23
|
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
@@ -34,7 +34,7 @@ class Lexer
|
|
34
34
|
## note: allow SPECIAL with penalty only
|
35
35
|
## 3-4 pen.
|
36
36
|
SCORE__P__RE = %r{
|
37
|
-
(?<
|
37
|
+
(?<score_more>
|
38
38
|
\b
|
39
39
|
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
40
40
|
[ ]* #{P_EN}
|
@@ -52,7 +52,7 @@ class Lexer
|
|
52
52
|
## 2-2 a.e.t. (1-1)
|
53
53
|
|
54
54
|
SCORE__P_ET_FT_HT__RE = %r{
|
55
|
-
(?<
|
55
|
+
(?<score_more>
|
56
56
|
\b
|
57
57
|
(?:
|
58
58
|
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
@@ -79,7 +79,7 @@ class Lexer
|
|
79
79
|
## special case for case WITHOUT extra time!!
|
80
80
|
## same as above (but WITHOUT extra time and pen required)
|
81
81
|
SCORE__P_FT_HT__RE = %r{
|
82
|
-
(?<
|
82
|
+
(?<score_more>
|
83
83
|
\b
|
84
84
|
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
85
85
|
[ ]* #{P_EN} [ ]+
|
@@ -99,36 +99,47 @@ class Lexer
|
|
99
99
|
## note: \b works only after non-alphanum e.g. )
|
100
100
|
|
101
101
|
|
102
|
-
|
103
|
-
## e.g. 2-1 (1-1)
|
104
|
-
## 2-1
|
105
|
-
|
102
|
+
##########
|
103
|
+
## e.g. 2-1 (1-1)
|
106
104
|
SCORE__FT_HT__RE = %r{
|
107
|
-
(?<
|
105
|
+
(?<score_more>
|
108
106
|
\b
|
109
107
|
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
110
|
-
(?:
|
111
108
|
[ ]+ \( [ ]*
|
112
109
|
(?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
|
113
110
|
[ ]* \)
|
114
|
-
)? # note: make half time (HT) score optional for now
|
115
111
|
(?=[ ,\]]|$)
|
116
112
|
)}ix ## todo/check: remove loakahead assertion here - why require space?
|
117
113
|
## note: \b works only after non-alphanum e.g. )
|
118
114
|
|
119
|
-
|
115
|
+
#####
|
116
|
+
## 2-1
|
117
|
+
SCORE__FT__RE = %r{
|
118
|
+
(?<score>
|
119
|
+
\b
|
120
|
+
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
121
|
+
\b
|
122
|
+
)}ix
|
120
123
|
|
121
124
|
#############################################
|
122
125
|
# map tables
|
123
126
|
# note: order matters; first come-first matched/served
|
127
|
+
#
|
128
|
+
## check - find a better name for SCORE_MORE - SCORE_EX, SCORE_BIG, or ___ - why? why not?
|
124
129
|
|
125
|
-
|
130
|
+
SCORE_MORE_RE = Regexp.union(
|
126
131
|
SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
|
127
132
|
SCORE__P_FT_HT__RE, # e.g. 5-1 pen. (1-1)
|
128
133
|
SCORE__P_ET__RE, # e.g. 2-2 a.e.t. or 5-1 pen. 2-2 a.e.t.
|
129
134
|
SCORE__P__RE, # e.g. 5-1 pen.
|
130
|
-
SCORE__FT_HT__RE, # e.g. 1-1 (1-0)
|
135
|
+
SCORE__FT_HT__RE, # e.g. 1-1 (1-0)
|
136
|
+
## note - keep basic score as its own token!!!!
|
137
|
+
## that is, SCORE & SCORE_MORE
|
138
|
+
### SCORE__FT__RE, # e.g. 1-1 -- note - must go last!!!
|
131
139
|
)
|
132
140
|
|
141
|
+
SCORE_RE = SCORE__FT__RE
|
142
|
+
|
143
|
+
|
133
144
|
end # class Lexer
|
134
145
|
end # module SportDb
|
data/lib/sportdb/parser/token.rb
CHANGED
@@ -111,7 +111,15 @@ BASICS_RE = %r{
|
|
111
111
|
(?<spaces> [ ]{2,}) |
|
112
112
|
(?<space> [ ])
|
113
113
|
|
|
114
|
-
(?<sym>[
|
114
|
+
(?<sym> (?<=^|[ ]) ## positive lookahead
|
115
|
+
(?: ----|
|
116
|
+
---|
|
117
|
+
--
|
118
|
+
)
|
119
|
+
(?=[ ]) ## positive lookahead
|
120
|
+
)
|
121
|
+
|
|
122
|
+
(?<sym> [;,/@|\[\]-] )
|
115
123
|
}ix
|
116
124
|
|
117
125
|
|
@@ -124,7 +132,8 @@ RE = Regexp.union( PROP_KEY_RE, ## start with prop key (match will/should swit
|
|
124
132
|
DURATION_RE, # note - duration MUST match before date
|
125
133
|
DATE_RE,
|
126
134
|
WDAY_RE, # allow standalone weekday name (e.g. Mo/Tu/etc.) - why? why not?
|
127
|
-
|
135
|
+
SCORE_MORE_RE,
|
136
|
+
SCORE_RE, ## note basic score e.g. 1-1 must go after SCORE_MORE_RE!!!
|
128
137
|
BASICS_RE,
|
129
138
|
MINUTE_RE,
|
130
139
|
GOAL_OG_RE, GOAL_PEN_RE,
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-01
|
11
|
+
date: 2025-02-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cocos
|