sportdb-parser 0.5.9 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +2 -0
- data/lib/sportdb/parser/lexer.rb +47 -28
- data/lib/sportdb/parser/parser.rb +421 -344
- data/lib/sportdb/parser/racc_parser.rb +1 -1
- data/lib/sportdb/parser/racc_tree.rb +12 -5
- data/lib/sportdb/parser/token-date.rb +18 -1
- data/lib/sportdb/parser/token-minute.rb +45 -0
- data/lib/sportdb/parser/token-prop.rb +133 -0
- data/lib/sportdb/parser/token-text.rb +9 -2
- data/lib/sportdb/parser/token.rb +43 -177
- data/lib/sportdb/parser/version.rb +2 -2
- data/lib/sportdb/parser.rb +2 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9af6317b144478400067502e60de2f8d6232ebf6e036b0f99b78f9c29922dba2
|
4
|
+
data.tar.gz: 7066483378693f6376f9c30ec71e5d4172c817c11025dd4e669da6d581b6ad54
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '039fdc82039d05ae8f51847a79dd77c0a657e316e8b0705a28bdf2f8e594f37531ea07a230c9e1a9133f96293975190dba070f50515d58bc9926e4ef3e8e152f'
|
7
|
+
data.tar.gz: e0f6483cd26ba7ef4800ecf76efd6f05e01e3a2458dbc6b65fe6582654c8d28627facbfc8228655e361df84c43418c9583826908cdcf3d61bf06d186288c56fa
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
@@ -14,6 +14,8 @@ lib/sportdb/parser/parser.rb
|
|
14
14
|
lib/sportdb/parser/racc_parser.rb
|
15
15
|
lib/sportdb/parser/racc_tree.rb
|
16
16
|
lib/sportdb/parser/token-date.rb
|
17
|
+
lib/sportdb/parser/token-minute.rb
|
18
|
+
lib/sportdb/parser/token-prop.rb
|
17
19
|
lib/sportdb/parser/token-score.rb
|
18
20
|
lib/sportdb/parser/token-status.rb
|
19
21
|
lib/sportdb/parser/token-text.rb
|
data/lib/sportdb/parser/lexer.rb
CHANGED
@@ -247,6 +247,11 @@ def tokenize_with_errors
|
|
247
247
|
## flatten tokens
|
248
248
|
tokens = []
|
249
249
|
tokens_by_line.each do |tok|
|
250
|
+
|
251
|
+
if debug?
|
252
|
+
pp tok
|
253
|
+
end
|
254
|
+
|
250
255
|
tokens += tok
|
251
256
|
tokens << [:NEWLINE, "\n"] ## auto-add newlines
|
252
257
|
end
|
@@ -260,7 +265,7 @@ def _tokenize_line( line )
|
|
260
265
|
tokens = []
|
261
266
|
errors = [] ## keep a list of errors - why? why not?
|
262
267
|
|
263
|
-
puts ">#{line}<" if debug?
|
268
|
+
puts "line: >#{line}<" if debug?
|
264
269
|
|
265
270
|
pos = 0
|
266
271
|
## track last offsets - to report error on no match
|
@@ -275,10 +280,10 @@ def _tokenize_line( line )
|
|
275
280
|
|
276
281
|
|
277
282
|
while m = @re.match( line, pos )
|
278
|
-
if debug?
|
279
|
-
pp m
|
280
|
-
puts "pos: #{pos}"
|
281
|
-
end
|
283
|
+
# if debug?
|
284
|
+
# pp m
|
285
|
+
# puts "pos: #{pos}"
|
286
|
+
# end
|
282
287
|
offsets = [m.begin(0), m.end(0)]
|
283
288
|
|
284
289
|
if offsets[0] != pos
|
@@ -298,7 +303,7 @@ def _tokenize_line( line )
|
|
298
303
|
|
299
304
|
pos = offsets[1]
|
300
305
|
|
301
|
-
pp offsets if debug?
|
306
|
+
# pp offsets if debug?
|
302
307
|
|
303
308
|
##
|
304
309
|
## note: racc requires pairs e.g. [:TOKEN, VAL]
|
@@ -306,12 +311,8 @@ def _tokenize_line( line )
|
|
306
311
|
|
307
312
|
|
308
313
|
t = if @re == PROP_RE
|
309
|
-
if m[:space]
|
310
|
-
## skip space
|
311
|
-
nil
|
312
|
-
elsif m[:spaces]
|
313
|
-
## skip spaces
|
314
|
-
nil
|
314
|
+
if m[:space] || m[:spaces]
|
315
|
+
nil ## skip space(s)
|
315
316
|
elsif m[:prop_name]
|
316
317
|
if m[:name] == 'Y'
|
317
318
|
[:YELLOW_CARD, m[:name]]
|
@@ -339,11 +340,11 @@ def _tokenize_line( line )
|
|
339
340
|
when '(' then [:'(']
|
340
341
|
when ')' then [:')']
|
341
342
|
when '-' then [:'-']
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
343
|
+
# when '.' then
|
344
|
+
# ## switch back to top-level mode!!
|
345
|
+
# puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
|
346
|
+
# @re = RE
|
347
|
+
# [:'.']
|
347
348
|
else
|
348
349
|
nil ## ignore others (e.g. brackets [])
|
349
350
|
end
|
@@ -353,12 +354,8 @@ def _tokenize_line( line )
|
|
353
354
|
nil
|
354
355
|
end
|
355
356
|
else ## assume TOP_LEVEL (a.k.a. RE) machinery
|
356
|
-
if m[:space]
|
357
|
-
## skip space
|
358
|
-
nil
|
359
|
-
elsif m[:spaces]
|
360
|
-
## skip spaces
|
361
|
-
nil
|
357
|
+
if m[:space] || m[:spaces]
|
358
|
+
nil ## skip space(s)
|
362
359
|
elsif m[:prop_key]
|
363
360
|
## switch context to PROP_RE
|
364
361
|
@re = PROP_RE
|
@@ -397,6 +394,7 @@ def _tokenize_line( line )
|
|
397
394
|
## map month names
|
398
395
|
## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
|
399
396
|
date[:y] = m[:year].to_i(10) if m[:year]
|
397
|
+
date[:m] = m[:month].to_i(10) if m[:month]
|
400
398
|
date[:m] = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
|
401
399
|
date[:d] = m[:day].to_i(10) if m[:day]
|
402
400
|
date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
|
@@ -417,6 +415,8 @@ def _tokenize_line( line )
|
|
417
415
|
duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ] if m[:day_name2]
|
418
416
|
## note - for debugging keep (pass along) "literal" duration
|
419
417
|
[:DURATION, [m[:duration], duration]]
|
418
|
+
elsif m[:wday] ## standalone weekday e.g. Mo/Tu/We/etc.
|
419
|
+
[:WDAY, [m[:wday], { wday: DAY_MAP[ m[:day_name].downcase ] } ]]
|
420
420
|
elsif m[:num] ## fix - change to ord (for ordinal number!!!)
|
421
421
|
## note - strip enclosing () and convert to integer
|
422
422
|
[:ORD, [m[:num], { value: m[:value].to_i(10) } ]]
|
@@ -454,6 +454,7 @@ def _tokenize_line( line )
|
|
454
454
|
case sym
|
455
455
|
when ',' then [:',']
|
456
456
|
when ';' then [:';']
|
457
|
+
when '/' then [:'/']
|
457
458
|
when '@' then [:'@']
|
458
459
|
when '|' then [:'|']
|
459
460
|
when '[' then [:'[']
|
@@ -472,11 +473,11 @@ def _tokenize_line( line )
|
|
472
473
|
|
473
474
|
tokens << t if t
|
474
475
|
|
475
|
-
if debug?
|
476
|
-
print ">"
|
477
|
-
print "*" * pos
|
478
|
-
puts "#{line[pos..-1]}<"
|
479
|
-
end
|
476
|
+
# if debug?
|
477
|
+
# print ">"
|
478
|
+
# print "*" * pos
|
479
|
+
# puts "#{line[pos..-1]}<"
|
480
|
+
# end
|
480
481
|
end
|
481
482
|
|
482
483
|
## check if no match in end of string
|
@@ -489,6 +490,24 @@ def _tokenize_line( line )
|
|
489
490
|
end
|
490
491
|
|
491
492
|
|
493
|
+
##
|
494
|
+
## if in prop mode continue if last token is [,-]
|
495
|
+
## otherwise change back to "standard" mode
|
496
|
+
if @re == PROP_RE
|
497
|
+
if [:',', :'-'].include?( tokens[-1][0] )
|
498
|
+
## continue/stay in PROP_RE mode
|
499
|
+
## todo/check - auto-add PROP_CONT token or such
|
500
|
+
## to help parser with possible NEWLINE
|
501
|
+
## conflicts - why? why not?
|
502
|
+
else
|
503
|
+
## switch back to top-level mode!!
|
504
|
+
puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
|
505
|
+
@re = RE
|
506
|
+
## note - auto-add PROP_END (<PROP_END>)
|
507
|
+
tokens << [:PROP_END, "<PROP_END>"]
|
508
|
+
end
|
509
|
+
end
|
510
|
+
|
492
511
|
[tokens,errors]
|
493
512
|
end
|
494
513
|
|