sportdb-parser 0.6.9 → 0.6.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/config/rounds_en.txt +1 -0
- data/lib/sportdb/parser/lexer.rb +57 -7
- data/lib/sportdb/parser/parser.rb +381 -358
- data/lib/sportdb/parser/racc_tree.rb +5 -3
- data/lib/sportdb/parser/token-status.rb +81 -8
- data/lib/sportdb/parser/token.rb +16 -1
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '0902e4f811d2584b7c7360f00557d979579d5df8b48e3370ace0d3e7d211c45d'
|
4
|
+
data.tar.gz: e03327cfb9f33e39c3cc44063f9915ccc48d1c78b8076ee9fd7ba511d7b7bdc5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 14e1f06f0fd5137208cce8a26f9a534317f0538a59f1701ae0140914afae9bed2963979df71d1032771798983dcf5ee5197c0105c6fb040c0e72052b86d282b5
|
7
|
+
data.tar.gz: e8b973f129cf11f951ba5629b81c53896bec02b949b4eb8d7c175b5fae54e33e31788cdfc18985f0d2759dfc473758512523adddf122e4c5293ce2c45d28543f
|
data/CHANGELOG.md
CHANGED
data/config/rounds_en.txt
CHANGED
data/lib/sportdb/parser/lexer.rb
CHANGED
@@ -413,6 +413,7 @@ def _tokenize_line( line )
|
|
413
413
|
end
|
414
414
|
|
415
415
|
|
416
|
+
old_pos = -1 ## allows to backtrack to old pos (used in geo)
|
416
417
|
|
417
418
|
while m = @re.match( line, pos )
|
418
419
|
# if debug?
|
@@ -431,12 +432,14 @@ def _tokenize_line( line )
|
|
431
432
|
log( msg )
|
432
433
|
end
|
433
434
|
|
435
|
+
|
434
436
|
##
|
435
437
|
## todo/fix - also check if possible
|
436
438
|
## if no match but not yet end off string!!!!
|
437
439
|
## report skipped text run too!!!
|
438
440
|
|
439
|
-
|
441
|
+
old_pos = pos
|
442
|
+
pos = offsets[1]
|
440
443
|
|
441
444
|
# pp offsets if debug?
|
442
445
|
|
@@ -444,7 +447,46 @@ def _tokenize_line( line )
|
|
444
447
|
## note: racc requires pairs e.g. [:TOKEN, VAL]
|
445
448
|
## for VAL use "text" or ["text", { opts }] array
|
446
449
|
|
447
|
-
t = if @re ==
|
450
|
+
t = if @re == GEO_RE
|
451
|
+
### note - possibly end inline geo on [ (and others?? in the future
|
452
|
+
if m[:space] || m[:spaces]
|
453
|
+
nil ## skip space(s)
|
454
|
+
elsif m[:text]
|
455
|
+
[:GEO, m[:text]] ## keep pos - why? why not?
|
456
|
+
elsif m[:timezone]
|
457
|
+
[:TIMEZONE, m[:timezone]]
|
458
|
+
elsif m[:sym]
|
459
|
+
sym = m[:sym]
|
460
|
+
## return symbols "inline" as is - why? why not?
|
461
|
+
## (?<sym>[;,@|\[\]-])
|
462
|
+
|
463
|
+
case sym
|
464
|
+
when ',' then [:',']
|
465
|
+
when '[' then
|
466
|
+
## get out-off geo mode and backtrack (w/ next)
|
467
|
+
puts " LEAVE GEO_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
|
468
|
+
@re = RE
|
469
|
+
pos = old_pos
|
470
|
+
next ## backtrack (resume new loop step)
|
471
|
+
else
|
472
|
+
puts "!!! TOKENIZE ERROR (sym) - ignore sym >#{sym}<"
|
473
|
+
nil ## ignore others (e.g. brackets [])
|
474
|
+
end
|
475
|
+
elsif m[:any]
|
476
|
+
## todo/check log error
|
477
|
+
msg = "parse error (tokenize geo) - skipping any match>#{m[:any]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
|
478
|
+
puts "!! WARN - #{msg}"
|
479
|
+
|
480
|
+
errors << msg
|
481
|
+
log( "!! WARN - #{msg}" )
|
482
|
+
|
483
|
+
nil
|
484
|
+
else
|
485
|
+
## report error/raise expection
|
486
|
+
puts "!!! TOKENIZE ERROR - no match found"
|
487
|
+
nil
|
488
|
+
end
|
489
|
+
elsif @re == PROP_CARDS_RE
|
448
490
|
if m[:space] || m[:spaces]
|
449
491
|
nil ## skip space(s)
|
450
492
|
elsif m[:prop_name]
|
@@ -669,8 +711,10 @@ def _tokenize_line( line )
|
|
669
711
|
elsif m[:note]
|
670
712
|
### todo/check:
|
671
713
|
## use value hash - why? why not? or simplify to:
|
672
|
-
##
|
673
|
-
[:NOTE,
|
714
|
+
## [:NOTE, [m[:note], {note: m[:note] } ]]
|
715
|
+
[:NOTE, m[:note]]
|
716
|
+
elsif m[:score_note]
|
717
|
+
[:SCORE_NOTE, m[:score_note]]
|
674
718
|
elsif m[:time]
|
675
719
|
## unify to iso-format
|
676
720
|
### 12.40 => 12:40
|
@@ -701,8 +745,6 @@ def _tokenize_line( line )
|
|
701
745
|
date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
|
702
746
|
## note - for debugging keep (pass along) "literal" date
|
703
747
|
[:DATE, [m[:date], date]]
|
704
|
-
elsif m[:timezone]
|
705
|
-
[:TIMEZONE, m[:timezone]]
|
706
748
|
elsif m[:duration]
|
707
749
|
## todo/check/fix - if end: works for kwargs!!!!!
|
708
750
|
duration = { start: {}, end: {}}
|
@@ -758,10 +800,13 @@ def _tokenize_line( line )
|
|
758
800
|
## (?<sym>[;,@|\[\]-])
|
759
801
|
|
760
802
|
case sym
|
803
|
+
when '@' ## enter geo mode
|
804
|
+
puts " ENTER GEO_RE MODE" if debug?
|
805
|
+
@re = GEO_RE
|
806
|
+
[:'@']
|
761
807
|
when ',' then [:',']
|
762
808
|
when ';' then [:';']
|
763
809
|
when '/' then [:'/']
|
764
|
-
when '@' then [:'@']
|
765
810
|
when '|' then [:'|']
|
766
811
|
when '[' then [:'[']
|
767
812
|
when ']' then [:']']
|
@@ -814,6 +859,11 @@ def _tokenize_line( line )
|
|
814
859
|
@re = RE
|
815
860
|
end
|
816
861
|
|
862
|
+
if @re == GEO_RE ### ALWAYS switch back to top level mode
|
863
|
+
puts " LEAVE GEO_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
|
864
|
+
@re = RE
|
865
|
+
end
|
866
|
+
|
817
867
|
##
|
818
868
|
## if in prop mode continue if last token is [,-]
|
819
869
|
## otherwise change back to "standard" mode
|