sportdb-parser 0.6.8 → 0.6.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/config/rounds_en.txt +1 -0
- data/lib/sportdb/parser/lexer.rb +125 -9
- data/lib/sportdb/parser/parser.rb +614 -574
- data/lib/sportdb/parser/racc_tree.rb +7 -0
- data/lib/sportdb/parser/token-prop.rb +31 -0
- data/lib/sportdb/parser/token.rb +15 -1
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d0b047ceeb4dc8b4bf446fd47770aa89bd381f35a6375e630dd823e0e8a9f960
|
4
|
+
data.tar.gz: a035121961c238510506d0f58862220cf8f7c767845585f3e0d26b4e28367471
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 19a354aa23e88a776573e3d481f6754c670b7195e40b3f7063fdeee1d8c6155bb2dfe67129e07e8dd665221205e586df0c0bed6735d227659773ff8bfbd126c2
|
7
|
+
data.tar.gz: d89aaedee183b9a8ccc2de58cd7b35e0aa6216463d2873534008bdb02c7e8b8fc9d6cbf555dfd6da67e8c9323d9fb113098ef23ddfa6eb8331b48b74e43477d9
|
data/CHANGELOG.md
CHANGED
data/config/rounds_en.txt
CHANGED
data/lib/sportdb/parser/lexer.rb
CHANGED
@@ -339,8 +339,11 @@ def _tokenize_line( line )
|
|
339
339
|
@re = PROP_CARDS_RE
|
340
340
|
tokens << [:PROP_YELLOWCARDS, m[:key]]
|
341
341
|
elsif ['ref', 'referee'].include?( key.downcase )
|
342
|
-
@re =
|
342
|
+
@re = PROP_REFEREE_RE
|
343
343
|
tokens << [:PROP_REFEREE, m[:key]]
|
344
|
+
elsif ['att', 'attn', 'attendance'].include?( key.downcase )
|
345
|
+
@re = PROP_ATTENDANCE_RE
|
346
|
+
tokens << [:PROP_ATTENDANCE, m[:key]]
|
344
347
|
elsif ['goals'].include?( key.downcase )
|
345
348
|
@re = PROP_GOAL_RE
|
346
349
|
tokens << [:PROP_GOALS, m[:key]]
|
@@ -410,6 +413,7 @@ def _tokenize_line( line )
|
|
410
413
|
end
|
411
414
|
|
412
415
|
|
416
|
+
old_pos = -1 ## allows to backtrack to old pos (used in geo)
|
413
417
|
|
414
418
|
while m = @re.match( line, pos )
|
415
419
|
# if debug?
|
@@ -428,12 +432,14 @@ def _tokenize_line( line )
|
|
428
432
|
log( msg )
|
429
433
|
end
|
430
434
|
|
435
|
+
|
431
436
|
##
|
432
437
|
## todo/fix - also check if possible
|
433
438
|
## if no match but not yet end off string!!!!
|
434
439
|
## report skipped text run too!!!
|
435
440
|
|
436
|
-
|
441
|
+
old_pos = pos
|
442
|
+
pos = offsets[1]
|
437
443
|
|
438
444
|
# pp offsets if debug?
|
439
445
|
|
@@ -441,7 +447,46 @@ def _tokenize_line( line )
|
|
441
447
|
## note: racc requires pairs e.g. [:TOKEN, VAL]
|
442
448
|
## for VAL use "text" or ["text", { opts }] array
|
443
449
|
|
444
|
-
t = if @re ==
|
450
|
+
t = if @re == GEO_RE
|
451
|
+
### note - possibly end inline geo on [ (and others?? in the future
|
452
|
+
if m[:space] || m[:spaces]
|
453
|
+
nil ## skip space(s)
|
454
|
+
elsif m[:text]
|
455
|
+
[:GEO, m[:text]] ## keep pos - why? why not?
|
456
|
+
elsif m[:timezone]
|
457
|
+
[:TIMEZONE, m[:timezone]]
|
458
|
+
elsif m[:sym]
|
459
|
+
sym = m[:sym]
|
460
|
+
## return symbols "inline" as is - why? why not?
|
461
|
+
## (?<sym>[;,@|\[\]-])
|
462
|
+
|
463
|
+
case sym
|
464
|
+
when ',' then [:',']
|
465
|
+
when '[' then
|
466
|
+
## get out-off geo mode and backtrack (w/ next)
|
467
|
+
puts " LEAVE GEO_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
|
468
|
+
@re = RE
|
469
|
+
pos = old_pos
|
470
|
+
next ## backtrack (resume new loop step)
|
471
|
+
else
|
472
|
+
puts "!!! TOKENIZE ERROR (sym) - ignore sym >#{sym}<"
|
473
|
+
nil ## ignore others (e.g. brackets [])
|
474
|
+
end
|
475
|
+
elsif m[:any]
|
476
|
+
## todo/check log error
|
477
|
+
msg = "parse error (tokenize geo) - skipping any match>#{m[:any]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
|
478
|
+
puts "!! WARN - #{msg}"
|
479
|
+
|
480
|
+
errors << msg
|
481
|
+
log( "!! WARN - #{msg}" )
|
482
|
+
|
483
|
+
nil
|
484
|
+
else
|
485
|
+
## report error/raise expection
|
486
|
+
puts "!!! TOKENIZE ERROR - no match found"
|
487
|
+
nil
|
488
|
+
end
|
489
|
+
elsif @re == PROP_CARDS_RE
|
445
490
|
if m[:space] || m[:spaces]
|
446
491
|
nil ## skip space(s)
|
447
492
|
elsif m[:prop_name]
|
@@ -514,6 +559,65 @@ def _tokenize_line( line )
|
|
514
559
|
puts "!!! TOKENIZE ERROR (PROP_RE) - no match found"
|
515
560
|
nil
|
516
561
|
end
|
562
|
+
elsif @re == PROP_ATTENDANCE_RE
|
563
|
+
if m[:space] || m[:spaces]
|
564
|
+
nil ## skip space(s)
|
565
|
+
elsif m[:enclosed_name]
|
566
|
+
## reserverd for use for sold out or such (in the future) - why? why not?
|
567
|
+
[:ENCLOSED_NAME, m[:name]]
|
568
|
+
elsif m[:num]
|
569
|
+
[:PROP_NUM, [m[:num], { value: m[:value].to_i(10) } ]]
|
570
|
+
=begin
|
571
|
+
elsif m[:sym]
|
572
|
+
sym = m[:sym]
|
573
|
+
case sym
|
574
|
+
when ',' then [:',']
|
575
|
+
when ';' then [:';']
|
576
|
+
# when '[' then [:'[']
|
577
|
+
# when ']' then [:']']
|
578
|
+
else
|
579
|
+
nil ## ignore others (e.g. brackets [])
|
580
|
+
end
|
581
|
+
=end
|
582
|
+
else
|
583
|
+
## report error
|
584
|
+
puts "!!! TOKENIZE ERROR (PROP_ATTENDANCE_RE) - no match found"
|
585
|
+
nil
|
586
|
+
end
|
587
|
+
elsif @re == PROP_REFEREE_RE
|
588
|
+
if m[:space] || m[:spaces]
|
589
|
+
nil ## skip space(s)
|
590
|
+
elsif m[:prop_key] ## check for inline prop keys
|
591
|
+
key = m[:key]
|
592
|
+
## supported for now coach/trainer (add manager?)
|
593
|
+
if ['att', 'attn', 'attendance' ].include?( key.downcase )
|
594
|
+
[:ATTENDANCE, m[:key]] ## use COACH_KEY or such - why? why not?
|
595
|
+
else
|
596
|
+
## report error - for unknown (inline) prop key in lineup
|
597
|
+
nil
|
598
|
+
end
|
599
|
+
elsif m[:prop_name] ## note - change prop_name to player
|
600
|
+
[:PROP_NAME, m[:name]] ### use PLAYER for token - why? why not?
|
601
|
+
elsif m[:num]
|
602
|
+
[:PROP_NUM, [m[:num], { value: m[:value].to_i(10) } ]]
|
603
|
+
elsif m[:enclosed_name]
|
604
|
+
## use HOLD,SAVE,POST or such keys - why? why not?
|
605
|
+
[:ENCLOSED_NAME, m[:name]]
|
606
|
+
elsif m[:sym]
|
607
|
+
sym = m[:sym]
|
608
|
+
case sym
|
609
|
+
when ',' then [:',']
|
610
|
+
when ';' then [:';']
|
611
|
+
# when '[' then [:'[']
|
612
|
+
# when ']' then [:']']
|
613
|
+
else
|
614
|
+
nil ## ignore others (e.g. brackets [])
|
615
|
+
end
|
616
|
+
else
|
617
|
+
## report error
|
618
|
+
puts "!!! TOKENIZE ERROR (PROP_REFEREE_RE) - no match found"
|
619
|
+
nil
|
620
|
+
end
|
517
621
|
elsif @re == PROP_PENALTIES_RE
|
518
622
|
if m[:space] || m[:spaces]
|
519
623
|
nil ## skip space(s)
|
@@ -639,8 +743,6 @@ def _tokenize_line( line )
|
|
639
743
|
date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
|
640
744
|
## note - for debugging keep (pass along) "literal" date
|
641
745
|
[:DATE, [m[:date], date]]
|
642
|
-
elsif m[:timezone]
|
643
|
-
[:TIMEZONE, m[:timezone]]
|
644
746
|
elsif m[:duration]
|
645
747
|
## todo/check/fix - if end: works for kwargs!!!!!
|
646
748
|
duration = { start: {}, end: {}}
|
@@ -696,10 +798,13 @@ def _tokenize_line( line )
|
|
696
798
|
## (?<sym>[;,@|\[\]-])
|
697
799
|
|
698
800
|
case sym
|
801
|
+
when '@' ## enter geo mode
|
802
|
+
puts " ENTER GEO_RE MODE" if debug?
|
803
|
+
@re = GEO_RE
|
804
|
+
[:'@']
|
699
805
|
when ',' then [:',']
|
700
806
|
when ';' then [:';']
|
701
807
|
when '/' then [:'/']
|
702
|
-
when '@' then [:'@']
|
703
808
|
when '|' then [:'|']
|
704
809
|
when '[' then [:'[']
|
705
810
|
when ']' then [:']']
|
@@ -713,7 +818,12 @@ def _tokenize_line( line )
|
|
713
818
|
end
|
714
819
|
elsif m[:any]
|
715
820
|
## todo/check log error
|
716
|
-
|
821
|
+
msg = "parse error (tokenize) - skipping any match>#{m[:any]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
|
822
|
+
puts "!! WARN - #{msg}"
|
823
|
+
|
824
|
+
errors << msg
|
825
|
+
log( "!! WARN - #{msg}" )
|
826
|
+
|
717
827
|
nil
|
718
828
|
else
|
719
829
|
## report error
|
@@ -747,11 +857,17 @@ def _tokenize_line( line )
|
|
747
857
|
@re = RE
|
748
858
|
end
|
749
859
|
|
860
|
+
if @re == GEO_RE ### ALWAYS switch back to top level mode
|
861
|
+
puts " LEAVE GEO_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
|
862
|
+
@re = RE
|
863
|
+
end
|
864
|
+
|
750
865
|
##
|
751
866
|
## if in prop mode continue if last token is [,-]
|
752
867
|
## otherwise change back to "standard" mode
|
753
|
-
if @re == PROP_RE
|
754
|
-
@re == PROP_GOAL_RE
|
868
|
+
if @re == PROP_RE || @re == PROP_CARDS_RE ||
|
869
|
+
@re == PROP_GOAL_RE || @re == PROP_PENALTIES_RE ||
|
870
|
+
@re == PROP_ATTENDANCE_RE || @re == PROP_REFEREE_RE
|
755
871
|
if [:',', :'-', :';'].include?( tokens[-1][0] )
|
756
872
|
## continue/stay in PROP_RE mode
|
757
873
|
## todo/check - auto-add PROP_CONT token or such
|