sportdb-parser 0.6.7 → 0.6.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/config/rounds_en.txt +4 -0
- data/lib/sportdb/parser/lang.rb +12 -1
- data/lib/sportdb/parser/lexer.rb +107 -3
- data/lib/sportdb/parser/parser.rb +665 -529
- data/lib/sportdb/parser/racc_tree.rb +33 -0
- data/lib/sportdb/parser/token-prop.rb +76 -18
- data/lib/sportdb/parser/token-text.rb +5 -1
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6d2e6a1f243cdd9e255d659ffd616e875d3baf978edada8e663eb54e9bff35df
|
4
|
+
data.tar.gz: 76ddc2f54d6ac9e117963e15ae5fa3cadfbaebce8d942021efcd849aaed7811d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 750b13d813a7fddf80b991c4fdb244856b4bd7d07410caa75cacabcd8cc724631b87eb15b3f61de0e84c378475a1dbc42529933997389102378527b623c39f90
|
7
|
+
data.tar.gz: a88f83b33728a761cbb44c134456aeb3b49f003b422a42b30642c43a95514f8a2825929b56d7d54d788f4c197f680e7830d4dffcb7728ab284ea6e23bae64877
|
data/CHANGELOG.md
CHANGED
data/config/rounds_en.txt
CHANGED
data/lib/sportdb/parser/lang.rb
CHANGED
@@ -15,6 +15,7 @@ module Lang
|
|
15
15
|
## Group 1-99
|
16
16
|
## Group HEX # used in concaf world cup quali
|
17
17
|
## Group 1A or A1, B1 - used anywhere
|
18
|
+
## yes - A1, A2, B1, C1, etc. used in UEFA Nations League for example!!
|
18
19
|
##
|
19
20
|
## use "key" of group - why? why not?
|
20
21
|
|
@@ -197,7 +198,17 @@ end
|
|
197
198
|
|
198
199
|
|
199
200
|
def self.is_round?( text )
|
200
|
-
|
201
|
+
### note - use check for case-insensitive
|
202
|
+
## was:
|
203
|
+
## more_round_names.include?( text )
|
204
|
+
## change to:
|
205
|
+
## more_round_names.any?{ |str| str.casecmp( text )==0 }
|
206
|
+
##
|
207
|
+
## todo/fix:
|
208
|
+
## maybe in the future use our own unaccent and downcase - wyh? why not?
|
209
|
+
## note - for now ROUND_RE is also case-insensitive!!
|
210
|
+
|
211
|
+
ROUND_RE.match?( text ) || more_round_names.any?{ |str| str.casecmp( text )==0 }
|
201
212
|
end
|
202
213
|
|
203
214
|
##
|
data/lib/sportdb/parser/lexer.rb
CHANGED
@@ -339,11 +339,17 @@ def _tokenize_line( line )
|
|
339
339
|
@re = PROP_CARDS_RE
|
340
340
|
tokens << [:PROP_YELLOWCARDS, m[:key]]
|
341
341
|
elsif ['ref', 'referee'].include?( key.downcase )
|
342
|
-
@re =
|
342
|
+
@re = PROP_REFEREE_RE
|
343
343
|
tokens << [:PROP_REFEREE, m[:key]]
|
344
|
+
elsif ['att', 'attn', 'attendance'].include?( key.downcase )
|
345
|
+
@re = PROP_ATTENDANCE_RE
|
346
|
+
tokens << [:PROP_ATTENDANCE, m[:key]]
|
344
347
|
elsif ['goals'].include?( key.downcase )
|
345
348
|
@re = PROP_GOAL_RE
|
346
349
|
tokens << [:PROP_GOALS, m[:key]]
|
350
|
+
elsif ['penalties', 'penalty shootout'].include?( key.downcase )
|
351
|
+
@re = PROP_PENALTIES_RE
|
352
|
+
tokens << [:PROP_PENALTIES, m[:key]]
|
347
353
|
else ## assume (team) line-up
|
348
354
|
@re = PROP_RE ## use LINEUP_RE ???
|
349
355
|
tokens << [:PROP, m[:key]]
|
@@ -511,6 +517,97 @@ def _tokenize_line( line )
|
|
511
517
|
puts "!!! TOKENIZE ERROR (PROP_RE) - no match found"
|
512
518
|
nil
|
513
519
|
end
|
520
|
+
elsif @re == PROP_ATTENDANCE_RE
|
521
|
+
if m[:space] || m[:spaces]
|
522
|
+
nil ## skip space(s)
|
523
|
+
elsif m[:enclosed_name]
|
524
|
+
## reserverd for use for sold out or such (in the future) - why? why not?
|
525
|
+
[:ENCLOSED_NAME, m[:name]]
|
526
|
+
elsif m[:num]
|
527
|
+
[:PROP_NUM, [m[:num], { value: m[:value].to_i(10) } ]]
|
528
|
+
=begin
|
529
|
+
elsif m[:sym]
|
530
|
+
sym = m[:sym]
|
531
|
+
case sym
|
532
|
+
when ',' then [:',']
|
533
|
+
when ';' then [:';']
|
534
|
+
# when '[' then [:'[']
|
535
|
+
# when ']' then [:']']
|
536
|
+
else
|
537
|
+
nil ## ignore others (e.g. brackets [])
|
538
|
+
end
|
539
|
+
=end
|
540
|
+
else
|
541
|
+
## report error
|
542
|
+
puts "!!! TOKENIZE ERROR (PROP_ATTENDANCE_RE) - no match found"
|
543
|
+
nil
|
544
|
+
end
|
545
|
+
elsif @re == PROP_REFEREE_RE
|
546
|
+
if m[:space] || m[:spaces]
|
547
|
+
nil ## skip space(s)
|
548
|
+
elsif m[:prop_key] ## check for inline prop keys
|
549
|
+
key = m[:key]
|
550
|
+
## supported for now coach/trainer (add manager?)
|
551
|
+
if ['att', 'attn', 'attendance' ].include?( key.downcase )
|
552
|
+
[:ATTENDANCE, m[:key]] ## use COACH_KEY or such - why? why not?
|
553
|
+
else
|
554
|
+
## report error - for unknown (inline) prop key in lineup
|
555
|
+
nil
|
556
|
+
end
|
557
|
+
elsif m[:prop_name] ## note - change prop_name to player
|
558
|
+
[:PROP_NAME, m[:name]] ### use PLAYER for token - why? why not?
|
559
|
+
elsif m[:num]
|
560
|
+
[:PROP_NUM, [m[:num], { value: m[:value].to_i(10) } ]]
|
561
|
+
elsif m[:enclosed_name]
|
562
|
+
## use HOLD,SAVE,POST or such keys - why? why not?
|
563
|
+
[:ENCLOSED_NAME, m[:name]]
|
564
|
+
elsif m[:sym]
|
565
|
+
sym = m[:sym]
|
566
|
+
case sym
|
567
|
+
when ',' then [:',']
|
568
|
+
when ';' then [:';']
|
569
|
+
# when '[' then [:'[']
|
570
|
+
# when ']' then [:']']
|
571
|
+
else
|
572
|
+
nil ## ignore others (e.g. brackets [])
|
573
|
+
end
|
574
|
+
else
|
575
|
+
## report error
|
576
|
+
puts "!!! TOKENIZE ERROR (PROP_REFEREE_RE) - no match found"
|
577
|
+
nil
|
578
|
+
end
|
579
|
+
elsif @re == PROP_PENALTIES_RE
|
580
|
+
if m[:space] || m[:spaces]
|
581
|
+
nil ## skip space(s)
|
582
|
+
elsif m[:prop_name] ## note - change prop_name to player
|
583
|
+
[:PROP_NAME, m[:name]] ### use PLAYER for token - why? why not?
|
584
|
+
elsif m[:enclosed_name]
|
585
|
+
## use HOLD,SAVE,POST or such keys - why? why not?
|
586
|
+
[:ENCLOSED_NAME, m[:name]]
|
587
|
+
elsif m[:score]
|
588
|
+
score = {}
|
589
|
+
## must always have ft for now e.g. 1-1 or such
|
590
|
+
### change to (generic) score from ft -
|
591
|
+
## might be score a.e.t. or such - why? why not?
|
592
|
+
score[:ft] = [m[:ft1].to_i(10),
|
593
|
+
m[:ft2].to_i(10)]
|
594
|
+
## note - for debugging keep (pass along) "literal" score
|
595
|
+
[:SCORE, [m[:score], score]]
|
596
|
+
elsif m[:sym]
|
597
|
+
sym = m[:sym]
|
598
|
+
case sym
|
599
|
+
when ',' then [:',']
|
600
|
+
when ';' then [:';']
|
601
|
+
when '[' then [:'[']
|
602
|
+
when ']' then [:']']
|
603
|
+
else
|
604
|
+
nil ## ignore others (e.g. brackets [])
|
605
|
+
end
|
606
|
+
else
|
607
|
+
## report error
|
608
|
+
puts "!!! TOKENIZE ERROR (PROP_PENALTIES_RE) - no match found"
|
609
|
+
nil
|
610
|
+
end
|
514
611
|
elsif @re == GOAL_RE || @re == PROP_GOAL_RE
|
515
612
|
if m[:space] || m[:spaces]
|
516
613
|
nil ## skip space(s)
|
@@ -678,7 +775,12 @@ def _tokenize_line( line )
|
|
678
775
|
end
|
679
776
|
elsif m[:any]
|
680
777
|
## todo/check log error
|
681
|
-
|
778
|
+
msg = "parse error (tokenize) - skipping any match>#{m[:any]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
|
779
|
+
puts "!! WARN - #{msg}"
|
780
|
+
|
781
|
+
errors << msg
|
782
|
+
log( "!! WARN - #{msg}" )
|
783
|
+
|
682
784
|
nil
|
683
785
|
else
|
684
786
|
## report error
|
@@ -715,7 +817,9 @@ def _tokenize_line( line )
|
|
715
817
|
##
|
716
818
|
## if in prop mode continue if last token is [,-]
|
717
819
|
## otherwise change back to "standard" mode
|
718
|
-
if @re == PROP_RE
|
820
|
+
if @re == PROP_RE || @re == PROP_CARDS_RE ||
|
821
|
+
@re == PROP_GOAL_RE || @re == PROP_PENALTIES_RE ||
|
822
|
+
@re == PROP_ATTENDANCE_RE || @re == PROP_REFEREE_RE
|
719
823
|
if [:',', :'-', :';'].include?( tokens[-1][0] )
|
720
824
|
## continue/stay in PROP_RE mode
|
721
825
|
## todo/check - auto-add PROP_CONT token or such
|