sportdb-parser 0.6.13 → 0.6.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/config/rounds_es.txt +3 -0
- data/lib/sportdb/parser/lang.rb +6 -1
- data/lib/sportdb/parser/lexer.rb +19 -14
- data/lib/sportdb/parser/parser.rb +578 -561
- data/lib/sportdb/parser/racc_tree.rb +7 -0
- data/lib/sportdb/parser/token.rb +18 -1
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d907a1b09e04c9c17884afe7881961b597b547ca9ab742ecb6e06f89f8bfe151
|
4
|
+
data.tar.gz: da287bc13de7217bf3c6a2ffcfdd8d7f3d376baff413b79a58dd544bb2bfcc3a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3be1466a2d4ef5a5d1129f6b0fa9f67c6258c5a1cc60aab831ac5c4c7121e691fa64a9f33e71508d91b9809bcdc1b77b152fc2f7c1580172937662865e8ee33e
|
7
|
+
data.tar.gz: fe14835fc3195b5e441bdd3f9763bfe1303aa471e041a908cafc027becfbfd5ffd2e9696948e2dd88647ef7de88ebabf39674e42ba6656144e478150822e699e
|
data/CHANGELOG.md
CHANGED
data/config/rounds_es.txt
CHANGED
data/lib/sportdb/parser/lang.rb
CHANGED
@@ -17,12 +17,17 @@ module Lang
|
|
17
17
|
## Group 1A or A1, B1 - used anywhere
|
18
18
|
## yes - A1, A2, B1, C1, etc. used in UEFA Nations League for example!!
|
19
19
|
##
|
20
|
+
## exlcude
|
20
21
|
## use "key" of group - why? why not?
|
22
|
+
##
|
23
|
+
## note - will include group stage too
|
24
|
+
## make sure is_round gets called before is_group for now!!!
|
21
25
|
|
22
26
|
GROUP_RE = %r{^
|
23
27
|
Group [ ]
|
24
|
-
(?<key>[a-z0-9]+)
|
28
|
+
(?<key> [a-z0-9]+ )
|
25
29
|
$}ix
|
30
|
+
|
26
31
|
def self.is_group?( text )
|
27
32
|
## use regex for match
|
28
33
|
GROUP_RE.match?( text )
|
data/lib/sportdb/parser/lexer.rb
CHANGED
@@ -182,13 +182,16 @@ def tokenize_with_errors
|
|
182
182
|
## pass 1
|
183
183
|
## replace all texts with keyword matches
|
184
184
|
## (e.g. group, round, leg, etc.)
|
185
|
+
##
|
186
|
+
## note - let is_round? get first (before is_group?)
|
187
|
+
## will match group stage as round (NOT group)
|
185
188
|
tokens = tokens.map do |t|
|
186
189
|
if t[0] == :TEXT
|
187
190
|
text = t[1]
|
188
|
-
t =
|
191
|
+
t = if is_round?( text ) || is_leg?( text ) || is_zone?( text )
|
192
|
+
[:ROUND, text]
|
193
|
+
elsif is_group?( text )
|
189
194
|
[:GROUP, text]
|
190
|
-
elsif is_round?( text ) || is_leg?( text ) || is_zone?( text )
|
191
|
-
[:ROUND, text]
|
192
195
|
else
|
193
196
|
t ## pass through as-is (1:1)
|
194
197
|
end
|
@@ -323,8 +326,7 @@ def _tokenize_line( line )
|
|
323
326
|
|
324
327
|
## start with prop key (match will switch into prop mode!!!)
|
325
328
|
## - fix - remove leading spaces in regex (upstream) - why? why not?
|
326
|
-
m = PROP_KEY_RE.match( line )
|
327
|
-
if m
|
329
|
+
if (m = PROP_KEY_RE.match( line ))
|
328
330
|
### switch into new mode
|
329
331
|
## switch context to PROP_RE
|
330
332
|
puts " ENTER PROP_RE MODE" if debug?
|
@@ -357,10 +359,15 @@ def _tokenize_line( line )
|
|
357
359
|
|
358
360
|
offsets = [m.begin(0), m.end(0)]
|
359
361
|
pos = offsets[1] ## update pos
|
360
|
-
|
362
|
+
elsif (m = ROUND_OUTLINE_RE.match( line ))
|
363
|
+
puts " ROUND_OUTLINE" if debug?
|
364
|
+
|
365
|
+
tokens << [:ROUND_OUTLINE, m[:round_outline]]
|
361
366
|
|
362
|
-
|
363
|
-
|
367
|
+
## note - eats-up line for now (change later to only eat-up marker e.g. »|>>)
|
368
|
+
offsets = [m.begin(0), m.end(0)]
|
369
|
+
pos = offsets[1] ## update pos
|
370
|
+
elsif (m = PLAYER_WITH_SCORE_RE.match( line ))
|
364
371
|
## switch context to GOAL_RE (goalline(s)
|
365
372
|
## split token (automagically) into two!! - player AND minute!!!
|
366
373
|
@re = GOAL_RE
|
@@ -380,10 +387,7 @@ def _tokenize_line( line )
|
|
380
387
|
|
381
388
|
offsets = [m.begin(0), m.end(0)]
|
382
389
|
pos = offsets[1] ## update pos
|
383
|
-
|
384
|
-
|
385
|
-
m = PLAYER_WITH_MINUTE_RE.match( line )
|
386
|
-
if m
|
390
|
+
elsif (m = PLAYER_WITH_MINUTE_RE.match( line ))
|
387
391
|
## switch context to GOAL_RE (goalline(s)
|
388
392
|
## split token (automagically) into two!! - player AND minute!!!
|
389
393
|
@re = GOAL_RE
|
@@ -396,8 +400,6 @@ def _tokenize_line( line )
|
|
396
400
|
## todo - find a better way? how possible?
|
397
401
|
tokens << [:NONE, "<|NONE|>"] if m[:none]
|
398
402
|
|
399
|
-
|
400
|
-
|
401
403
|
## auto-add player token first
|
402
404
|
tokens << [:PLAYER, m[:name]]
|
403
405
|
## minute props
|
@@ -412,6 +414,7 @@ def _tokenize_line( line )
|
|
412
414
|
end
|
413
415
|
end
|
414
416
|
|
417
|
+
|
415
418
|
|
416
419
|
old_pos = -1 ## allows to backtrack to old pos (used in geo)
|
417
420
|
|
@@ -462,6 +465,8 @@ def _tokenize_line( line )
|
|
462
465
|
|
463
466
|
case sym
|
464
467
|
when ',' then [:',']
|
468
|
+
when '›' then [:','] ## note - treat geo sep › (unicode) like comma for now!!!
|
469
|
+
when '>' then [:','] ## note - treat geo sep > (ascii) like comma for now!!!
|
465
470
|
when '[' then
|
466
471
|
## get out-off geo mode and backtrack (w/ next)
|
467
472
|
puts " LEAVE GEO_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
|