sportdb-parser 0.6.12 → 0.6.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/config/rounds_es.txt +3 -0
- data/config/zones_en.txt +2 -0
- data/lib/sportdb/parser/lang.rb +35 -26
- data/lib/sportdb/parser/lexer.rb +19 -3
- data/lib/sportdb/parser/parser.rb +578 -561
- data/lib/sportdb/parser/racc_tree.rb +7 -0
- data/lib/sportdb/parser/token.rb +18 -1
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 51b568bfcb315049faf125718d09615346959cdd7301934a5131cb0fac1b6f9b
|
4
|
+
data.tar.gz: c8bd6486e70d28d4b121a3fb1ebec206b46880571a2404de64418fd76fe8b039
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1236c350c6b2cef3a7b9caff9b943ef09d69a02e5e6f2fe3ed55c5677d4a20ed17bba6423c77d035a4ea4c772bdaf598a81ee0738f25185ad2bbf5b63ab4e901
|
7
|
+
data.tar.gz: a16da2520f22b6392330bfbec90e2c8c2ca0f784bb31556ddb0140e25d53d7a293d6ca615b4a2a08d426e0069b0f6dcab8ed06ba35e688a03befa6429d9c732d
|
data/CHANGELOG.md
CHANGED
data/config/rounds_es.txt
CHANGED
data/config/zones_en.txt
CHANGED
data/lib/sportdb/parser/lang.rb
CHANGED
@@ -17,12 +17,17 @@ module Lang
|
|
17
17
|
## Group 1A or A1, B1 - used anywhere
|
18
18
|
## yes - A1, A2, B1, C1, etc. used in UEFA Nations League for example!!
|
19
19
|
##
|
20
|
+
## exlcude
|
20
21
|
## use "key" of group - why? why not?
|
22
|
+
##
|
23
|
+
## note - will include group stage too
|
24
|
+
## make sure is_round gets called before is_group for now!!!
|
21
25
|
|
22
26
|
GROUP_RE = %r{^
|
23
27
|
Group [ ]
|
24
|
-
(?<key>[a-z0-9]+)
|
28
|
+
(?<key> [a-z0-9]+ )
|
25
29
|
$}ix
|
30
|
+
|
26
31
|
def self.is_group?( text )
|
27
32
|
## use regex for match
|
28
33
|
GROUP_RE.match?( text )
|
@@ -39,8 +44,8 @@ ROUND_RE = %r{^
|
|
39
44
|
##
|
40
45
|
### note - allow Group ("stand-alone") as "generic" round for now
|
41
46
|
## BUT do NOT allow Group 1, Group 2, Group A, Group B, etc.
|
42
|
-
(?: Group [ ] [
|
43
|
-
Group (?: [ ] phase|stage)? |
|
47
|
+
(?: Group [ ] [a-z0-9]+ [ ] Play-?offs? |
|
48
|
+
Group (?: [ ] (?: phase|stage))? |
|
44
49
|
League (?: [ ] phase)?
|
45
50
|
)
|
46
51
|
|
|
@@ -54,10 +59,13 @@ ROUND_RE = %r{^
|
|
54
59
|
)
|
55
60
|
[ ] [1-9][0-9]*
|
56
61
|
(?: ## note - add optional Matchday 1 of 2 or such
|
57
|
-
[ ] of [1-9][0-9]*
|
62
|
+
[ ] of [ ] [1-9][0-9]*
|
58
63
|
)?
|
59
64
|
)
|
60
65
|
|
|
66
|
+
(?: Round [ ] One
|
67
|
+
)
|
68
|
+
|
|
61
69
|
## starting with qual(ification)
|
62
70
|
## Qual. Round 1 / Qual. Round 2 / Qual. Round 3
|
63
71
|
## or
|
@@ -116,16 +124,28 @@ ROUND_RE = %r{^
|
|
116
124
|
|
|
117
125
|
# round32
|
118
126
|
(?: Round[ ]of[ ]32 |
|
119
|
-
Last[ ]32
|
127
|
+
Last[ ]32 |
|
128
|
+
16th[ ]finals |
|
129
|
+
1/16[ ]finals )
|
120
130
|
|
|
121
131
|
# round16
|
122
132
|
(?: Round[ ]of[ ]16 |
|
123
|
-
Last[ ]16
|
133
|
+
Last[ ]16 |
|
134
|
+
8th[ ]finals |
|
135
|
+
1/8[ ]finals )
|
124
136
|
|
|
137
|
+
# round8 aka quarterfinals
|
138
|
+
# note - allow quarter-finals/quarter finals/quarterfinals
|
139
|
+
(?: Round[ ]of[ ]8 |
|
140
|
+
Last[ ]8 |
|
141
|
+
1/4[ ]finals |
|
142
|
+
Quarter[ -]?finals? |
|
143
|
+
Quarters )
|
144
|
+
|
|
125
145
|
# fifthplace
|
126
146
|
(?:
|
127
147
|
(?: (Fifth|5th)[ -]place
|
128
|
-
(?: [ ] (?: match|play[ -]?off
|
148
|
+
(?: [ ] (?: match|final|play[ -]?off ))?
|
129
149
|
) |
|
130
150
|
(?: Match[ ]for[ ](?: fifth|5th )[ -]place )
|
131
151
|
)
|
@@ -133,33 +153,21 @@ ROUND_RE = %r{^
|
|
133
153
|
# thirdplace
|
134
154
|
(?:
|
135
155
|
(?: (Third|3rd)[ -]place
|
136
|
-
(?: [ ] (?: match|play[ -]?off
|
156
|
+
(?: [ ] (?: match|final|play[ -]?off ))?
|
137
157
|
) |
|
138
158
|
(?: Match[ ]for[ ](?: third|3rd )[ -]place )
|
139
159
|
)
|
140
160
|
|
|
141
|
-
#
|
142
|
-
(?:
|
143
|
-
## note - allow quarter-finals/quarter finals/quarterfinals
|
144
|
-
Quarter[ -]?finals? |
|
145
|
-
Quarters |
|
146
|
-
Last[ ]8 |
|
147
|
-
8th[ ]finals |
|
148
|
-
1/8[ ]finals ## check 1/8 finals is same as querter-finals?
|
149
|
-
)
|
150
|
-
|
|
151
|
-
# semifinals
|
161
|
+
# round4 aka semifinals
|
152
162
|
(?:
|
163
|
+
Round[ ]of[ ]4 |
|
164
|
+
Last[ ]4 |
|
153
165
|
Semi[ -]?finals? |
|
154
|
-
Semis
|
155
|
-
Last[ ]4 |
|
156
|
-
1/4[ ]finals ## check 1/4 finals is same as semi-finals?
|
157
|
-
)
|
166
|
+
Semis )
|
158
167
|
|
|
159
|
-
# final
|
168
|
+
# round2 aka final
|
160
169
|
Finals?
|
161
|
-
|
162
|
-
|
|
170
|
+
|
|
163
171
|
## add replays
|
164
172
|
## e.g. Final Replay
|
165
173
|
## Quarter-finals replays
|
@@ -271,6 +279,7 @@ LEG_RE = %r{^
|
|
271
279
|
# leg 1 of 2 / leg 2 of 2
|
272
280
|
# note - leg limited to ALWAY 1/2 of 2 for now - why? why not?
|
273
281
|
# for more use match 1/2/3 etc.
|
282
|
+
## allow leg of three (e.g. leg 1 of 3) - why? why not?
|
274
283
|
(?: leg [ ] [12]
|
275
284
|
(?: [ ] of [ ] 2)? )
|
276
285
|
|
|
data/lib/sportdb/parser/lexer.rb
CHANGED
@@ -182,13 +182,16 @@ def tokenize_with_errors
|
|
182
182
|
## pass 1
|
183
183
|
## replace all texts with keyword matches
|
184
184
|
## (e.g. group, round, leg, etc.)
|
185
|
+
##
|
186
|
+
## note - let is_round? get first (before is_group?)
|
187
|
+
## will match group stage as round (NOT group)
|
185
188
|
tokens = tokens.map do |t|
|
186
189
|
if t[0] == :TEXT
|
187
190
|
text = t[1]
|
188
|
-
t =
|
191
|
+
t = if is_round?( text ) || is_leg?( text ) || is_zone?( text )
|
192
|
+
[:ROUND, text]
|
193
|
+
elsif is_group?( text )
|
189
194
|
[:GROUP, text]
|
190
|
-
elsif is_round?( text ) || is_leg?( text ) || is_zone?( text )
|
191
|
-
[:ROUND, text]
|
192
195
|
else
|
193
196
|
t ## pass through as-is (1:1)
|
194
197
|
end
|
@@ -359,6 +362,17 @@ def _tokenize_line( line )
|
|
359
362
|
pos = offsets[1] ## update pos
|
360
363
|
end
|
361
364
|
|
365
|
+
m = ROUND_OUTLINE_RE.match( line )
|
366
|
+
if m
|
367
|
+
puts " ROUND_OUTLINE" if debug?
|
368
|
+
|
369
|
+
tokens << [:ROUND_OUTLINE, m[:round_outline]]
|
370
|
+
|
371
|
+
## note - eats-up line for now (change later to only eat-up marker e.g. »|>>)
|
372
|
+
offsets = [m.begin(0), m.end(0)]
|
373
|
+
pos = offsets[1] ## update pos
|
374
|
+
end
|
375
|
+
|
362
376
|
m = PLAYER_WITH_SCORE_RE.match( line )
|
363
377
|
if m
|
364
378
|
## switch context to GOAL_RE (goalline(s)
|
@@ -462,6 +476,8 @@ def _tokenize_line( line )
|
|
462
476
|
|
463
477
|
case sym
|
464
478
|
when ',' then [:',']
|
479
|
+
when '›' then [:','] ## note - treat geo sep › (unicode) like comma for now!!!
|
480
|
+
when '>' then [:','] ## note - treat geo sep > (ascii) like comma for now!!!
|
465
481
|
when '[' then
|
466
482
|
## get out-off geo mode and backtrack (w/ next)
|
467
483
|
puts " LEAVE GEO_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
|