sportdb-parser 0.6.10 → 0.6.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +1 -0
- data/config/rounds_en.txt +0 -3
- data/config/zones_en.txt +18 -0
- data/lib/sportdb/parser/lang.rb +91 -34
- data/lib/sportdb/parser/lexer.rb +6 -4
- data/lib/sportdb/parser/parser.rb +370 -349
- data/lib/sportdb/parser/racc_tree.rb +5 -3
- data/lib/sportdb/parser/token-status.rb +81 -40
- data/lib/sportdb/parser/token.rb +1 -0
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 45ae1f806a3734a32c34fd96a32784568443875cd82c9ab1e8ab867bd7358977
|
4
|
+
data.tar.gz: 3bc7ed4c867722b5d950ff759afdf02191405949fedffa4609d81c74daf882be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a6d43efebff7d7558d084fea36d83bc6ec9fdb4aed83aa23d8226f79b10018ddbaf6b607539ac6d664b1038d7772062fcf4cf6f22791444e1c87353889e77d91
|
7
|
+
data.tar.gz: ae2151c60bb90bc4dd61d3fadd3fed6e44b865739ebfb7b28bd8c4a9e3b373cfb1827a2202ebc740ec8a69103d1073c6fe1caa5241a3580495aef1e503d9f55c
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
data/config/rounds_en.txt
CHANGED
data/config/zones_en.txt
ADDED
data/lib/sportdb/parser/lang.rb
CHANGED
@@ -31,17 +31,20 @@ end
|
|
31
31
|
|
32
32
|
|
33
33
|
|
34
|
-
ROUND_RE = %r{^
|
35
|
-
|
34
|
+
ROUND_RE = %r{^
|
35
|
+
(?:
|
36
|
+
|
37
|
+
## add special case for group play-off rounds!
|
36
38
|
## group 2 play-off (e.g. worldcup 1954, 1958)
|
37
39
|
##
|
38
40
|
### note - allow Group ("stand-alone") as "generic" round for now
|
39
41
|
## BUT do NOT allow Group 1, Group 2, Group A, Group B, etc.
|
40
42
|
(?: Group [ ] [A-Z0-9]+ [ ] Play-?offs? |
|
41
|
-
Group (?: [ ] phase)? |
|
43
|
+
Group (?: [ ] phase|stage)? |
|
42
44
|
League (?: [ ] phase)?
|
43
45
|
)
|
44
46
|
|
|
47
|
+
|
45
48
|
# round - note - requiers number e.g. round 1,2, etc.
|
46
49
|
# note - use 1-9 regex (cannot start with 0) - why? why not?
|
47
50
|
# make week 01 or round 01 or matchday 01 possible?
|
@@ -50,6 +53,9 @@ ROUND_RE = %r{^(
|
|
50
53
|
Week
|
51
54
|
)
|
52
55
|
[ ] [1-9][0-9]*
|
56
|
+
(?: ## note - add optional Matchday 1 of 2 or such
|
57
|
+
[ ] of [1-9][0-9]*
|
58
|
+
)?
|
53
59
|
)
|
54
60
|
|
|
55
61
|
## starting with qual(ification)
|
@@ -58,8 +64,8 @@ ROUND_RE = %r{^(
|
|
58
64
|
## Playoff Round 1
|
59
65
|
## Play-in Round 1
|
60
66
|
(?: (?: Qual \. |
|
61
|
-
Play
|
62
|
-
Play
|
67
|
+
Play[ -]?off |
|
68
|
+
Play[ -]?in
|
63
69
|
)
|
64
70
|
[ ] Round [ ] [1-9][0-9]* )
|
65
71
|
|
|
@@ -67,29 +73,45 @@ ROUND_RE = %r{^(
|
|
67
73
|
## First Round
|
68
74
|
## Play-off Round
|
69
75
|
## Final Round (e.g. Worldcup 1950)
|
70
|
-
(?:
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
76
|
+
(?: (?:
|
77
|
+
Play[ -]?off |
|
78
|
+
Final |
|
79
|
+
Wildcard |
|
80
|
+
Qualifying |
|
81
|
+
(?:
|
82
|
+
(?:
|
83
|
+
[1-9][0-9]* \. |
|
84
|
+
1st | First |
|
85
|
+
2nd | Second |
|
86
|
+
3rd | Third |
|
87
|
+
4th | Fourth |
|
88
|
+
5th | Fifth
|
89
|
+
)
|
90
|
+
(?: ## with optionals
|
91
|
+
[ ] Qualifying
|
92
|
+
)?
|
93
|
+
)
|
94
|
+
)
|
95
|
+
[ ] Round
|
78
96
|
)
|
79
97
|
|
|
80
98
|
## starting with preliminary
|
81
99
|
# e.g. Preliminary round
|
82
100
|
(?: Preliminary [ ]
|
83
101
|
(?: Round |
|
84
|
-
Semi
|
85
|
-
Final
|
102
|
+
Semi[ -]?finals |
|
103
|
+
Final |
|
104
|
+
Qualifier
|
86
105
|
)
|
87
106
|
)
|
88
107
|
|
|
89
108
|
# more (kockout) rounds
|
90
|
-
# playoffs - playoff, play-off, play-offs
|
91
|
-
|
92
|
-
|
109
|
+
# playoffs - playoff, play-off, play-offs &
|
110
|
+
# playins
|
111
|
+
(?:
|
112
|
+
Play[ -]?offs? (?: [ ]for[ ]quarter-?finals )?
|
113
|
+
|
|
114
|
+
Play[ -]?ins?
|
93
115
|
)
|
94
116
|
|
|
95
117
|
# round32
|
@@ -98,13 +120,12 @@ ROUND_RE = %r{^(
|
|
98
120
|
|
|
99
121
|
# round16
|
100
122
|
(?: Round[ ]of[ ]16 |
|
101
|
-
Last[ ]16
|
102
|
-
8th[ ]finals )
|
123
|
+
Last[ ]16 )
|
103
124
|
|
|
104
125
|
# fifthplace
|
105
126
|
(?:
|
106
127
|
(?: (Fifth|5th)[ -]place
|
107
|
-
(?: [ ] (?: match|play
|
128
|
+
(?: [ ] (?: match|play[ -]?off|final ))?
|
108
129
|
) |
|
109
130
|
(?: Match[ ]for[ ](?: fifth|5th )[ -]place )
|
110
131
|
)
|
@@ -112,38 +133,44 @@ ROUND_RE = %r{^(
|
|
112
133
|
# thirdplace
|
113
134
|
(?:
|
114
135
|
(?: (Third|3rd)[ -]place
|
115
|
-
(?: [ ] (?: match|play
|
136
|
+
(?: [ ] (?: match|play[ -]?off|final ))?
|
116
137
|
) |
|
117
138
|
(?: Match[ ]for[ ](?: third|3rd )[ -]place )
|
118
139
|
)
|
119
140
|
|
|
120
141
|
# quarterfinals
|
121
142
|
(?:
|
122
|
-
|
143
|
+
## note - allow quarter-finals/quarter finals/quarterfinals
|
144
|
+
Quarter[ -]?finals? |
|
123
145
|
Quarters |
|
124
|
-
Last[ ]8
|
146
|
+
Last[ ]8 |
|
147
|
+
8th[ ]finals |
|
148
|
+
1/8[ ]finals ## check 1/8 finals is same as querter-finals?
|
125
149
|
)
|
126
150
|
|
|
127
151
|
# semifinals
|
128
152
|
(?:
|
129
|
-
Semi
|
153
|
+
Semi[ -]?finals? |
|
130
154
|
Semis |
|
131
|
-
Last[ ]4
|
155
|
+
Last[ ]4 |
|
156
|
+
1/4[ ]finals ## check 1/4 finals is same as semi-finals?
|
132
157
|
)
|
133
158
|
|
|
134
159
|
# final
|
135
160
|
Finals?
|
136
161
|
|
|
137
|
-
# decider e.g. Entscheidungsspiel
|
138
|
-
Decider
|
139
162
|
|
|
140
163
|
## add replays
|
141
164
|
## e.g. Final Replay
|
142
165
|
## Quarter-finals replays
|
143
166
|
## First round replays
|
144
167
|
(?:
|
145
|
-
(?:
|
146
|
-
|
168
|
+
(?: (?: 1st | First |
|
169
|
+
2nd | Second |
|
170
|
+
3rd | Third |
|
171
|
+
4th | Fourth |
|
172
|
+
5th | Fifth ) [ ] Round |
|
173
|
+
Quarter[ -]?finals? |
|
147
174
|
Finals?
|
148
175
|
)
|
149
176
|
[ ] Replays?
|
@@ -151,7 +178,8 @@ ROUND_RE = %r{^(
|
|
151
178
|
|
|
152
179
|
## more
|
153
180
|
(?:
|
154
|
-
|
181
|
+
Decider | # decider e.g. Entscheidungsspiel
|
182
|
+
Reclassification
|
155
183
|
)
|
156
184
|
)$}ix
|
157
185
|
|
@@ -196,6 +224,19 @@ def self.more_round_names
|
|
196
224
|
end
|
197
225
|
end
|
198
226
|
|
227
|
+
def self.zone_names
|
228
|
+
@zone_name ||= begin
|
229
|
+
names = []
|
230
|
+
langs = ['en']
|
231
|
+
## sort names by length??
|
232
|
+
langs.each do |lang|
|
233
|
+
path = "#{SportDb::Module::Parser.root}/config/zones_#{lang}.txt"
|
234
|
+
names += read_names( path )
|
235
|
+
end
|
236
|
+
names
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
199
240
|
|
200
241
|
def self.is_round?( text )
|
201
242
|
### note - use check for case-insensitive
|
@@ -208,20 +249,36 @@ def self.is_round?( text )
|
|
208
249
|
## maybe in the future use our own unaccent and downcase - wyh? why not?
|
209
250
|
## note - for now ROUND_RE is also case-insensitive!!
|
210
251
|
|
211
|
-
ROUND_RE.match?( text ) ||
|
252
|
+
ROUND_RE.match?( text ) ||
|
253
|
+
more_round_names.any?{ |str| str.casecmp( text )==0 }
|
212
254
|
end
|
213
255
|
|
256
|
+
def self.is_zone?( text )
|
257
|
+
zone_names.any?{ |str| str.casecmp( text )==0 }
|
258
|
+
end
|
259
|
+
|
260
|
+
|
214
261
|
##
|
215
262
|
## keep leg separate (from round) - why? why not?
|
216
263
|
##
|
217
264
|
LEG_RE = %r{^
|
218
265
|
# leg1
|
219
|
-
(?: 1st|First)[ ]leg
|
266
|
+
(?: 1st|First) [ ] leg
|
220
267
|
|
|
221
268
|
# leg2
|
222
|
-
(?: 2nd|Second)[ ]leg
|
269
|
+
(?: 2nd|Second) [ ] leg
|
270
|
+
|
|
271
|
+
# leg 1 of 2 / leg 2 of 2
|
272
|
+
# note - leg limited to ALWAY 1/2 of 2 for now - why? why not?
|
273
|
+
# for more use match 1/2/3 etc.
|
274
|
+
(?: leg [ ] [12]
|
275
|
+
(?: [ ] of [ ] 2)? )
|
276
|
+
|
|
277
|
+
(?: match [ ] [1-9][0-9]* )
|
223
278
|
$}ix
|
224
279
|
|
280
|
+
|
281
|
+
|
225
282
|
### Pair matches/games if marked with leg1 n leg2
|
226
283
|
def self.is_leg?( text )
|
227
284
|
LEG_RE.match?( text )
|
data/lib/sportdb/parser/lexer.rb
CHANGED
@@ -26,7 +26,7 @@ end
|
|
26
26
|
def is_group?( text ) Lang.is_group?( text ); end
|
27
27
|
def is_round?( text ) Lang.is_round?( text ); end
|
28
28
|
def is_leg?( text ) Lang.is_leg?( text ); end
|
29
|
-
|
29
|
+
def is_zone?( text ) Lang.is_zone?( text ); end
|
30
30
|
|
31
31
|
## transforms
|
32
32
|
##
|
@@ -187,7 +187,7 @@ def tokenize_with_errors
|
|
187
187
|
text = t[1]
|
188
188
|
t = if is_group?( text )
|
189
189
|
[:GROUP, text]
|
190
|
-
elsif is_round?( text ) || is_leg?( text )
|
190
|
+
elsif is_round?( text ) || is_leg?( text ) || is_zone?( text )
|
191
191
|
[:ROUND, text]
|
192
192
|
else
|
193
193
|
t ## pass through as-is (1:1)
|
@@ -711,8 +711,10 @@ def _tokenize_line( line )
|
|
711
711
|
elsif m[:note]
|
712
712
|
### todo/check:
|
713
713
|
## use value hash - why? why not? or simplify to:
|
714
|
-
##
|
715
|
-
[:NOTE,
|
714
|
+
## [:NOTE, [m[:note], {note: m[:note] } ]]
|
715
|
+
[:NOTE, m[:note]]
|
716
|
+
elsif m[:score_note]
|
717
|
+
[:SCORE_NOTE, m[:score_note]]
|
716
718
|
elsif m[:time]
|
717
719
|
## unify to iso-format
|
718
720
|
### 12.40 => 12:40
|