sportdb-parser 0.6.11 → 0.6.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +1 -0
- data/config/rounds_en.txt +0 -3
- data/config/zones_en.txt +18 -0
- data/lib/sportdb/parser/lang.rb +91 -34
- data/lib/sportdb/parser/lexer.rb +2 -2
- data/lib/sportdb/parser/token-status.rb +8 -40
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 45ae1f806a3734a32c34fd96a32784568443875cd82c9ab1e8ab867bd7358977
|
4
|
+
data.tar.gz: 3bc7ed4c867722b5d950ff759afdf02191405949fedffa4609d81c74daf882be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a6d43efebff7d7558d084fea36d83bc6ec9fdb4aed83aa23d8226f79b10018ddbaf6b607539ac6d664b1038d7772062fcf4cf6f22791444e1c87353889e77d91
|
7
|
+
data.tar.gz: ae2151c60bb90bc4dd61d3fadd3fed6e44b865739ebfb7b28bd8c4a9e3b373cfb1827a2202ebc740ec8a69103d1073c6fe1caa5241a3580495aef1e503d9f55c
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
data/config/rounds_en.txt
CHANGED
data/config/zones_en.txt
ADDED
data/lib/sportdb/parser/lang.rb
CHANGED
@@ -31,17 +31,20 @@ end
|
|
31
31
|
|
32
32
|
|
33
33
|
|
34
|
-
ROUND_RE = %r{^
|
35
|
-
|
34
|
+
ROUND_RE = %r{^
|
35
|
+
(?:
|
36
|
+
|
37
|
+
## add special case for group play-off rounds!
|
36
38
|
## group 2 play-off (e.g. worldcup 1954, 1958)
|
37
39
|
##
|
38
40
|
### note - allow Group ("stand-alone") as "generic" round for now
|
39
41
|
## BUT do NOT allow Group 1, Group 2, Group A, Group B, etc.
|
40
42
|
(?: Group [ ] [A-Z0-9]+ [ ] Play-?offs? |
|
41
|
-
Group (?: [ ] phase)? |
|
43
|
+
Group (?: [ ] phase|stage)? |
|
42
44
|
League (?: [ ] phase)?
|
43
45
|
)
|
44
46
|
|
|
47
|
+
|
45
48
|
# round - note - requiers number e.g. round 1,2, etc.
|
46
49
|
# note - use 1-9 regex (cannot start with 0) - why? why not?
|
47
50
|
# make week 01 or round 01 or matchday 01 possible?
|
@@ -50,6 +53,9 @@ ROUND_RE = %r{^(
|
|
50
53
|
Week
|
51
54
|
)
|
52
55
|
[ ] [1-9][0-9]*
|
56
|
+
(?: ## note - add optional Matchday 1 of 2 or such
|
57
|
+
[ ] of [1-9][0-9]*
|
58
|
+
)?
|
53
59
|
)
|
54
60
|
|
|
55
61
|
## starting with qual(ification)
|
@@ -58,8 +64,8 @@ ROUND_RE = %r{^(
|
|
58
64
|
## Playoff Round 1
|
59
65
|
## Play-in Round 1
|
60
66
|
(?: (?: Qual \. |
|
61
|
-
Play
|
62
|
-
Play
|
67
|
+
Play[ -]?off |
|
68
|
+
Play[ -]?in
|
63
69
|
)
|
64
70
|
[ ] Round [ ] [1-9][0-9]* )
|
65
71
|
|
|
@@ -67,29 +73,45 @@ ROUND_RE = %r{^(
|
|
67
73
|
## First Round
|
68
74
|
## Play-off Round
|
69
75
|
## Final Round (e.g. Worldcup 1950)
|
70
|
-
(?:
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
76
|
+
(?: (?:
|
77
|
+
Play[ -]?off |
|
78
|
+
Final |
|
79
|
+
Wildcard |
|
80
|
+
Qualifying |
|
81
|
+
(?:
|
82
|
+
(?:
|
83
|
+
[1-9][0-9]* \. |
|
84
|
+
1st | First |
|
85
|
+
2nd | Second |
|
86
|
+
3rd | Third |
|
87
|
+
4th | Fourth |
|
88
|
+
5th | Fifth
|
89
|
+
)
|
90
|
+
(?: ## with optionals
|
91
|
+
[ ] Qualifying
|
92
|
+
)?
|
93
|
+
)
|
94
|
+
)
|
95
|
+
[ ] Round
|
78
96
|
)
|
79
97
|
|
|
80
98
|
## starting with preliminary
|
81
99
|
# e.g. Preliminary round
|
82
100
|
(?: Preliminary [ ]
|
83
101
|
(?: Round |
|
84
|
-
Semi
|
85
|
-
Final
|
102
|
+
Semi[ -]?finals |
|
103
|
+
Final |
|
104
|
+
Qualifier
|
86
105
|
)
|
87
106
|
)
|
88
107
|
|
|
89
108
|
# more (kockout) rounds
|
90
|
-
# playoffs - playoff, play-off, play-offs
|
91
|
-
|
92
|
-
|
109
|
+
# playoffs - playoff, play-off, play-offs &
|
110
|
+
# playins
|
111
|
+
(?:
|
112
|
+
Play[ -]?offs? (?: [ ]for[ ]quarter-?finals )?
|
113
|
+
|
|
114
|
+
Play[ -]?ins?
|
93
115
|
)
|
94
116
|
|
|
95
117
|
# round32
|
@@ -98,13 +120,12 @@ ROUND_RE = %r{^(
|
|
98
120
|
|
|
99
121
|
# round16
|
100
122
|
(?: Round[ ]of[ ]16 |
|
101
|
-
Last[ ]16
|
102
|
-
8th[ ]finals )
|
123
|
+
Last[ ]16 )
|
103
124
|
|
|
104
125
|
# fifthplace
|
105
126
|
(?:
|
106
127
|
(?: (Fifth|5th)[ -]place
|
107
|
-
(?: [ ] (?: match|play
|
128
|
+
(?: [ ] (?: match|play[ -]?off|final ))?
|
108
129
|
) |
|
109
130
|
(?: Match[ ]for[ ](?: fifth|5th )[ -]place )
|
110
131
|
)
|
@@ -112,38 +133,44 @@ ROUND_RE = %r{^(
|
|
112
133
|
# thirdplace
|
113
134
|
(?:
|
114
135
|
(?: (Third|3rd)[ -]place
|
115
|
-
(?: [ ] (?: match|play
|
136
|
+
(?: [ ] (?: match|play[ -]?off|final ))?
|
116
137
|
) |
|
117
138
|
(?: Match[ ]for[ ](?: third|3rd )[ -]place )
|
118
139
|
)
|
119
140
|
|
|
120
141
|
# quarterfinals
|
121
142
|
(?:
|
122
|
-
|
143
|
+
## note - allow quarter-finals/quarter finals/quarterfinals
|
144
|
+
Quarter[ -]?finals? |
|
123
145
|
Quarters |
|
124
|
-
Last[ ]8
|
146
|
+
Last[ ]8 |
|
147
|
+
8th[ ]finals |
|
148
|
+
1/8[ ]finals ## check 1/8 finals is same as querter-finals?
|
125
149
|
)
|
126
150
|
|
|
127
151
|
# semifinals
|
128
152
|
(?:
|
129
|
-
Semi
|
153
|
+
Semi[ -]?finals? |
|
130
154
|
Semis |
|
131
|
-
Last[ ]4
|
155
|
+
Last[ ]4 |
|
156
|
+
1/4[ ]finals ## check 1/4 finals is same as semi-finals?
|
132
157
|
)
|
133
158
|
|
|
134
159
|
# final
|
135
160
|
Finals?
|
136
161
|
|
|
137
|
-
# decider e.g. Entscheidungsspiel
|
138
|
-
Decider
|
139
162
|
|
|
140
163
|
## add replays
|
141
164
|
## e.g. Final Replay
|
142
165
|
## Quarter-finals replays
|
143
166
|
## First round replays
|
144
167
|
(?:
|
145
|
-
(?:
|
146
|
-
|
168
|
+
(?: (?: 1st | First |
|
169
|
+
2nd | Second |
|
170
|
+
3rd | Third |
|
171
|
+
4th | Fourth |
|
172
|
+
5th | Fifth ) [ ] Round |
|
173
|
+
Quarter[ -]?finals? |
|
147
174
|
Finals?
|
148
175
|
)
|
149
176
|
[ ] Replays?
|
@@ -151,7 +178,8 @@ ROUND_RE = %r{^(
|
|
151
178
|
|
|
152
179
|
## more
|
153
180
|
(?:
|
154
|
-
|
181
|
+
Decider | # decider e.g. Entscheidungsspiel
|
182
|
+
Reclassification
|
155
183
|
)
|
156
184
|
)$}ix
|
157
185
|
|
@@ -196,6 +224,19 @@ def self.more_round_names
|
|
196
224
|
end
|
197
225
|
end
|
198
226
|
|
227
|
+
def self.zone_names
|
228
|
+
@zone_name ||= begin
|
229
|
+
names = []
|
230
|
+
langs = ['en']
|
231
|
+
## sort names by length??
|
232
|
+
langs.each do |lang|
|
233
|
+
path = "#{SportDb::Module::Parser.root}/config/zones_#{lang}.txt"
|
234
|
+
names += read_names( path )
|
235
|
+
end
|
236
|
+
names
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
199
240
|
|
200
241
|
def self.is_round?( text )
|
201
242
|
### note - use check for case-insensitive
|
@@ -208,20 +249,36 @@ def self.is_round?( text )
|
|
208
249
|
## maybe in the future use our own unaccent and downcase - wyh? why not?
|
209
250
|
## note - for now ROUND_RE is also case-insensitive!!
|
210
251
|
|
211
|
-
ROUND_RE.match?( text ) ||
|
252
|
+
ROUND_RE.match?( text ) ||
|
253
|
+
more_round_names.any?{ |str| str.casecmp( text )==0 }
|
212
254
|
end
|
213
255
|
|
256
|
+
def self.is_zone?( text )
|
257
|
+
zone_names.any?{ |str| str.casecmp( text )==0 }
|
258
|
+
end
|
259
|
+
|
260
|
+
|
214
261
|
##
|
215
262
|
## keep leg separate (from round) - why? why not?
|
216
263
|
##
|
217
264
|
LEG_RE = %r{^
|
218
265
|
# leg1
|
219
|
-
(?: 1st|First)[ ]leg
|
266
|
+
(?: 1st|First) [ ] leg
|
220
267
|
|
|
221
268
|
# leg2
|
222
|
-
(?: 2nd|Second)[ ]leg
|
269
|
+
(?: 2nd|Second) [ ] leg
|
270
|
+
|
|
271
|
+
# leg 1 of 2 / leg 2 of 2
|
272
|
+
# note - leg limited to ALWAY 1/2 of 2 for now - why? why not?
|
273
|
+
# for more use match 1/2/3 etc.
|
274
|
+
(?: leg [ ] [12]
|
275
|
+
(?: [ ] of [ ] 2)? )
|
276
|
+
|
|
277
|
+
(?: match [ ] [1-9][0-9]* )
|
223
278
|
$}ix
|
224
279
|
|
280
|
+
|
281
|
+
|
225
282
|
### Pair matches/games if marked with leg1 n leg2
|
226
283
|
def self.is_leg?( text )
|
227
284
|
LEG_RE.match?( text )
|
data/lib/sportdb/parser/lexer.rb
CHANGED
@@ -26,7 +26,7 @@ end
|
|
26
26
|
def is_group?( text ) Lang.is_group?( text ); end
|
27
27
|
def is_round?( text ) Lang.is_round?( text ); end
|
28
28
|
def is_leg?( text ) Lang.is_leg?( text ); end
|
29
|
-
|
29
|
+
def is_zone?( text ) Lang.is_zone?( text ); end
|
30
30
|
|
31
31
|
## transforms
|
32
32
|
##
|
@@ -187,7 +187,7 @@ def tokenize_with_errors
|
|
187
187
|
text = t[1]
|
188
188
|
t = if is_group?( text )
|
189
189
|
[:GROUP, text]
|
190
|
-
elsif is_round?( text ) || is_leg?( text )
|
190
|
+
elsif is_round?( text ) || is_leg?( text ) || is_zone?( text )
|
191
191
|
[:ROUND, text]
|
192
192
|
else
|
193
193
|
t ## pass through as-is (1:1)
|
@@ -90,53 +90,21 @@ NOTE_RE = %r{
|
|
90
90
|
# add "top-level" NB: version
|
91
91
|
## with full (end-of) line note - why? why not?
|
92
92
|
|
|
93
|
-
(?: originally[ ])? scheduled
|
94
|
-
## e.g. [originally scheduled to play in Mexico City]
|
95
|
-
|
|
96
93
|
rescheduled
|
97
|
-
## e.g. [
|
94
|
+
## e.g. [rescheduled due to earthquake occurred in Mexico on September 19]
|
95
|
+
|
|
96
|
+
declared
|
97
|
+
## e.g. [declared void]
|
98
98
|
|
|
99
99
|
remaining
|
100
100
|
## e.g. [remaining 79']
|
101
101
|
## [remaining 84']
|
102
102
|
## [remaining 59']
|
103
103
|
## [remaining 5']
|
104
|
-
|
|
105
|
-
played
|
106
|
-
## e.g. [played in Macaé-RJ]
|
107
|
-
## [played in Caxias do Sul-RS]
|
108
|
-
## [played in Sete Lagoas-MG]
|
109
|
-
## [played in Uberlândia-MG]
|
110
|
-
## [played in Brasília-DF]
|
111
|
-
## [played in Vöcklabruck]
|
112
|
-
## [played in Pasching]
|
113
|
-
|
|
114
|
-
declared
|
115
|
-
## e.g. [declared void]
|
116
|
-
|
|
117
|
-
inter-group
|
118
|
-
## e.g. [inter-group A-B]
|
119
|
-
## [inter-group C-D]
|
120
104
|
)
|
121
105
|
[ ]
|
122
106
|
[^\]]+? ## slurp all to next ] - (use non-greedy)
|
123
107
|
)
|
124
|
-
|
|
125
|
-
(?:
|
126
|
-
## starting with in - do NOT allow digits
|
127
|
-
## name starting with in possible - why? why not?
|
128
|
-
in[ ]
|
129
|
-
[^0-9\]]+?
|
130
|
-
## e.g. [In Estadio La Corregidora]
|
131
|
-
## [in Unidad Deportiva Centenario]
|
132
|
-
## [in Estadio Olímpico Universitario]
|
133
|
-
## [in Estadio Victoria]
|
134
|
-
## [in UD José Brindis]
|
135
|
-
## [in Colomos Alfredo "Pistache" Torres stadium]
|
136
|
-
##
|
137
|
-
## TODO/FIX
|
138
|
-
## remove in ?? - is same as @ Estadio Victoria and such - why? why not=
|
139
|
-
)
|
140
108
|
)
|
141
109
|
\]
|
142
110
|
}ix
|
@@ -148,7 +116,7 @@ SCORE_NOTE_RE = %r{
|
|
148
116
|
(?<score_note>
|
149
117
|
(?: # plain aet e.g. [aet]
|
150
118
|
aet | a\.e\.t\. |
|
151
|
-
after [ ] extra [ ] time
|
119
|
+
after [ ] extra [ -] time
|
152
120
|
)
|
153
121
|
|
|
154
122
|
(?: # plain penalties e.g. [3-2 pen]
|
@@ -176,7 +144,7 @@ SCORE_NOTE_RE = %r{
|
|
176
144
|
|
177
145
|
(?:
|
178
146
|
(?: # opt 1 - no team listed/named - requires score
|
179
|
-
|
147
|
+
(?: won|wins? ) [ ] ## note - allow won,win or wins
|
180
148
|
(?: ## score
|
181
149
|
\d{1,2}-\d{1,2}
|
182
150
|
[ ]
|
@@ -190,7 +158,7 @@ SCORE_NOTE_RE = %r{
|
|
190
158
|
[1-9\p{L}][0-9\p{L} .-]+?
|
191
159
|
[ ]
|
192
160
|
)
|
193
|
-
wins [ ]
|
161
|
+
(?: won|wins? ) [ ] ## won/win/wins
|
194
162
|
(?: ## score optional
|
195
163
|
\d{1,2}-\d{1,2}
|
196
164
|
[ ]
|
@@ -214,7 +182,7 @@ SCORE_NOTE_RE = %r{
|
|
214
182
|
[1-9\p{L}][0-9\p{L} .-]+?
|
215
183
|
[ ]
|
216
184
|
)
|
217
|
-
wins? [ ]
|
185
|
+
(?: won|wins? ) [ ] # won/win/wins
|
218
186
|
on [ ] away [ ] goals
|
219
187
|
)
|
220
188
|
) # score_note ref
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-03-
|
11
|
+
date: 2025-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cocos
|
@@ -85,6 +85,7 @@ extra_rdoc_files:
|
|
85
85
|
- config/rounds_es.txt
|
86
86
|
- config/rounds_misc.txt
|
87
87
|
- config/rounds_pt.txt
|
88
|
+
- config/zones_en.txt
|
88
89
|
files:
|
89
90
|
- CHANGELOG.md
|
90
91
|
- Manifest.txt
|
@@ -95,6 +96,7 @@ files:
|
|
95
96
|
- config/rounds_es.txt
|
96
97
|
- config/rounds_misc.txt
|
97
98
|
- config/rounds_pt.txt
|
99
|
+
- config/zones_en.txt
|
98
100
|
- lib/sportdb/parser.rb
|
99
101
|
- lib/sportdb/parser/lang.rb
|
100
102
|
- lib/sportdb/parser/lexer.rb
|