sportdb-parser 0.6.11 → 0.6.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +1 -0
- data/config/rounds_en.txt +0 -3
- data/config/zones_en.txt +20 -0
- data/lib/sportdb/parser/lang.rb +107 -46
- data/lib/sportdb/parser/lexer.rb +2 -2
- data/lib/sportdb/parser/token-status.rb +8 -40
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4a2e9867bfc8e1dec97214c0d3c335fe0d3cdb0cf5835184081af1447d509e4a
|
4
|
+
data.tar.gz: 1b038a316ad0de74bc559972e59832bfe3af5530b6c0547a9b2eaca0ecbfefbf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 11b6cb434a959e39b8dd8a935b9bb0d973e9e47e647c1f19dbc1c536a013b982dc948ca62c2d7a9a661a7aeb276444cfbfb81c4975df30ece993076124fce7bd
|
7
|
+
data.tar.gz: b0487df7f949c28c0d4df23d040aeff4eb3247c01b088fe45b65aa6a29a3891781d24bd56d85e99cd1e0d31d82efe953c13f6c689301a9623a01ffa2bdbb45f0
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
data/config/rounds_en.txt
CHANGED
data/config/zones_en.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
#####
|
2
|
+
# zone names in english
|
3
|
+
|
4
|
+
|
5
|
+
Western Region
|
6
|
+
West Region
|
7
|
+
Eastern Region
|
8
|
+
East Region
|
9
|
+
|
10
|
+
Western Conference
|
11
|
+
Eastern Conference
|
12
|
+
|
13
|
+
Northern Zone
|
14
|
+
Western Zone A
|
15
|
+
Western Zone B
|
16
|
+
Central Zone
|
17
|
+
Central & Eastern Zone
|
18
|
+
Southern Zone
|
19
|
+
|
20
|
+
|
data/lib/sportdb/parser/lang.rb
CHANGED
@@ -31,17 +31,20 @@ end
|
|
31
31
|
|
32
32
|
|
33
33
|
|
34
|
-
ROUND_RE = %r{^
|
35
|
-
|
34
|
+
ROUND_RE = %r{^
|
35
|
+
(?:
|
36
|
+
|
37
|
+
## add special case for group play-off rounds!
|
36
38
|
## group 2 play-off (e.g. worldcup 1954, 1958)
|
37
39
|
##
|
38
40
|
### note - allow Group ("stand-alone") as "generic" round for now
|
39
41
|
## BUT do NOT allow Group 1, Group 2, Group A, Group B, etc.
|
40
|
-
(?: Group [ ] [
|
41
|
-
Group (?: [ ] phase)? |
|
42
|
+
(?: Group [ ] [a-z0-9]+ [ ] Play-?offs? |
|
43
|
+
Group (?: [ ] (?: phase|stage))? |
|
42
44
|
League (?: [ ] phase)?
|
43
45
|
)
|
44
46
|
|
|
47
|
+
|
45
48
|
# round - note - requiers number e.g. round 1,2, etc.
|
46
49
|
# note - use 1-9 regex (cannot start with 0) - why? why not?
|
47
50
|
# make week 01 or round 01 or matchday 01 possible?
|
@@ -50,16 +53,22 @@ ROUND_RE = %r{^(
|
|
50
53
|
Week
|
51
54
|
)
|
52
55
|
[ ] [1-9][0-9]*
|
56
|
+
(?: ## note - add optional Matchday 1 of 2 or such
|
57
|
+
[ ] of [ ] [1-9][0-9]*
|
58
|
+
)?
|
53
59
|
)
|
54
60
|
|
|
61
|
+
(?: Round [ ] One
|
62
|
+
)
|
63
|
+
|
|
55
64
|
## starting with qual(ification)
|
56
65
|
## Qual. Round 1 / Qual. Round 2 / Qual. Round 3
|
57
66
|
## or
|
58
67
|
## Playoff Round 1
|
59
68
|
## Play-in Round 1
|
60
69
|
(?: (?: Qual \. |
|
61
|
-
Play
|
62
|
-
Play
|
70
|
+
Play[ -]?off |
|
71
|
+
Play[ -]?in
|
63
72
|
)
|
64
73
|
[ ] Round [ ] [1-9][0-9]* )
|
65
74
|
|
|
@@ -67,44 +76,71 @@ ROUND_RE = %r{^(
|
|
67
76
|
## First Round
|
68
77
|
## Play-off Round
|
69
78
|
## Final Round (e.g. Worldcup 1950)
|
70
|
-
(?:
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
79
|
+
(?: (?:
|
80
|
+
Play[ -]?off |
|
81
|
+
Final |
|
82
|
+
Wildcard |
|
83
|
+
Qualifying |
|
84
|
+
(?:
|
85
|
+
(?:
|
86
|
+
[1-9][0-9]* \. |
|
87
|
+
1st | First |
|
88
|
+
2nd | Second |
|
89
|
+
3rd | Third |
|
90
|
+
4th | Fourth |
|
91
|
+
5th | Fifth
|
92
|
+
)
|
93
|
+
(?: ## with optionals
|
94
|
+
[ ] Qualifying
|
95
|
+
)?
|
96
|
+
)
|
97
|
+
)
|
98
|
+
[ ] Round
|
78
99
|
)
|
79
100
|
|
|
80
101
|
## starting with preliminary
|
81
102
|
# e.g. Preliminary round
|
82
103
|
(?: Preliminary [ ]
|
83
104
|
(?: Round |
|
84
|
-
Semi
|
85
|
-
Final
|
105
|
+
Semi[ -]?finals |
|
106
|
+
Final |
|
107
|
+
Qualifier
|
86
108
|
)
|
87
109
|
)
|
88
110
|
|
|
89
111
|
# more (kockout) rounds
|
90
|
-
# playoffs - playoff, play-off, play-offs
|
91
|
-
|
92
|
-
|
112
|
+
# playoffs - playoff, play-off, play-offs &
|
113
|
+
# playins
|
114
|
+
(?:
|
115
|
+
Play[ -]?offs? (?: [ ]for[ ]quarter-?finals )?
|
116
|
+
|
|
117
|
+
Play[ -]?ins?
|
93
118
|
)
|
94
119
|
|
|
95
120
|
# round32
|
96
121
|
(?: Round[ ]of[ ]32 |
|
97
|
-
Last[ ]32
|
122
|
+
Last[ ]32 |
|
123
|
+
16th[ ]finals |
|
124
|
+
1/16[ ]finals )
|
98
125
|
|
|
99
126
|
# round16
|
100
127
|
(?: Round[ ]of[ ]16 |
|
101
128
|
Last[ ]16 |
|
102
|
-
8th[ ]finals
|
129
|
+
8th[ ]finals |
|
130
|
+
1/8[ ]finals )
|
103
131
|
|
|
132
|
+
# round8 aka quarterfinals
|
133
|
+
# note - allow quarter-finals/quarter finals/quarterfinals
|
134
|
+
(?: Round[ ]of[ ]8 |
|
135
|
+
Last[ ]8 |
|
136
|
+
1/4[ ]finals |
|
137
|
+
Quarter[ -]?finals? |
|
138
|
+
Quarters )
|
139
|
+
|
|
104
140
|
# fifthplace
|
105
141
|
(?:
|
106
142
|
(?: (Fifth|5th)[ -]place
|
107
|
-
(?: [ ] (?: match|play
|
143
|
+
(?: [ ] (?: match|final|play[ -]?off ))?
|
108
144
|
) |
|
109
145
|
(?: Match[ ]for[ ](?: fifth|5th )[ -]place )
|
110
146
|
)
|
@@ -112,38 +148,32 @@ ROUND_RE = %r{^(
|
|
112
148
|
# thirdplace
|
113
149
|
(?:
|
114
150
|
(?: (Third|3rd)[ -]place
|
115
|
-
(?: [ ] (?: match|play
|
151
|
+
(?: [ ] (?: match|final|play[ -]?off ))?
|
116
152
|
) |
|
117
153
|
(?: Match[ ]for[ ](?: third|3rd )[ -]place )
|
118
154
|
)
|
119
155
|
|
|
120
|
-
#
|
121
|
-
(?:
|
122
|
-
Quarter-?finals? |
|
123
|
-
Quarters |
|
124
|
-
Last[ ]8
|
125
|
-
)
|
126
|
-
|
|
127
|
-
# semifinals
|
156
|
+
# round4 aka semifinals
|
128
157
|
(?:
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
158
|
+
Round[ ]of[ ]4 |
|
159
|
+
Last[ ]4 |
|
160
|
+
Semi[ -]?finals? |
|
161
|
+
Semis )
|
133
162
|
|
|
134
|
-
# final
|
163
|
+
# round2 aka final
|
135
164
|
Finals?
|
136
|
-
|
137
|
-
# decider e.g. Entscheidungsspiel
|
138
|
-
Decider
|
139
|
-
|
|
165
|
+
|
|
140
166
|
## add replays
|
141
167
|
## e.g. Final Replay
|
142
168
|
## Quarter-finals replays
|
143
169
|
## First round replays
|
144
170
|
(?:
|
145
|
-
(?:
|
146
|
-
|
171
|
+
(?: (?: 1st | First |
|
172
|
+
2nd | Second |
|
173
|
+
3rd | Third |
|
174
|
+
4th | Fourth |
|
175
|
+
5th | Fifth ) [ ] Round |
|
176
|
+
Quarter[ -]?finals? |
|
147
177
|
Finals?
|
148
178
|
)
|
149
179
|
[ ] Replays?
|
@@ -151,7 +181,8 @@ ROUND_RE = %r{^(
|
|
151
181
|
|
|
152
182
|
## more
|
153
183
|
(?:
|
154
|
-
|
184
|
+
Decider | # decider e.g. Entscheidungsspiel
|
185
|
+
Reclassification
|
155
186
|
)
|
156
187
|
)$}ix
|
157
188
|
|
@@ -196,6 +227,19 @@ def self.more_round_names
|
|
196
227
|
end
|
197
228
|
end
|
198
229
|
|
230
|
+
def self.zone_names
|
231
|
+
@zone_name ||= begin
|
232
|
+
names = []
|
233
|
+
langs = ['en']
|
234
|
+
## sort names by length??
|
235
|
+
langs.each do |lang|
|
236
|
+
path = "#{SportDb::Module::Parser.root}/config/zones_#{lang}.txt"
|
237
|
+
names += read_names( path )
|
238
|
+
end
|
239
|
+
names
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
199
243
|
|
200
244
|
def self.is_round?( text )
|
201
245
|
### note - use check for case-insensitive
|
@@ -208,20 +252,37 @@ def self.is_round?( text )
|
|
208
252
|
## maybe in the future use our own unaccent and downcase - wyh? why not?
|
209
253
|
## note - for now ROUND_RE is also case-insensitive!!
|
210
254
|
|
211
|
-
ROUND_RE.match?( text ) ||
|
255
|
+
ROUND_RE.match?( text ) ||
|
256
|
+
more_round_names.any?{ |str| str.casecmp( text )==0 }
|
212
257
|
end
|
213
258
|
|
259
|
+
def self.is_zone?( text )
|
260
|
+
zone_names.any?{ |str| str.casecmp( text )==0 }
|
261
|
+
end
|
262
|
+
|
263
|
+
|
214
264
|
##
|
215
265
|
## keep leg separate (from round) - why? why not?
|
216
266
|
##
|
217
267
|
LEG_RE = %r{^
|
218
268
|
# leg1
|
219
|
-
(?: 1st|First)[ ]leg
|
269
|
+
(?: 1st|First) [ ] leg
|
220
270
|
|
|
221
271
|
# leg2
|
222
|
-
(?: 2nd|Second)[ ]leg
|
272
|
+
(?: 2nd|Second) [ ] leg
|
273
|
+
|
|
274
|
+
# leg 1 of 2 / leg 2 of 2
|
275
|
+
# note - leg limited to ALWAY 1/2 of 2 for now - why? why not?
|
276
|
+
# for more use match 1/2/3 etc.
|
277
|
+
## allow leg of three (e.g. leg 1 of 3) - why? why not?
|
278
|
+
(?: leg [ ] [12]
|
279
|
+
(?: [ ] of [ ] 2)? )
|
280
|
+
|
|
281
|
+
(?: match [ ] [1-9][0-9]* )
|
223
282
|
$}ix
|
224
283
|
|
284
|
+
|
285
|
+
|
225
286
|
### Pair matches/games if marked with leg1 n leg2
|
226
287
|
def self.is_leg?( text )
|
227
288
|
LEG_RE.match?( text )
|
data/lib/sportdb/parser/lexer.rb
CHANGED
@@ -26,7 +26,7 @@ end
|
|
26
26
|
def is_group?( text ) Lang.is_group?( text ); end
|
27
27
|
def is_round?( text ) Lang.is_round?( text ); end
|
28
28
|
def is_leg?( text ) Lang.is_leg?( text ); end
|
29
|
-
|
29
|
+
def is_zone?( text ) Lang.is_zone?( text ); end
|
30
30
|
|
31
31
|
## transforms
|
32
32
|
##
|
@@ -187,7 +187,7 @@ def tokenize_with_errors
|
|
187
187
|
text = t[1]
|
188
188
|
t = if is_group?( text )
|
189
189
|
[:GROUP, text]
|
190
|
-
elsif is_round?( text ) || is_leg?( text )
|
190
|
+
elsif is_round?( text ) || is_leg?( text ) || is_zone?( text )
|
191
191
|
[:ROUND, text]
|
192
192
|
else
|
193
193
|
t ## pass through as-is (1:1)
|
@@ -90,53 +90,21 @@ NOTE_RE = %r{
|
|
90
90
|
# add "top-level" NB: version
|
91
91
|
## with full (end-of) line note - why? why not?
|
92
92
|
|
|
93
|
-
(?: originally[ ])? scheduled
|
94
|
-
## e.g. [originally scheduled to play in Mexico City]
|
95
|
-
|
|
96
93
|
rescheduled
|
97
|
-
## e.g. [
|
94
|
+
## e.g. [rescheduled due to earthquake occurred in Mexico on September 19]
|
95
|
+
|
|
96
|
+
declared
|
97
|
+
## e.g. [declared void]
|
98
98
|
|
|
99
99
|
remaining
|
100
100
|
## e.g. [remaining 79']
|
101
101
|
## [remaining 84']
|
102
102
|
## [remaining 59']
|
103
103
|
## [remaining 5']
|
104
|
-
|
|
105
|
-
played
|
106
|
-
## e.g. [played in Macaé-RJ]
|
107
|
-
## [played in Caxias do Sul-RS]
|
108
|
-
## [played in Sete Lagoas-MG]
|
109
|
-
## [played in Uberlândia-MG]
|
110
|
-
## [played in Brasília-DF]
|
111
|
-
## [played in Vöcklabruck]
|
112
|
-
## [played in Pasching]
|
113
|
-
|
|
114
|
-
declared
|
115
|
-
## e.g. [declared void]
|
116
|
-
|
|
117
|
-
inter-group
|
118
|
-
## e.g. [inter-group A-B]
|
119
|
-
## [inter-group C-D]
|
120
104
|
)
|
121
105
|
[ ]
|
122
106
|
[^\]]+? ## slurp all to next ] - (use non-greedy)
|
123
107
|
)
|
124
|
-
|
|
125
|
-
(?:
|
126
|
-
## starting with in - do NOT allow digits
|
127
|
-
## name starting with in possible - why? why not?
|
128
|
-
in[ ]
|
129
|
-
[^0-9\]]+?
|
130
|
-
## e.g. [In Estadio La Corregidora]
|
131
|
-
## [in Unidad Deportiva Centenario]
|
132
|
-
## [in Estadio Olímpico Universitario]
|
133
|
-
## [in Estadio Victoria]
|
134
|
-
## [in UD José Brindis]
|
135
|
-
## [in Colomos Alfredo "Pistache" Torres stadium]
|
136
|
-
##
|
137
|
-
## TODO/FIX
|
138
|
-
## remove in ?? - is same as @ Estadio Victoria and such - why? why not=
|
139
|
-
)
|
140
108
|
)
|
141
109
|
\]
|
142
110
|
}ix
|
@@ -148,7 +116,7 @@ SCORE_NOTE_RE = %r{
|
|
148
116
|
(?<score_note>
|
149
117
|
(?: # plain aet e.g. [aet]
|
150
118
|
aet | a\.e\.t\. |
|
151
|
-
after [ ] extra [ ] time
|
119
|
+
after [ ] extra [ -] time
|
152
120
|
)
|
153
121
|
|
|
154
122
|
(?: # plain penalties e.g. [3-2 pen]
|
@@ -176,7 +144,7 @@ SCORE_NOTE_RE = %r{
|
|
176
144
|
|
177
145
|
(?:
|
178
146
|
(?: # opt 1 - no team listed/named - requires score
|
179
|
-
|
147
|
+
(?: won|wins? ) [ ] ## note - allow won,win or wins
|
180
148
|
(?: ## score
|
181
149
|
\d{1,2}-\d{1,2}
|
182
150
|
[ ]
|
@@ -190,7 +158,7 @@ SCORE_NOTE_RE = %r{
|
|
190
158
|
[1-9\p{L}][0-9\p{L} .-]+?
|
191
159
|
[ ]
|
192
160
|
)
|
193
|
-
wins [ ]
|
161
|
+
(?: won|wins? ) [ ] ## won/win/wins
|
194
162
|
(?: ## score optional
|
195
163
|
\d{1,2}-\d{1,2}
|
196
164
|
[ ]
|
@@ -214,7 +182,7 @@ SCORE_NOTE_RE = %r{
|
|
214
182
|
[1-9\p{L}][0-9\p{L} .-]+?
|
215
183
|
[ ]
|
216
184
|
)
|
217
|
-
wins? [ ]
|
185
|
+
(?: won|wins? ) [ ] # won/win/wins
|
218
186
|
on [ ] away [ ] goals
|
219
187
|
)
|
220
188
|
) # score_note ref
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-03-
|
11
|
+
date: 2025-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cocos
|
@@ -85,6 +85,7 @@ extra_rdoc_files:
|
|
85
85
|
- config/rounds_es.txt
|
86
86
|
- config/rounds_misc.txt
|
87
87
|
- config/rounds_pt.txt
|
88
|
+
- config/zones_en.txt
|
88
89
|
files:
|
89
90
|
- CHANGELOG.md
|
90
91
|
- Manifest.txt
|
@@ -95,6 +96,7 @@ files:
|
|
95
96
|
- config/rounds_es.txt
|
96
97
|
- config/rounds_misc.txt
|
97
98
|
- config/rounds_pt.txt
|
99
|
+
- config/zones_en.txt
|
98
100
|
- lib/sportdb/parser.rb
|
99
101
|
- lib/sportdb/parser/lang.rb
|
100
102
|
- lib/sportdb/parser/lexer.rb
|