sportdb-parser 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +17 -4
- data/lib/sportdb/parser/lexer-on_goal.rb +172 -0
- data/lib/sportdb/parser/lexer-on_group_def.rb +31 -0
- data/lib/sportdb/parser/lexer-on_prop_lineup.rb +79 -0
- data/lib/sportdb/parser/lexer-on_prop_misc.rb +110 -0
- data/lib/sportdb/parser/lexer-on_prop_penalties.rb +40 -0
- data/lib/sportdb/parser/lexer-on_round_def.rb +37 -0
- data/lib/sportdb/parser/lexer-on_top.rb +125 -0
- data/lib/sportdb/parser/lexer-prep_doc.rb +131 -0
- data/lib/sportdb/parser/lexer-prep_line.rb +63 -0
- data/lib/sportdb/parser/lexer-tokenize.rb +449 -0
- data/lib/sportdb/parser/lexer.rb +133 -1363
- data/lib/sportdb/parser/lexer_buffer.rb +8 -37
- data/lib/sportdb/parser/lexer_token.rb +126 -0
- data/lib/sportdb/parser/parser.rb +1104 -1403
- data/lib/sportdb/parser/racc_parser.rb +36 -32
- data/lib/sportdb/parser/racc_tree.rb +65 -98
- data/lib/sportdb/parser/token-date--helpers.rb +130 -0
- data/lib/sportdb/parser/token-date--names.rb +108 -0
- data/lib/sportdb/parser/token-date.rb +20 -192
- data/lib/sportdb/parser/token-date_duration.rb +8 -27
- data/lib/sportdb/parser/token-geo.rb +16 -16
- data/lib/sportdb/parser/token-goals--helpers.rb +114 -0
- data/lib/sportdb/parser/token-goals.rb +103 -249
- data/lib/sportdb/parser/token-group.rb +8 -22
- data/lib/sportdb/parser/token-prop.rb +138 -124
- data/lib/sportdb/parser/token-prop_name.rb +48 -39
- data/lib/sportdb/parser/token-round.rb +21 -35
- data/lib/sportdb/parser/token-score--helpers.rb +189 -0
- data/lib/sportdb/parser/token-score.rb +9 -393
- data/lib/sportdb/parser/token-score_full.rb +331 -0
- data/lib/sportdb/parser/token-status.rb +44 -46
- data/lib/sportdb/parser/token-status_inline.rb +112 -0
- data/lib/sportdb/parser/token-text.rb +41 -31
- data/lib/sportdb/parser/token-time.rb +29 -26
- data/lib/sportdb/parser/token.rb +58 -159
- data/lib/sportdb/parser/version.rb +1 -1
- data/lib/sportdb/parser.rb +45 -17
- metadata +19 -6
- data/lib/sportdb/parser/blocktxt.rb +0 -99
- data/lib/sportdb/parser/lexer_tty.rb +0 -111
- data/lib/sportdb/parser/token-table.rb +0 -149
- data/lib/sportdb/parser/token_helpers.rb +0 -92
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
module SportDb
|
|
2
|
+
class Lexer
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
## todo/fix
|
|
6
|
+
## for internal helper constants
|
|
7
|
+
## use trailing underline
|
|
8
|
+
## e.g. P_EN_, AGG_EN_ etc!!!
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
## todo/check: use ‹› (unicode chars) to mark optional parts in regex constant name - why? why not?
|
|
12
|
+
|
|
13
|
+
#####
|
|
14
|
+
# english helpers (penalty, extra time, ...)
|
|
15
|
+
## note - p must go last (shortest match)
|
|
16
|
+
# pso = penalty shootout
|
|
17
|
+
### - note - remove PSO for now (may add later back) - why? why not?
|
|
18
|
+
#
|
|
19
|
+
# todo/fix/clean-up - keep it simple - remove optional trailing dot (.)
|
|
20
|
+
# from pen., p., agg. etc. - why? why not?
|
|
21
|
+
# always use (simply) pen, p, agg
|
|
22
|
+
# (also) remove a.e.t. / a.e.t option - why? why not?
|
|
23
|
+
#
|
|
24
|
+
## UPDATE mar/2026: addd pens too - keep - why? why not?
|
|
25
|
+
## (4-3 pens)
|
|
26
|
+
## (4-3 Pens) -- keep mixed Pens/Pen. too - why? why not?
|
|
27
|
+
## (4-3 Pen.)
|
|
28
|
+
P_EN = '(?-i: PEN | P |' +
|
|
29
|
+
'[Pp]ens | [Pp]en\.? | p\.? )' # e.g. p., p, pen, pen., etc.
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
## fix - change ET_EN to AET_EN!!! - why? why not?
|
|
33
|
+
## check - allow Aet too - why? why not?
|
|
34
|
+
## or A.e.t ??
|
|
35
|
+
ET_EN = '(?-i: AET | ' +
|
|
36
|
+
'aet | a\.e\.t\.? )' # note: make last . optional (e.g a.e.t) allowed too
|
|
37
|
+
# AET_EN = ET_EN
|
|
38
|
+
|
|
39
|
+
####
|
|
40
|
+
## after (golden goal/sudden death) extra time - add more options/styles - why? why not?
|
|
41
|
+
AETGG_EN = '(?-i: AET/GG | AGGET | ASDET | ' +
|
|
42
|
+
'aet/gg | a\.e\.t\.?/g\.g\.? | agget | asdet )'
|
|
43
|
+
## after (silver goal) extra time
|
|
44
|
+
AETSG_EN = '(?-i: AET/SG | ASGET | ' +
|
|
45
|
+
'aet/sg | a\.e\.t\.?/s\.g\.? | asget )'
|
|
46
|
+
|
|
47
|
+
## agg/agg. or AGG
|
|
48
|
+
AGG_EN = '(?-i: AGG | agg\.? )' ## aggregate e..g 4-4 agg etc.
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
## regex score helpers
|
|
53
|
+
## note - MUST double escape \d e.g. \\d!!! if not "simple" string (e.g. '' but %Q<>)
|
|
54
|
+
|
|
55
|
+
##
|
|
56
|
+
## fix - change SCORE_P to SCORE_FULL_P
|
|
57
|
+
## SCORE_ET to SCORE_FULL_ET
|
|
58
|
+
##
|
|
59
|
+
## (re)use SCORE_P, SCORE_ET for score only part!!!
|
|
60
|
+
##
|
|
61
|
+
## fix/fix/fix - rename to SCORE_P_ SCORE_ET_
|
|
62
|
+
## mark internals with TRAILING underscore (leading NOT possible!)
|
|
63
|
+
|
|
64
|
+
SCORE_P = %Q< (?<p1>\\d{1,2}) - (?<p2>\\d{1,2})
|
|
65
|
+
[ ]? #{P_EN}
|
|
66
|
+
>
|
|
67
|
+
SCORE_ET = %Q< (?<et1>\\d{1,2}) - (?<et2>\\d{1,2})
|
|
68
|
+
[ ]? #{ET_EN}
|
|
69
|
+
>
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
SCORE_LOOKAHEAD = '(?= [ ,\]] | $)'
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
####
|
|
77
|
+
## after extra-time with golden goal/sudden death & silver goal rule
|
|
78
|
+
## note - golden goal & silver goal EXCLUDE penalties!!!
|
|
79
|
+
##
|
|
80
|
+
## 4-3 a.e.t/g.g.
|
|
81
|
+
## 4-3 aet/gg
|
|
82
|
+
## 4-3agget -or- 4-3 asdet
|
|
83
|
+
## 2-1 aet/sg
|
|
84
|
+
## -or-
|
|
85
|
+
## 4-3 aet/gg (3-3, 2-1)
|
|
86
|
+
SCORE__ET_GG_SG__RE = %r{
|
|
87
|
+
(?<score_full>
|
|
88
|
+
\b
|
|
89
|
+
(?<et1>\d{1,2}) - (?<et2>\d{1,2})
|
|
90
|
+
[ ]? (?:
|
|
91
|
+
(?<aetgg> #{AETGG_EN})
|
|
92
|
+
|
|
|
93
|
+
(?<aetsg> #{AETSG_EN})
|
|
94
|
+
)
|
|
95
|
+
### note:
|
|
96
|
+
## add optional full-time, half-time score
|
|
97
|
+
(?:
|
|
98
|
+
[ ]+
|
|
99
|
+
\(
|
|
100
|
+
[ ]*
|
|
101
|
+
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
|
102
|
+
[ ]*
|
|
103
|
+
(?:
|
|
104
|
+
, [ ]*
|
|
105
|
+
(?: (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
|
|
106
|
+
[ ]*
|
|
107
|
+
)?
|
|
108
|
+
)? # note: make half time (HT) score optional for now
|
|
109
|
+
\)
|
|
110
|
+
)?
|
|
111
|
+
#{SCORE_LOOKAHEAD}
|
|
112
|
+
)}ix
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
## note: allow SPECIAL cases WITHOUT full time scores (just a.e.t or pen. + a.e.t.)
|
|
116
|
+
## 3-4 pen. 2-2 a.e.t.
|
|
117
|
+
## 3-4 pen. 2-2 a.e.t.
|
|
118
|
+
## 2-2 a.e.t.
|
|
119
|
+
SCORE__P_ET__RE = %r{
|
|
120
|
+
(?<score_full>
|
|
121
|
+
\b
|
|
122
|
+
(?: #{SCORE_P} [ ]+
|
|
123
|
+
)? ## note: make penalty (P) score optional for now
|
|
124
|
+
#{SCORE_ET}
|
|
125
|
+
#{SCORE_LOOKAHEAD}
|
|
126
|
+
)}ix
|
|
127
|
+
## todo/check: remove loakahead assertion here - why require space?
|
|
128
|
+
## note: \b works only after non-alphanum e.g. )
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
## note: allow SPECIAL cases WITHOUT full time scores
|
|
132
|
+
## AND with pen in last position!
|
|
133
|
+
## 2-2 a.e.t., 3-4 pen.
|
|
134
|
+
## 2-2 a.e.t. 3-4 pen. ## or without comma separator - why? why not?
|
|
135
|
+
SCORE__ET_P__RE = %r{
|
|
136
|
+
(?<score_full>
|
|
137
|
+
\b
|
|
138
|
+
#{SCORE_ET}
|
|
139
|
+
(?: [ ]*,[ ]* | [ ]+ )
|
|
140
|
+
#{SCORE_P}
|
|
141
|
+
#{SCORE_LOOKAHEAD}
|
|
142
|
+
)}ix
|
|
143
|
+
## todo/check: remove loakahead assertion here - why require space?
|
|
144
|
+
## note: \b works only after non-alphanum e.g. )
|
|
145
|
+
|
|
146
|
+
### special case (i) - full time with penalties
|
|
147
|
+
## 2-2, 3-4 pen.
|
|
148
|
+
SCORE__FT_P__RE = %r{
|
|
149
|
+
(?<score_full>
|
|
150
|
+
\b
|
|
151
|
+
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
|
152
|
+
[ ]*,[ ]* ## note - comma required!!!
|
|
153
|
+
#{SCORE_P}
|
|
154
|
+
#{SCORE_LOOKAHEAD}
|
|
155
|
+
)}ix
|
|
156
|
+
|
|
157
|
+
### special case (ii) - full time & half-time with penalties
|
|
158
|
+
## 2-2 (1-1), 3-4 pen.
|
|
159
|
+
SCORE__FT_HT_P__RE = %r{
|
|
160
|
+
(?<score_full>
|
|
161
|
+
\b
|
|
162
|
+
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
|
163
|
+
[ ]*
|
|
164
|
+
\(
|
|
165
|
+
(?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
|
|
166
|
+
\)
|
|
167
|
+
[ ]*,[ ]* ## note - comma required!!!
|
|
168
|
+
#{SCORE_P}
|
|
169
|
+
#{SCORE_LOOKAHEAD}
|
|
170
|
+
)}ix
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
## note: allow SPECIAL with penalty only
|
|
176
|
+
## 3-4 pen. or 3-4p etc.
|
|
177
|
+
SCORE__P__RE = %r{
|
|
178
|
+
(?<score_full>
|
|
179
|
+
\b
|
|
180
|
+
#{SCORE_P}
|
|
181
|
+
#{SCORE_LOOKAHEAD}
|
|
182
|
+
)}ix
|
|
183
|
+
## todo/check: remove loakahead assertion here - why require space?
|
|
184
|
+
## note: \b works only after non-alphanum e.g. )
|
|
185
|
+
|
|
186
|
+
####
|
|
187
|
+
## support short all-in-one e.g.
|
|
188
|
+
## e.g. 3-4 pen. 2-2 a.e.t. ( 1-1, 1-1 ) becomes
|
|
189
|
+
## 3-4 pen. (2-2, 1-1, 1-1)
|
|
190
|
+
|
|
191
|
+
SCORE__P_ET_FT_HT_V2__RE = %r{
|
|
192
|
+
(?<score_full>
|
|
193
|
+
\b
|
|
194
|
+
#{SCORE_P} [ ]+
|
|
195
|
+
\(
|
|
196
|
+
[ ]*
|
|
197
|
+
(?<et1>\d{1,2}) - (?<et2>\d{1,2})
|
|
198
|
+
[ ]*, [ ]*
|
|
199
|
+
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
|
200
|
+
[ ]*, [ ]*
|
|
201
|
+
(?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
|
|
202
|
+
[ ]*
|
|
203
|
+
\)
|
|
204
|
+
#{SCORE_LOOKAHEAD}
|
|
205
|
+
)}ix ## todo/check: remove loakahead assertion here - why require space?
|
|
206
|
+
## note: \b works only after non-alphanum e.g. )
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# e.g. 2-2 a.e.t. (1-1, 1-0), 5-1 pen.
|
|
210
|
+
SCORE__ET_FT_HT_P__RE = %r{
|
|
211
|
+
(?<score_full>
|
|
212
|
+
\b
|
|
213
|
+
#{SCORE_ET} [ ]+
|
|
214
|
+
\(
|
|
215
|
+
[ ]*
|
|
216
|
+
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
|
217
|
+
[ ]*
|
|
218
|
+
(?:
|
|
219
|
+
, [ ]*
|
|
220
|
+
(?: (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
|
|
221
|
+
[ ]*
|
|
222
|
+
)?
|
|
223
|
+
)? # note: make half time (HT) score optional for now
|
|
224
|
+
\)
|
|
225
|
+
(?: [ ]*,[ ]* | [ ]+)
|
|
226
|
+
#{SCORE_P}
|
|
227
|
+
#{SCORE_LOOKAHEAD}
|
|
228
|
+
)}ix ## todo/check: remove loakahead assertion here - why require space?
|
|
229
|
+
## note: \b works only after non-alphanum e.g. )
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1) or
|
|
234
|
+
## 3-4p 2-2aet (1-1, ) or
|
|
235
|
+
## 3-4 pen. 2-2 a.e.t. (1-1) or
|
|
236
|
+
## 2-2 a.e.t. (1-1, 1-1) or
|
|
237
|
+
## 2-2 a.e.t. (1-1, ) or
|
|
238
|
+
## 2-2 a.e.t. (1-1)
|
|
239
|
+
|
|
240
|
+
SCORE__P_ET_FT_HT__RE = %r{
|
|
241
|
+
(?<score_full>
|
|
242
|
+
\b
|
|
243
|
+
(?:
|
|
244
|
+
#{SCORE_P} [ ]+
|
|
245
|
+
)? ## note - make penalty (P) score optional for now
|
|
246
|
+
#{SCORE_ET} [ ]+
|
|
247
|
+
\(
|
|
248
|
+
[ ]*
|
|
249
|
+
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
|
250
|
+
[ ]*
|
|
251
|
+
(?:
|
|
252
|
+
, [ ]*
|
|
253
|
+
(?: (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
|
|
254
|
+
[ ]*
|
|
255
|
+
)?
|
|
256
|
+
)? # note: make half time (HT) score optional for now
|
|
257
|
+
\)
|
|
258
|
+
#{SCORE_LOOKAHEAD}
|
|
259
|
+
)}ix ## todo/check: remove loakahead assertion here - why require space?
|
|
260
|
+
## note: \b works only after non-alphanum e.g. )
|
|
261
|
+
|
|
262
|
+
###
|
|
263
|
+
## special case for case WITHOUT extra time!!
|
|
264
|
+
## same as above (but WITHOUT extra time and pen required)
|
|
265
|
+
SCORE__P_FT_HT__RE = %r{
|
|
266
|
+
(?<score_full>
|
|
267
|
+
\b
|
|
268
|
+
#{SCORE_P} [ ]+
|
|
269
|
+
\(
|
|
270
|
+
[ ]*
|
|
271
|
+
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
|
272
|
+
[ ]*
|
|
273
|
+
(?:
|
|
274
|
+
, [ ]*
|
|
275
|
+
(?: (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
|
|
276
|
+
[ ]*
|
|
277
|
+
)?
|
|
278
|
+
)? # note: make half time (HT) score optional for now
|
|
279
|
+
\)
|
|
280
|
+
#{SCORE_LOOKAHEAD}
|
|
281
|
+
)}ix ## todo/check: remove loakahead assertion here - why require space?
|
|
282
|
+
## note: \b works only after non-alphanum e.g. )
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
##########
|
|
286
|
+
## e.g. 2-1 (1-1)
|
|
287
|
+
SCORE__FT_HT__RE = %r{
|
|
288
|
+
(?<score_full>
|
|
289
|
+
\b
|
|
290
|
+
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
|
291
|
+
[ ]+ \( [ ]*
|
|
292
|
+
(?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
|
|
293
|
+
[ ]* \)
|
|
294
|
+
#{SCORE_LOOKAHEAD}
|
|
295
|
+
)}ix ## todo/check: remove loakahead assertion here - why require space?
|
|
296
|
+
## note: \b works only after non-alphanum e.g. )
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
#############################################
|
|
304
|
+
# map tables
|
|
305
|
+
# note: order matters; first come-first matched/served
|
|
306
|
+
|
|
307
|
+
## note 2-2, 5-1 pen. must get priority (get before SCORE_LEGS!!!)
|
|
308
|
+
## break out
|
|
309
|
+
## note - no need for Regexp.union for now (only single regex!)
|
|
310
|
+
|
|
311
|
+
SCORE_FULL_1ST_RE = SCORE__FT_P__RE # e.g. 2-2, 5-1 pen.
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
SCORE_FULL_RE = Regexp.union(
|
|
315
|
+
SCORE__ET_GG_SG__RE, # e.g. 3-1 aet/gg
|
|
316
|
+
SCORE__P_ET_FT_HT_V2__RE, # e.g. 5-1 pen. (2-2, 1-1, 1-0)
|
|
317
|
+
SCORE__ET_FT_HT_P__RE, # e.g. 2-2 a.e.t. (1-1, 1-0), 5-1 pen.
|
|
318
|
+
SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
|
|
319
|
+
SCORE__P_FT_HT__RE, # e.g. 5-1 pen. (1-1)
|
|
320
|
+
SCORE__ET_P__RE, # e.g. 2-2 a.e.t., 5-1 pen.
|
|
321
|
+
SCORE__FT_HT_P__RE, # e.g. 2-2 (1-1), 5-1 pen.
|
|
322
|
+
SCORE__P_ET__RE, # e.g. 5-1 pen. 2-2 a.e.t. or 2-2 a.e.t. (w/o pen)
|
|
323
|
+
SCORE__P__RE, # e.g. 5-1 pen.
|
|
324
|
+
SCORE__FT_HT__RE, # e.g. 1-1 (1-0)
|
|
325
|
+
## note - keep basic score as its own token!!!!
|
|
326
|
+
## that is, SCORE & SCORE_MORE
|
|
327
|
+
### SCORE__FT__RE, # e.g. 1-1 -- note - must go last!!!
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
end # class Lexer
|
|
331
|
+
end # module SportDb
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
module SportDb
|
|
2
2
|
class Lexer
|
|
3
|
-
|
|
3
|
+
|
|
4
4
|
## (match) status
|
|
5
5
|
## note: english usage - cancelled (in UK), canceled (in US)
|
|
6
6
|
##
|
|
@@ -9,24 +9,24 @@ class Lexer
|
|
|
9
9
|
|
|
10
10
|
POSTPONED = %Q{ (?<postponed> postponed | pst\\.? | po?stp\\.? | ppd\\.? ) }
|
|
11
11
|
CANCELED = %Q{ (?<canceled> cancell?ed | canc\\.? ) } ## add can/can. - why? why not?
|
|
12
|
-
WALKOVER = %Q{ (?<walkover> walkover | w/o | wo ) } ## add o/w too - why? why not?
|
|
13
|
-
AWARDED = %Q{ (?<awarded> awarded | awd\\.? ) }
|
|
14
|
-
SUSPENDED = %Q{ (?<suspended> suspended | susp\\.? ) }
|
|
12
|
+
WALKOVER = %Q{ (?<walkover> walkover | w/o | wo ) } ## add o/w too - why? why not?
|
|
13
|
+
AWARDED = %Q{ (?<awarded> awarded | awd\\.? ) }
|
|
14
|
+
SUSPENDED = %Q{ (?<suspended> suspended | susp\\.? ) }
|
|
15
15
|
ABANDONED = %Q{ (?<abandoned> abandoned | aban\\.? | abd\\.? ) }
|
|
16
|
-
ANNULLED = %Q{ (?<annulled> annulled ) }
|
|
17
|
-
VOIDED = %Q{ (?<voided> voided | void ) } ### note - alternative (name) to annulled
|
|
16
|
+
ANNULLED = %Q{ (?<annulled> annulled ) }
|
|
17
|
+
VOIDED = %Q{ (?<voided> voided | void ) } ### note - alternative (name) to annulled
|
|
18
18
|
|
|
19
|
-
REPLAY = %Q{ (?<replay> replay | repl\\.? ) }
|
|
19
|
+
REPLAY = %Q{ (?<replay> replay | repl\\.? ) }
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
##
|
|
23
|
-
## note - status_note incl. complete text incl. <status> (not normalized)
|
|
23
|
+
## note - status_note incl. complete text incl. <status> (not normalized)
|
|
24
24
|
## <status> gets normalized e.g. ppt => postponed etc.
|
|
25
25
|
|
|
26
26
|
STATUS_RE = %r{
|
|
27
27
|
\[
|
|
28
|
-
(?:
|
|
29
|
-
#############################################
|
|
28
|
+
(?:
|
|
29
|
+
#############################################
|
|
30
30
|
### opt 1 - allow long forms with note/comment for some stati
|
|
31
31
|
## e.g. [postponed due to tropical storm "Hanna"]
|
|
32
32
|
## [suspended at 84' by storm; result stood]
|
|
@@ -37,33 +37,33 @@ STATUS_RE = %r{
|
|
|
37
37
|
## pre-match (not played)
|
|
38
38
|
#{POSTPONED}
|
|
39
39
|
|
|
|
40
|
-
#{CANCELED}
|
|
40
|
+
#{CANCELED}
|
|
41
41
|
|
|
|
42
|
-
#{WALKOVER}
|
|
42
|
+
#{WALKOVER}
|
|
43
43
|
|
|
|
44
|
-
######################
|
|
44
|
+
######################
|
|
45
45
|
## pre/post match
|
|
46
46
|
#{AWARDED}
|
|
47
47
|
|
|
|
48
48
|
########################
|
|
49
49
|
## post match - (partially) played
|
|
50
|
-
#{SUSPENDED}
|
|
51
|
-
|
|
|
50
|
+
#{SUSPENDED}
|
|
51
|
+
|
|
|
52
52
|
#{ABANDONED}
|
|
53
53
|
|
|
|
54
54
|
#{ANNULLED}
|
|
55
55
|
|
|
|
56
56
|
#{VOIDED} ### note - alternative to annulled
|
|
57
57
|
) ## end-of-<status>
|
|
58
|
-
[ :;,-]+ ## leading spaces (or separators)
|
|
59
|
-
[^\]]+? ## note - add non-greedy match
|
|
60
|
-
) ## end-of-<status-note>
|
|
58
|
+
[ :;,-]+ ## leading spaces (or separators)
|
|
59
|
+
[^\]]+? ## note - add non-greedy match
|
|
60
|
+
) ## end-of-<status-note>
|
|
61
61
|
[ ]* ## eat-up optional trailing spaces
|
|
62
62
|
)
|
|
63
|
-
|
|
|
63
|
+
|
|
|
64
64
|
########################################
|
|
65
65
|
## opt 2 - short form only (no note/comments) e.g. [postponed], [Canceled], etc.
|
|
66
|
-
####################################
|
|
66
|
+
####################################
|
|
67
67
|
(?<status>
|
|
68
68
|
####################
|
|
69
69
|
## pre-match (not played)
|
|
@@ -71,15 +71,15 @@ STATUS_RE = %r{
|
|
|
71
71
|
|
|
|
72
72
|
#{CANCELED}
|
|
73
73
|
|
|
|
74
|
-
#{WALKOVER}
|
|
74
|
+
#{WALKOVER}
|
|
75
75
|
|
|
|
76
|
-
######################
|
|
76
|
+
######################
|
|
77
77
|
## pre/post match
|
|
78
78
|
#{AWARDED}
|
|
79
79
|
|
|
|
80
80
|
########################
|
|
81
81
|
## post match - (partially) played
|
|
82
|
-
#{SUSPENDED}
|
|
82
|
+
#{SUSPENDED}
|
|
83
83
|
|
|
|
84
84
|
#{ABANDONED}
|
|
85
85
|
|
|
|
@@ -88,7 +88,7 @@ STATUS_RE = %r{
|
|
|
88
88
|
#{VOIDED} ### note - alternative to annulled
|
|
89
89
|
|
|
|
90
90
|
#{REPLAY} ### todo/fix - keep replay - why? why not?
|
|
91
|
-
### prefer replay in round e.g.
|
|
91
|
+
### prefer replay in round e.g.
|
|
92
92
|
## ▪ Round 17, Replay
|
|
93
93
|
## ▪ Semi-finals, Replays
|
|
94
94
|
)
|
|
@@ -108,16 +108,16 @@ def self._build_status( m )
|
|
|
108
108
|
elsif m[:abandoned] then 'abandoned'
|
|
109
109
|
elsif m[:annulled] ||
|
|
110
110
|
m[:voided] then 'annulled'
|
|
111
|
-
elsif m[:replay] then 'replay'
|
|
111
|
+
elsif m[:replay] then 'replay'
|
|
112
112
|
else ## fallback on "generic" status (shouldn't happen)
|
|
113
113
|
m[:status]
|
|
114
114
|
end
|
|
115
115
|
|
|
116
116
|
## includes note? e.g. awarded; originally 2-0
|
|
117
|
-
status[:status_note] = m[:status_note] if m[:status_note]
|
|
118
|
-
|
|
117
|
+
status[:status_note] = m[:status_note] if m[:status_note]
|
|
118
|
+
|
|
119
119
|
status
|
|
120
|
-
end
|
|
120
|
+
end
|
|
121
121
|
def _build_status( m ) self.class._build_status( m ); end
|
|
122
122
|
|
|
123
123
|
|
|
@@ -138,57 +138,55 @@ postponed|postp\.|ppd\.
|
|
|
138
138
|
## [postponed due to tropical storm "Hanna"]
|
|
139
139
|
## [postponed from Sep 10-12 due to death Queen Elizabeth II]
|
|
140
140
|
|
|
141
|
-
cancell?ed|canc.\
|
|
142
|
-
|
|
143
|
-
walkover|w/o|wo
|
|
144
|
-
## A victory awarded to one team because the opponent was unable
|
|
141
|
+
cancell?ed|canc.\
|
|
142
|
+
|
|
143
|
+
walkover|w/o|wo
|
|
144
|
+
## A victory awarded to one team because the opponent was unable
|
|
145
145
|
## or unwilling to compete (e.g., failing to show up or being disqualified).
|
|
146
146
|
## -or-
|
|
147
147
|
## A walkover or "win over" reveals when a team has won a game
|
|
148
|
-
## without it being played.
|
|
148
|
+
## without it being played.
|
|
149
149
|
## -or-
|
|
150
150
|
## see <https://en.wikipedia.org/wiki/Walkover>
|
|
151
151
|
|
|
152
152
|
|
|
153
153
|
|
|
154
|
-
######################
|
|
154
|
+
######################
|
|
155
155
|
## pre/post match
|
|
156
156
|
awarded|awd\.
|
|
157
157
|
|
|
158
158
|
## e.g. [awarded match to Leones Negros by undue alignment; original result 1-2]
|
|
159
159
|
## [awarded 3-0 to Cafetaleros by undue alignment; originally ended 2-0]
|
|
160
160
|
## [awarded 3-0; originally 0-2, América used ineligible player (Federico Viñas)]
|
|
161
|
-
|
|
162
|
-
## A result that is decided by a governing body
|
|
161
|
+
|
|
162
|
+
## A result that is decided by a governing body
|
|
163
163
|
## (like FIFA or a domestic league) rather than by the play on the pitch.
|
|
164
|
-
## Usually follows a Forfeit or Walkover.
|
|
165
|
-
## If a team refuses to play, abandons a match, or fields an ineligible player,
|
|
164
|
+
## Usually follows a Forfeit or Walkover.
|
|
165
|
+
## If a team refuses to play, abandons a match, or fields an ineligible player,
|
|
166
166
|
## the opponent is typically awarded a 3-0 victory.
|
|
167
167
|
|
|
168
168
|
########################
|
|
169
169
|
## post match - (partially) played
|
|
170
|
-
suspended|susp\.
|
|
170
|
+
suspended|susp\.
|
|
171
171
|
|
|
172
|
-
## e.g. [suspended at 0-0 in 12' due to storm]
|
|
172
|
+
## e.g. [suspended at 0-0 in 12' due to storm]
|
|
173
173
|
## [suspended at 84' by storm; result stood]
|
|
174
|
-
|
|
174
|
+
|
|
175
175
|
## The match is temporarily halted but intended to be resumed or restarted later.
|
|
176
176
|
|
|
177
177
|
abandoned|aban.\|abd\.
|
|
178
178
|
|
|
179
|
-
## e.g. [abandoned at 1-1 in 65' due to cardiac arrest
|
|
179
|
+
## e.g. [abandoned at 1-1 in 65' due to cardiac arrest
|
|
180
180
|
## Luton player Tom Lockyer]
|
|
181
181
|
## [abandoned at 0-0 in 6' due to waterlogged pitch]
|
|
182
|
-
## [abandoned at 5-0 in 80' due to attack
|
|
182
|
+
## [abandoned at 5-0 in 80' due to attack
|
|
183
183
|
## on assistant referee by Cerro; result stood]
|
|
184
184
|
## [abandoned at 1-0 in 31']
|
|
185
185
|
## [abandoned at 0-1' in 85 due to crowd trouble]
|
|
186
186
|
|
|
187
|
-
## The match started but was stopped by the referee before the final whistle
|
|
187
|
+
## The match started but was stopped by the referee before the final whistle
|
|
188
188
|
## (e.g., due to a waterlogged pitch or player injury) and did not resume
|
|
189
189
|
|
|
190
190
|
annulled OR voided|void
|
|
191
191
|
## The match result is struck from the record entirely,
|
|
192
192
|
## usually due to a team's withdrawal from the league or a severe rule violation.
|
|
193
|
-
|
|
194
|
-
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
module SportDb
|
|
2
|
+
class Lexer
|
|
3
|
+
|
|
4
|
+
## (match) status inline versions
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
## "inline" match status e.g.
|
|
10
|
+
## Clapham Rovers w/o Hitchin
|
|
11
|
+
## Queen's Park bye
|
|
12
|
+
|
|
13
|
+
## add support for WO or W-0 too - why? why not?
|
|
14
|
+
INLINE_WO_RE = %r{
|
|
15
|
+
(?<inline_wo>
|
|
16
|
+
\b (?: w/o | W/O ) \b
|
|
17
|
+
)}x ## note - NOT case insensitive
|
|
18
|
+
|
|
19
|
+
INLINE_BYE_RE = %r{
|
|
20
|
+
(?<inline_bye>
|
|
21
|
+
\b (?: bye | BYE ) \b
|
|
22
|
+
)}x ## note - NOT case insensitive
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
###
|
|
26
|
+
# A n/p B (note - basically a inline short form of A v B [cancelled] )
|
|
27
|
+
# N/P
|
|
28
|
+
INLINE_NP_RE = %r{
|
|
29
|
+
(?<inline_np>
|
|
30
|
+
\b (?: n/p | N/P ) \b
|
|
31
|
+
)}x ## note - NOT case insensitive
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
###
|
|
35
|
+
# abd/abd. or aban/aban. [abandoned]
|
|
36
|
+
# ABD/ABAN
|
|
37
|
+
INLINE_ABD_RE = %r{
|
|
38
|
+
(?<inline_abd>
|
|
39
|
+
\b (?: abd\.? |
|
|
40
|
+
aban\.? |
|
|
41
|
+
ABD | ABAN
|
|
42
|
+
)
|
|
43
|
+
## POSITIVE lookahead - requires space
|
|
44
|
+
(?= [ ])
|
|
45
|
+
)}x ## note - NOT case insensitive
|
|
46
|
+
|
|
47
|
+
####
|
|
48
|
+
# susp/susp. [suspended]
|
|
49
|
+
# SUSP
|
|
50
|
+
INLINE_SUSP_RE = %r{
|
|
51
|
+
(?<inline_susp>
|
|
52
|
+
\b (?: susp\.? |
|
|
53
|
+
SUSP )
|
|
54
|
+
## POSITIVE lookahead - requires space
|
|
55
|
+
(?= [ ])
|
|
56
|
+
)}x ## note - NOT case insensitive
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
####
|
|
60
|
+
# ppd/ppd. or pst/pst. or pstp/pstp. or postp/postp. [postponed]
|
|
61
|
+
# PPD/PSTP/POSTP/P-P
|
|
62
|
+
# todo/check - add/allow p-p too - why? why not?
|
|
63
|
+
INLINE_PPD_RE = %r{
|
|
64
|
+
(?<inline_ppd>
|
|
65
|
+
\b (?: ppd\.? |
|
|
66
|
+
pst\.? |
|
|
67
|
+
po?stp\.? |
|
|
68
|
+
PPD | PST | PO?STP | P-P
|
|
69
|
+
)
|
|
70
|
+
## POSITIVE lookahead - requires space
|
|
71
|
+
(?= [ ])
|
|
72
|
+
)}x ## note - NOT case insensitive
|
|
73
|
+
|
|
74
|
+
####
|
|
75
|
+
# void via x-x X-X
|
|
76
|
+
# todo/check - only allow X-X - why? why not?
|
|
77
|
+
INLINE_VOID_RE = %r{
|
|
78
|
+
(?<inline_void>
|
|
79
|
+
\b (?: x-x |
|
|
80
|
+
X-X
|
|
81
|
+
)
|
|
82
|
+
## POSITIVE lookahead - requires space
|
|
83
|
+
(?= [ ])
|
|
84
|
+
)}x ## note - NOT case insensitive
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
####
|
|
88
|
+
# awd/awd. [awarded]
|
|
89
|
+
# AWD
|
|
90
|
+
# note - recommendation is to allways include score
|
|
91
|
+
# thus, use/prefer SCORE_AWD e.g. 0-3 awd
|
|
92
|
+
INLINE_AWD_RE = %r{
|
|
93
|
+
(?<inline_awd>
|
|
94
|
+
\b (?: awd\.? | AWD )
|
|
95
|
+
## POSITIVE lookahead - requires space
|
|
96
|
+
(?= [ ])
|
|
97
|
+
)}x ## note - NOT case insensitive
|
|
98
|
+
|
|
99
|
+
###
|
|
100
|
+
# canc/canc. [cancelled]
|
|
101
|
+
# CANC
|
|
102
|
+
INLINE_CANC_RE = %r{
|
|
103
|
+
(?<inline_canc>
|
|
104
|
+
\b (?: canc\.? | CANC )
|
|
105
|
+
## POSITIVE lookahead - requires space
|
|
106
|
+
(?= [ ])
|
|
107
|
+
)}x ## note - NOT case insensitive
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
end # class Lexer
|
|
112
|
+
end # module SportDb
|