sportdb-parser 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -1
  3. data/Manifest.txt +17 -4
  4. data/lib/sportdb/parser/lexer-on_goal.rb +172 -0
  5. data/lib/sportdb/parser/lexer-on_group_def.rb +31 -0
  6. data/lib/sportdb/parser/lexer-on_prop_lineup.rb +79 -0
  7. data/lib/sportdb/parser/lexer-on_prop_misc.rb +110 -0
  8. data/lib/sportdb/parser/lexer-on_prop_penalties.rb +40 -0
  9. data/lib/sportdb/parser/lexer-on_round_def.rb +37 -0
  10. data/lib/sportdb/parser/lexer-on_top.rb +125 -0
  11. data/lib/sportdb/parser/lexer-prep_doc.rb +131 -0
  12. data/lib/sportdb/parser/lexer-prep_line.rb +63 -0
  13. data/lib/sportdb/parser/lexer-tokenize.rb +449 -0
  14. data/lib/sportdb/parser/lexer.rb +133 -1363
  15. data/lib/sportdb/parser/lexer_buffer.rb +8 -37
  16. data/lib/sportdb/parser/lexer_token.rb +126 -0
  17. data/lib/sportdb/parser/parser.rb +1104 -1403
  18. data/lib/sportdb/parser/racc_parser.rb +36 -32
  19. data/lib/sportdb/parser/racc_tree.rb +65 -98
  20. data/lib/sportdb/parser/token-date--helpers.rb +130 -0
  21. data/lib/sportdb/parser/token-date--names.rb +108 -0
  22. data/lib/sportdb/parser/token-date.rb +20 -192
  23. data/lib/sportdb/parser/token-date_duration.rb +8 -27
  24. data/lib/sportdb/parser/token-geo.rb +16 -16
  25. data/lib/sportdb/parser/token-goals--helpers.rb +114 -0
  26. data/lib/sportdb/parser/token-goals.rb +103 -249
  27. data/lib/sportdb/parser/token-group.rb +8 -22
  28. data/lib/sportdb/parser/token-prop.rb +138 -124
  29. data/lib/sportdb/parser/token-prop_name.rb +48 -39
  30. data/lib/sportdb/parser/token-round.rb +21 -35
  31. data/lib/sportdb/parser/token-score--helpers.rb +189 -0
  32. data/lib/sportdb/parser/token-score.rb +9 -393
  33. data/lib/sportdb/parser/token-score_full.rb +331 -0
  34. data/lib/sportdb/parser/token-status.rb +44 -46
  35. data/lib/sportdb/parser/token-status_inline.rb +112 -0
  36. data/lib/sportdb/parser/token-text.rb +41 -31
  37. data/lib/sportdb/parser/token-time.rb +29 -26
  38. data/lib/sportdb/parser/token.rb +58 -159
  39. data/lib/sportdb/parser/version.rb +1 -1
  40. data/lib/sportdb/parser.rb +45 -17
  41. metadata +19 -6
  42. data/lib/sportdb/parser/blocktxt.rb +0 -99
  43. data/lib/sportdb/parser/lexer_tty.rb +0 -111
  44. data/lib/sportdb/parser/token-table.rb +0 -149
  45. data/lib/sportdb/parser/token_helpers.rb +0 -92
@@ -0,0 +1,331 @@
1
+ module SportDb
2
+ class Lexer
3
+
4
+
5
+ ## todo/fix
6
+ ## for internal helper constants
7
+ ## use trailing underline
8
+ ## e.g. P_EN_, AGG_EN_ etc!!!
9
+
10
+
11
+ ## todo/check: use ‹› (unicode chars) to mark optional parts in regex constant name - why? why not?
12
+
13
+ #####
14
+ # english helpers (penalty, extra time, ...)
15
+ ## note - p must go last (shortest match)
16
+ # pso = penalty shootout
17
+ ### - note - remove PSO for now (may add later back) - why? why not?
18
+ #
19
+ # todo/fix/clean-up - keep it simple - remove optional trailing dot (.)
20
+ # from pen., p., agg. etc. - why? why not?
21
+ # always use (simply) pen, p, agg
22
+ # (also) remove a.e.t. / a.e.t option - why? why not?
23
+ #
24
+ ## UPDATE mar/2026: addd pens too - keep - why? why not?
25
+ ## (4-3 pens)
26
+ ## (4-3 Pens) -- keep mixed Pens/Pen. too - why? why not?
27
+ ## (4-3 Pen.)
28
+ P_EN = '(?-i: PEN | P |' +
29
+ '[Pp]ens | [Pp]en\.? | p\.? )' # e.g. p., p, pen, pen., etc.
30
+
31
+
32
+ ## fix - change ET_EN to AET_EN!!! - why? why not?
33
+ ## check - allow Aet too - why? why not?
34
+ ## or A.e.t ??
35
+ ET_EN = '(?-i: AET | ' +
36
+ 'aet | a\.e\.t\.? )' # note: make last . optional (e.g a.e.t) allowed too
37
+ # AET_EN = ET_EN
38
+
39
+ ####
40
+ ## after (golden goal/sudden death) extra time - add more options/styles - why? why not?
41
+ AETGG_EN = '(?-i: AET/GG | AGGET | ASDET | ' +
42
+ 'aet/gg | a\.e\.t\.?/g\.g\.? | agget | asdet )'
43
+ ## after (silver goal) extra time
44
+ AETSG_EN = '(?-i: AET/SG | ASGET | ' +
45
+ 'aet/sg | a\.e\.t\.?/s\.g\.? | asget )'
46
+
47
+ ## agg/agg. or AGG
48
+ AGG_EN = '(?-i: AGG | agg\.? )' ## aggregate e..g 4-4 agg etc.
49
+
50
+
51
+
52
+ ## regex score helpers
53
+ ## note - MUST double escape \d e.g. \\d!!! if not "simple" string (e.g. '' but %Q<>)
54
+
55
+ ##
56
+ ## fix - change SCORE_P to SCORE_FULL_P
57
+ ## SCORE_ET to SCORE_FULL_ET
58
+ ##
59
+ ## (re)use SCORE_P, SCORE_ET for score only part!!!
60
+ ##
61
+ ## fix/fix/fix - rename to SCORE_P_ SCORE_ET_
62
+ ## mark internals with TRAILING underscore (leading NOT possible!)
63
+
64
+ SCORE_P = %Q< (?<p1>\\d{1,2}) - (?<p2>\\d{1,2})
65
+ [ ]? #{P_EN}
66
+ >
67
+ SCORE_ET = %Q< (?<et1>\\d{1,2}) - (?<et2>\\d{1,2})
68
+ [ ]? #{ET_EN}
69
+ >
70
+
71
+
72
+ SCORE_LOOKAHEAD = '(?= [ ,\]] | $)'
73
+
74
+
75
+
76
+ ####
77
+ ## after extra-time with golden goal/sudden death & silver goal rule
78
+ ## note - golden goal & silver goal EXCLUDE penalties!!!
79
+ ##
80
+ ## 4-3 a.e.t/g.g.
81
+ ## 4-3 aet/gg
82
+ ## 4-3agget -or- 4-3 asdet
83
+ ## 2-1 aet/sg
84
+ ## -or-
85
+ ## 4-3 aet/gg (3-3, 2-1)
86
+ SCORE__ET_GG_SG__RE = %r{
87
+ (?<score_full>
88
+ \b
89
+ (?<et1>\d{1,2}) - (?<et2>\d{1,2})
90
+ [ ]? (?:
91
+ (?<aetgg> #{AETGG_EN})
92
+ |
93
+ (?<aetsg> #{AETSG_EN})
94
+ )
95
+ ### note:
96
+ ## add optional full-time, half-time score
97
+ (?:
98
+ [ ]+
99
+ \(
100
+ [ ]*
101
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
102
+ [ ]*
103
+ (?:
104
+ , [ ]*
105
+ (?: (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
106
+ [ ]*
107
+ )?
108
+ )? # note: make half time (HT) score optional for now
109
+ \)
110
+ )?
111
+ #{SCORE_LOOKAHEAD}
112
+ )}ix
113
+
114
+
115
+ ## note: allow SPECIAL cases WITHOUT full time scores (just a.e.t or pen. + a.e.t.)
116
+ ## 3-4 pen. 2-2 a.e.t.
117
+ ## 3-4 pen. 2-2 a.e.t.
118
+ ## 2-2 a.e.t.
119
+ SCORE__P_ET__RE = %r{
120
+ (?<score_full>
121
+ \b
122
+ (?: #{SCORE_P} [ ]+
123
+ )? ## note: make penalty (P) score optional for now
124
+ #{SCORE_ET}
125
+ #{SCORE_LOOKAHEAD}
126
+ )}ix
127
+ ## todo/check: remove loakahead assertion here - why require space?
128
+ ## note: \b works only after non-alphanum e.g. )
129
+
130
+
131
+ ## note: allow SPECIAL cases WITHOUT full time scores
132
+ ## AND with pen in last position!
133
+ ## 2-2 a.e.t., 3-4 pen.
134
+ ## 2-2 a.e.t. 3-4 pen. ## or without comma separator - why? why not?
135
+ SCORE__ET_P__RE = %r{
136
+ (?<score_full>
137
+ \b
138
+ #{SCORE_ET}
139
+ (?: [ ]*,[ ]* | [ ]+ )
140
+ #{SCORE_P}
141
+ #{SCORE_LOOKAHEAD}
142
+ )}ix
143
+ ## todo/check: remove loakahead assertion here - why require space?
144
+ ## note: \b works only after non-alphanum e.g. )
145
+
146
+ ### special case (i) - full time with penalties
147
+ ## 2-2, 3-4 pen.
148
+ SCORE__FT_P__RE = %r{
149
+ (?<score_full>
150
+ \b
151
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
152
+ [ ]*,[ ]* ## note - comma required!!!
153
+ #{SCORE_P}
154
+ #{SCORE_LOOKAHEAD}
155
+ )}ix
156
+
157
+ ### special case (ii) - full time & half-time with penalties
158
+ ## 2-2 (1-1), 3-4 pen.
159
+ SCORE__FT_HT_P__RE = %r{
160
+ (?<score_full>
161
+ \b
162
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
163
+ [ ]*
164
+ \(
165
+ (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
166
+ \)
167
+ [ ]*,[ ]* ## note - comma required!!!
168
+ #{SCORE_P}
169
+ #{SCORE_LOOKAHEAD}
170
+ )}ix
171
+
172
+
173
+
174
+
175
+ ## note: allow SPECIAL with penalty only
176
+ ## 3-4 pen. or 3-4p etc.
177
+ SCORE__P__RE = %r{
178
+ (?<score_full>
179
+ \b
180
+ #{SCORE_P}
181
+ #{SCORE_LOOKAHEAD}
182
+ )}ix
183
+ ## todo/check: remove loakahead assertion here - why require space?
184
+ ## note: \b works only after non-alphanum e.g. )
185
+
186
+ ####
187
+ ## support short all-in-one e.g.
188
+ ## e.g. 3-4 pen. 2-2 a.e.t. ( 1-1, 1-1 ) becomes
189
+ ## 3-4 pen. (2-2, 1-1, 1-1)
190
+
191
+ SCORE__P_ET_FT_HT_V2__RE = %r{
192
+ (?<score_full>
193
+ \b
194
+ #{SCORE_P} [ ]+
195
+ \(
196
+ [ ]*
197
+ (?<et1>\d{1,2}) - (?<et2>\d{1,2})
198
+ [ ]*, [ ]*
199
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
200
+ [ ]*, [ ]*
201
+ (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
202
+ [ ]*
203
+ \)
204
+ #{SCORE_LOOKAHEAD}
205
+ )}ix ## todo/check: remove loakahead assertion here - why require space?
206
+ ## note: \b works only after non-alphanum e.g. )
207
+
208
+
209
+ # e.g. 2-2 a.e.t. (1-1, 1-0), 5-1 pen.
210
+ SCORE__ET_FT_HT_P__RE = %r{
211
+ (?<score_full>
212
+ \b
213
+ #{SCORE_ET} [ ]+
214
+ \(
215
+ [ ]*
216
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
217
+ [ ]*
218
+ (?:
219
+ , [ ]*
220
+ (?: (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
221
+ [ ]*
222
+ )?
223
+ )? # note: make half time (HT) score optional for now
224
+ \)
225
+ (?: [ ]*,[ ]* | [ ]+)
226
+ #{SCORE_P}
227
+ #{SCORE_LOOKAHEAD}
228
+ )}ix ## todo/check: remove loakahead assertion here - why require space?
229
+ ## note: \b works only after non-alphanum e.g. )
230
+
231
+
232
+
233
+ ## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1) or
234
+ ## 3-4p 2-2aet (1-1, ) or
235
+ ## 3-4 pen. 2-2 a.e.t. (1-1) or
236
+ ## 2-2 a.e.t. (1-1, 1-1) or
237
+ ## 2-2 a.e.t. (1-1, ) or
238
+ ## 2-2 a.e.t. (1-1)
239
+
240
+ SCORE__P_ET_FT_HT__RE = %r{
241
+ (?<score_full>
242
+ \b
243
+ (?:
244
+ #{SCORE_P} [ ]+
245
+ )? ## note - make penalty (P) score optional for now
246
+ #{SCORE_ET} [ ]+
247
+ \(
248
+ [ ]*
249
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
250
+ [ ]*
251
+ (?:
252
+ , [ ]*
253
+ (?: (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
254
+ [ ]*
255
+ )?
256
+ )? # note: make half time (HT) score optional for now
257
+ \)
258
+ #{SCORE_LOOKAHEAD}
259
+ )}ix ## todo/check: remove loakahead assertion here - why require space?
260
+ ## note: \b works only after non-alphanum e.g. )
261
+
262
+ ###
263
+ ## special case for case WITHOUT extra time!!
264
+ ## same as above (but WITHOUT extra time and pen required)
265
+ SCORE__P_FT_HT__RE = %r{
266
+ (?<score_full>
267
+ \b
268
+ #{SCORE_P} [ ]+
269
+ \(
270
+ [ ]*
271
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
272
+ [ ]*
273
+ (?:
274
+ , [ ]*
275
+ (?: (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
276
+ [ ]*
277
+ )?
278
+ )? # note: make half time (HT) score optional for now
279
+ \)
280
+ #{SCORE_LOOKAHEAD}
281
+ )}ix ## todo/check: remove loakahead assertion here - why require space?
282
+ ## note: \b works only after non-alphanum e.g. )
283
+
284
+
285
+ ##########
286
+ ## e.g. 2-1 (1-1)
287
+ SCORE__FT_HT__RE = %r{
288
+ (?<score_full>
289
+ \b
290
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
291
+ [ ]+ \( [ ]*
292
+ (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
293
+ [ ]* \)
294
+ #{SCORE_LOOKAHEAD}
295
+ )}ix ## todo/check: remove loakahead assertion here - why require space?
296
+ ## note: \b works only after non-alphanum e.g. )
297
+
298
+
299
+
300
+
301
+
302
+
303
+ #############################################
304
+ # map tables
305
+ # note: order matters; first come-first matched/served
306
+
307
+ ## note 2-2, 5-1 pen. must get priority (get before SCORE_LEGS!!!)
308
+ ## break out
309
+ ## note - no need for Regexp.union for now (only single regex!)
310
+
311
+ SCORE_FULL_1ST_RE = SCORE__FT_P__RE # e.g. 2-2, 5-1 pen.
312
+
313
+
314
+ SCORE_FULL_RE = Regexp.union(
315
+ SCORE__ET_GG_SG__RE, # e.g. 3-1 aet/gg
316
+ SCORE__P_ET_FT_HT_V2__RE, # e.g. 5-1 pen. (2-2, 1-1, 1-0)
317
+ SCORE__ET_FT_HT_P__RE, # e.g. 2-2 a.e.t. (1-1, 1-0), 5-1 pen.
318
+ SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
319
+ SCORE__P_FT_HT__RE, # e.g. 5-1 pen. (1-1)
320
+ SCORE__ET_P__RE, # e.g. 2-2 a.e.t., 5-1 pen.
321
+ SCORE__FT_HT_P__RE, # e.g. 2-2 (1-1), 5-1 pen.
322
+ SCORE__P_ET__RE, # e.g. 5-1 pen. 2-2 a.e.t. or 2-2 a.e.t. (w/o pen)
323
+ SCORE__P__RE, # e.g. 5-1 pen.
324
+ SCORE__FT_HT__RE, # e.g. 1-1 (1-0)
325
+ ## note - keep basic score as its own token!!!!
326
+ ## that is, SCORE & SCORE_MORE
327
+ ### SCORE__FT__RE, # e.g. 1-1 -- note - must go last!!!
328
+ )
329
+
330
+ end # class Lexer
331
+ end # module SportDb
@@ -1,6 +1,6 @@
1
1
  module SportDb
2
2
  class Lexer
3
-
3
+
4
4
  ## (match) status
5
5
  ## note: english usage - cancelled (in UK), canceled (in US)
6
6
  ##
@@ -9,24 +9,24 @@ class Lexer
9
9
 
10
10
  POSTPONED = %Q{ (?<postponed> postponed | pst\\.? | po?stp\\.? | ppd\\.? ) }
11
11
  CANCELED = %Q{ (?<canceled> cancell?ed | canc\\.? ) } ## add can/can. - why? why not?
12
- WALKOVER = %Q{ (?<walkover> walkover | w/o | wo ) } ## add o/w too - why? why not?
13
- AWARDED = %Q{ (?<awarded> awarded | awd\\.? ) }
14
- SUSPENDED = %Q{ (?<suspended> suspended | susp\\.? ) }
12
+ WALKOVER = %Q{ (?<walkover> walkover | w/o | wo ) } ## add o/w too - why? why not?
13
+ AWARDED = %Q{ (?<awarded> awarded | awd\\.? ) }
14
+ SUSPENDED = %Q{ (?<suspended> suspended | susp\\.? ) }
15
15
  ABANDONED = %Q{ (?<abandoned> abandoned | aban\\.? | abd\\.? ) }
16
- ANNULLED = %Q{ (?<annulled> annulled ) }
17
- VOIDED = %Q{ (?<voided> voided | void ) } ### note - alternative (name) to annulled
16
+ ANNULLED = %Q{ (?<annulled> annulled ) }
17
+ VOIDED = %Q{ (?<voided> voided | void ) } ### note - alternative (name) to annulled
18
18
 
19
- REPLAY = %Q{ (?<replay> replay | repl\\.? ) }
19
+ REPLAY = %Q{ (?<replay> replay | repl\\.? ) }
20
20
 
21
21
 
22
22
  ##
23
- ## note - status_note incl. complete text incl. <status> (not normalized)
23
+ ## note - status_note incl. complete text incl. <status> (not normalized)
24
24
  ## <status> gets normalized e.g. ppt => postponed etc.
25
25
 
26
26
  STATUS_RE = %r{
27
27
  \[
28
- (?:
29
- #############################################
28
+ (?:
29
+ #############################################
30
30
  ### opt 1 - allow long forms with note/comment for some stati
31
31
  ## e.g. [postponed due to tropical storm "Hanna"]
32
32
  ## [suspended at 84' by storm; result stood]
@@ -37,33 +37,33 @@ STATUS_RE = %r{
37
37
  ## pre-match (not played)
38
38
  #{POSTPONED}
39
39
  |
40
- #{CANCELED}
40
+ #{CANCELED}
41
41
  |
42
- #{WALKOVER}
42
+ #{WALKOVER}
43
43
  |
44
- ######################
44
+ ######################
45
45
  ## pre/post match
46
46
  #{AWARDED}
47
47
  |
48
48
  ########################
49
49
  ## post match - (partially) played
50
- #{SUSPENDED}
51
- |
50
+ #{SUSPENDED}
51
+ |
52
52
  #{ABANDONED}
53
53
  |
54
54
  #{ANNULLED}
55
55
  |
56
56
  #{VOIDED} ### note - alternative to annulled
57
57
  ) ## end-of-<status>
58
- [ :;,-]+ ## leading spaces (or separators)
59
- [^\]]+? ## note - add non-greedy match
60
- ) ## end-of-<status-note>
58
+ [ :;,-]+ ## leading spaces (or separators)
59
+ [^\]]+? ## note - add non-greedy match
60
+ ) ## end-of-<status-note>
61
61
  [ ]* ## eat-up optional trailing spaces
62
62
  )
63
- |
63
+ |
64
64
  ########################################
65
65
  ## opt 2 - short form only (no note/comments) e.g. [postponed], [Canceled], etc.
66
- ####################################
66
+ ####################################
67
67
  (?<status>
68
68
  ####################
69
69
  ## pre-match (not played)
@@ -71,15 +71,15 @@ STATUS_RE = %r{
71
71
  |
72
72
  #{CANCELED}
73
73
  |
74
- #{WALKOVER}
74
+ #{WALKOVER}
75
75
  |
76
- ######################
76
+ ######################
77
77
  ## pre/post match
78
78
  #{AWARDED}
79
79
  |
80
80
  ########################
81
81
  ## post match - (partially) played
82
- #{SUSPENDED}
82
+ #{SUSPENDED}
83
83
  |
84
84
  #{ABANDONED}
85
85
  |
@@ -88,7 +88,7 @@ STATUS_RE = %r{
88
88
  #{VOIDED} ### note - alternative to annulled
89
89
  |
90
90
  #{REPLAY} ### todo/fix - keep replay - why? why not?
91
- ### prefer replay in round e.g.
91
+ ### prefer replay in round e.g.
92
92
  ## ▪ Round 17, Replay
93
93
  ## ▪ Semi-finals, Replays
94
94
  )
@@ -108,16 +108,16 @@ def self._build_status( m )
108
108
  elsif m[:abandoned] then 'abandoned'
109
109
  elsif m[:annulled] ||
110
110
  m[:voided] then 'annulled'
111
- elsif m[:replay] then 'replay'
111
+ elsif m[:replay] then 'replay'
112
112
  else ## fallback on "generic" status (shouldn't happen)
113
113
  m[:status]
114
114
  end
115
115
 
116
116
  ## includes note? e.g. awarded; originally 2-0
117
- status[:status_note] = m[:status_note] if m[:status_note]
118
-
117
+ status[:status_note] = m[:status_note] if m[:status_note]
118
+
119
119
  status
120
- end
120
+ end
121
121
  def _build_status( m ) self.class._build_status( m ); end
122
122
 
123
123
 
@@ -138,57 +138,55 @@ postponed|postp\.|ppd\.
138
138
  ## [postponed due to tropical storm "Hanna"]
139
139
  ## [postponed from Sep 10-12 due to death Queen Elizabeth II]
140
140
 
141
- cancell?ed|canc.\
142
-
143
- walkover|w/o|wo
144
- ## A victory awarded to one team because the opponent was unable
141
+ cancell?ed|canc.\
142
+
143
+ walkover|w/o|wo
144
+ ## A victory awarded to one team because the opponent was unable
145
145
  ## or unwilling to compete (e.g., failing to show up or being disqualified).
146
146
  ## -or-
147
147
  ## A walkover or "win over" reveals when a team has won a game
148
- ## without it being played.
148
+ ## without it being played.
149
149
  ## -or-
150
150
  ## see <https://en.wikipedia.org/wiki/Walkover>
151
151
 
152
152
 
153
153
 
154
- ######################
154
+ ######################
155
155
  ## pre/post match
156
156
  awarded|awd\.
157
157
 
158
158
  ## e.g. [awarded match to Leones Negros by undue alignment; original result 1-2]
159
159
  ## [awarded 3-0 to Cafetaleros by undue alignment; originally ended 2-0]
160
160
  ## [awarded 3-0; originally 0-2, América used ineligible player (Federico Viñas)]
161
-
162
- ## A result that is decided by a governing body
161
+
162
+ ## A result that is decided by a governing body
163
163
  ## (like FIFA or a domestic league) rather than by the play on the pitch.
164
- ## Usually follows a Forfeit or Walkover.
165
- ## If a team refuses to play, abandons a match, or fields an ineligible player,
164
+ ## Usually follows a Forfeit or Walkover.
165
+ ## If a team refuses to play, abandons a match, or fields an ineligible player,
166
166
  ## the opponent is typically awarded a 3-0 victory.
167
167
 
168
168
  ########################
169
169
  ## post match - (partially) played
170
- suspended|susp\.
170
+ suspended|susp\.
171
171
 
172
- ## e.g. [suspended at 0-0 in 12' due to storm]
172
+ ## e.g. [suspended at 0-0 in 12' due to storm]
173
173
  ## [suspended at 84' by storm; result stood]
174
-
174
+
175
175
  ## The match is temporarily halted but intended to be resumed or restarted later.
176
176
 
177
177
  abandoned|aban.\|abd\.
178
178
 
179
- ## e.g. [abandoned at 1-1 in 65' due to cardiac arrest
179
+ ## e.g. [abandoned at 1-1 in 65' due to cardiac arrest
180
180
  ## Luton player Tom Lockyer]
181
181
  ## [abandoned at 0-0 in 6' due to waterlogged pitch]
182
- ## [abandoned at 5-0 in 80' due to attack
182
+ ## [abandoned at 5-0 in 80' due to attack
183
183
  ## on assistant referee by Cerro; result stood]
184
184
  ## [abandoned at 1-0 in 31']
185
185
  ## [abandoned at 0-1' in 85 due to crowd trouble]
186
186
 
187
- ## The match started but was stopped by the referee before the final whistle
187
+ ## The match started but was stopped by the referee before the final whistle
188
188
  ## (e.g., due to a waterlogged pitch or player injury) and did not resume
189
189
 
190
190
  annulled OR voided|void
191
191
  ## The match result is struck from the record entirely,
192
192
  ## usually due to a team's withdrawal from the league or a severe rule violation.
193
-
194
-
@@ -0,0 +1,112 @@
1
+ module SportDb
2
+ class Lexer
3
+
4
+ ## (match) status inline versions
5
+
6
+
7
+
8
+
9
+ ## "inline" match status e.g.
10
+ ## Clapham Rovers w/o Hitchin
11
+ ## Queen's Park bye
12
+
13
+ ## add support for WO or W-0 too - why? why not?
14
+ INLINE_WO_RE = %r{
15
+ (?<inline_wo>
16
+ \b (?: w/o | W/O ) \b
17
+ )}x ## note - NOT case insensitive
18
+
19
+ INLINE_BYE_RE = %r{
20
+ (?<inline_bye>
21
+ \b (?: bye | BYE ) \b
22
+ )}x ## note - NOT case insensitive
23
+
24
+
25
+ ###
26
+ # A n/p B (note - basically a inline short form of A v B [cancelled] )
27
+ # N/P
28
+ INLINE_NP_RE = %r{
29
+ (?<inline_np>
30
+ \b (?: n/p | N/P ) \b
31
+ )}x ## note - NOT case insensitive
32
+
33
+
34
+ ###
35
+ # abd/abd. or aban/aban. [abandoned]
36
+ # ABD/ABAN
37
+ INLINE_ABD_RE = %r{
38
+ (?<inline_abd>
39
+ \b (?: abd\.? |
40
+ aban\.? |
41
+ ABD | ABAN
42
+ )
43
+ ## POSITIVE lookahead - requires space
44
+ (?= [ ])
45
+ )}x ## note - NOT case insensitive
46
+
47
+ ####
48
+ # susp/susp. [suspended]
49
+ # SUSP
50
+ INLINE_SUSP_RE = %r{
51
+ (?<inline_susp>
52
+ \b (?: susp\.? |
53
+ SUSP )
54
+ ## POSITIVE lookahead - requires space
55
+ (?= [ ])
56
+ )}x ## note - NOT case insensitive
57
+
58
+
59
+ ####
60
+ # ppd/ppd. or pst/pst. or pstp/pstp. or postp/postp. [postponed]
61
+ # PPD/PSTP/POSTP/P-P
62
+ # todo/check - add/allow p-p too - why? why not?
63
+ INLINE_PPD_RE = %r{
64
+ (?<inline_ppd>
65
+ \b (?: ppd\.? |
66
+ pst\.? |
67
+ po?stp\.? |
68
+ PPD | PST | PO?STP | P-P
69
+ )
70
+ ## POSITIVE lookahead - requires space
71
+ (?= [ ])
72
+ )}x ## note - NOT case insensitive
73
+
74
+ ####
75
+ # void via x-x X-X
76
+ # todo/check - only allow X-X - why? why not?
77
+ INLINE_VOID_RE = %r{
78
+ (?<inline_void>
79
+ \b (?: x-x |
80
+ X-X
81
+ )
82
+ ## POSITIVE lookahead - requires space
83
+ (?= [ ])
84
+ )}x ## note - NOT case insensitive
85
+
86
+
87
+ ####
88
+ # awd/awd. [awarded]
89
+ # AWD
90
+ # note - recommendation is to allways include score
91
+ # thus, use/prefer SCORE_AWD e.g. 0-3 awd
92
+ INLINE_AWD_RE = %r{
93
+ (?<inline_awd>
94
+ \b (?: awd\.? | AWD )
95
+ ## POSITIVE lookahead - requires space
96
+ (?= [ ])
97
+ )}x ## note - NOT case insensitive
98
+
99
+ ###
100
+ # canc/canc. [cancelled]
101
+ # CANC
102
+ INLINE_CANC_RE = %r{
103
+ (?<inline_canc>
104
+ \b (?: canc\.? | CANC )
105
+ ## POSITIVE lookahead - requires space
106
+ (?= [ ])
107
+ )}x ## note - NOT case insensitive
108
+
109
+
110
+
111
+ end # class Lexer
112
+ end # module SportDb