sportdb-parser 0.6.20 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -1
  3. data/Manifest.txt +14 -8
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/parser/blocktxt.rb +99 -0
  6. data/lib/sportdb/parser/lexer.rb +958 -395
  7. data/lib/sportdb/parser/lexer_buffer.rb +97 -0
  8. data/lib/sportdb/parser/lexer_tty.rb +111 -0
  9. data/lib/sportdb/parser/parser.rb +1768 -855
  10. data/lib/sportdb/parser/racc_parser.rb +1 -1
  11. data/lib/sportdb/parser/racc_tree.rb +327 -41
  12. data/lib/sportdb/parser/token-date.rb +160 -178
  13. data/lib/sportdb/parser/token-date_duration.rb +190 -0
  14. data/lib/sportdb/parser/token-geo.rb +59 -59
  15. data/lib/sportdb/parser/token-goals.rb +460 -0
  16. data/lib/sportdb/parser/token-group.rb +43 -0
  17. data/lib/sportdb/parser/token-note.rb +40 -0
  18. data/lib/sportdb/parser/token-prop.rb +70 -54
  19. data/lib/sportdb/parser/token-prop_name.rb +74 -0
  20. data/lib/sportdb/parser/token-round.rb +102 -0
  21. data/lib/sportdb/parser/token-score.rb +323 -47
  22. data/lib/sportdb/parser/token-score_fuller.rb +435 -0
  23. data/lib/sportdb/parser/token-score_legs.rb +59 -0
  24. data/lib/sportdb/parser/token-status.rb +157 -160
  25. data/lib/sportdb/parser/token-table.rb +149 -0
  26. data/lib/sportdb/parser/token-text.rb +72 -23
  27. data/lib/sportdb/parser/token-time.rb +141 -0
  28. data/lib/sportdb/parser/token.rb +242 -105
  29. data/lib/sportdb/parser/token_helpers.rb +92 -0
  30. data/lib/sportdb/parser/version.rb +2 -2
  31. data/lib/sportdb/parser.rb +24 -2
  32. metadata +18 -18
  33. data/config/rounds_de.txt +0 -125
  34. data/config/rounds_en.txt +0 -29
  35. data/config/rounds_es.txt +0 -26
  36. data/config/rounds_misc.txt +0 -25
  37. data/config/rounds_pt.txt +0 -4
  38. data/config/zones_en.txt +0 -20
  39. data/lib/sportdb/parser/lang.rb +0 -298
  40. data/lib/sportdb/parser/token-minute.rb +0 -205
@@ -2,14 +2,104 @@ module SportDb
2
2
  class Lexer
3
3
 
4
4
 
5
+
6
+
5
7
  ## todo/check: use ‹› (unicode chars) to mark optional parts in regex constant name - why? why not?
6
8
 
7
9
  #####
8
10
  # english helpers (penalty, extra time, ...)
9
11
  ## note - p must go last (shortest match)
10
12
  # pso = penalty shootout
11
- P_EN = '(?: pso | pen\.? | p\.? )' # e.g. p., p, pen, pen., PSO, etc.
12
- ET_EN = '(?: aet | a\.e\.t\.? )' # note: make last . optional (e.g a.e.t) allowed too
13
+ ### - note - remove PSO for now (may add later back) - why? why not?
14
+ #
15
+ # todo/fix/clean-up - keep it simple - remove optional trailing dot (.)
16
+ # from pen., p., agg. etc. - why? why not?
17
+ # always use (simply) pen, p, agg
18
+ # (also) remove a.e.t. / a.e.t option - why? why not?
19
+ #
20
+ ## UPDATE mar/2026: addd pens too - keep - why? why not?
21
+ ## (4-3 pens)
22
+ ## (4-3 Pens) -- keep mixed Pens/Pen. too - why? why not?
23
+ ## (4-3 Pen.)
24
+ P_EN = '(?-i: PEN | P |' +
25
+ '[Pp]ens | [Pp]en\.? | p\.? )' # e.g. p., p, pen, pen., etc.
26
+
27
+
28
+ ## fix - change ET_EN to AET_EN!!! - why? why not?
29
+ ## check - allow Aet too - why? why not?
30
+ ## or A.e.t ??
31
+ ET_EN = '(?-i: AET | ' +
32
+ 'aet | a\.e\.t\.? )' # note: make last . optional (e.g a.e.t) allowed too
33
+ # AET_EN = ET_EN
34
+
35
+ ####
36
+ ## after (golden goal/sudden death) extra time - add more options/styles - why? why not?
37
+ AETGG_EN = '(?-i: AET/GG | AGGET | ASDET | ' +
38
+ 'aet/gg | a\.e\.t\.?/g\.g\.? | agget | asdet )'
39
+ ## after (silver goal) extra time
40
+ AETSG_EN = '(?-i: AET/SG | ASGET | ' +
41
+ 'aet/sg | a\.e\.t\.?/s\.g\.? | asget )'
42
+
43
+ ## agg/agg. or AGG
44
+ AGG_EN = '(?-i: AGG | agg\.? )' ## aggregate e..g 4-4 agg etc.
45
+
46
+
47
+
48
+ ## regex score helpers
49
+ ## note - MUST double escape \d e.g. \\d!!! if not "simple" string (e.g. '' but %Q<>)
50
+
51
+ ##
52
+ ## fix - change SCORE_P to SCORE_FULL_P
53
+ ## SCORE_ET to SCORE_FULL_ET
54
+ ##
55
+ ## (re)use SCORE_P, SCORE_ET for score only part!!!
56
+
57
+ SCORE_P = %Q< (?<p1>\\d{1,2}) - (?<p2>\\d{1,2})
58
+ [ ]? #{P_EN}
59
+ >
60
+ SCORE_ET = %Q< (?<et1>\\d{1,2}) - (?<et2>\\d{1,2})
61
+ [ ]? #{ET_EN}
62
+ >
63
+ SCORE_LOOKAHEAD = '(?= [ ,\]] | $)'
64
+
65
+
66
+ ####
67
+ ## after extra-time with golden goal/sudden death & silver goal rule
68
+ ## note - golden goal & silver goal EXCLUDE penalties!!!
69
+ ##
70
+ ## 4-3 a.e.t/g.g.
71
+ ## 4-3 aet/gg
72
+ ## 4-3agget -or- 4-3 asdet
73
+ ## 2-1 aet/sg
74
+ ## -or-
75
+ ## 4-3 aet/gg (3-3, 2-1)
76
+ SCORE__ET_GG_SG__RE = %r{
77
+ (?<score_full>
78
+ \b
79
+ (?<et1>\d{1,2}) - (?<et2>\d{1,2})
80
+ [ ]? (?:
81
+ (?<aetgg> #{AETGG_EN})
82
+ |
83
+ (?<aetsg> #{AETSG_EN})
84
+ )
85
+ ### note:
86
+ ## add optional full-time, half-time score
87
+ (?:
88
+ [ ]+
89
+ \(
90
+ [ ]*
91
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
92
+ [ ]*
93
+ (?:
94
+ , [ ]*
95
+ (?: (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
96
+ [ ]*
97
+ )?
98
+ )? # note: make half time (HT) score optional for now
99
+ \)
100
+ )?
101
+ #{SCORE_LOOKAHEAD}
102
+ )}ix
13
103
 
14
104
 
15
105
  ## note: allow SPECIAL cases WITHOUT full time scores (just a.e.t or pen. + a.e.t.)
@@ -17,43 +107,83 @@ class Lexer
17
107
  ## 3-4 pen. 2-2 a.e.t.
18
108
  ## 2-2 a.e.t.
19
109
  SCORE__P_ET__RE = %r{
20
- (?<score_more>
110
+ (?<score_full>
21
111
  \b
22
- (?:
23
- (?<p1>\d{1,2}) - (?<p2>\d{1,2})
24
- [ ]* #{P_EN} [ ]+
25
- )? # note: make penalty (P) score optional for now
26
- (?<et1>\d{1,2}) - (?<et2>\d{1,2})
27
- [ ]* #{ET_EN}
28
- (?=[ ,\]]|$)
112
+ (?: #{SCORE_P} [ ]+
113
+ )? ## note: make penalty (P) score optional for now
114
+ #{SCORE_ET}
115
+ #{SCORE_LOOKAHEAD}
29
116
  )}ix
30
117
  ## todo/check: remove loakahead assertion here - why require space?
31
118
  ## note: \b works only after non-alphanum e.g. )
32
119
 
33
120
 
121
+ ## note: allow SPECIAL cases WITHOUT full time scores
122
+ ## AND with pen in last position!
123
+ ## 2-2 a.e.t., 3-4 pen.
124
+ ## 2-2 a.e.t. 3-4 pen. ## or without comma separator - why? why not?
125
+ SCORE__ET_P__RE = %r{
126
+ (?<score_full>
127
+ \b
128
+ #{SCORE_ET}
129
+ (?: [ ]*,[ ]* | [ ]+ )
130
+ #{SCORE_P}
131
+ #{SCORE_LOOKAHEAD}
132
+ )}ix
133
+ ## todo/check: remove loakahead assertion here - why require space?
134
+ ## note: \b works only after non-alphanum e.g. )
135
+
136
+ ### special case (i) - full time with penalties
137
+ ## 2-2, 3-4 pen.
138
+ SCORE__FT_P__RE = %r{
139
+ (?<score_full>
140
+ \b
141
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
142
+ [ ]*,[ ]* ## note - comma required!!!
143
+ #{SCORE_P}
144
+ #{SCORE_LOOKAHEAD}
145
+ )}ix
146
+
147
+ ### special case (ii) - full time & half-time with penalties
148
+ ## 2-2 (1-1), 3-4 pen.
149
+ SCORE__FT_HT_P__RE = %r{
150
+ (?<score_full>
151
+ \b
152
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
153
+ [ ]*
154
+ \(
155
+ (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
156
+ \)
157
+ [ ]*,[ ]* ## note - comma required!!!
158
+ #{SCORE_P}
159
+ #{SCORE_LOOKAHEAD}
160
+ )}ix
161
+
162
+
163
+
164
+
34
165
  ## note: allow SPECIAL with penalty only
35
- ## 3-4 pen.
166
+ ## 3-4 pen. or 3-4p etc.
36
167
  SCORE__P__RE = %r{
37
- (?<score_more>
168
+ (?<score_full>
38
169
  \b
39
- (?<p1>\d{1,2}) - (?<p2>\d{1,2})
40
- [ ]* #{P_EN}
41
- (?=[ ,\]]|$)
170
+ #{SCORE_P}
171
+ #{SCORE_LOOKAHEAD}
42
172
  )}ix
43
173
  ## todo/check: remove loakahead assertion here - why require space?
44
174
  ## note: \b works only after non-alphanum e.g. )
45
175
 
46
176
  ####
47
177
  ## support short all-in-one e.g.
48
- ## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1) becomes
178
+ ## e.g. 3-4 pen. 2-2 a.e.t. ( 1-1, 1-1 ) becomes
49
179
  ## 3-4 pen. (2-2, 1-1, 1-1)
50
180
 
51
181
  SCORE__P_ET_FT_HT_V2__RE = %r{
52
- (?<score_more>
182
+ (?<score_full>
53
183
  \b
54
- (?<p1>\d{1,2}) - (?<p2>\d{1,2})
55
- [ ]* #{P_EN} [ ]+
184
+ #{SCORE_P} [ ]+
56
185
  \(
186
+ [ ]*
57
187
  (?<et1>\d{1,2}) - (?<et2>\d{1,2})
58
188
  [ ]*, [ ]*
59
189
  (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
@@ -61,11 +191,35 @@ class Lexer
61
191
  (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
62
192
  [ ]*
63
193
  \)
64
- (?=[ ,\]]|$)
194
+ #{SCORE_LOOKAHEAD}
65
195
  )}ix ## todo/check: remove loakahead assertion here - why require space?
66
196
  ## note: \b works only after non-alphanum e.g. )
67
197
 
68
198
 
199
+ # e.g. 2-2 a.e.t. (1-1, 1-0), 5-1 pen.
200
+ SCORE__ET_FT_HT_P__RE = %r{
201
+ (?<score_full>
202
+ \b
203
+ #{SCORE_ET} [ ]+
204
+ \(
205
+ [ ]*
206
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
207
+ [ ]*
208
+ (?:
209
+ , [ ]*
210
+ (?: (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
211
+ [ ]*
212
+ )?
213
+ )? # note: make half time (HT) score optional for now
214
+ \)
215
+ (?: [ ]*,[ ]* | [ ]+)
216
+ #{SCORE_P}
217
+ #{SCORE_LOOKAHEAD}
218
+ )}ix ## todo/check: remove loakahead assertion here - why require space?
219
+ ## note: \b works only after non-alphanum e.g. )
220
+
221
+
222
+
69
223
  ## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1) or
70
224
  ## 3-4p 2-2aet (1-1, ) or
71
225
  ## 3-4 pen. 2-2 a.e.t. (1-1) or
@@ -74,14 +228,12 @@ class Lexer
74
228
  ## 2-2 a.e.t. (1-1)
75
229
 
76
230
  SCORE__P_ET_FT_HT__RE = %r{
77
- (?<score_more>
231
+ (?<score_full>
78
232
  \b
79
233
  (?:
80
- (?<p1>\d{1,2}) - (?<p2>\d{1,2})
81
- [ ]* #{P_EN} [ ]+
82
- )? # note: make penalty (P) score optional for now
83
- (?<et1>\d{1,2}) - (?<et2>\d{1,2})
84
- [ ]* #{ET_EN} [ ]+
234
+ #{SCORE_P} [ ]+
235
+ )? ## note - make penalty (P) score optional for now
236
+ #{SCORE_ET} [ ]+
85
237
  \(
86
238
  [ ]*
87
239
  (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
@@ -93,7 +245,7 @@ class Lexer
93
245
  )?
94
246
  )? # note: make half time (HT) score optional for now
95
247
  \)
96
- (?=[ ,\]]|$)
248
+ #{SCORE_LOOKAHEAD}
97
249
  )}ix ## todo/check: remove loakahead assertion here - why require space?
98
250
  ## note: \b works only after non-alphanum e.g. )
99
251
 
@@ -101,10 +253,9 @@ class Lexer
101
253
  ## special case for case WITHOUT extra time!!
102
254
  ## same as above (but WITHOUT extra time and pen required)
103
255
  SCORE__P_FT_HT__RE = %r{
104
- (?<score_more>
256
+ (?<score_full>
105
257
  \b
106
- (?<p1>\d{1,2}) - (?<p2>\d{1,2})
107
- [ ]* #{P_EN} [ ]+
258
+ #{SCORE_P} [ ]+
108
259
  \(
109
260
  [ ]*
110
261
  (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
@@ -116,7 +267,7 @@ class Lexer
116
267
  )?
117
268
  )? # note: make half time (HT) score optional for now
118
269
  \)
119
- (?=[ ,\]]|$)
270
+ #{SCORE_LOOKAHEAD}
120
271
  )}ix ## todo/check: remove loakahead assertion here - why require space?
121
272
  ## note: \b works only after non-alphanum e.g. )
122
273
 
@@ -124,36 +275,33 @@ class Lexer
124
275
  ##########
125
276
  ## e.g. 2-1 (1-1)
126
277
  SCORE__FT_HT__RE = %r{
127
- (?<score_more>
278
+ (?<score_full>
128
279
  \b
129
280
  (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
130
281
  [ ]+ \( [ ]*
131
282
  (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
132
283
  [ ]* \)
133
- (?=[ ,\]]|$)
284
+ #{SCORE_LOOKAHEAD}
134
285
  )}ix ## todo/check: remove loakahead assertion here - why require space?
135
286
  ## note: \b works only after non-alphanum e.g. )
136
287
 
137
- #####
138
- ## 2-1
139
- SCORE__FT__RE = %r{
140
- (?<score>
141
- \b
142
- (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
143
- \b
144
- )}ix
145
288
 
289
+
290
+
146
291
  #############################################
147
292
  # map tables
148
293
  # note: order matters; first come-first matched/served
149
- #
150
- ## check - find a better name for SCORE_MORE - SCORE_EX, SCORE_BIG, or ___ - why? why not?
151
294
 
152
- SCORE_MORE_RE = Regexp.union(
295
+ SCORE_FULL_RE = Regexp.union(
296
+ SCORE__ET_GG_SG__RE, # e.g. 3-1 aet/gg
153
297
  SCORE__P_ET_FT_HT_V2__RE, # e.g. 5-1 pen. (2-2, 1-1, 1-0)
298
+ SCORE__ET_FT_HT_P__RE, # e.g. 2-2 a.e.t. (1-1, 1-0), 5-1 pen.
154
299
  SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
155
300
  SCORE__P_FT_HT__RE, # e.g. 5-1 pen. (1-1)
156
- SCORE__P_ET__RE, # e.g. 2-2 a.e.t. or 5-1 pen. 2-2 a.e.t.
301
+ SCORE__ET_P__RE, # e.g. 2-2 a.e.t., 5-1 pen.
302
+ SCORE__FT_P__RE, # e.g. 2-2, 5-1 pen.
303
+ SCORE__FT_HT_P__RE, # e.g. 2-2 (1-1), 5-1 pen.
304
+ SCORE__P_ET__RE, # e.g. 5-1 pen. 2-2 a.e.t. or 2-2 a.e.t. (w/o pen)
157
305
  SCORE__P__RE, # e.g. 5-1 pen.
158
306
  SCORE__FT_HT__RE, # e.g. 1-1 (1-0)
159
307
  ## note - keep basic score as its own token!!!!
@@ -161,8 +309,136 @@ SCORE_MORE_RE = Regexp.union(
161
309
  ### SCORE__FT__RE, # e.g. 1-1 -- note - must go last!!!
162
310
  )
163
311
 
164
- SCORE_RE = SCORE__FT__RE
165
-
312
+
313
+ ###
314
+ ##
315
+ ## add support for score awarded (inline style)
316
+ ## 3-0 awd 3-0 awd. 3-0awd
317
+ ## 0-1 awd or 0-1 AWD etc.
318
+
319
+ ##
320
+ ## note - keep AWD w/o dot - why? why not?
321
+
322
+ SCORE_AWD_RE = %r{
323
+ (?<score_awd>
324
+ \b
325
+ (?<score1>\d{1,2}) - (?<score2>\d{1,2})
326
+ [ ]?
327
+ (?-i: awd\.? | AWD )
328
+ ## POSITIVE lookahead - requires space
329
+ (?= [ ])
330
+ )}ix
331
+
332
+ ###
333
+ ##
334
+ ## add support for score abandoned (inline style)
335
+ ## 2-1 abd. or 2-1 ABD
336
+ SCORE_ABD_RE = %r{
337
+ (?<score_abd>
338
+ \b
339
+ (?<score1>\d{1,2}) - (?<score2>\d{1,2})
340
+ [ ]?
341
+ (?-i: abd\.? | ABD )
342
+ ## POSITIVE lookahead - requires space
343
+ (?= [ ])
344
+ )}ix
345
+
346
+ #####
347
+ ## 2-1
348
+ ###
349
+ ### note - was SCORE__FT__RE
350
+ ### changed to "generic" SCORE_RE
351
+ ### and
352
+ ## (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
353
+ ## changed
354
+ ## (?<score1>\d{1,2}) - (?<score2>\d{1,2})
355
+ ## to
356
+ ## pattern match not necessarily the full-time (ft) scoreline!!!
357
+ ## - pattern also used for goal seq(uence) e.g. 1-0 Kane, 1-1 Johnson
358
+ SCORE_RE = %r{
359
+ (?<score>
360
+ \b
361
+ (?<score1>\d{1,2}) - (?<score2>\d{1,2})
362
+ \b
363
+ )}ix
364
+
365
+
366
+ ######
367
+ # add support for "split" score
368
+ # note - for now (2) 1 is REQUIRED
369
+
370
+ SCORE_TEAM_RE = %r{
371
+ (?<score_team>
372
+ \(
373
+ (?<score_i> \d{1,2})
374
+ \)
375
+ [ ]* ## note - space optional- why? why not?
376
+ (?<score_ii> \d{1,2})
377
+ \b
378
+ )
379
+ }ix
380
+
381
+ # "penalty"-style (4) is assumed penalty score
382
+ # note - for now 1 (4) is REQUIRED
383
+
384
+ SCORE_TEAM_PEN_RE = %r{
385
+ (?<score_team_pen>
386
+ \b
387
+ (?<score_i> \d{1,2})
388
+ \b
389
+ [ ]* ## note - space optional- why? why not?
390
+ \(
391
+ (?<score_pen> \d{1,2})
392
+ \)
393
+ )
394
+ }ix
395
+
396
+ ########
397
+ ## note - score_team_num (<100) e.g. 0, 1, .., 10, 11, .. 99
398
+ ## use a different name - why? why not?
399
+ ## note - must be surrouned by space
400
+ SCORE_TEAM_NUM_RE = %r{
401
+ ## positive lookbehind
402
+ (?<= [ ])
403
+
404
+ (?<score_team_num> \d{1,2} )
405
+
406
+ ## positive lookahead
407
+ (?= [ ]|\z)
408
+ }x
409
+
410
+
411
+
412
+ def self._build_score_team( m )
413
+ score = {}
414
+ ## note - score team is "generic"
415
+ ## might be full-time (ft) or
416
+ ## after extra-time (aet) or such
417
+ ## or even undecided/unknown
418
+ ## thus, use score_i/score_ii
419
+ score[:score] = [m[:score_i].to_i(10),
420
+ m[:score_ii].to_i(10)]
421
+ score
422
+ end
423
+ def _build_score_team( m ) self.class._build_score_team( m ); end
424
+
425
+
426
+ def self._build_score_team_pen( m )
427
+ score = {}
428
+ score[:score] = [m[:score_i].to_i(10),
429
+ m[:score_pen].to_i(10)]
430
+ score
431
+ end
432
+ def _build_score_team_pen( m ) self.class._build_score_team_pen( m ); end
433
+
434
+
435
+ def self._build_score_team_num( m )
436
+ score = {}
437
+ score[:score] = m[:score_team_num].to_i(10)
438
+ score
439
+ end
440
+ def _build_score_team_num( m ) self.class._build_score_team_num( m ); end
441
+
166
442
 
167
443
  end # class Lexer
168
444
  end # module SportDb