sportdb-parser 0.6.20 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -1
  3. data/Manifest.txt +14 -8
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/parser/blocktxt.rb +99 -0
  6. data/lib/sportdb/parser/lexer.rb +958 -395
  7. data/lib/sportdb/parser/lexer_buffer.rb +97 -0
  8. data/lib/sportdb/parser/lexer_tty.rb +111 -0
  9. data/lib/sportdb/parser/parser.rb +1768 -855
  10. data/lib/sportdb/parser/racc_parser.rb +1 -1
  11. data/lib/sportdb/parser/racc_tree.rb +327 -41
  12. data/lib/sportdb/parser/token-date.rb +160 -178
  13. data/lib/sportdb/parser/token-date_duration.rb +190 -0
  14. data/lib/sportdb/parser/token-geo.rb +59 -59
  15. data/lib/sportdb/parser/token-goals.rb +460 -0
  16. data/lib/sportdb/parser/token-group.rb +43 -0
  17. data/lib/sportdb/parser/token-note.rb +40 -0
  18. data/lib/sportdb/parser/token-prop.rb +70 -54
  19. data/lib/sportdb/parser/token-prop_name.rb +74 -0
  20. data/lib/sportdb/parser/token-round.rb +102 -0
  21. data/lib/sportdb/parser/token-score.rb +323 -47
  22. data/lib/sportdb/parser/token-score_fuller.rb +435 -0
  23. data/lib/sportdb/parser/token-score_legs.rb +59 -0
  24. data/lib/sportdb/parser/token-status.rb +157 -160
  25. data/lib/sportdb/parser/token-table.rb +149 -0
  26. data/lib/sportdb/parser/token-text.rb +72 -23
  27. data/lib/sportdb/parser/token-time.rb +141 -0
  28. data/lib/sportdb/parser/token.rb +242 -105
  29. data/lib/sportdb/parser/token_helpers.rb +92 -0
  30. data/lib/sportdb/parser/version.rb +2 -2
  31. data/lib/sportdb/parser.rb +24 -2
  32. metadata +18 -18
  33. data/config/rounds_de.txt +0 -125
  34. data/config/rounds_en.txt +0 -29
  35. data/config/rounds_es.txt +0 -26
  36. data/config/rounds_misc.txt +0 -25
  37. data/config/rounds_pt.txt +0 -4
  38. data/config/zones_en.txt +0 -20
  39. data/lib/sportdb/parser/lang.rb +0 -298
  40. data/lib/sportdb/parser/token-minute.rb +0 -205
@@ -0,0 +1,435 @@
1
+ module SportDb
2
+ class Lexer
3
+
4
+ ###
5
+ # alternate (more literate) score style
6
+ # found in books & magazines
7
+ # (with no half-time score)
8
+ #
9
+ # 4-4 (aet, 6-6 on aggregate, win 3-5 on pens)
10
+ # -or-
11
+ # 4-4 (aet, 6-6 an aggregate, 3-5 on pens)
12
+ # -or-
13
+ # 4-4 (aet, 6-6agg, 3-5p)
14
+ #
15
+ # 2-2 (3-3 on aggregate, win on away goals)
16
+ # -or-
17
+ # 2-2 (3-3 agg, away goals)
18
+ # etc.
19
+
20
+
21
+ ## regex score helpers
22
+ ## note - MUST double escape \d e.g. \\d!!! if not "simple" string (e.g. '' but %Q<>)
23
+
24
+ def self._mk_score_fuller_agg( win: ) ## with optional win - true|false
25
+ %Q<
26
+ (?:
27
+ ############
28
+ ## opt 1) with win
29
+ (?:
30
+ #{ win ? '(?: win [ ] )?' : '' }
31
+ (?<agg1>\\d{1,2}) - (?<agg2>\\d{1,2})
32
+ [ ] on [ ] agg (?: regate )?
33
+ )
34
+ |
35
+ #####
36
+ ## opt 2) "classic" (post)
37
+ (?:
38
+ (?<agg1>\\d{1,2}) - (?<agg2>\\d{1,2})
39
+ [ ]*
40
+ #{AGG_EN}
41
+ )
42
+ |
43
+ #####
44
+ ## opt 3) agg up-front (pre)
45
+ (?:
46
+ agg [ ]
47
+ (?<agg1>\\d{1,2}) - (?<agg2>\\d{1,2})
48
+ )
49
+ )
50
+ >
51
+ end
52
+
53
+ def self._mk_score_fuller_p( win: ) ## with optional win - true|false
54
+ %Q<
55
+ (?:
56
+ ############
57
+ ## opt 1) with win
58
+ (?:
59
+ #{ win ? '(?: win [ ] )?' : '' }
60
+ (?<p1>\\d{1,2}) - (?<p2>\\d{1,2})
61
+ [ ] on [ ] pens
62
+ )
63
+ |
64
+ #####
65
+ ## opt 2) "classic" (post)
66
+ (?:
67
+ (?<p1>\\d{1,2}) - (?<p2>\\d{1,2})
68
+ [ ]*
69
+ #{P_EN}
70
+ )
71
+ |
72
+ #####
73
+ ## opt 3) up-front (pre)
74
+ (?:
75
+ (?: pen|p) [ ]
76
+ (?<p1>\\d{1,2}) - (?<p2>\\d{1,2})
77
+ )
78
+ )
79
+ >
80
+ end
81
+
82
+
83
+ SCORE_FULLER_AGG = _mk_score_fuller_agg( win: false )
84
+ SCORE_FULLER_AGG_WIN = _mk_score_fuller_agg( win: true )
85
+
86
+ SCORE_FULLER_P = _mk_score_fuller_p( win: false )
87
+ SCORE_FULLER_P_WIN = _mk_score_fuller_p( win: true )
88
+
89
+ SCORE_FULLER_AWAY_WIN = %Q<
90
+ (?:
91
+ (?<away>
92
+ ############
93
+ ## opt 1) with win
94
+ (?:
95
+ (?: win [ ] )?
96
+ (?: (?<away1>\\d{1,2}) - (?<away2>\\d{1,2}) [ ] )?
97
+ on [ ] away [ ] goals? # goal or goals
98
+ )
99
+ |
100
+ #####
101
+ ## opt 2) "classic" (post)
102
+ (?:
103
+ (?: (?<away1>\\d{1,2}) - (?<away2>\\d{1,2}) [ ] )?
104
+ [ ]* away
105
+ )
106
+ |
107
+ #####
108
+ ## opt 3) up-front (pre)
109
+ (?:
110
+ away
111
+ (?: [ ]
112
+ (?<away1>\\d{1,2}) - (?<away2>\\d{1,2})
113
+ )?
114
+ )
115
+ ))
116
+ >
117
+
118
+
119
+
120
+ SCORE_FULLER_HT_OPT = %Q<
121
+ (?: HT [ ]
122
+ (?: (?<ht1>\\d{1,2}) - (?<ht2>\\d{1,2}))
123
+ [ ]*,[ ]*
124
+ )? ## note - make optional
125
+ >
126
+
127
+ SCORE_FULLER_FT_OPT = %Q<
128
+ (?: FT [ ]
129
+ (?: (?<ft1>\\d{1,2}) - (?<ft2>\\d{1,2}))
130
+ [ ]*,[ ]*
131
+ )? ## note - make optional
132
+ >
133
+
134
+
135
+ #############
136
+ # 4-4 (HT 2-1)
137
+ # or
138
+ # Team A 4-1 Team B (HT 2-1)
139
+
140
+ SCORE_FULLER__HT = %Q<
141
+ \\( HT [ ]
142
+ (?<ht1>\\d{1,2}) - (?<ht2>\\d{1,2})
143
+ \\)
144
+ >
145
+
146
+ SCORE_FULLER__HT_FT__RE = %r{
147
+ (?<score_fuller>
148
+ \b
149
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
150
+ [ ]+
151
+ #{SCORE_FULLER__HT}
152
+ )}ix
153
+
154
+ SCORE_FULLER_MORE__HT_FT__RE = %r{
155
+ (?<score_fuller_more>
156
+ #{SCORE_FULLER__HT}
157
+ )}ix
158
+
159
+
160
+
161
+
162
+ ############
163
+ # 4-4 (aet)
164
+ # 4-4 (a.e.t.)
165
+ # or
166
+ # add golden goal/sudden death and silver goal e.g.
167
+ # 5-4 (aet/gg) - note: adds golden (goal) flag
168
+ # 5-4 (a.e.t./g.g.) - note: adds silver (goal) flag
169
+ #
170
+ # Team A 4-4 Team B (aet)
171
+ # Team A 4-4 Team B (a.e.t.)
172
+ #
173
+ # or
174
+ # 4-4 (FT 3-2, AET)
175
+ # 4-4 (HT 2-1, FT 3-2, AET)
176
+
177
+
178
+ SCORE_FULLER__ET = %Q<
179
+ \\(
180
+ #{SCORE_FULLER_HT_OPT}
181
+ #{SCORE_FULLER_FT_OPT}
182
+ (?:
183
+ (?<aetgg> #{AETGG_EN})
184
+ |
185
+ (?<aetsg> #{AETSG_EN})
186
+ |
187
+ (?<aet> #{ET_EN})
188
+ )
189
+ \\)
190
+ >
191
+
192
+ SCORE_FULLER__ET__RE = %r{
193
+ (?<score_fuller>
194
+ \b
195
+ (?<et1>\d{1,2}) - (?<et2>\d{1,2})
196
+ [ ]+
197
+ #{SCORE_FULLER__ET}
198
+ )}ix
199
+
200
+ SCORE_FULLER_MORE__ET__RE = %r{
201
+ (?<score_fuller_more>
202
+ #{SCORE_FULLER__ET}
203
+ )}ix
204
+
205
+
206
+ #############
207
+ # 4-4 (aet, win 3-5 on pens)
208
+ # 4-4 (aet, 3-5 on pens)
209
+ # 4-4 (aet, 3-5 pen)
210
+ # 4-4 (a.e.t., 3-5 pen.)
211
+ # or
212
+ # Team A 4-4 Team B (aet, win 3-5 on pens)
213
+ # Team A 4-4 Team B (aet, 3-5 on pens)
214
+ # Team A 4-4 Team B (aet, 3-5 pen)
215
+ # Team A 4-4 Team B (a.e.t., 3-5 pen.)
216
+
217
+ SCORE_FULLER__ET_P = %Q<
218
+ \\(
219
+ #{SCORE_FULLER_HT_OPT}
220
+ #{SCORE_FULLER_FT_OPT}
221
+ (?<aet> #{ET_EN})
222
+ [ ]*,[ ]*
223
+ #{SCORE_FULLER_P_WIN}
224
+ \\)
225
+ >
226
+
227
+ SCORE_FULLER__ET_P__RE = %r{
228
+ (?<score_fuller>
229
+ \b
230
+ (?<et1>\d{1,2}) - (?<et2>\d{1,2})
231
+ [ ]+
232
+ #{SCORE_FULLER__ET_P}
233
+ )}ix
234
+
235
+ SCORE_FULLER_MORE__ET_P__RE = %r{
236
+ (?<score_fuller_more>
237
+ #{SCORE_FULLER__ET_P}
238
+ )}ix
239
+
240
+
241
+ ##########################
242
+ # 4-4 (win 3-5 on pens)
243
+ # 4-4 (3-5 pen)
244
+ # 4-4 (3-5p)
245
+ # or
246
+ # Team A 4-4 Team B (win 3-5 on pens)
247
+ # Team A 4-4 Team B (3-5 pen)
248
+ # Team A 4-4 Team B (3-5p)
249
+
250
+ SCORE_FULLER__FT_P = %Q<
251
+ \\(
252
+ #{SCORE_FULLER_HT_OPT}
253
+ #{SCORE_FULLER_P_WIN}
254
+ \\)
255
+ >
256
+
257
+ SCORE_FULLER__FT_P__RE = %r{
258
+ (?<score_fuller>
259
+ \b
260
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
261
+ [ ]+
262
+ \(
263
+ #{SCORE_FULLER_P_WIN}
264
+ \)
265
+ )}ix
266
+
267
+ SCORE_FULLER_MORE__FT_P__RE = %r{
268
+ (?<score_fuller_more>
269
+ #{SCORE_FULLER__FT_P}
270
+ )}ix
271
+
272
+
273
+ #####################
274
+ # 3-2 (win 4-5 on aggregate)
275
+ # 3-2 (4-5 on aggregate)
276
+ # 3-2 (4-5 on agg)
277
+ # 3-2 (4-5 agg)
278
+ # 3-2 (4-5 agg.)
279
+ # or
280
+ # 3-2 (agg 4-5)
281
+
282
+ SCORE_FULLER__FT_AGG = %Q<
283
+ \\(
284
+ #{SCORE_FULLER_HT_OPT}
285
+ #{SCORE_FULLER_AGG_WIN}
286
+ \\)
287
+ >
288
+
289
+ SCORE_FULLER__FT_AGG__RE = %r{
290
+ (?<score_fuller>
291
+ \b
292
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
293
+ [ ]+
294
+ #{SCORE_FULLER__FT_AGG}
295
+ )}ix
296
+
297
+ SCORE_FULLER_MORE__FT_AGG__RE = %r{
298
+ (?<score_fuller_more>
299
+ #{SCORE_FULLER__FT_AGG}
300
+ )}ix
301
+
302
+ ####
303
+ # ft + agg + away
304
+ # 2-1 (3-3 on aggregate, win on away goals)
305
+ # 2-1 (3-3 on aggregate, win 2-1 on away goals)
306
+
307
+ SCORE_FULLER__FT_AGG_AWAY = %Q<
308
+ \\(
309
+ #{SCORE_FULLER_HT_OPT}
310
+ #{SCORE_FULLER_AGG}
311
+ [ ]*,[ ]*
312
+ #{SCORE_FULLER_AWAY_WIN}
313
+ \\)
314
+ >
315
+
316
+ SCORE_FULLER__FT_AGG_AWAY__RE = %r{
317
+ (?<score_fuller>
318
+ \b
319
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
320
+ [ ]+
321
+ #{SCORE_FULLER__FT_AGG_AWAY}
322
+ )}ix
323
+
324
+ SCORE_FULLER_MORE__FT_AGG_AWAY__RE = %r{
325
+ (?<score_fuller_more>
326
+ #{SCORE_FULLER__FT_AGG_AWAY}
327
+ )}ix
328
+
329
+
330
+ #####################
331
+ # 2-1 (aet, 3-3 on aggregate, win 5-2 on pens)
332
+ # 2-1 (aet, 3-3 agg, 5-2 pen.)
333
+
334
+ SCORE_FULLER__ET_AGG_P = %Q<
335
+ \\(
336
+ #{SCORE_FULLER_HT_OPT}
337
+ #{SCORE_FULLER_FT_OPT}
338
+ (?<aet> #{ET_EN})
339
+ [ ]*,[ ]*
340
+ #{SCORE_FULLER_AGG}
341
+ [ ]*,[ ]*
342
+ #{SCORE_FULLER_P_WIN}
343
+ \\)
344
+ >
345
+
346
+ SCORE_FULLER__ET_AGG_P__RE = %r{
347
+ (?<score_fuller>
348
+ \b
349
+ (?<et1>\d{1,2}) - (?<et2>\d{1,2})
350
+ [ ]+
351
+ #{SCORE_FULLER__ET_AGG_P}
352
+ )}ix
353
+
354
+ SCORE_FULLER_MORE__ET_AGG_P__RE = %r{
355
+ (?<score_fuller_more>
356
+ #{SCORE_FULLER__ET_AGG_P}
357
+ )}ix
358
+
359
+
360
+ #############################################
361
+ # map tables
362
+ # note: order matters - first come-first matched/served
363
+
364
+ SCORE_FULLER_RE = Regexp.union(
365
+ SCORE_FULLER__HT_FT__RE, ## e.g. 3-2 (HT 2-1)
366
+ SCORE_FULLER__ET_P__RE, ## e.g. 2-2 (aet, win 5-3 on pens)
367
+ SCORE_FULLER__ET__RE, ## e.g. 2-3 (aet)
368
+ SCORE_FULLER__FT_P__RE, ## e.g. 2-2 (win 5-3 on pens)
369
+ SCORE_FULLER__FT_AGG__RE, ## e.g. 2-3 (win 5-4 on aggregate)
370
+ SCORE_FULLER__FT_AGG_AWAY__RE, ## e.g. 2-1 (3-3 on aggreate, win 2-1 on away goals)
371
+ SCORE_FULLER__ET_AGG_P__RE, ## e.g. 2-1 (aet, 3-3 on aggregate, win 5-2 on pens)
372
+ )
373
+
374
+
375
+
376
+ ##
377
+ ## add support for "stand-alone" (HT) and (FT) - keep why? why not?
378
+ SCORE_FULLER_MORE__HT__RE = %r{
379
+ (?<score_fuller_more>
380
+ \( (?<ht> ht ) \)
381
+ )}ix
382
+
383
+ SCORE_FULLER_MORE__FT__RE = %r{
384
+ (?<score_fuller_more>
385
+ \( (?<ft> ft ) \)
386
+ )}ix
387
+
388
+
389
+ ###
390
+ # add special for fuller_more
391
+ # (aet 4-3) - core score is ft, and fuller more incl. et!!!
392
+
393
+ SCORE_FULLER_MORE__FT_ET__RE = %r{
394
+ (?<score_fuller_more>
395
+ \(#{ET_EN}
396
+ [ ]
397
+ (?<et1>\d{1,2}) - (?<et2>\d{1,2})
398
+ \)
399
+ )}ix
400
+
401
+
402
+
403
+ ##
404
+ # note - simply (1-1) !!!!!
405
+ ## note - special attention needed for placemenent in processing error!!!
406
+ ## make sure it is the last (or on of the last) match(es)
407
+
408
+ SCORE_FULLER_MORE__HT_FT__CLASSIC_RE = %r{
409
+ (?<score_fuller_more>
410
+ \(
411
+ (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
412
+ \)
413
+ )}ix
414
+
415
+
416
+ SCORE_FULLER_MORE_RE = Regexp.union(
417
+ SCORE_FULLER_MORE__FT__RE, ## e.g. (ft)
418
+ SCORE_FULLER_MORE__HT__RE, ## e.g. (ht)
419
+ SCORE_FULLER_MORE__HT_FT__RE, ## e.g. (HT 2-1)
420
+ SCORE_FULLER_MORE__ET_P__RE, ## e.g. (aet, win 5-3 on pens)
421
+ SCORE_FULLER_MORE__ET__RE, ## e.g. (aet)
422
+ SCORE_FULLER_MORE__FT_ET__RE, ## e.g. (aet 3-2) - (SPECIAL) incl. after extra-time score!!
423
+ SCORE_FULLER_MORE__FT_P__RE, ## e.g. (win 5-3 on pens)
424
+ SCORE_FULLER_MORE__FT_AGG__RE, ## e.g. (win 5-4 on aggregate)
425
+ SCORE_FULLER_MORE__FT_AGG_AWAY__RE, ## e.g. (3-3 on aggreate, win 2-1 on away goals)
426
+ SCORE_FULLER_MORE__ET_AGG_P__RE, ## e.g. (aet, 3-3 on aggregate, win 5-2 on pens)
427
+
428
+ SCORE_FULLER_MORE__HT_FT__CLASSIC_RE, ## e.g. (2-1) half-time !!!!
429
+ )
430
+
431
+
432
+
433
+ end # class Lexer
434
+ end # module SportDb
435
+
@@ -0,0 +1,59 @@
1
+ module SportDb
2
+ class Lexer
3
+
4
+
5
+ ##
6
+ ## note - for now only two legs (1st,2nd) supported
7
+ ## maybe more in the future (if there is a real-world sample/use)
8
+
9
+ ##
10
+ ## win on away goals
11
+ ## aet
12
+ ##
13
+
14
+ SCORE_LEGS_RE = %r{
15
+ (?<score_legs>
16
+ \b
17
+ (?<leg1_ft1>\d{1,2}) - (?<leg1_ft2>\d{1,2})
18
+ (?: [ ]+ | [ ]*,[ ]*) # separate by spaces OR comma
19
+ (?:
20
+ ## opt 1 - after extra-time (et) score
21
+ (?<leg2_et1>\d{1,2}) - (?<leg2_et2>\d{1,2})
22
+ [ ]? #{ET_EN} ## a.e.t./aet
23
+ ### note - might end in dot (.) not alpha
24
+ ### thus, wordboundary NOT working
25
+ #{SCORE_LOOKAHEAD}
26
+ |
27
+ ## opt 2 - full-time (ft)
28
+ (?<leg2_ft1>\d{1,2}) - (?<leg2_ft2>\d{1,2})
29
+ \b
30
+ )
31
+ (?: ## check optional aggregate e.g. (agg 4-4)
32
+ [ ]+
33
+ \(
34
+ agg [ ]
35
+ (?<agg1>\d{1,2}) - (?<agg2>\d{1,2})
36
+
37
+ ### add win options
38
+ (?:
39
+ ## opt 1 - on away goals
40
+ (?<away> [ ]*,[ ]*
41
+ (?:win [ ])? on [ ] away [ ] goals?
42
+ )
43
+ |
44
+ ## opt 2 - on penalties
45
+ (?:
46
+ [ ]*,[ ]*
47
+ (?:win [ ])?
48
+ (?<leg2_p1>\d{1,2}) - (?<leg2_p2>\d{1,2})
49
+ [ ] on [ ] pens
50
+ )
51
+ )?
52
+ \)
53
+ )?
54
+ )}ix
55
+
56
+
57
+
58
+ end # class Lexer
59
+ end # module SportDb