sportdb-parser 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -1
  3. data/Manifest.txt +17 -4
  4. data/lib/sportdb/parser/lexer-on_goal.rb +172 -0
  5. data/lib/sportdb/parser/lexer-on_group_def.rb +31 -0
  6. data/lib/sportdb/parser/lexer-on_prop_lineup.rb +79 -0
  7. data/lib/sportdb/parser/lexer-on_prop_misc.rb +110 -0
  8. data/lib/sportdb/parser/lexer-on_prop_penalties.rb +40 -0
  9. data/lib/sportdb/parser/lexer-on_round_def.rb +37 -0
  10. data/lib/sportdb/parser/lexer-on_top.rb +125 -0
  11. data/lib/sportdb/parser/lexer-prep_doc.rb +131 -0
  12. data/lib/sportdb/parser/lexer-prep_line.rb +63 -0
  13. data/lib/sportdb/parser/lexer-tokenize.rb +449 -0
  14. data/lib/sportdb/parser/lexer.rb +133 -1363
  15. data/lib/sportdb/parser/lexer_buffer.rb +8 -37
  16. data/lib/sportdb/parser/lexer_token.rb +126 -0
  17. data/lib/sportdb/parser/parser.rb +1104 -1403
  18. data/lib/sportdb/parser/racc_parser.rb +36 -32
  19. data/lib/sportdb/parser/racc_tree.rb +65 -98
  20. data/lib/sportdb/parser/token-date--helpers.rb +130 -0
  21. data/lib/sportdb/parser/token-date--names.rb +108 -0
  22. data/lib/sportdb/parser/token-date.rb +20 -192
  23. data/lib/sportdb/parser/token-date_duration.rb +8 -27
  24. data/lib/sportdb/parser/token-geo.rb +16 -16
  25. data/lib/sportdb/parser/token-goals--helpers.rb +114 -0
  26. data/lib/sportdb/parser/token-goals.rb +103 -249
  27. data/lib/sportdb/parser/token-group.rb +8 -22
  28. data/lib/sportdb/parser/token-prop.rb +138 -124
  29. data/lib/sportdb/parser/token-prop_name.rb +48 -39
  30. data/lib/sportdb/parser/token-round.rb +21 -35
  31. data/lib/sportdb/parser/token-score--helpers.rb +189 -0
  32. data/lib/sportdb/parser/token-score.rb +9 -393
  33. data/lib/sportdb/parser/token-score_full.rb +331 -0
  34. data/lib/sportdb/parser/token-status.rb +44 -46
  35. data/lib/sportdb/parser/token-status_inline.rb +112 -0
  36. data/lib/sportdb/parser/token-text.rb +41 -31
  37. data/lib/sportdb/parser/token-time.rb +29 -26
  38. data/lib/sportdb/parser/token.rb +58 -159
  39. data/lib/sportdb/parser/version.rb +1 -1
  40. data/lib/sportdb/parser.rb +45 -17
  41. metadata +19 -6
  42. data/lib/sportdb/parser/blocktxt.rb +0 -99
  43. data/lib/sportdb/parser/lexer_tty.rb +0 -111
  44. data/lib/sportdb/parser/token-table.rb +0 -149
  45. data/lib/sportdb/parser/token_helpers.rb +0 -92
@@ -3,32 +3,13 @@ class Lexer
3
3
 
4
4
 
5
5
  ######################################################
6
- ## goal mode (switched to by PLAYER_WITH_MINUTE_RE)
6
+ ## goal mode
7
+ ## note - must be enclosed in ()!!!
7
8
  ##
8
- ## note - must be enclosed in ()!!!
9
9
  ## todo - add () in basics - why? why not?
10
10
 
11
11
 
12
12
 
13
- ##
14
- ## todo/fix - split up BASICS!!!
15
- ## break out SPACES_RE for general reuse!!!
16
- ## makes it easier to use "custom" symbols (<sym>)
17
-
18
-
19
- GOAL_BASICS_RE = %r{
20
- (?<spaces> [ ]{2,}) |
21
- (?<space> [ ])
22
- |
23
- (?<sym>
24
- [;,)] ## add (-) dash too - why? why not?
25
- )
26
- }ix
27
-
28
-
29
-
30
-
31
-
32
13
 
33
14
  ## note - assume lines starting with opening ( are goal lines!!!!
34
15
  ## note - use \A (instead of ^) - \A strictly matches the start of the string.
@@ -39,37 +20,25 @@ GOAL_BASICS_RE = %r{
39
20
  ## todo/fix -- exclude (a), (h), (n) - TEAM_AWAY, TEAM_HOME, TEAM_NEUTRAL tokens!!
40
21
 
41
22
  START_GOAL_LINE_RE = %r{
42
- \A
43
- [ ]* ## ignore leading spaces (if any)
23
+ \A
24
+ [ ]* ## ignore leading spaces (if any)
44
25
  \(
45
26
 
46
27
  # check NEGATIVE lookahead
47
- (?!
28
+ (?!
48
29
  ## exclude (a), (h), (n)
49
30
  ## TEAM_AWAY, TEAM_HOME, TEAM_NEUTRAL
50
- (?: a|h|n )
51
- \)
31
+ (?: a|h|n )
32
+ \)
52
33
  )
53
34
 
54
35
  }xi
55
36
 
56
- =begin
57
- # check NEGATIVE lookahead
58
- (?!
59
- ## exclude ord
60
- (?: \d+ \))
61
- |
62
- ## exclude score - goal_line_alt!!!
63
- (?: [ ]* \b
64
- \d-\d ## score e.g. 1-0
65
- \b )
66
- )
67
- =end
68
37
 
69
38
 
70
39
  #############
71
40
  ## check for goal compat(ility) "legacy" line
72
- ## e.g.
41
+ ## e.g.
73
42
  ## (6' Puskás 0-1, 9' Czibor 0-2, 11' Morlock 1-2, 18' Rahn 2-2,
74
43
  ## 84' Rahn 3-2)
75
44
  ## (6 Puskás 0-1, 9 Czibor 0-2, 11 Morlock 1-2, 18 Rahn 2-2,
@@ -78,47 +47,52 @@ START_GOAL_LINE_RE = %r{
78
47
 
79
48
  START_GOAL_LINE_COMPAT_RE = %r{
80
49
  \A
81
- [ ]* ## ignore leading spaces (if any)
82
- \(
83
-
50
+ [ ]* ## ignore leading spaces (if any)
51
+ \(
52
+
84
53
  ## (i) check NEGATIVE lookahead
85
- ## exclude score e.g. 1-1 etc.
54
+ ## exclude score e.g. 1-1 etc.
86
55
  (?! [ ]* \b \d-\d \b)
87
56
 
88
- ## (ii) check POSITIVE lookahead
57
+ ## (ii) check POSITIVE lookahead
89
58
  (?= [ ]*
90
59
  \d{1,3}
91
60
  '? ## optional minute marker
92
61
  (?: \+
93
- \d{1,2}
62
+ \d{1,2}
94
63
  '? ## optional minute marker
95
- )?
96
- )
64
+ )?
65
+ )
97
66
  }xi
98
67
 
99
68
 
100
69
 
101
70
  ###
102
71
  ## check for goal line (alternate syntax)
103
- ## (1-0 Player, 1-1 Player, ...)
104
- # must start-off OR yes, include score
105
- ##
106
- ## note - allow "centered" style e.g.
107
- ## ( Player 44' (p) 1-0
72
+ ## (1-0 Player, 1-1 Player, ...)
73
+ # must start-off OR yes, include score
74
+ ##
75
+ ## note - allow "centered" style e.g.
76
+ ## ( Player 44' (p) 1-0
108
77
  ## 1-1 Player 64' )
109
78
  START_GOAL_LINE_ALT_RE = %r{
110
79
  \A
111
- [ ]* ## ignore leading spaces (if any)
112
- \(
113
-
114
- # check POSITIVE lookahead
115
- (?= .*? ## note - non-greedy
116
- \b \d-\d \b ## score e.g. 0-1
117
- )
80
+ [ ]* ## ignore leading spaces (if any)
81
+ \(
82
+
83
+ # check POSITIVE lookahead
84
+ (?= .*? ## note - non-greedy
85
+ \b \d-\d \b ## score e.g. 0-1
86
+ )
118
87
  }xi
119
88
 
120
89
 
121
90
 
91
+
92
+
93
+
94
+
95
+
122
96
  ###
123
97
  ## e.g. (-; Metzger)
124
98
  GOAL_NONE_RE = %r{ (?<goals_none>
@@ -132,10 +106,10 @@ GOAL_NONE_RE = %r{ (?<goals_none>
132
106
  # e.g. (Metzger - Krämer (2), Cichy, Rahn)
133
107
  # (Brunnenmeier 17 - Gerwien 74)
134
108
  # (Brunnenmeier - Gerwien)
135
- # that is, NOT allowed
109
+ # that is, NOT allowed
136
110
  # e.g. (Metzger 83-Krämer 29, 88, Cichy 33, Rahn 37)
137
111
  # (Brunnenmeier 17-Gerwien 74)
138
- # (Brunnenmeier-Gerwien)
112
+ # (Brunnenmeier-Gerwien)
139
113
  #
140
114
  # note - allow split by - e.g.
141
115
  # Frankfurt 4-2 Schalke (Kreß 45, Solz 55, Trimhold 58, Huberts 73 p -
@@ -151,11 +125,11 @@ GOAL_SEP_ALT_RE = %r{
151
125
 
152
126
 
153
127
  ## e.g. (2)
154
- ## (2/p), (2/pen.), (3/2p), (3/ 2 pen.)
128
+ ## (2/p), (2/pen.), (3/2p), (3/ 2 pen.)
155
129
  ## -or- (2,1pen), (3, 2 pens)
156
- ##
157
- ## (p), (pen.) (2 pen.), (2p)
158
- ## (og), (o.g.),
130
+ ##
131
+ ## (p), (pen.) (2 pen.), (2p)
132
+ ## (og), (o.g.),
159
133
  ## (2og), (2 o.g.), (2ogs)
160
134
  #
161
135
  ##
@@ -173,8 +147,8 @@ GOAL_COUNT_RE = %r{
173
147
  ## opt own goals (og)
174
148
  (?<og>
175
149
  (?: (?<og_value> \d{1,2}) [ ]? )?
176
- (?:ogs?|o\.g\.|o)
177
- )
150
+ (?:ogs?|o\.g\.|o)
151
+ )
178
152
  |
179
153
  ## opt fallback - classic count/number
180
154
  (?: (?<value> [1-9])
@@ -185,50 +159,19 @@ GOAL_COUNT_RE = %r{
185
159
  (?:pens|pen\.?|p)
186
160
  )?
187
161
  )
188
- )
162
+ )
189
163
  \)
190
164
  )}ix
191
165
 
192
166
 
193
167
 
194
168
 
195
-
196
-
197
- ## minute variant for N/A not/available
198
- ## todo/check - find a better syntax - why? why not?
199
- ##
200
- ## note "??".to_i(10) returns 0 or
201
- ## "__".to_i(10) returns 0
202
- ## quick hack - assume 0 for n/a for now
203
-
204
- MINUTE_NA_RE = %r{
205
- (?<minute>
206
- (?<=[ (]) # positive lookbehind for space or opening
207
- (?<value> \?{2} | _{2} )
208
- ' ## must have minute marker!!!!
209
- )
210
- }ix
211
-
212
- =begin
213
- MINUTE_RE = %r{
214
- (?<minute>
215
- (?<=[ (]) # positive lookbehind for space or opening ( e.g. (61') required
216
- # todo - add more lookbehinds e.g. ,) etc. - why? why not?
217
- (?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
218
- (?: \+
219
- (?<value2>\d{1,3})
220
- )?
221
- ' ## must have minute marker!!!!
222
- )
223
- }ix
224
- =end
225
-
226
-
227
169
  ##
228
170
  ## note - inline \b check in MINUTE_RE excludes
229
- ## 85pen or 90+4pen or 38p (possible and NOT excluded in GOAL_MINUTE_RE !!!)
171
+ ## 85pen or 90+4pen or 38p
172
+ ## (possible and NOT excluded in GOAL_MINUTE_RE !!!)
230
173
  ##
231
- ## minute with optional stoppage
174
+ ## minute with optional stoppage (offset)
232
175
 
233
176
  MINUTE_RE = %r{
234
177
  (?<minute>
@@ -236,19 +179,59 @@ MINUTE_RE = %r{
236
179
  (?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
237
180
  \b
238
181
  '? ## optional minute marker
239
-
240
- (?: \+ (?<value2>\d{1,2})
241
- \b
182
+
183
+ (?: \+ (?<value2>\d{1,2})
184
+ \b
242
185
  '? ## optional minute marker
243
186
  )?
244
-
245
187
  )
246
188
  }ix
247
189
 
190
+
191
+
192
+ ##
193
+ ## keep separate? or add simply inside GOAL_MINUTE_RE - why? why not?
194
+ ## fix-fix-fix - move into GOAL_MINUTE_RE !!!
195
+
196
+ GOAL_MINUTE_NA_RE = %r{
197
+ (?<goal_minute_na>
198
+
199
+ # positive lookbehind
200
+ (?<=[ ,;])
201
+
202
+ (?<value> \?{1,2})
203
+ '? ## optional minute marker
204
+ ## note - add goal minute qualifiers here inline!!!
205
+ (?:
206
+ (?: [ ]? (?<og> (?: \((?:og|o\.g\.|o)\)) ## allow (og)
207
+ |
208
+ (?: (?:og|o\.g\.|o)) ## allow plain og
209
+ )
210
+ )
211
+ |
212
+ (?: [ ]? (?<pen> (?: \((?:pen\.?|p)\)) ## allow ()
213
+ |
214
+ (?: (?:pen\.?|p))
215
+ )
216
+ )
217
+ |
218
+ ## add experimental header qualifier
219
+ (?: [ ]? (?<hdr> \( (?:hdr\.?|h ) \) | (?: hdr\.?|h ) ))
220
+ |
221
+ ## add experimental free kick qualifier
222
+ (?: [ ]? (?<fk> \( (?:fk\.?|f ) \) | (?: fk\.?|f) ))
223
+ )?
224
+
225
+ ## note - check positive lookahead
226
+ (?=[ ,;)]|$)
227
+ )
228
+ }ix
229
+
230
+
248
231
  ## goal types
249
232
  # (pen.) or (pen) or (p.) or (p)
250
233
  ## (o.g.) or (og)
251
- ## todo/check - keep case-insensitive
234
+ ## todo/check - keep case-insensitive
252
235
  ## or allow OG or P or PEN or
253
236
  ## only lower case - why? why not?
254
237
  ##
@@ -260,12 +243,12 @@ GOAL_MINUTE_RE = %r{
260
243
  \b
261
244
  (?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
262
245
  '? ## optional minute marker
263
-
246
+
264
247
  (?: \+ (?<value2>\d{1,2})
265
248
  '? ## optional minute marker
266
- )?
267
-
268
- ## note - add goal minute qualifiers here inline!!!
249
+ )?
250
+
251
+ ## note - add goal minute qualifiers here inline!!!
269
252
  (?:
270
253
  (?: [ ]? (?<og> (?: \((?:og|o\.g\.|o)\)) ## allow (og)
271
254
  |
@@ -276,7 +259,7 @@ GOAL_MINUTE_RE = %r{
276
259
  (?: [ ]? (?<pen> (?: \((?:pen\.?|p)\)) ## allow ()
277
260
  |
278
261
  (?: (?:pen\.?|p))
279
- )
262
+ )
280
263
  )
281
264
  |
282
265
  ## add experimental header qualifier
@@ -287,45 +270,33 @@ GOAL_MINUTE_RE = %r{
287
270
  )?
288
271
 
289
272
  ## add experimental seconds
290
- ## e.g. (95 secs) or (95sec) etc.
273
+ ## e.g. (95 secs) or (95sec) etc.
291
274
  (?: [ ]* \(
292
275
  (?<secs>\d{1,3})
293
276
  [ ]?secs?
294
- \)
277
+ \)
295
278
  )?
296
279
  )
297
280
 
298
- ## note - check positive lookahead
299
- (?=[ ,;)]|$)
281
+ ## note - check positive lookahead
282
+ (?=[ ,;)]|$)
300
283
  }ix
301
284
 
302
285
 
303
286
 
304
287
 
305
288
 
306
-
307
- GOAL_RE = Regexp.union(
308
- GOAL_BASICS_RE,
309
- GOAL_NONE_RE,
310
- GOAL_MINUTE_RE,
311
- GOAL_COUNT_RE,
312
- ## MINUTE_NA_RE, ## note - add/allow not/available (n/a,na) minutes hack for now
313
- ## GOAL_OG_RE, GOAL_PEN_RE,
314
- ## SCORE_RE, ## add back in v2 (level 3) or such!!
315
- PROP_NAME_RE, ## note - (re)use prop name for now for (player) name
316
- GOAL_SEP_ALT_RE,
317
- ## todo/fix - add ANY_RE !!!!
318
- )
319
-
289
+ ###
290
+ ## more regex for goal alt
320
291
 
321
292
 
322
293
  GOAL_TYPE_RE = %r{
323
294
  (?<goal_type>
324
295
  \(
325
296
  (?:
326
- (?<og> og|o\.g\.|o )
297
+ (?<og> og|o\.g\.|o )
327
298
  |
328
- (?<pen> pen\.?|p )
299
+ (?<pen> pen\.?|p )
329
300
  |
330
301
  ## add experimental header qualifier
331
302
  (?<hdr> hdr\.?|h )
@@ -338,123 +309,6 @@ GOAL_TYPE_RE = %r{
338
309
 
339
310
 
340
311
 
341
- GOAL_ALT_RE = Regexp.union(
342
- GOAL_BASICS_RE,
343
- SCORE_RE, ## e.g. 1-0, 0-1, etc.
344
- GOAL_MINUTE_RE,
345
- GOAL_TYPE_RE,
346
- PROP_NAME_RE, ## note - (re)use prop name for now for (player) name
347
- ## todo/fix - add ANY_RE !!!!
348
- )
349
-
350
- GOAL_COMPAT_RE = Regexp.union(
351
- GOAL_BASICS_RE,
352
- SCORE_RE, ## e.g. 1-0, 0-1, etc.
353
- MINUTE_RE, ## note - matches minute e.g. 92, 7, 7' 7+3, 46+, etc.
354
- GOAL_TYPE_RE,
355
- PROP_NAME_RE, ## note - (re)use prop name for now for (player) name
356
- ## todo/fix - add ANY_RE !!!!
357
- )
358
-
359
- =begin
360
- ## note - leave out n/a minute in goals - make minutes optional!!!
361
- PROP_GOAL_RE = Regexp.union(
362
- GOAL_BASICS_RE,
363
- MINUTE_RE,
364
- ## MINUTE_NA_RE, ## note - add/allow not/available (n/a,na) minutes hack for now
365
- GOAL_OG_RE, GOAL_PEN_RE,
366
- SCORE_RE,
367
- PROP_NAME_RE, ## note - (re)use prop name for now for (player) name
368
- )
369
- =end
370
-
371
-
372
-
373
-
374
- def self._parse_goal_minute( str )
375
- ## note - strip - leading/trailing spaces
376
- m = GOAL_MINUTE_RE.match( str.strip )
377
- if m && m.pre_match == '' && m.post_match == ''
378
- _build_goal_minute( m )
379
- elsif m
380
- ## note - match BUT not anchored to start and end-of-string!!!
381
- ## report, error somehow??
382
- nil
383
- else
384
- nil ## no match - return nil
385
- end
386
- end
387
-
388
-
389
- def self._build_goal_minute( m )
390
- minute = {}
391
-
392
- minute[:m] = m[:value].to_i(10) ## always required
393
-
394
- ## stoppage/injury time (offset)
395
- minute[:offset] = m[:value2].to_i(10) if m[:value2]
396
-
397
- minute[:og] = true if m[:og]
398
- minute[:pen] = true if m[:pen]
399
- minute[:freekick] = true if m[:fk]
400
- minute[:header] = true if m[:hdr]
401
-
402
- minute[:secs] = m[:secs].to_i(10) if m[:secs]
403
-
404
- minute
405
- end
406
- def _build_goal_minute( m ) self.class._build_goal_minute( m ); end
407
-
408
-
409
- def self._build_minute( m )
410
- minute = {}
411
- minute[:m] = m[:value].to_i(10) ## always required
412
-
413
- ## stoppage/injury time (offset)
414
- minute[:offset] = m[:value2].to_i(10) if m[:value2]
415
-
416
- minute
417
- end
418
- def _build_minute( m ) self.class._build_minute( m ); end
419
-
420
-
421
-
422
- def self._parse_goal_count( str )
423
- ## note - strip - leading/trailing spaces
424
- m = GOAL_COUNT_RE.match( str.strip )
425
- if m && m.pre_match == '' && m.post_match == ''
426
- _build_goal_count( m )
427
- elsif m
428
- ## note - match BUT not anchored to start and end-of-string!!!
429
- ## report, error somehow??
430
- nil
431
- else
432
- nil ## no match - return nil
433
- end
434
- end
435
-
436
- def self._build_goal_count( m )
437
- count = {}
438
- count[:count] = m[:value].to_i(10) if m[:value]
439
- count[:og] = m[:og_value] ? m[:og_value].to_i(10) : 1 if m[:og] ## check flag
440
- count[:pen] = m[:pen_value] ? m[:pen_value].to_i(10) : 1 if m[:pen] ## check flag
441
- count
442
- end
443
- def _build_goal_count( m ) self.class._build_goal_count( m ); end
444
-
445
-
446
-
447
-
448
- def self._build_goal_type( m )
449
- goal = {}
450
- goal[:og] = true if m[:og]
451
- goal[:pen] = true if m[:pen]
452
- goal[:freekick] = true if m[:fk]
453
- goal[:header] = true if m[:hdr]
454
- goal
455
- end
456
- def _build_goal_type( m ) self.class._build_goal_type( m ); end
457
-
458
312
 
459
313
  end # class Lexer
460
314
  end # module SportDb
@@ -8,36 +8,22 @@ class Lexer
8
8
  # Group 1 : ....
9
9
  # Group A2 | ....
10
10
  ## note - use \A (instead of ^) - \A strictly matches the start of the string.
11
- GROUP_DEF_LINE_RE = %r{ \A
11
+
12
+ START_WITH_GROUP_DEF_LINE_RE = %r{
13
+ \A
12
14
  [ ]* ## ignore leading spaces (if any)
13
15
  (?<group_def>
14
16
  Group
15
17
  [ ]
16
- [a-z0-9]+ ## todo/check - allow dot (.) too e.g. 1.A etc.- why? why not?
18
+ [a-z0-9]+ ## todo/check - allow dot (.) too e.g. 1.A etc.- why? why not?
17
19
  )
18
- ### possitive lookahead MUST be : OR |
20
+ ### positive lookahead MUST be : OR |
19
21
  (?= [ ]*
20
- [:|]
21
- [ ]) ## note: requires space for now after [:|] - keep - why? why not?
22
- }ix
23
-
24
- GROUP_DEF_BASICS_RE = %r{
25
- (?<spaces> [ ]{2,}) |
26
- (?<space> [ ])
27
- |
28
- (?<sym> [:|,] ) ### note - add comma (,) as optional separator
29
- }ix
30
-
31
-
32
- GROUP_DEF_RE = Regexp.union( GROUP_DEF_BASICS_RE,
33
- TEXT_RE,
34
- ANY_RE,
35
- )
36
-
37
-
22
+ [:|]
23
+ [ ]) ## note: requires space for now after [:|] - keep - why? why not?
24
+ }ix
38
25
 
39
26
 
40
-
41
27
 
42
28
  end # class Lexer
43
29
  end # module SportDb