sportdb-parser 0.6.20 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -1
  3. data/Manifest.txt +14 -8
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/parser/blocktxt.rb +99 -0
  6. data/lib/sportdb/parser/lexer.rb +958 -395
  7. data/lib/sportdb/parser/lexer_buffer.rb +97 -0
  8. data/lib/sportdb/parser/lexer_tty.rb +111 -0
  9. data/lib/sportdb/parser/parser.rb +1768 -855
  10. data/lib/sportdb/parser/racc_parser.rb +1 -1
  11. data/lib/sportdb/parser/racc_tree.rb +327 -41
  12. data/lib/sportdb/parser/token-date.rb +160 -178
  13. data/lib/sportdb/parser/token-date_duration.rb +190 -0
  14. data/lib/sportdb/parser/token-geo.rb +59 -59
  15. data/lib/sportdb/parser/token-goals.rb +460 -0
  16. data/lib/sportdb/parser/token-group.rb +43 -0
  17. data/lib/sportdb/parser/token-note.rb +40 -0
  18. data/lib/sportdb/parser/token-prop.rb +70 -54
  19. data/lib/sportdb/parser/token-prop_name.rb +74 -0
  20. data/lib/sportdb/parser/token-round.rb +102 -0
  21. data/lib/sportdb/parser/token-score.rb +323 -47
  22. data/lib/sportdb/parser/token-score_fuller.rb +435 -0
  23. data/lib/sportdb/parser/token-score_legs.rb +59 -0
  24. data/lib/sportdb/parser/token-status.rb +157 -160
  25. data/lib/sportdb/parser/token-table.rb +149 -0
  26. data/lib/sportdb/parser/token-text.rb +72 -23
  27. data/lib/sportdb/parser/token-time.rb +141 -0
  28. data/lib/sportdb/parser/token.rb +242 -105
  29. data/lib/sportdb/parser/token_helpers.rb +92 -0
  30. data/lib/sportdb/parser/version.rb +2 -2
  31. data/lib/sportdb/parser.rb +24 -2
  32. metadata +18 -18
  33. data/config/rounds_de.txt +0 -125
  34. data/config/rounds_en.txt +0 -29
  35. data/config/rounds_es.txt +0 -26
  36. data/config/rounds_misc.txt +0 -25
  37. data/config/rounds_pt.txt +0 -4
  38. data/config/zones_en.txt +0 -20
  39. data/lib/sportdb/parser/lang.rb +0 -298
  40. data/lib/sportdb/parser/token-minute.rb +0 -205
@@ -14,7 +14,7 @@ class Lexer
14
14
  GEO_TEXT_RE = %r{
15
15
  ## must start with alpha (allow unicode letters!!)
16
16
  (?<text>
17
- ## positive lookbehind - for now space (or beginning of line - for testing) only
17
+ ## positive lookbehind - for now space (or beginning of line - for testing) only
18
18
  ## (MUST be fixed number of chars - no quantifier e.g. +? etc.)
19
19
  (?<= [ ,›>\[\]]|^)
20
20
  (?:
@@ -40,21 +40,42 @@ GEO_TEXT_RE = %r{
40
40
  ## for single spaces only (and _/ MUST not be surround by spaces)
41
41
 
42
42
  (?:
43
- [ ]? # only single spaces allowed inline!!!
44
43
  (?:
45
- \p{L} | \d | [.&'°]
46
- |
47
- (?: (?<! [ ]) ## no space allowed before (but possible after)
48
- [-]
49
- )
50
- |
51
- (?: (?<! [ ]) ## no spaces allowed around these characters
52
- [_/]
53
- (?! [ ])
54
- )
55
- )+
56
- )*
57
-
44
+ [ ]? # only single (inline) space allowed - double spaces are breaks!!!
45
+ (?:
46
+ \p{L} | \d | [.&'°]
47
+ |
48
+ (?: (?<! [ ]) ## no space allowed before (but possible after)
49
+ [-]
50
+ )
51
+ |
52
+ (?: (?<! [ ]) ## no spaces allowed around these characters
53
+ [_/]
54
+ (?! [ ])
55
+ )
56
+ )+
57
+ )
58
+ |
59
+ ## for now allow auto-add optional
60
+ ## parenthesis enclosed closed text
61
+ ## e.g. Dublin (Dalymount Park)
62
+ ## Bucuresti (23 August)
63
+ ## Paris (Parc des Princes)
64
+ ## Ost-Berlin (Walter-Ulbricht)
65
+ ## Athinai (OAKA - Maroussi)
66
+ ##
67
+ ## or Valencia (Spain) or Solna
68
+ (?:
69
+ [ ]
70
+ \(
71
+ [^()\[\],;:›<>]+ ## todo - add more special chars
72
+ ## maybe list only allowed ones??
73
+ ## make pattern more strict - why? why not?
74
+ \)
75
+ )
76
+ )*
77
+
78
+
58
79
  ## must NOT end with space or dash(-)
59
80
  ## todo/fix - possible in regex here
60
81
  ## only end in alphanum a-z0-9 (not dot or & ???)
@@ -63,56 +84,15 @@ GEO_TEXT_RE = %r{
63
84
  ## must be space!!!
64
85
  ## (or comma or start/end of string)
65
86
  ## kind of \b !!!
66
- ## positive lookahead
87
+ ## POSITIVE lookahead
67
88
  (?=[ ,›>\[\]]|$)
68
- )
69
- }ix
70
-
71
-
72
89
 
73
- ##
74
- # for timezone format use for now:
75
- # (BRT/UTC-3) (e.g. brazil time)
76
- #
77
- # (CET/UTC+1) - central european time
78
- # (CEST/UTC+2) - central european summer time - daylight saving time (DST).
79
- # (EET/UTC+1) - eastern european time
80
- # (EEST/UTC+2) - eastern european summer time - daylight saving time (DST).
81
- #
82
- # UTC+3
83
- # UTC+4
84
- # UTC+0
85
- # UTC+00
86
- # UTC+0000
87
- #
88
- # - allow +01 or +0100 - why? why not
89
- # - +0130 (01:30)
90
- #
91
- # see
92
- # https://en.wikipedia.org/wiki/Time_zone
93
- # https://en.wikipedia.org/wiki/List_of_UTC_offsets
94
- # https://en.wikipedia.org/wiki/UTC−04:00 etc.
95
- #
96
- # e.g. (UTC-2) or (CEST/UTC-2) etc.
97
- # todo check - only allow upcase
98
- # or (utc-2) and (cest/utc-2) too - why? why not?
99
-
100
- TIMEZONE_RE = %r{
101
- (?<timezone>
102
- \(
103
- ## optional "local" timezone name eg. BRT or CEST etc.
104
- (?: [a-z]+
105
- /
106
- )?
107
- [a-z]+
108
- [+-]
109
- \d{1,4} ## e.g. 0 or 00 or 0000
110
- \)
111
90
  )
112
91
  }ix
113
92
 
114
93
 
115
94
 
95
+
116
96
  GEO_BASICS_RE = %r{
117
97
  (?<spaces> [ ]{2,}) |
118
98
  (?<space> [ ])
@@ -121,10 +101,30 @@ GEO_BASICS_RE = %r{
121
101
  }ix
122
102
 
123
103
 
104
+ ## note - add "hacky" check for comma that is followed by a prop(erty)
105
+ ##
106
+ ## make sure to NOT match
107
+ ## props e.g. att: 18000
108
+ ## July 10 @ Paris, Parc des Princes, att: 18000
109
+ ## July 10 @ Paris, Parc des Princes, att: 18000
110
+ ##
111
+
112
+
113
+ GEO_END_RE = %r{
114
+ (?<geo_end>
115
+ ,
116
+ )
117
+ ## POSITIVE lookahead for props
118
+ (?=
119
+ [ ]* ## optional spaces
120
+ (?: att|ref) ## todo/fix - use generic [a-z]+ - why? why not?
121
+ :
122
+ )
123
+ }ix
124
124
 
125
125
 
126
126
  GEO_RE = Regexp.union(
127
- TIMEZONE_RE,
127
+ GEO_END_RE,
128
128
  GEO_BASICS_RE,
129
129
  GEO_TEXT_RE,
130
130
  ANY_RE,
@@ -0,0 +1,460 @@
1
+ module SportDb
2
+ class Lexer
3
+
4
+
5
+ ######################################################
6
+ ## goal mode (switched to by PLAYER_WITH_MINUTE_RE)
7
+ ##
8
+ ## note - must be enclosed in ()!!!
9
+ ## todo - add () in basics - why? why not?
10
+
11
+
12
+
13
+ ##
14
+ ## todo/fix - split up BASICS!!!
15
+ ## break out SPACES_RE for general reuse!!!
16
+ ## makes it easier to use "custom" symbols (<sym>)
17
+
18
+
19
+ GOAL_BASICS_RE = %r{
20
+ (?<spaces> [ ]{2,}) |
21
+ (?<space> [ ])
22
+ |
23
+ (?<sym>
24
+ [;,)] ## add (-) dash too - why? why not?
25
+ )
26
+ }ix
27
+
28
+
29
+
30
+
31
+
32
+
33
+ ## note - assume lines starting with opening ( are goal lines!!!!
34
+ ## note - use \A (instead of ^) - \A strictly matches the start of the string.
35
+ ##
36
+ ## note - check for negative lookahead
37
+ ## to exclude ord (numbers) e.g. (1), (42), etc.!!!
38
+ ##
39
+ ## todo/fix -- exclude (a), (h), (n) - TEAM_AWAY, TEAM_HOME, TEAM_NEUTRAL tokens!!
40
+
41
+ START_GOAL_LINE_RE = %r{
42
+ \A
43
+ [ ]* ## ignore leading spaces (if any)
44
+ \(
45
+
46
+ # check NEGATIVE lookahead
47
+ (?!
48
+ ## exclude (a), (h), (n)
49
+ ## TEAM_AWAY, TEAM_HOME, TEAM_NEUTRAL
50
+ (?: a|h|n )
51
+ \)
52
+ )
53
+
54
+ }xi
55
+
56
+ =begin
57
+ # check NEGATIVE lookahead
58
+ (?!
59
+ ## exclude ord
60
+ (?: \d+ \))
61
+ |
62
+ ## exclude score - goal_line_alt!!!
63
+ (?: [ ]* \b
64
+ \d-\d ## score e.g. 1-0
65
+ \b )
66
+ )
67
+ =end
68
+
69
+
70
+ #############
71
+ ## check for goal compat(ility) "legacy" line
72
+ ## e.g.
73
+ ## (6' Puskás 0-1, 9' Czibor 0-2, 11' Morlock 1-2, 18' Rahn 2-2,
74
+ ## 84' Rahn 3-2)
75
+ ## (6 Puskás 0-1, 9 Czibor 0-2, 11 Morlock 1-2, 18 Rahn 2-2,
76
+ ## 84 Rahn 3-2)
77
+
78
+
79
+ START_GOAL_LINE_COMPAT_RE = %r{
80
+ \A
81
+ [ ]* ## ignore leading spaces (if any)
82
+ \(
83
+
84
+ ## (i) check NEGATIVE lookahead
85
+ ## exclude score e.g. 1-1 etc.
86
+ (?! [ ]* \b \d-\d \b)
87
+
88
+ ## (ii) check POSITIVE lookahead
89
+ (?= [ ]*
90
+ \d{1,3}
91
+ '? ## optional minute marker
92
+ (?: \+
93
+ \d{1,2}
94
+ '? ## optional minute marker
95
+ )?
96
+ )
97
+ }xi
98
+
99
+
100
+
101
+ ###
102
+ ## check for goal line (alternate syntax)
103
+ ## (1-0 Player, 1-1 Player, ...)
104
+ # must start-off OR yes, include score
105
+ ##
106
+ ## note - allow "centered" style e.g.
107
+ ## ( Player 44' (p) 1-0
108
+ ## 1-1 Player 64' )
109
+ START_GOAL_LINE_ALT_RE = %r{
110
+ \A
111
+ [ ]* ## ignore leading spaces (if any)
112
+ \(
113
+
114
+ # check POSITIVE lookahead
115
+ (?= .*? ## note - non-greedy
116
+ \b \d-\d \b ## score e.g. 0-1
117
+ )
118
+ }xi
119
+
120
+
121
+
122
+ ###
123
+ ## e.g. (-; Metzger)
124
+ GOAL_NONE_RE = %r{ (?<goals_none>
125
+ -[ ]*;
126
+ )
127
+ }x
128
+
129
+ ###
130
+ # note - alternate goal separator dash (-) MUST have leading and trailing space!!!
131
+ # e.g. (Metzger 83 - Krämer 29, 88, Cichy 33, Rahn 37)
132
+ # e.g. (Metzger - Krämer (2), Cichy, Rahn)
133
+ # (Brunnenmeier 17 - Gerwien 74)
134
+ # (Brunnenmeier - Gerwien)
135
+ # that is, NOT allowed
136
+ # e.g. (Metzger 83-Krämer 29, 88, Cichy 33, Rahn 37)
137
+ # (Brunnenmeier 17-Gerwien 74)
138
+ # (Brunnenmeier-Gerwien)
139
+ #
140
+ # note - allow split by - e.g.
141
+ # Frankfurt 4-2 Schalke (Kreß 45, Solz 55, Trimhold 58, Huberts 73 p -
142
+ # Berz 7, Herrmann 74)
143
+
144
+
145
+ GOAL_SEP_ALT_RE = %r{
146
+ (?<goal_sep_alt>
147
+ (?<=[ ]) ## positive lookbehind - space required
148
+ -
149
+ (?=[ ]|\z) ## positive lookahead - speace required
150
+ )}x
151
+
152
+
153
+ ## e.g. (2)
154
+ ## (2/p), (2/pen.), (3/2p), (3/ 2 pen.)
155
+ ## -or- (2,1pen), (3, 2 pens)
156
+ ##
157
+ ## (p), (pen.) (2 pen.), (2p)
158
+ ## (og), (o.g.),
159
+ ## (2og), (2 o.g.), (2ogs)
160
+ #
161
+ ##
162
+
163
+ GOAL_COUNT_RE = %r{
164
+ (?<goal_count>
165
+ \(
166
+ (?:
167
+ ## opt penalties
168
+ (?<pen>
169
+ (?: (?<pen_value> \d{1,2}) [ ]? )?
170
+ (?:pens|pen\.?|p)
171
+ )
172
+ |
173
+ ## opt own goals (og)
174
+ (?<og>
175
+ (?: (?<og_value> \d{1,2}) [ ]? )?
176
+ (?:ogs?|o\.g\.|o)
177
+ )
178
+ |
179
+ ## opt fallback - classic count/number
180
+ (?: (?<value> [1-9])
181
+ ## check for option penalties
182
+ (?<pen>
183
+ [,/] [ ]*
184
+ (?: (?<pen_value> \d{1,2}) [ ]? )?
185
+ (?:pens|pen\.?|p)
186
+ )?
187
+ )
188
+ )
189
+ \)
190
+ )}ix
191
+
192
+
193
+
194
+
195
+
196
+
197
+ ## minute variant for N/A not/available
198
+ ## todo/check - find a better syntax - why? why not?
199
+ ##
200
+ ## note "??".to_i(10) returns 0 or
201
+ ## "__".to_i(10) returns 0
202
+ ## quick hack - assume 0 for n/a for now
203
+
204
+ MINUTE_NA_RE = %r{
205
+ (?<minute>
206
+ (?<=[ (]) # positive lookbehind for space or opening
207
+ (?<value> \?{2} | _{2} )
208
+ ' ## must have minute marker!!!!
209
+ )
210
+ }ix
211
+
212
+ =begin
213
+ MINUTE_RE = %r{
214
+ (?<minute>
215
+ (?<=[ (]) # positive lookbehind for space or opening ( e.g. (61') required
216
+ # todo - add more lookbehinds e.g. ,) etc. - why? why not?
217
+ (?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
218
+ (?: \+
219
+ (?<value2>\d{1,3})
220
+ )?
221
+ ' ## must have minute marker!!!!
222
+ )
223
+ }ix
224
+ =end
225
+
226
+
227
+ ##
228
+ ## note - inline \b check in MINUTE_RE excludes
229
+ ## 85pen or 90+4pen or 38p (possible and NOT excluded in GOAL_MINUTE_RE !!!)
230
+ ##
231
+ ## minute with optional stoppage
232
+
233
+ MINUTE_RE = %r{
234
+ (?<minute>
235
+ \b
236
+ (?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
237
+ \b
238
+ '? ## optional minute marker
239
+
240
+ (?: \+ (?<value2>\d{1,2})
241
+ \b
242
+ '? ## optional minute marker
243
+ )?
244
+
245
+ )
246
+ }ix
247
+
248
+ ## goal types
249
+ # (pen.) or (pen) or (p.) or (p)
250
+ ## (o.g.) or (og)
251
+ ## todo/check - keep case-insensitive
252
+ ## or allow OG or P or PEN or
253
+ ## only lower case - why? why not?
254
+ ##
255
+ ## add (gg) for golden goal - why? why not?
256
+ ## add (sg) for silver goal - why? why not??
257
+
258
+ GOAL_MINUTE_RE = %r{
259
+ (?<goal_minute>
260
+ \b
261
+ (?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
262
+ '? ## optional minute marker
263
+
264
+ (?: \+ (?<value2>\d{1,2})
265
+ '? ## optional minute marker
266
+ )?
267
+
268
+ ## note - add goal minute qualifiers here inline!!!
269
+ (?:
270
+ (?: [ ]? (?<og> (?: \((?:og|o\.g\.|o)\)) ## allow (og)
271
+ |
272
+ (?: (?:og|o\.g\.|o)) ## allow plain og
273
+ )
274
+ )
275
+ |
276
+ (?: [ ]? (?<pen> (?: \((?:pen\.?|p)\)) ## allow ()
277
+ |
278
+ (?: (?:pen\.?|p))
279
+ )
280
+ )
281
+ |
282
+ ## add experimental header qualifier
283
+ (?: [ ]? (?<hdr> \( (?:hdr\.?|h ) \) | (?: hdr\.?|h ) ))
284
+ |
285
+ ## add experimental free kick qualifier
286
+ (?: [ ]? (?<fk> \( (?:fk\.?|f ) \) | (?: fk\.?|f) ))
287
+ )?
288
+
289
+ ## add experimental seconds
290
+ ## e.g. (95 secs) or (95sec) etc.
291
+ (?: [ ]* \(
292
+ (?<secs>\d{1,3})
293
+ [ ]?secs?
294
+ \)
295
+ )?
296
+ )
297
+
298
+ ## note - check positive lookahead
299
+ (?=[ ,;)]|$)
300
+ }ix
301
+
302
+
303
+
304
+
305
+
306
+
307
+ GOAL_RE = Regexp.union(
308
+ GOAL_BASICS_RE,
309
+ GOAL_NONE_RE,
310
+ GOAL_MINUTE_RE,
311
+ GOAL_COUNT_RE,
312
+ ## MINUTE_NA_RE, ## note - add/allow not/available (n/a,na) minutes hack for now
313
+ ## GOAL_OG_RE, GOAL_PEN_RE,
314
+ ## SCORE_RE, ## add back in v2 (level 3) or such!!
315
+ PROP_NAME_RE, ## note - (re)use prop name for now for (player) name
316
+ GOAL_SEP_ALT_RE,
317
+ ## todo/fix - add ANY_RE !!!!
318
+ )
319
+
320
+
321
+
322
+ GOAL_TYPE_RE = %r{
323
+ (?<goal_type>
324
+ \(
325
+ (?:
326
+ (?<og> og|o\.g\.|o )
327
+ |
328
+ (?<pen> pen\.?|p )
329
+ |
330
+ ## add experimental header qualifier
331
+ (?<hdr> hdr\.?|h )
332
+ |
333
+ ## add experimental free kick qualifier
334
+ (?<fk> fk\.?|f )
335
+ )
336
+ \)
337
+ )}xi
338
+
339
+
340
+
341
+ GOAL_ALT_RE = Regexp.union(
342
+ GOAL_BASICS_RE,
343
+ SCORE_RE, ## e.g. 1-0, 0-1, etc.
344
+ GOAL_MINUTE_RE,
345
+ GOAL_TYPE_RE,
346
+ PROP_NAME_RE, ## note - (re)use prop name for now for (player) name
347
+ ## todo/fix - add ANY_RE !!!!
348
+ )
349
+
350
+ GOAL_COMPAT_RE = Regexp.union(
351
+ GOAL_BASICS_RE,
352
+ SCORE_RE, ## e.g. 1-0, 0-1, etc.
353
+ MINUTE_RE, ## note - matches minute e.g. 92, 7, 7' 7+3, 46+, etc.
354
+ GOAL_TYPE_RE,
355
+ PROP_NAME_RE, ## note - (re)use prop name for now for (player) name
356
+ ## todo/fix - add ANY_RE !!!!
357
+ )
358
+
359
+ =begin
360
+ ## note - leave out n/a minute in goals - make minutes optional!!!
361
+ PROP_GOAL_RE = Regexp.union(
362
+ GOAL_BASICS_RE,
363
+ MINUTE_RE,
364
+ ## MINUTE_NA_RE, ## note - add/allow not/available (n/a,na) minutes hack for now
365
+ GOAL_OG_RE, GOAL_PEN_RE,
366
+ SCORE_RE,
367
+ PROP_NAME_RE, ## note - (re)use prop name for now for (player) name
368
+ )
369
+ =end
370
+
371
+
372
+
373
+
374
+ def self._parse_goal_minute( str )
375
+ ## note - strip - leading/trailing spaces
376
+ m = GOAL_MINUTE_RE.match( str.strip )
377
+ if m && m.pre_match == '' && m.post_match == ''
378
+ _build_goal_minute( m )
379
+ elsif m
380
+ ## note - match BUT not anchored to start and end-of-string!!!
381
+ ## report, error somehow??
382
+ nil
383
+ else
384
+ nil ## no match - return nil
385
+ end
386
+ end
387
+
388
+
389
+ def self._build_goal_minute( m )
390
+ minute = {}
391
+
392
+ minute[:m] = m[:value].to_i(10) ## always required
393
+
394
+ ## stoppage/injury time (offset)
395
+ minute[:offset] = m[:value2].to_i(10) if m[:value2]
396
+
397
+ minute[:og] = true if m[:og]
398
+ minute[:pen] = true if m[:pen]
399
+ minute[:freekick] = true if m[:fk]
400
+ minute[:header] = true if m[:hdr]
401
+
402
+ minute[:secs] = m[:secs].to_i(10) if m[:secs]
403
+
404
+ minute
405
+ end
406
+ def _build_goal_minute( m ) self.class._build_goal_minute( m ); end
407
+
408
+
409
+ def self._build_minute( m )
410
+ minute = {}
411
+ minute[:m] = m[:value].to_i(10) ## always required
412
+
413
+ ## stoppage/injury time (offset)
414
+ minute[:offset] = m[:value2].to_i(10) if m[:value2]
415
+
416
+ minute
417
+ end
418
+ def _build_minute( m ) self.class._build_minute( m ); end
419
+
420
+
421
+
422
+ def self._parse_goal_count( str )
423
+ ## note - strip - leading/trailing spaces
424
+ m = GOAL_COUNT_RE.match( str.strip )
425
+ if m && m.pre_match == '' && m.post_match == ''
426
+ _build_goal_count( m )
427
+ elsif m
428
+ ## note - match BUT not anchored to start and end-of-string!!!
429
+ ## report, error somehow??
430
+ nil
431
+ else
432
+ nil ## no match - return nil
433
+ end
434
+ end
435
+
436
+ def self._build_goal_count( m )
437
+ count = {}
438
+ count[:count] = m[:value].to_i(10) if m[:value]
439
+ count[:og] = m[:og_value] ? m[:og_value].to_i(10) : 1 if m[:og] ## check flag
440
+ count[:pen] = m[:pen_value] ? m[:pen_value].to_i(10) : 1 if m[:pen] ## check flag
441
+ count
442
+ end
443
+ def _build_goal_count( m ) self.class._build_goal_count( m ); end
444
+
445
+
446
+
447
+
448
+ def self._build_goal_type( m )
449
+ goal = {}
450
+ goal[:og] = true if m[:og]
451
+ goal[:pen] = true if m[:pen]
452
+ goal[:freekick] = true if m[:fk]
453
+ goal[:header] = true if m[:hdr]
454
+ goal
455
+ end
456
+ def _build_goal_type( m ) self.class._build_goal_type( m ); end
457
+
458
+
459
+ end # class Lexer
460
+ end # module SportDb
@@ -0,0 +1,43 @@
1
+ module SportDb
2
+ class Lexer
3
+
4
+
5
+ ###
6
+ # check for start of group def line e.g.
7
+ # Group A | ...
8
+ # Group 1 : ....
9
+ # Group A2 | ....
10
+ ## note - use \A (instead of ^) - \A strictly matches the start of the string.
11
+ GROUP_DEF_LINE_RE = %r{ \A
12
+ [ ]* ## ignore leading spaces (if any)
13
+ (?<group_def>
14
+ Group
15
+ [ ]
16
+ [a-z0-9]+ ## todo/check - allow dot (.) too e.g. 1.A etc.- why? why not?
17
+ )
18
+ ### possitive lookahead MUST be : OR |
19
+ (?= [ ]*
20
+ [:|]
21
+ [ ]) ## note: requires space for now after [:|] - keep - why? why not?
22
+ }ix
23
+
24
+ GROUP_DEF_BASICS_RE = %r{
25
+ (?<spaces> [ ]{2,}) |
26
+ (?<space> [ ])
27
+ |
28
+ (?<sym> [:|,] ) ### note - add comma (,) as optional separator
29
+ }ix
30
+
31
+
32
+ GROUP_DEF_RE = Regexp.union( GROUP_DEF_BASICS_RE,
33
+ TEXT_RE,
34
+ ANY_RE,
35
+ )
36
+
37
+
38
+
39
+
40
+
41
+
42
+ end # class Lexer
43
+ end # module SportDb
@@ -0,0 +1,40 @@
1
+ module SportDb
2
+ class Lexer
3
+
4
+
5
+ ### fix - use (?<text>) - text capture for inner text!!
6
+ ## use (?<note> for complete match as a convention!! )
7
+ NOTE_RE = %r{
8
+ \[
9
+ (?<note>
10
+ [^\[\]\#]*? ## note - non-greedy/lazy operator
11
+ ## exclude comments inside note block - why? why not?
12
+ )
13
+ \]
14
+ }xi
15
+
16
+
17
+ ####
18
+ ## fix - change NOTE_RE to MATCH_NOTE_RE !!!!
19
+ ## and change NOTA_BENE_RE to NOTE_RE !!!
20
+
21
+
22
+
23
+ ## check for "literal" (multi-line) note blocks
24
+ ## eg. nb: or note:
25
+ ## space required after double colon - why? why not?
26
+ ##
27
+ ## note - use \A (instead of ^) - \A strictly matches the start of the string.
28
+ NOTA_BENE_RE = %r{ \A
29
+ [ ]* ## ignore leading spaces (if any)
30
+ (?: nb | note) [ ]* : [ ]+
31
+ (?<nota_bene>
32
+ .+? ## use non-greedy
33
+ )
34
+ [ ]* ## ignore trailing spaces (if any)
35
+ \z
36
+ }xi
37
+
38
+
39
+ end # class Lexer
40
+ end # module SportDb