sportdb-parser 0.5.9 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +2 -0
- data/lib/sportdb/parser/lexer.rb +101 -36
- data/lib/sportdb/parser/parser.rb +561 -387
- data/lib/sportdb/parser/racc_parser.rb +5 -3
- data/lib/sportdb/parser/racc_tree.rb +12 -5
- data/lib/sportdb/parser/token-date.rb +81 -13
- data/lib/sportdb/parser/token-minute.rb +45 -0
- data/lib/sportdb/parser/token-prop.rb +133 -0
- data/lib/sportdb/parser/token-score.rb +25 -14
- data/lib/sportdb/parser/token-text.rb +9 -2
- data/lib/sportdb/parser/token.rb +51 -176
- data/lib/sportdb/parser/version.rb +2 -2
- data/lib/sportdb/parser.rb +2 -0
- metadata +4 -2
@@ -14,7 +14,7 @@ def initialize( txt, debug: false )
|
|
14
14
|
|
15
15
|
### todo:
|
16
16
|
## - pass along debug flag
|
17
|
-
lexer = SportDb::Lexer.new( txt )
|
17
|
+
lexer = SportDb::Lexer.new( txt, debug: debug )
|
18
18
|
## note - use tokenize_with_errors and add/collect tokenize errors
|
19
19
|
@tokens, @errors = lexer.tokenize_with_errors
|
20
20
|
## pp @tokens
|
@@ -68,12 +68,14 @@ def initialize( txt, debug: false )
|
|
68
68
|
|
69
69
|
|
70
70
|
def on_error(error_token_id, error_value, value_stack)
|
71
|
-
|
71
|
+
## auto-add error_token (as string)
|
72
|
+
error_token = Racc_token_to_s_table[error_token_id]
|
73
|
+
args = [error_token, error_token_id, error_value, value_stack]
|
72
74
|
puts
|
73
75
|
puts "!! on parse error:"
|
74
76
|
puts "args=#{args.pretty_inspect}"
|
75
77
|
|
76
|
-
@errors << "parse error on token: #{error_token_id} with value: #{error_value}, stack: #{value_stack.pretty_inspect}"
|
78
|
+
@errors << "parse error on token: #{error_token} (#{error_token_id}) with value: #{error_value}, stack: #{value_stack.pretty_inspect}"
|
77
79
|
## exit 1 ## exit for now - get and print more info about context etc.!!
|
78
80
|
end
|
79
81
|
|
@@ -71,10 +71,14 @@ RoundDef = Struct.new( :name, :date, :duration ) do
|
|
71
71
|
end
|
72
72
|
end
|
73
73
|
|
74
|
-
DateHeader = Struct.new( :date ) do
|
74
|
+
DateHeader = Struct.new( :date, :time, :geo, :timezone ) do
|
75
75
|
def pretty_print( printer )
|
76
76
|
printer.text( "<DateHeader " )
|
77
|
-
printer.text( "#{self.date.pretty_inspect}
|
77
|
+
printer.text( "#{self.date.pretty_inspect}" )
|
78
|
+
printer.text( " time=#{self.time.pretty_inspect}" ) if self.time
|
79
|
+
printer.text( " geo=#{self.geo.pretty_inspect}" ) if self.geo
|
80
|
+
printer.text( " timezone=#{self.timezone}") if self.timezone
|
81
|
+
printer.text( ">")
|
78
82
|
end
|
79
83
|
end
|
80
84
|
|
@@ -85,14 +89,17 @@ GroupHeader = Struct.new( :name ) do
|
|
85
89
|
end
|
86
90
|
end
|
87
91
|
|
88
|
-
RoundHeader = Struct.new( :names ) do
|
92
|
+
RoundHeader = Struct.new( :names, :group ) do
|
89
93
|
def pretty_print( printer )
|
90
94
|
printer.text( "<RoundHeader " )
|
91
|
-
printer.text( "#{self.names.join(', ')}
|
95
|
+
printer.text( "#{self.names.join(', ')}" )
|
96
|
+
printer.text( " group=#{self.group}") if self.group
|
97
|
+
printer.text( ">" )
|
92
98
|
end
|
93
99
|
end
|
94
100
|
|
95
|
-
|
101
|
+
|
102
|
+
MatchLine = Struct.new( :ord, :date, :time, :wday,
|
96
103
|
:team1, :team2, :score,
|
97
104
|
:status,
|
98
105
|
:geo,
|
@@ -146,12 +146,29 @@ DATE_II_RE = %r{
|
|
146
146
|
)}ix
|
147
147
|
|
148
148
|
|
149
|
+
# e.g. iso-date - 2011-08-25
|
150
|
+
## note - allow/support ("shortcuts") e.g 2011-8-25 or 2011-8-3 / 2011-08-03 etc.
|
151
|
+
DATE_III_RE = %r{
|
152
|
+
(?<date>
|
153
|
+
\b
|
154
|
+
(?<year>\d{4})
|
155
|
+
-
|
156
|
+
(?<month>\d{1,2})
|
157
|
+
-
|
158
|
+
(?<day>\d{1,2})
|
159
|
+
\b
|
160
|
+
)}ix
|
161
|
+
|
162
|
+
|
163
|
+
|
164
|
+
|
149
165
|
#############################################
|
150
166
|
# map tables
|
151
167
|
# note: order matters; first come-first matched/served
|
152
168
|
DATE_RE = Regexp.union(
|
153
169
|
DATE_I_RE,
|
154
|
-
DATE_II_RE
|
170
|
+
DATE_II_RE,
|
171
|
+
DATE_III_RE,
|
155
172
|
)
|
156
173
|
|
157
174
|
|
@@ -197,29 +214,36 @@ end
|
|
197
214
|
#
|
198
215
|
# Sun Jun/23 - Wed Jun/26 -- YES
|
199
216
|
# Jun/23 - Jun/26 -- YES
|
200
|
-
#
|
201
|
-
|
202
|
-
#
|
203
|
-
# Jun/25
|
217
|
+
# Jun/25 - 26 - why? why not??? - YES - see blow variant iii!!!
|
218
|
+
|
219
|
+
# Tue Jun/25 + Wed Jun/26 -- NO
|
220
|
+
# Jun/25 + Jun/26 -- NO
|
204
221
|
# Jun/25 .. 26 - why? why not???
|
205
222
|
# Jun/25 to 26 - why? why not???
|
206
223
|
# Jun/25 + 26 - add - why? why not???
|
207
224
|
# Sun-Wed Jun/23-26 - add - why? why not???
|
208
225
|
# Wed+Thu Jun/26+27 2024 - add - why? why not???
|
209
226
|
#
|
210
|
-
# maybe use
|
227
|
+
# maybe use comma and plus for list of dates
|
211
228
|
# Tue Jun/25, Wed Jun/26, Thu Jun/27 ??
|
212
229
|
# Tue Jun/25 + Wed Jun/26 + Thu Jun/27 ??
|
213
230
|
#
|
214
231
|
# add back optional comma (before) year - why? why not?
|
232
|
+
#
|
215
233
|
|
216
234
|
|
217
235
|
##
|
218
236
|
# todo add plus later on - why? why not?
|
237
|
+
### todo/fix add optional comma (,) before year
|
238
|
+
|
239
|
+
### regex note/tip/remindr - \b () \b MUST always get enclosed in parantheses
|
240
|
+
## because alternation (|) has lowest priority/binding
|
241
|
+
|
219
242
|
|
220
243
|
DURATION_I_RE = %r{
|
221
244
|
(?<duration>
|
222
245
|
\b
|
246
|
+
(?:
|
223
247
|
## optional day name
|
224
248
|
((?<day_name1>#{DAY_NAMES})
|
225
249
|
[ ]
|
@@ -228,12 +252,13 @@ DURATION_I_RE = %r{
|
|
228
252
|
(?: \/|[ ] )
|
229
253
|
(?<day1>\d{1,2})
|
230
254
|
## optional year
|
231
|
-
(
|
255
|
+
( ,? # optional comma
|
256
|
+
[ ]
|
232
257
|
(?<year1>\d{4})
|
233
258
|
)?
|
234
259
|
|
235
260
|
## support + and - (add .. or such - why??)
|
236
|
-
[ ]*
|
261
|
+
[ ]* - [ ]*
|
237
262
|
|
238
263
|
## optional day name
|
239
264
|
((?<day_name2>#{DAY_NAMES})
|
@@ -243,20 +268,28 @@ DURATION_I_RE = %r{
|
|
243
268
|
(?: \/|[ ] )
|
244
269
|
(?<day2>\d{1,2})
|
245
270
|
## optional year
|
246
|
-
(
|
271
|
+
( ,? # optional comma
|
272
|
+
[ ]
|
247
273
|
(?<year2>\d{4})
|
248
274
|
)?
|
275
|
+
)
|
249
276
|
\b
|
250
277
|
)}ix
|
251
278
|
|
252
279
|
|
280
|
+
|
281
|
+
# FIX - remove this variant
|
282
|
+
# "standardize on month day [year]" !!!!
|
283
|
+
|
284
|
+
=begin
|
253
285
|
###
|
254
286
|
# variant ii
|
255
287
|
# e.g. 26 July - 27 July
|
256
|
-
|
257
|
-
|
288
|
+
# 26 July,
|
289
|
+
XXX_DURATION_II_RE = %r{
|
258
290
|
(?<duration>
|
259
291
|
\b
|
292
|
+
(?
|
260
293
|
## optional day name
|
261
294
|
((?<day_name1>#{DAY_NAMES})
|
262
295
|
[ ]
|
@@ -265,7 +298,8 @@ DURATION_II_RE = %r{
|
|
265
298
|
[ ]
|
266
299
|
(?<month_name1>#{MONTH_NAMES})
|
267
300
|
## optional year
|
268
|
-
(
|
301
|
+
(
|
302
|
+
[ ]
|
269
303
|
(?<year1>\d{4})
|
270
304
|
)?
|
271
305
|
|
@@ -283,16 +317,50 @@ DURATION_II_RE = %r{
|
|
283
317
|
( [ ]
|
284
318
|
(?<year2>\d{4})
|
285
319
|
)?
|
320
|
+
)
|
321
|
+
\b
|
322
|
+
)}ix
|
323
|
+
=end
|
324
|
+
|
325
|
+
|
326
|
+
# variant ii
|
327
|
+
# add support for shorthand
|
328
|
+
# August 16-18, 2011
|
329
|
+
# September 13-15, 2011
|
330
|
+
# October 18-20, 2011
|
331
|
+
# March/6-8, 2012
|
332
|
+
# March 6-8 2012
|
333
|
+
# March 6-8
|
334
|
+
#
|
335
|
+
# - add support for August 16+17 or such (and check 16+18)
|
336
|
+
# use <op> to check if day2 is a plus or range or such - why? why not?
|
337
|
+
|
338
|
+
DURATION_II_RE = %r{
|
339
|
+
(?<duration>
|
340
|
+
\b
|
341
|
+
(?:
|
342
|
+
(?<month_name1>#{MONTH_NAMES})
|
343
|
+
[ /]
|
344
|
+
(?<day1>\d{1,2})
|
345
|
+
-
|
346
|
+
(?<day2>\d{1,2})
|
347
|
+
(?:
|
348
|
+
,? ## optional comma
|
349
|
+
[ ]
|
350
|
+
(?<year1>\d{4})
|
351
|
+
)? ## optional year
|
352
|
+
)
|
286
353
|
\b
|
287
354
|
)}ix
|
288
355
|
|
289
356
|
|
357
|
+
|
290
358
|
#############################################
|
291
359
|
# map tables
|
292
360
|
# note: order matters; first come-first matched/served
|
293
361
|
DURATION_RE = Regexp.union(
|
294
362
|
DURATION_I_RE,
|
295
|
-
DURATION_II_RE
|
363
|
+
DURATION_II_RE,
|
296
364
|
)
|
297
365
|
|
298
366
|
|
@@ -0,0 +1,45 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class Lexer
|
4
|
+
|
5
|
+
#
|
6
|
+
# todo/check - move goal type regexes to goal or somewhere else?
|
7
|
+
#
|
8
|
+
|
9
|
+
## goal types
|
10
|
+
# (pen.) or (pen) or (p.) or (p)
|
11
|
+
## (o.g.) or (og)
|
12
|
+
## todo/check - keep case-insensitive
|
13
|
+
## or allow OG or P or PEN or
|
14
|
+
## only lower case - why? why not?
|
15
|
+
GOAL_PEN_RE = %r{
|
16
|
+
(?<pen> \(
|
17
|
+
(?:pen|p)\.?
|
18
|
+
\)
|
19
|
+
)
|
20
|
+
}ix
|
21
|
+
GOAL_OG_RE = %r{
|
22
|
+
(?<og> \(
|
23
|
+
(?:og|o\.g\.)
|
24
|
+
\)
|
25
|
+
)
|
26
|
+
}ix
|
27
|
+
|
28
|
+
|
29
|
+
MINUTE_RE = %r{
|
30
|
+
(?<minute>
|
31
|
+
(?<=[ (]) # positive lookbehind for space or opening ( e.g. (61') required
|
32
|
+
# todo - add more lookbehinds e.g. ,) etc. - why? why not?
|
33
|
+
(?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
|
34
|
+
(?: \+
|
35
|
+
(?<value2>\d{1,3})
|
36
|
+
)?
|
37
|
+
' ## must have minute marker!!!!
|
38
|
+
)
|
39
|
+
}ix
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
end # module SportDb
|
45
|
+
end # class Lexer
|
@@ -0,0 +1,133 @@
|
|
1
|
+
###
|
2
|
+
## team prop mode e.g.
|
3
|
+
##
|
4
|
+
##
|
5
|
+
## Fri Jun 14 21:00 @ München Fußball Arena, München
|
6
|
+
## (1) Germany v Scotland 5-1 (3-0)
|
7
|
+
## Wirtz 10' Musiala 19' Havertz 45+1' (pen.) Füllkrug 68' Can 90+3'; Rüdiger 87' (o.g.)
|
8
|
+
##
|
9
|
+
## Germany: Neuer - Kimmich, Rüdiger, Tah [Y], Mittelstädt - Andrich [Y] (Groß 46'),
|
10
|
+
## Kroos (Can 80') - Musiala (Müller 74'), Gündogan, Wirtz (Sane 63') -
|
11
|
+
## Havertz (Füllkrug 63')
|
12
|
+
## Scotland: Gunn - Porteous [R 44'], Hendry, Tierney (McKenna 78') - Ralston [Y],
|
13
|
+
## McTominay, McGregor (Gilmour 67'), Robertson - Christie (Shankland 82'),
|
14
|
+
## Adams (Hanley 46'), McGinn (McLean 67')
|
15
|
+
|
16
|
+
|
17
|
+
module SportDb
|
18
|
+
class Lexer
|
19
|
+
|
20
|
+
|
21
|
+
## name different from text (does NOT allow number in name/text)
|
22
|
+
|
23
|
+
PROP_NAME_RE = %r{
|
24
|
+
(?<prop_name> \b
|
25
|
+
(?<name>
|
26
|
+
\p{L}+
|
27
|
+
\.? ## optional dot
|
28
|
+
(?:
|
29
|
+
[ ]? # only single spaces allowed inline!!!
|
30
|
+
(?:
|
31
|
+
(?:
|
32
|
+
(?<=\p{L}) ## use lookbehind
|
33
|
+
[/'-] ## must be surrounded by letters
|
34
|
+
## e.g. One/Two NOT
|
35
|
+
## One/ Two or One / Two or One /Two etc.
|
36
|
+
(?=\p{L}) ## use lookahead
|
37
|
+
)
|
38
|
+
|
|
39
|
+
(?:
|
40
|
+
(?<=[ ]) ## use lookbehind -- add letter (plus dot) or such - why? why not?
|
41
|
+
['] ## must be surrounded by leading space and
|
42
|
+
## traling letters (e.g. UDI 'Beter Bed)
|
43
|
+
(?=\p{L}) ## use lookahead
|
44
|
+
)
|
45
|
+
|
|
46
|
+
(?:
|
47
|
+
(?<=\p{L}) ## use lookbehind
|
48
|
+
['] ## must be surrounded by leading letter and
|
49
|
+
## trailing space PLUS letter (e.g. UDI' Beter Bed)
|
50
|
+
(?=[ ]\p{L}) ## use lookahead (space WITH letter
|
51
|
+
)
|
52
|
+
| ## standard case with letter(s) and optinal dot
|
53
|
+
(?: \p{L}+
|
54
|
+
\.? ## optional dot
|
55
|
+
)
|
56
|
+
)+
|
57
|
+
)*
|
58
|
+
)
|
59
|
+
## add lookahead - must be non-alphanum
|
60
|
+
(?=[ ,;\]\)]|$)
|
61
|
+
)
|
62
|
+
}ix
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
|
67
|
+
##############
|
68
|
+
# add support for props/ attributes e.g.
|
69
|
+
#
|
70
|
+
# Germany: Neuer - Kimmich, Rüdiger, Tah [Y], Mittelstädt - Andrich [Y] (46' Groß),
|
71
|
+
# Kroos (80' Can) - Musiala (74' Müller), Gündogan,
|
72
|
+
# Wirtz (63' Sane) - Havertz (63' Füllkrug)
|
73
|
+
# Scotland: Gunn - Porteous [R 44'], Hendry, Tierney (78' McKenna) - Ralston [Y],
|
74
|
+
# McTominay, McGregor (67' Gilmour), Robertson - Christie (82' Shankland),
|
75
|
+
# Adams (46' Hanley), McGinn (67' McLean)
|
76
|
+
#
|
77
|
+
## note: colon (:) MUST be followed by one (or more) spaces
|
78
|
+
## make sure mon feb 12 18:10 will not match
|
79
|
+
## allow 1. FC Köln etc.
|
80
|
+
## Mainz 05:
|
81
|
+
## limit to 30 chars max
|
82
|
+
## only allow chars incl. intl but (NOT ()[]/;)
|
83
|
+
##
|
84
|
+
## todo/fix:
|
85
|
+
## check if St. Pölten works; with starting St. ???
|
86
|
+
|
87
|
+
|
88
|
+
PROP_KEY_RE = %r{
|
89
|
+
(?<prop_key> \b
|
90
|
+
(?<key>
|
91
|
+
(?:\p{L}+
|
92
|
+
|
|
93
|
+
\d+ # check for num lookahead (MUST be space or dot)
|
94
|
+
## MUST be followed by (optional dot) and
|
95
|
+
## required space !!!
|
96
|
+
## MUST be follow by a to z!!!!
|
97
|
+
\.? ## optional dot
|
98
|
+
[ ]? ## make space optional too - why? why not?
|
99
|
+
## yes - eg. 1st, 2nd, 5th etc.
|
100
|
+
\p{L}+
|
101
|
+
)
|
102
|
+
[\d\p{L}'/° -]*? ## allow almost anyting
|
103
|
+
## fix - add negative lookahead
|
104
|
+
## no space and dash etc.
|
105
|
+
## only allowed "inline" not at the end
|
106
|
+
## must end with latter or digit!
|
107
|
+
)
|
108
|
+
[ ]*? # slurp trailing spaces
|
109
|
+
:
|
110
|
+
(?=[ ]+) ## possitive lookahead (must be followed by space!!)
|
111
|
+
)
|
112
|
+
}ix
|
113
|
+
|
114
|
+
|
115
|
+
|
116
|
+
PROP_BASICS_RE = %r{
|
117
|
+
(?<spaces> [ ]{2,}) |
|
118
|
+
(?<space> [ ])
|
119
|
+
|
|
120
|
+
(?<sym>
|
121
|
+
[;,\(\)\[\]-]
|
122
|
+
)
|
123
|
+
}ix
|
124
|
+
|
125
|
+
PROP_RE = Regexp.union(
|
126
|
+
PROP_BASICS_RE,
|
127
|
+
MINUTE_RE,
|
128
|
+
PROP_NAME_RE,
|
129
|
+
)
|
130
|
+
|
131
|
+
|
132
|
+
end # class Lexer
|
133
|
+
end # module SportDb
|
@@ -17,7 +17,7 @@ class Lexer
|
|
17
17
|
## 3-4 pen. 2-2 a.e.t.
|
18
18
|
## 2-2 a.e.t.
|
19
19
|
SCORE__P_ET__RE = %r{
|
20
|
-
(?<
|
20
|
+
(?<score_more>
|
21
21
|
\b
|
22
22
|
(?:
|
23
23
|
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
@@ -34,7 +34,7 @@ class Lexer
|
|
34
34
|
## note: allow SPECIAL with penalty only
|
35
35
|
## 3-4 pen.
|
36
36
|
SCORE__P__RE = %r{
|
37
|
-
(?<
|
37
|
+
(?<score_more>
|
38
38
|
\b
|
39
39
|
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
40
40
|
[ ]* #{P_EN}
|
@@ -52,7 +52,7 @@ class Lexer
|
|
52
52
|
## 2-2 a.e.t. (1-1)
|
53
53
|
|
54
54
|
SCORE__P_ET_FT_HT__RE = %r{
|
55
|
-
(?<
|
55
|
+
(?<score_more>
|
56
56
|
\b
|
57
57
|
(?:
|
58
58
|
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
@@ -79,7 +79,7 @@ class Lexer
|
|
79
79
|
## special case for case WITHOUT extra time!!
|
80
80
|
## same as above (but WITHOUT extra time and pen required)
|
81
81
|
SCORE__P_FT_HT__RE = %r{
|
82
|
-
(?<
|
82
|
+
(?<score_more>
|
83
83
|
\b
|
84
84
|
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
85
85
|
[ ]* #{P_EN} [ ]+
|
@@ -99,36 +99,47 @@ class Lexer
|
|
99
99
|
## note: \b works only after non-alphanum e.g. )
|
100
100
|
|
101
101
|
|
102
|
-
|
103
|
-
## e.g. 2-1 (1-1)
|
104
|
-
## 2-1
|
105
|
-
|
102
|
+
##########
|
103
|
+
## e.g. 2-1 (1-1)
|
106
104
|
SCORE__FT_HT__RE = %r{
|
107
|
-
(?<
|
105
|
+
(?<score_more>
|
108
106
|
\b
|
109
107
|
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
110
|
-
(?:
|
111
108
|
[ ]+ \( [ ]*
|
112
109
|
(?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
|
113
110
|
[ ]* \)
|
114
|
-
)? # note: make half time (HT) score optional for now
|
115
111
|
(?=[ ,\]]|$)
|
116
112
|
)}ix ## todo/check: remove loakahead assertion here - why require space?
|
117
113
|
## note: \b works only after non-alphanum e.g. )
|
118
114
|
|
119
|
-
|
115
|
+
#####
|
116
|
+
## 2-1
|
117
|
+
SCORE__FT__RE = %r{
|
118
|
+
(?<score>
|
119
|
+
\b
|
120
|
+
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
121
|
+
\b
|
122
|
+
)}ix
|
120
123
|
|
121
124
|
#############################################
|
122
125
|
# map tables
|
123
126
|
# note: order matters; first come-first matched/served
|
127
|
+
#
|
128
|
+
## check - find a better name for SCORE_MORE - SCORE_EX, SCORE_BIG, or ___ - why? why not?
|
124
129
|
|
125
|
-
|
130
|
+
SCORE_MORE_RE = Regexp.union(
|
126
131
|
SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
|
127
132
|
SCORE__P_FT_HT__RE, # e.g. 5-1 pen. (1-1)
|
128
133
|
SCORE__P_ET__RE, # e.g. 2-2 a.e.t. or 5-1 pen. 2-2 a.e.t.
|
129
134
|
SCORE__P__RE, # e.g. 5-1 pen.
|
130
|
-
SCORE__FT_HT__RE, # e.g. 1-1 (1-0)
|
135
|
+
SCORE__FT_HT__RE, # e.g. 1-1 (1-0)
|
136
|
+
## note - keep basic score as its own token!!!!
|
137
|
+
## that is, SCORE & SCORE_MORE
|
138
|
+
### SCORE__FT__RE, # e.g. 1-1 -- note - must go last!!!
|
131
139
|
)
|
132
140
|
|
141
|
+
SCORE_RE = SCORE__FT__RE
|
142
|
+
|
143
|
+
|
133
144
|
end # class Lexer
|
134
145
|
end # module SportDb
|
@@ -24,6 +24,13 @@ class Lexer
|
|
24
24
|
# allow Cote'd Ivoir or such
|
25
25
|
## e.g. add '
|
26
26
|
|
27
|
+
## note:
|
28
|
+
## make sure these do NOT match!!!
|
29
|
+
## TEXT => "Matchday 1 / Group A"
|
30
|
+
## TEXT => "Matchday 2 / Group A"
|
31
|
+
## TEXT => "Matchday 3 / Group A"
|
32
|
+
|
33
|
+
|
27
34
|
|
28
35
|
TEXT_RE = %r{
|
29
36
|
## must start with alpha (allow unicode letters!!)
|
@@ -59,11 +66,11 @@ TEXT_RE = %r{
|
|
59
66
|
## AND switch to case-sensitive (via -i!!!)
|
60
67
|
)
|
61
68
|
| # only single spaces allowed inline!!!
|
62
|
-
[
|
69
|
+
[-/]
|
63
70
|
)?
|
64
71
|
(?:
|
65
72
|
\p{L} |
|
66
|
-
[
|
73
|
+
[&'°]
|
67
74
|
|
|
68
75
|
(?:
|
69
76
|
\d+
|