sportdb-parser 0.6.20 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +14 -8
- data/Rakefile +1 -1
- data/lib/sportdb/parser/blocktxt.rb +99 -0
- data/lib/sportdb/parser/lexer.rb +958 -395
- data/lib/sportdb/parser/lexer_buffer.rb +97 -0
- data/lib/sportdb/parser/lexer_tty.rb +111 -0
- data/lib/sportdb/parser/parser.rb +1768 -855
- data/lib/sportdb/parser/racc_parser.rb +1 -1
- data/lib/sportdb/parser/racc_tree.rb +327 -41
- data/lib/sportdb/parser/token-date.rb +160 -178
- data/lib/sportdb/parser/token-date_duration.rb +190 -0
- data/lib/sportdb/parser/token-geo.rb +59 -59
- data/lib/sportdb/parser/token-goals.rb +460 -0
- data/lib/sportdb/parser/token-group.rb +43 -0
- data/lib/sportdb/parser/token-note.rb +40 -0
- data/lib/sportdb/parser/token-prop.rb +70 -54
- data/lib/sportdb/parser/token-prop_name.rb +74 -0
- data/lib/sportdb/parser/token-round.rb +102 -0
- data/lib/sportdb/parser/token-score.rb +323 -47
- data/lib/sportdb/parser/token-score_fuller.rb +435 -0
- data/lib/sportdb/parser/token-score_legs.rb +59 -0
- data/lib/sportdb/parser/token-status.rb +157 -160
- data/lib/sportdb/parser/token-table.rb +149 -0
- data/lib/sportdb/parser/token-text.rb +72 -23
- data/lib/sportdb/parser/token-time.rb +141 -0
- data/lib/sportdb/parser/token.rb +242 -105
- data/lib/sportdb/parser/token_helpers.rb +92 -0
- data/lib/sportdb/parser/version.rb +2 -2
- data/lib/sportdb/parser.rb +24 -2
- metadata +18 -18
- data/config/rounds_de.txt +0 -125
- data/config/rounds_en.txt +0 -29
- data/config/rounds_es.txt +0 -26
- data/config/rounds_misc.txt +0 -25
- data/config/rounds_pt.txt +0 -4
- data/config/zones_en.txt +0 -20
- data/lib/sportdb/parser/lang.rb +0 -298
- data/lib/sportdb/parser/token-minute.rb +0 -205
|
@@ -106,49 +106,104 @@ DAY_MAP = build_map( DAY_LINES, downcase: true )
|
|
|
106
106
|
|
|
107
107
|
|
|
108
108
|
|
|
109
|
-
## todo - add more date variants !!!!
|
|
109
|
+
## todo - add more date variants !!!! why? why not?
|
|
110
110
|
|
|
111
|
-
|
|
111
|
+
|
|
112
|
+
# e.g. Fri Aug 9
|
|
113
|
+
# Fri Aug 9
|
|
114
|
+
## Fri, Aug 9
|
|
115
|
+
## Fri, Aug 9 2024
|
|
116
|
+
## Fri, Aug 9, 2024
|
|
117
|
+
## Aug 9, 2024
|
|
118
|
+
## Aug 9, 2024
|
|
119
|
+
## note - eat-up optional comma after DAY_NAMES!!
|
|
120
|
+
##
|
|
121
|
+
## note - Fri Aug/9 no longer supported!!!
|
|
112
122
|
DATE_I_RE = %r{
|
|
113
123
|
(?<date>
|
|
114
124
|
\b
|
|
115
125
|
## optional day name
|
|
116
126
|
((?<day_name>#{DAY_NAMES})
|
|
117
|
-
|
|
127
|
+
(?: ,?[ ]+)
|
|
118
128
|
)?
|
|
119
129
|
(?<month_name>#{MONTH_NAMES})
|
|
120
|
-
|
|
130
|
+
[ ]
|
|
121
131
|
(?<day>\d{1,2})
|
|
132
|
+
\b
|
|
122
133
|
## optional year
|
|
123
|
-
(
|
|
124
|
-
|
|
134
|
+
( ,? [ ] ## note - comma optinal with single space required for now
|
|
135
|
+
(?<year>\d{4}) ## optional year 2025 (yyyy)
|
|
136
|
+
\b
|
|
125
137
|
)?
|
|
138
|
+
)}ix
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
### todo/fix - add (opt) day_name later
|
|
142
|
+
## add (opt) year later
|
|
143
|
+
# e.g. Aug 9 & Aug 10
|
|
144
|
+
### note - allow shortcut e.g. Aug 9 & 10
|
|
145
|
+
DATE_LEGS_I_RE = %r{
|
|
146
|
+
(?<date_legs>
|
|
147
|
+
\b
|
|
148
|
+
(?<month_name1>#{MONTH_NAMES})
|
|
149
|
+
[ ]
|
|
150
|
+
(?<day1>\d{1,2})
|
|
151
|
+
[ ] & [ ]
|
|
152
|
+
(?:
|
|
153
|
+
(?<month_name2>#{MONTH_NAMES})
|
|
154
|
+
[ ]
|
|
155
|
+
)? ## note - make 2nd month_name optional
|
|
156
|
+
(?<day2>\d{1,2})
|
|
126
157
|
\b
|
|
127
158
|
)}ix
|
|
128
159
|
|
|
129
160
|
|
|
161
|
+
###
|
|
130
162
|
# e.g. 3 June or 10 June
|
|
163
|
+
## note - allow more spaces between DAY_NAMES and DAY e.g.
|
|
164
|
+
## Sun 1 Mar
|
|
165
|
+
## Wed 4 Mar
|
|
166
|
+
## Sat 14 Mar
|
|
167
|
+
## Sat 11 Apr
|
|
168
|
+
## Sat 11 Apr 2021
|
|
169
|
+
## Sat 11 Apr 21
|
|
170
|
+
##
|
|
171
|
+
## Sat, 11 Apr
|
|
172
|
+
## note - eat-up optional comma after DAY_NAMES!!
|
|
173
|
+
##
|
|
174
|
+
## note - Sat 14 Mar 17:30
|
|
175
|
+
## check two-digit year (with NEGATIVE lookahead for time!!!)
|
|
176
|
+
|
|
131
177
|
DATE_II_RE = %r{
|
|
132
178
|
(?<date>
|
|
133
179
|
\b
|
|
134
180
|
## optional day name
|
|
135
181
|
((?<day_name>#{DAY_NAMES})
|
|
136
|
-
|
|
182
|
+
(?: ,?[ ]+)
|
|
137
183
|
)?
|
|
138
184
|
(?<day>\d{1,2})
|
|
139
185
|
[ ]
|
|
140
186
|
(?<month_name>#{MONTH_NAMES})
|
|
187
|
+
\b
|
|
141
188
|
## optional year
|
|
142
189
|
( [ ]
|
|
143
|
-
(
|
|
190
|
+
(?:
|
|
191
|
+
(?<year>\d{4}) ## optional year 2025 (yyyy)
|
|
192
|
+
|
|
|
193
|
+
(?:
|
|
194
|
+
(?<yy>\d{2}) ## optional year 25 (yy)
|
|
195
|
+
## check NEGATIVE lookahead
|
|
196
|
+
(?! :|[:h]\d{2})
|
|
197
|
+
)
|
|
198
|
+
)
|
|
199
|
+
\b
|
|
144
200
|
)?
|
|
145
|
-
\b
|
|
146
201
|
)}ix
|
|
147
202
|
|
|
148
203
|
|
|
149
204
|
# e.g. iso-date - 2011-08-25
|
|
150
205
|
## note - allow/support ("shortcuts") e.g 2011-8-25 or 2011-8-3 / 2011-08-03 etc.
|
|
151
|
-
|
|
206
|
+
DATE_III_A_RE = %r{
|
|
152
207
|
(?<date>
|
|
153
208
|
\b
|
|
154
209
|
(?<year>\d{4})
|
|
@@ -159,11 +214,33 @@ DATE_III_RE = %r{
|
|
|
159
214
|
\b
|
|
160
215
|
)}ix
|
|
161
216
|
|
|
217
|
+
## starting w/ day/month/year e.g. 25-08-2011
|
|
218
|
+
DATE_III_B_RE = %r{
|
|
219
|
+
(?<date>
|
|
220
|
+
\b
|
|
221
|
+
## optional day name
|
|
222
|
+
((?<day_name>#{DAY_NAMES})
|
|
223
|
+
(?: ,?[ ]+)
|
|
224
|
+
)?
|
|
225
|
+
(?<day>\d{1,2})
|
|
226
|
+
-
|
|
227
|
+
(?<month>\d{1,2})
|
|
228
|
+
-
|
|
229
|
+
(?<year>\d{4})
|
|
230
|
+
\b
|
|
231
|
+
)}ix
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
|
|
162
235
|
## allow (short)"european" style 8.8.
|
|
163
236
|
## note - assume day/month!!!
|
|
164
237
|
DATE_IIII_RE = %r{
|
|
165
238
|
(?<date>
|
|
166
239
|
\b
|
|
240
|
+
## optional day name
|
|
241
|
+
((?<day_name>#{DAY_NAMES})
|
|
242
|
+
(?: ,?[ ]+)
|
|
243
|
+
)?
|
|
167
244
|
(?<day>\d{1,2})
|
|
168
245
|
\.
|
|
169
246
|
(?<month>\d{1,2})
|
|
@@ -179,6 +256,33 @@ DATE_IIII_RE = %r{
|
|
|
179
256
|
}ix
|
|
180
257
|
|
|
181
258
|
|
|
259
|
+
####################
|
|
260
|
+
### 04/03/2026 or 4/3/2026
|
|
261
|
+
## 04/03/26 or 4/3/26
|
|
262
|
+
## 04/03 or 4/3
|
|
263
|
+
DATE_IIIII_RE = %r{
|
|
264
|
+
(?<date>
|
|
265
|
+
\b
|
|
266
|
+
## optional day name
|
|
267
|
+
((?<day_name>#{DAY_NAMES})
|
|
268
|
+
(?: ,?[ ]+)
|
|
269
|
+
)?
|
|
270
|
+
(?<day>\d{1,2})
|
|
271
|
+
/
|
|
272
|
+
(?<month>\d{1,2})
|
|
273
|
+
\b
|
|
274
|
+
(?:
|
|
275
|
+
/
|
|
276
|
+
(?:
|
|
277
|
+
(?<year>\d{4}) ## optional year 2025 (yyyy)
|
|
278
|
+
|
|
|
279
|
+
(?<yy>\d{2}) ## optional year 25 (yy)
|
|
280
|
+
)
|
|
281
|
+
\b
|
|
282
|
+
)?
|
|
283
|
+
)
|
|
284
|
+
}ix
|
|
285
|
+
|
|
182
286
|
|
|
183
287
|
|
|
184
288
|
#############################################
|
|
@@ -187,13 +291,56 @@ DATE_IIII_RE = %r{
|
|
|
187
291
|
DATE_RE = Regexp.union(
|
|
188
292
|
DATE_I_RE,
|
|
189
293
|
DATE_II_RE,
|
|
190
|
-
|
|
294
|
+
DATE_III_A_RE, ## e.g. 1973-08-14
|
|
295
|
+
DATE_III_B_RE,
|
|
191
296
|
DATE_IIII_RE, ## e.g. 8.8. or 8.13.79 or 08.14.1973
|
|
297
|
+
DATE_IIIII_RE, ## e.g. 08/14/1973
|
|
192
298
|
)
|
|
193
299
|
|
|
300
|
+
## todo - add more format style here; change to Regexp.union later!!!
|
|
301
|
+
DATE_LEGS_RE = DATE_LEGS_I_RE
|
|
194
302
|
|
|
195
|
-
|
|
196
|
-
##
|
|
303
|
+
|
|
304
|
+
## "internal" date helpers
|
|
305
|
+
def self._build_date( m )
|
|
306
|
+
date = {}
|
|
307
|
+
## map month names
|
|
308
|
+
## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
|
|
309
|
+
date[:y] = m[:year].to_i(10) if m[:year]
|
|
310
|
+
## check - use y too for two-digit year or keep separate - why? why not?
|
|
311
|
+
date[:yy] = m[:yy].to_i(10) if m[:yy] ## two digit year (e.g. 25 or 78 etc.)
|
|
312
|
+
date[:m] = m[:month].to_i(10) if m[:month]
|
|
313
|
+
date[:m] = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
|
|
314
|
+
date[:d] = m[:day].to_i(10) if m[:day]
|
|
315
|
+
date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
|
|
316
|
+
|
|
317
|
+
date
|
|
318
|
+
end
|
|
319
|
+
def _build_date( m ) self.class._build_date( m ); end
|
|
320
|
+
|
|
321
|
+
def self._build_date_legs( m )
|
|
322
|
+
legs = {}
|
|
323
|
+
## map month names
|
|
324
|
+
## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
|
|
325
|
+
date = {}
|
|
326
|
+
date[:m] = MONTH_MAP[ m[:month_name1].downcase ]
|
|
327
|
+
date[:d] = m[:day1].to_i(10)
|
|
328
|
+
legs[:date1] = date
|
|
329
|
+
|
|
330
|
+
date = {}
|
|
331
|
+
date[:m] = MONTH_MAP[ m[:month_name2].downcase ] if m[:month_name2]
|
|
332
|
+
date[:d] = m[:day2].to_i(10)
|
|
333
|
+
legs[:date2] = date
|
|
334
|
+
|
|
335
|
+
legs
|
|
336
|
+
end
|
|
337
|
+
def _build_date_legs( m ) self.class._build_date_legs( m ); end
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
#############
|
|
343
|
+
## "top-level" add a date parser helper
|
|
197
344
|
def self.parse_date( str, start: )
|
|
198
345
|
if m=DATE_RE.match( str )
|
|
199
346
|
|
|
@@ -220,171 +367,6 @@ def self.parse_date( str, start: )
|
|
|
220
367
|
end
|
|
221
368
|
|
|
222
369
|
|
|
223
|
-
|
|
224
|
-
###
|
|
225
|
-
# date duration
|
|
226
|
-
# use - or + as separator
|
|
227
|
-
# in theory plus( +) only if dates
|
|
228
|
-
# are two days next to each other
|
|
229
|
-
#
|
|
230
|
-
# otherwise define new dates type in the future? why? why not?
|
|
231
|
-
#
|
|
232
|
-
# check for plus (+) if dates are next to each other (t+1) - why? why not?
|
|
233
|
-
|
|
234
|
-
#
|
|
235
|
-
# Sun Jun/23 - Wed Jun/26 -- YES
|
|
236
|
-
# Jun/23 - Jun/26 -- YES
|
|
237
|
-
# Jun/25 - 26 - why? why not??? - YES - see blow variant iii!!!
|
|
238
|
-
|
|
239
|
-
# Tue Jun/25 + Wed Jun/26 -- NO
|
|
240
|
-
# Jun/25 + Jun/26 -- NO
|
|
241
|
-
# Jun/25 .. 26 - why? why not???
|
|
242
|
-
# Jun/25 to 26 - why? why not???
|
|
243
|
-
# Jun/25 + 26 - add - why? why not???
|
|
244
|
-
# Sun-Wed Jun/23-26 - add - why? why not???
|
|
245
|
-
# Wed+Thu Jun/26+27 2024 - add - why? why not???
|
|
246
|
-
#
|
|
247
|
-
# maybe use comma and plus for list of dates
|
|
248
|
-
# Tue Jun/25, Wed Jun/26, Thu Jun/27 ??
|
|
249
|
-
# Tue Jun/25 + Wed Jun/26 + Thu Jun/27 ??
|
|
250
|
-
#
|
|
251
|
-
# add back optional comma (before) year - why? why not?
|
|
252
|
-
#
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
##
|
|
256
|
-
# todo add plus later on - why? why not?
|
|
257
|
-
### todo/fix add optional comma (,) before year
|
|
258
|
-
|
|
259
|
-
### regex note/tip/remindr - \b () \b MUST always get enclosed in parantheses
|
|
260
|
-
## because alternation (|) has lowest priority/binding
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
DURATION_I_RE = %r{
|
|
264
|
-
(?<duration>
|
|
265
|
-
\b
|
|
266
|
-
(?:
|
|
267
|
-
## optional day name
|
|
268
|
-
((?<day_name1>#{DAY_NAMES})
|
|
269
|
-
[ ]
|
|
270
|
-
)?
|
|
271
|
-
(?<month_name1>#{MONTH_NAMES})
|
|
272
|
-
(?: \/|[ ] )
|
|
273
|
-
(?<day1>\d{1,2})
|
|
274
|
-
## optional year
|
|
275
|
-
( ,? # optional comma
|
|
276
|
-
[ ]
|
|
277
|
-
(?<year1>\d{4})
|
|
278
|
-
)?
|
|
279
|
-
|
|
280
|
-
## support + and - (add .. or such - why??)
|
|
281
|
-
[ ]* - [ ]*
|
|
282
|
-
|
|
283
|
-
## optional day name
|
|
284
|
-
((?<day_name2>#{DAY_NAMES})
|
|
285
|
-
[ ]
|
|
286
|
-
)?
|
|
287
|
-
(?<month_name2>#{MONTH_NAMES})
|
|
288
|
-
(?: \/|[ ] )
|
|
289
|
-
(?<day2>\d{1,2})
|
|
290
|
-
## optional year
|
|
291
|
-
( ,? # optional comma
|
|
292
|
-
[ ]
|
|
293
|
-
(?<year2>\d{4})
|
|
294
|
-
)?
|
|
295
|
-
)
|
|
296
|
-
\b
|
|
297
|
-
)}ix
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
# FIX - remove this variant
|
|
302
|
-
# "standardize on month day [year]" !!!!
|
|
303
|
-
|
|
304
|
-
=begin
|
|
305
|
-
###
|
|
306
|
-
# variant ii
|
|
307
|
-
# e.g. 26 July - 27 July
|
|
308
|
-
# 26 July,
|
|
309
|
-
XXX_DURATION_II_RE = %r{
|
|
310
|
-
(?<duration>
|
|
311
|
-
\b
|
|
312
|
-
(?
|
|
313
|
-
## optional day name
|
|
314
|
-
((?<day_name1>#{DAY_NAMES})
|
|
315
|
-
[ ]
|
|
316
|
-
)?
|
|
317
|
-
(?<day1>\d{1,2})
|
|
318
|
-
[ ]
|
|
319
|
-
(?<month_name1>#{MONTH_NAMES})
|
|
320
|
-
## optional year
|
|
321
|
-
(
|
|
322
|
-
[ ]
|
|
323
|
-
(?<year1>\d{4})
|
|
324
|
-
)?
|
|
325
|
-
|
|
326
|
-
## support + and - (add .. or such - why??)
|
|
327
|
-
[ ]*[-][ ]*
|
|
328
|
-
|
|
329
|
-
## optional day name
|
|
330
|
-
((?<day_name2>#{DAY_NAMES})
|
|
331
|
-
[ ]
|
|
332
|
-
)?
|
|
333
|
-
(?<day2>\d{1,2})
|
|
334
|
-
[ ]
|
|
335
|
-
(?<month_name2>#{MONTH_NAMES})
|
|
336
|
-
## optional year
|
|
337
|
-
( [ ]
|
|
338
|
-
(?<year2>\d{4})
|
|
339
|
-
)?
|
|
340
|
-
)
|
|
341
|
-
\b
|
|
342
|
-
)}ix
|
|
343
|
-
=end
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
# variant ii
|
|
347
|
-
# add support for shorthand
|
|
348
|
-
# August 16-18, 2011
|
|
349
|
-
# September 13-15, 2011
|
|
350
|
-
# October 18-20, 2011
|
|
351
|
-
# March/6-8, 2012
|
|
352
|
-
# March 6-8 2012
|
|
353
|
-
# March 6-8
|
|
354
|
-
#
|
|
355
|
-
# - add support for August 16+17 or such (and check 16+18)
|
|
356
|
-
# use <op> to check if day2 is a plus or range or such - why? why not?
|
|
357
|
-
|
|
358
|
-
DURATION_II_RE = %r{
|
|
359
|
-
(?<duration>
|
|
360
|
-
\b
|
|
361
|
-
(?:
|
|
362
|
-
(?<month_name1>#{MONTH_NAMES})
|
|
363
|
-
[ /]
|
|
364
|
-
(?<day1>\d{1,2})
|
|
365
|
-
-
|
|
366
|
-
(?<day2>\d{1,2})
|
|
367
|
-
(?:
|
|
368
|
-
,? ## optional comma
|
|
369
|
-
[ ]
|
|
370
|
-
(?<year1>\d{4})
|
|
371
|
-
)? ## optional year
|
|
372
|
-
)
|
|
373
|
-
\b
|
|
374
|
-
)}ix
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
#############################################
|
|
379
|
-
# map tables
|
|
380
|
-
# note: order matters; first come-first matched/served
|
|
381
|
-
DURATION_RE = Regexp.union(
|
|
382
|
-
DURATION_I_RE,
|
|
383
|
-
DURATION_II_RE,
|
|
384
|
-
)
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
370
|
end # class Lexer
|
|
389
371
|
end # module SportDb
|
|
390
372
|
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
module SportDb
|
|
2
|
+
class Lexer
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
###
|
|
8
|
+
# date duration
|
|
9
|
+
# use - or + as separator
|
|
10
|
+
# in theory plus( +) only if dates
|
|
11
|
+
# are two days next to each other
|
|
12
|
+
#
|
|
13
|
+
# otherwise define new dates type in the future? why? why not?
|
|
14
|
+
#
|
|
15
|
+
# check for plus (+) if dates are next to each other (t+1) - why? why not?
|
|
16
|
+
|
|
17
|
+
#
|
|
18
|
+
# Sun Jun 23 - Wed Jun 26 -- YES
|
|
19
|
+
# Jun 23 - Jun 26 -- YES
|
|
20
|
+
# Jun 25 - 26 - why? why not??? - YES - see blow variant iii!!!
|
|
21
|
+
|
|
22
|
+
# Tue Jun 25 + Wed Jun 26 -- NO
|
|
23
|
+
# Jun 25 + Jun 26 -- NO
|
|
24
|
+
# Jun 25 .. 26 - why? why not???
|
|
25
|
+
# Jun 25 to 26 - why? why not???
|
|
26
|
+
# Jun 25 + 26 - add - why? why not???
|
|
27
|
+
# Sun-Wed Jun 23-26 - add - why? why not???
|
|
28
|
+
# Wed+Thu Jun 26+27 2024 - add - why? why not???
|
|
29
|
+
#
|
|
30
|
+
# maybe use comma and plus for list of dates
|
|
31
|
+
# Tue Jun 25, Wed Jun 26, Thu Jun 27 ??
|
|
32
|
+
# Tue Jun 25 + Wed Jun 26 + Thu Jun 27 ??
|
|
33
|
+
#
|
|
34
|
+
# add back optional comma (before) year - why? why not?
|
|
35
|
+
#
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
##
|
|
39
|
+
# todo add plus later on - why? why not?
|
|
40
|
+
### todo/fix add optional comma (,) before year
|
|
41
|
+
|
|
42
|
+
### regex note/tip/remindr - \b () \b MUST always get enclosed in parantheses
|
|
43
|
+
## because alternation (|) has lowest priority/binding
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
DURATION_I_RE = %r{
|
|
47
|
+
(?<duration>
|
|
48
|
+
\b
|
|
49
|
+
(?:
|
|
50
|
+
## optional day name
|
|
51
|
+
((?<day_name1>#{DAY_NAMES})
|
|
52
|
+
[ ]
|
|
53
|
+
)?
|
|
54
|
+
(?<month_name1>#{MONTH_NAMES})
|
|
55
|
+
[ ]
|
|
56
|
+
(?<day1>\d{1,2})
|
|
57
|
+
## optional year
|
|
58
|
+
( ,? # optional comma
|
|
59
|
+
[ ]
|
|
60
|
+
(?<year1>\d{4})
|
|
61
|
+
)?
|
|
62
|
+
|
|
63
|
+
## support + and - (add .. or such - why??)
|
|
64
|
+
[ ]* - [ ]*
|
|
65
|
+
|
|
66
|
+
## optional day name
|
|
67
|
+
((?<day_name2>#{DAY_NAMES})
|
|
68
|
+
[ ]
|
|
69
|
+
)?
|
|
70
|
+
(?<month_name2>#{MONTH_NAMES})
|
|
71
|
+
[ ]
|
|
72
|
+
(?<day2>\d{1,2})
|
|
73
|
+
## optional year
|
|
74
|
+
( ,? # optional comma
|
|
75
|
+
[ ]
|
|
76
|
+
(?<year2>\d{4})
|
|
77
|
+
)?
|
|
78
|
+
)
|
|
79
|
+
\b
|
|
80
|
+
)}ix
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# FIX - remove this variant
|
|
85
|
+
# "standardize on month day [year]" !!!!
|
|
86
|
+
|
|
87
|
+
=begin
|
|
88
|
+
###
|
|
89
|
+
# variant ii
|
|
90
|
+
# e.g. 26 July - 27 July
|
|
91
|
+
# 26 July,
|
|
92
|
+
XXX_DURATION_II_RE = %r{
|
|
93
|
+
(?<duration>
|
|
94
|
+
\b
|
|
95
|
+
(?
|
|
96
|
+
## optional day name
|
|
97
|
+
((?<day_name1>#{DAY_NAMES})
|
|
98
|
+
[ ]
|
|
99
|
+
)?
|
|
100
|
+
(?<day1>\d{1,2})
|
|
101
|
+
[ ]
|
|
102
|
+
(?<month_name1>#{MONTH_NAMES})
|
|
103
|
+
## optional year
|
|
104
|
+
(
|
|
105
|
+
[ ]
|
|
106
|
+
(?<year1>\d{4})
|
|
107
|
+
)?
|
|
108
|
+
|
|
109
|
+
## support + and - (add .. or such - why??)
|
|
110
|
+
[ ]*[-][ ]*
|
|
111
|
+
|
|
112
|
+
## optional day name
|
|
113
|
+
((?<day_name2>#{DAY_NAMES})
|
|
114
|
+
[ ]
|
|
115
|
+
)?
|
|
116
|
+
(?<day2>\d{1,2})
|
|
117
|
+
[ ]
|
|
118
|
+
(?<month_name2>#{MONTH_NAMES})
|
|
119
|
+
## optional year
|
|
120
|
+
( [ ]
|
|
121
|
+
(?<year2>\d{4})
|
|
122
|
+
)?
|
|
123
|
+
)
|
|
124
|
+
\b
|
|
125
|
+
)}ix
|
|
126
|
+
=end
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# variant ii
|
|
130
|
+
# add support for shorthand
|
|
131
|
+
# August 16-18, 2011
|
|
132
|
+
# September 13-15, 2011
|
|
133
|
+
# October 18-20, 2011
|
|
134
|
+
# March 6-8 2012
|
|
135
|
+
# March 6-8
|
|
136
|
+
#
|
|
137
|
+
# - add support for August 16+17 or such (and check 16+18)
|
|
138
|
+
# use <op> to check if day2 is a plus or range or such - why? why not?
|
|
139
|
+
|
|
140
|
+
DURATION_II_RE = %r{
|
|
141
|
+
(?<duration>
|
|
142
|
+
\b
|
|
143
|
+
(?:
|
|
144
|
+
(?<month_name1>#{MONTH_NAMES})
|
|
145
|
+
[ ]
|
|
146
|
+
(?<day1>\d{1,2})
|
|
147
|
+
-
|
|
148
|
+
(?<day2>\d{1,2})
|
|
149
|
+
(?:
|
|
150
|
+
,? ## optional comma
|
|
151
|
+
[ ]
|
|
152
|
+
(?<year1>\d{4})
|
|
153
|
+
)? ## optional year
|
|
154
|
+
)
|
|
155
|
+
\b
|
|
156
|
+
)}ix
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
#############################################
|
|
161
|
+
# map tables
|
|
162
|
+
# note: order matters; first come-first matched/served
|
|
163
|
+
DURATION_RE = Regexp.union(
|
|
164
|
+
DURATION_I_RE,
|
|
165
|
+
DURATION_II_RE,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def self._build_duration( m )
|
|
170
|
+
## todo/check/fix - if end: works for kwargs!!!!!
|
|
171
|
+
duration = { start: {}, end: {}}
|
|
172
|
+
|
|
173
|
+
duration[:start][:y] = m[:year1].to_i(10) if m[:year1]
|
|
174
|
+
duration[:start][:m] = MONTH_MAP[ m[:month_name1].downcase ] if m[:month_name1]
|
|
175
|
+
duration[:start][:d] = m[:day1].to_i(10) if m[:day1]
|
|
176
|
+
duration[:start][:wday] = DAY_MAP[ m[:day_name1].downcase ] if m[:day_name1]
|
|
177
|
+
|
|
178
|
+
duration[:end][:y] = m[:year2].to_i(10) if m[:year2]
|
|
179
|
+
duration[:end][:m] = MONTH_MAP[ m[:month_name2].downcase ] if m[:month_name2]
|
|
180
|
+
duration[:end][:d] = m[:day2].to_i(10) if m[:day2]
|
|
181
|
+
duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ] if m[:day_name2]
|
|
182
|
+
|
|
183
|
+
duration
|
|
184
|
+
end
|
|
185
|
+
def _build_duration(m) self.class._build_duration( m ); end
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
end # class Lexer
|
|
189
|
+
end # module SportDb
|
|
190
|
+
|