sportdb-parser 0.6.20 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -1
  3. data/Manifest.txt +14 -8
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/parser/blocktxt.rb +99 -0
  6. data/lib/sportdb/parser/lexer.rb +958 -395
  7. data/lib/sportdb/parser/lexer_buffer.rb +97 -0
  8. data/lib/sportdb/parser/lexer_tty.rb +111 -0
  9. data/lib/sportdb/parser/parser.rb +1768 -855
  10. data/lib/sportdb/parser/racc_parser.rb +1 -1
  11. data/lib/sportdb/parser/racc_tree.rb +327 -41
  12. data/lib/sportdb/parser/token-date.rb +160 -178
  13. data/lib/sportdb/parser/token-date_duration.rb +190 -0
  14. data/lib/sportdb/parser/token-geo.rb +59 -59
  15. data/lib/sportdb/parser/token-goals.rb +460 -0
  16. data/lib/sportdb/parser/token-group.rb +43 -0
  17. data/lib/sportdb/parser/token-note.rb +40 -0
  18. data/lib/sportdb/parser/token-prop.rb +70 -54
  19. data/lib/sportdb/parser/token-prop_name.rb +74 -0
  20. data/lib/sportdb/parser/token-round.rb +102 -0
  21. data/lib/sportdb/parser/token-score.rb +323 -47
  22. data/lib/sportdb/parser/token-score_fuller.rb +435 -0
  23. data/lib/sportdb/parser/token-score_legs.rb +59 -0
  24. data/lib/sportdb/parser/token-status.rb +157 -160
  25. data/lib/sportdb/parser/token-table.rb +149 -0
  26. data/lib/sportdb/parser/token-text.rb +72 -23
  27. data/lib/sportdb/parser/token-time.rb +141 -0
  28. data/lib/sportdb/parser/token.rb +242 -105
  29. data/lib/sportdb/parser/token_helpers.rb +92 -0
  30. data/lib/sportdb/parser/version.rb +2 -2
  31. data/lib/sportdb/parser.rb +24 -2
  32. metadata +18 -18
  33. data/config/rounds_de.txt +0 -125
  34. data/config/rounds_en.txt +0 -29
  35. data/config/rounds_es.txt +0 -26
  36. data/config/rounds_misc.txt +0 -25
  37. data/config/rounds_pt.txt +0 -4
  38. data/config/zones_en.txt +0 -20
  39. data/lib/sportdb/parser/lang.rb +0 -298
  40. data/lib/sportdb/parser/token-minute.rb +0 -205
@@ -106,49 +106,104 @@ DAY_MAP = build_map( DAY_LINES, downcase: true )
106
106
 
107
107
 
108
108
 
109
- ## todo - add more date variants !!!!
109
+ ## todo - add more date variants !!!! why? why not?
110
110
 
111
- # e.g. Fri Aug/9 or Fri Aug 9
111
+
112
+ # e.g. Fri Aug 9
113
+ # Fri Aug 9
114
+ ## Fri, Aug 9
115
+ ## Fri, Aug 9 2024
116
+ ## Fri, Aug 9, 2024
117
+ ## Aug 9, 2024
118
+ ## Aug 9, 2024
119
+ ## note - eat-up optional comma after DAY_NAMES!!
120
+ ##
121
+ ## note - Fri Aug/9 no longer supported!!!
112
122
  DATE_I_RE = %r{
113
123
  (?<date>
114
124
  \b
115
125
  ## optional day name
116
126
  ((?<day_name>#{DAY_NAMES})
117
- [ ]
127
+ (?: ,?[ ]+)
118
128
  )?
119
129
  (?<month_name>#{MONTH_NAMES})
120
- (?: \/|[ ] )
130
+ [ ]
121
131
  (?<day>\d{1,2})
132
+ \b
122
133
  ## optional year
123
- ( [ ]
124
- (?<year>\d{4})
134
+ ( ,? [ ] ## note - comma optinal with single space required for now
135
+ (?<year>\d{4}) ## optional year 2025 (yyyy)
136
+ \b
125
137
  )?
138
+ )}ix
139
+
140
+
141
+ ### todo/fix - add (opt) day_name later
142
+ ## add (opt) year later
143
+ # e.g. Aug 9 & Aug 10
144
+ ### note - allow shortcut e.g. Aug 9 & 10
145
+ DATE_LEGS_I_RE = %r{
146
+ (?<date_legs>
147
+ \b
148
+ (?<month_name1>#{MONTH_NAMES})
149
+ [ ]
150
+ (?<day1>\d{1,2})
151
+ [ ] & [ ]
152
+ (?:
153
+ (?<month_name2>#{MONTH_NAMES})
154
+ [ ]
155
+ )? ## note - make 2nd month_name optional
156
+ (?<day2>\d{1,2})
126
157
  \b
127
158
  )}ix
128
159
 
129
160
 
161
+ ###
130
162
  # e.g. 3 June or 10 June
163
+ ## note - allow more spaces between DAY_NAMES and DAY e.g.
164
+ ## Sun 1 Mar
165
+ ## Wed 4 Mar
166
+ ## Sat 14 Mar
167
+ ## Sat 11 Apr
168
+ ## Sat 11 Apr 2021
169
+ ## Sat 11 Apr 21
170
+ ##
171
+ ## Sat, 11 Apr
172
+ ## note - eat-up optional comma after DAY_NAMES!!
173
+ ##
174
+ ## note - Sat 14 Mar 17:30
175
+ ## check two-digit year (with NEGATIVE lookahead for time!!!)
176
+
131
177
  DATE_II_RE = %r{
132
178
  (?<date>
133
179
  \b
134
180
  ## optional day name
135
181
  ((?<day_name>#{DAY_NAMES})
136
- [ ]
182
+ (?: ,?[ ]+)
137
183
  )?
138
184
  (?<day>\d{1,2})
139
185
  [ ]
140
186
  (?<month_name>#{MONTH_NAMES})
187
+ \b
141
188
  ## optional year
142
189
  ( [ ]
143
- (?<year>\d{4})
190
+ (?:
191
+ (?<year>\d{4}) ## optional year 2025 (yyyy)
192
+ |
193
+ (?:
194
+ (?<yy>\d{2}) ## optional year 25 (yy)
195
+ ## check NEGATIVE lookahead
196
+ (?! :|[:h]\d{2})
197
+ )
198
+ )
199
+ \b
144
200
  )?
145
- \b
146
201
  )}ix
147
202
 
148
203
 
149
204
  # e.g. iso-date - 2011-08-25
150
205
  ## note - allow/support ("shortcuts") e.g 2011-8-25 or 2011-8-3 / 2011-08-03 etc.
151
- DATE_III_RE = %r{
206
+ DATE_III_A_RE = %r{
152
207
  (?<date>
153
208
  \b
154
209
  (?<year>\d{4})
@@ -159,11 +214,33 @@ DATE_III_RE = %r{
159
214
  \b
160
215
  )}ix
161
216
 
217
+ ## starting w/ day/month/year e.g. 25-08-2011
218
+ DATE_III_B_RE = %r{
219
+ (?<date>
220
+ \b
221
+ ## optional day name
222
+ ((?<day_name>#{DAY_NAMES})
223
+ (?: ,?[ ]+)
224
+ )?
225
+ (?<day>\d{1,2})
226
+ -
227
+ (?<month>\d{1,2})
228
+ -
229
+ (?<year>\d{4})
230
+ \b
231
+ )}ix
232
+
233
+
234
+
162
235
  ## allow (short)"european" style 8.8.
163
236
  ## note - assume day/month!!!
164
237
  DATE_IIII_RE = %r{
165
238
  (?<date>
166
239
  \b
240
+ ## optional day name
241
+ ((?<day_name>#{DAY_NAMES})
242
+ (?: ,?[ ]+)
243
+ )?
167
244
  (?<day>\d{1,2})
168
245
  \.
169
246
  (?<month>\d{1,2})
@@ -179,6 +256,33 @@ DATE_IIII_RE = %r{
179
256
  }ix
180
257
 
181
258
 
259
+ ####################
260
+ ### 04/03/2026 or 4/3/2026
261
+ ## 04/03/26 or 4/3/26
262
+ ## 04/03 or 4/3
263
+ DATE_IIIII_RE = %r{
264
+ (?<date>
265
+ \b
266
+ ## optional day name
267
+ ((?<day_name>#{DAY_NAMES})
268
+ (?: ,?[ ]+)
269
+ )?
270
+ (?<day>\d{1,2})
271
+ /
272
+ (?<month>\d{1,2})
273
+ \b
274
+ (?:
275
+ /
276
+ (?:
277
+ (?<year>\d{4}) ## optional year 2025 (yyyy)
278
+ |
279
+ (?<yy>\d{2}) ## optional year 25 (yy)
280
+ )
281
+ \b
282
+ )?
283
+ )
284
+ }ix
285
+
182
286
 
183
287
 
184
288
  #############################################
@@ -187,13 +291,56 @@ DATE_IIII_RE = %r{
187
291
  DATE_RE = Regexp.union(
188
292
  DATE_I_RE,
189
293
  DATE_II_RE,
190
- DATE_III_RE,
294
+ DATE_III_A_RE, ## e.g. 1973-08-14
295
+ DATE_III_B_RE,
191
296
  DATE_IIII_RE, ## e.g. 8.8. or 8.13.79 or 08.14.1973
297
+ DATE_IIIII_RE, ## e.g. 08/14/1973
192
298
  )
193
299
 
300
+ ## todo - add more format style here; change to Regexp.union later!!!
301
+ DATE_LEGS_RE = DATE_LEGS_I_RE
194
302
 
195
- ##
196
- ## add a date parser helper
303
+
304
+ ## "internal" date helpers
305
+ def self._build_date( m )
306
+ date = {}
307
+ ## map month names
308
+ ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
309
+ date[:y] = m[:year].to_i(10) if m[:year]
310
+ ## check - use y too for two-digit year or keep separate - why? why not?
311
+ date[:yy] = m[:yy].to_i(10) if m[:yy] ## two digit year (e.g. 25 or 78 etc.)
312
+ date[:m] = m[:month].to_i(10) if m[:month]
313
+ date[:m] = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
314
+ date[:d] = m[:day].to_i(10) if m[:day]
315
+ date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
316
+
317
+ date
318
+ end
319
+ def _build_date( m ) self.class._build_date( m ); end
320
+
321
+ def self._build_date_legs( m )
322
+ legs = {}
323
+ ## map month names
324
+ ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
325
+ date = {}
326
+ date[:m] = MONTH_MAP[ m[:month_name1].downcase ]
327
+ date[:d] = m[:day1].to_i(10)
328
+ legs[:date1] = date
329
+
330
+ date = {}
331
+ date[:m] = MONTH_MAP[ m[:month_name2].downcase ] if m[:month_name2]
332
+ date[:d] = m[:day2].to_i(10)
333
+ legs[:date2] = date
334
+
335
+ legs
336
+ end
337
+ def _build_date_legs( m ) self.class._build_date_legs( m ); end
338
+
339
+
340
+
341
+
342
+ #############
343
+ ## "top-level" add a date parser helper
197
344
  def self.parse_date( str, start: )
198
345
  if m=DATE_RE.match( str )
199
346
 
@@ -220,171 +367,6 @@ def self.parse_date( str, start: )
220
367
  end
221
368
 
222
369
 
223
-
224
- ###
225
- # date duration
226
- # use - or + as separator
227
- # in theory plus( +) only if dates
228
- # are two days next to each other
229
- #
230
- # otherwise define new dates type in the future? why? why not?
231
- #
232
- # check for plus (+) if dates are next to each other (t+1) - why? why not?
233
-
234
- #
235
- # Sun Jun/23 - Wed Jun/26 -- YES
236
- # Jun/23 - Jun/26 -- YES
237
- # Jun/25 - 26 - why? why not??? - YES - see blow variant iii!!!
238
-
239
- # Tue Jun/25 + Wed Jun/26 -- NO
240
- # Jun/25 + Jun/26 -- NO
241
- # Jun/25 .. 26 - why? why not???
242
- # Jun/25 to 26 - why? why not???
243
- # Jun/25 + 26 - add - why? why not???
244
- # Sun-Wed Jun/23-26 - add - why? why not???
245
- # Wed+Thu Jun/26+27 2024 - add - why? why not???
246
- #
247
- # maybe use comma and plus for list of dates
248
- # Tue Jun/25, Wed Jun/26, Thu Jun/27 ??
249
- # Tue Jun/25 + Wed Jun/26 + Thu Jun/27 ??
250
- #
251
- # add back optional comma (before) year - why? why not?
252
- #
253
-
254
-
255
- ##
256
- # todo add plus later on - why? why not?
257
- ### todo/fix add optional comma (,) before year
258
-
259
- ### regex note/tip/remindr - \b () \b MUST always get enclosed in parantheses
260
- ## because alternation (|) has lowest priority/binding
261
-
262
-
263
- DURATION_I_RE = %r{
264
- (?<duration>
265
- \b
266
- (?:
267
- ## optional day name
268
- ((?<day_name1>#{DAY_NAMES})
269
- [ ]
270
- )?
271
- (?<month_name1>#{MONTH_NAMES})
272
- (?: \/|[ ] )
273
- (?<day1>\d{1,2})
274
- ## optional year
275
- ( ,? # optional comma
276
- [ ]
277
- (?<year1>\d{4})
278
- )?
279
-
280
- ## support + and - (add .. or such - why??)
281
- [ ]* - [ ]*
282
-
283
- ## optional day name
284
- ((?<day_name2>#{DAY_NAMES})
285
- [ ]
286
- )?
287
- (?<month_name2>#{MONTH_NAMES})
288
- (?: \/|[ ] )
289
- (?<day2>\d{1,2})
290
- ## optional year
291
- ( ,? # optional comma
292
- [ ]
293
- (?<year2>\d{4})
294
- )?
295
- )
296
- \b
297
- )}ix
298
-
299
-
300
-
301
- # FIX - remove this variant
302
- # "standardize on month day [year]" !!!!
303
-
304
- =begin
305
- ###
306
- # variant ii
307
- # e.g. 26 July - 27 July
308
- # 26 July,
309
- XXX_DURATION_II_RE = %r{
310
- (?<duration>
311
- \b
312
- (?
313
- ## optional day name
314
- ((?<day_name1>#{DAY_NAMES})
315
- [ ]
316
- )?
317
- (?<day1>\d{1,2})
318
- [ ]
319
- (?<month_name1>#{MONTH_NAMES})
320
- ## optional year
321
- (
322
- [ ]
323
- (?<year1>\d{4})
324
- )?
325
-
326
- ## support + and - (add .. or such - why??)
327
- [ ]*[-][ ]*
328
-
329
- ## optional day name
330
- ((?<day_name2>#{DAY_NAMES})
331
- [ ]
332
- )?
333
- (?<day2>\d{1,2})
334
- [ ]
335
- (?<month_name2>#{MONTH_NAMES})
336
- ## optional year
337
- ( [ ]
338
- (?<year2>\d{4})
339
- )?
340
- )
341
- \b
342
- )}ix
343
- =end
344
-
345
-
346
- # variant ii
347
- # add support for shorthand
348
- # August 16-18, 2011
349
- # September 13-15, 2011
350
- # October 18-20, 2011
351
- # March/6-8, 2012
352
- # March 6-8 2012
353
- # March 6-8
354
- #
355
- # - add support for August 16+17 or such (and check 16+18)
356
- # use <op> to check if day2 is a plus or range or such - why? why not?
357
-
358
- DURATION_II_RE = %r{
359
- (?<duration>
360
- \b
361
- (?:
362
- (?<month_name1>#{MONTH_NAMES})
363
- [ /]
364
- (?<day1>\d{1,2})
365
- -
366
- (?<day2>\d{1,2})
367
- (?:
368
- ,? ## optional comma
369
- [ ]
370
- (?<year1>\d{4})
371
- )? ## optional year
372
- )
373
- \b
374
- )}ix
375
-
376
-
377
-
378
- #############################################
379
- # map tables
380
- # note: order matters; first come-first matched/served
381
- DURATION_RE = Regexp.union(
382
- DURATION_I_RE,
383
- DURATION_II_RE,
384
- )
385
-
386
-
387
-
388
370
  end # class Lexer
389
371
  end # module SportDb
390
372
 
@@ -0,0 +1,190 @@
1
+ module SportDb
2
+ class Lexer
3
+
4
+
5
+
6
+
7
+ ###
8
+ # date duration
9
+ # use - or + as separator
10
+ # in theory plus( +) only if dates
11
+ # are two days next to each other
12
+ #
13
+ # otherwise define new dates type in the future? why? why not?
14
+ #
15
+ # check for plus (+) if dates are next to each other (t+1) - why? why not?
16
+
17
+ #
18
+ # Sun Jun 23 - Wed Jun 26 -- YES
19
+ # Jun 23 - Jun 26 -- YES
20
+ # Jun 25 - 26 - why? why not??? - YES - see blow variant iii!!!
21
+
22
+ # Tue Jun 25 + Wed Jun 26 -- NO
23
+ # Jun 25 + Jun 26 -- NO
24
+ # Jun 25 .. 26 - why? why not???
25
+ # Jun 25 to 26 - why? why not???
26
+ # Jun 25 + 26 - add - why? why not???
27
+ # Sun-Wed Jun 23-26 - add - why? why not???
28
+ # Wed+Thu Jun 26+27 2024 - add - why? why not???
29
+ #
30
+ # maybe use comma and plus for list of dates
31
+ # Tue Jun 25, Wed Jun 26, Thu Jun 27 ??
32
+ # Tue Jun 25 + Wed Jun 26 + Thu Jun 27 ??
33
+ #
34
+ # add back optional comma (before) year - why? why not?
35
+ #
36
+
37
+
38
+ ##
39
+ # todo add plus later on - why? why not?
40
+ ### todo/fix add optional comma (,) before year
41
+
42
+ ### regex note/tip/remindr - \b () \b MUST always get enclosed in parantheses
43
+ ## because alternation (|) has lowest priority/binding
44
+
45
+
46
+ DURATION_I_RE = %r{
47
+ (?<duration>
48
+ \b
49
+ (?:
50
+ ## optional day name
51
+ ((?<day_name1>#{DAY_NAMES})
52
+ [ ]
53
+ )?
54
+ (?<month_name1>#{MONTH_NAMES})
55
+ [ ]
56
+ (?<day1>\d{1,2})
57
+ ## optional year
58
+ ( ,? # optional comma
59
+ [ ]
60
+ (?<year1>\d{4})
61
+ )?
62
+
63
+ ## support + and - (add .. or such - why??)
64
+ [ ]* - [ ]*
65
+
66
+ ## optional day name
67
+ ((?<day_name2>#{DAY_NAMES})
68
+ [ ]
69
+ )?
70
+ (?<month_name2>#{MONTH_NAMES})
71
+ [ ]
72
+ (?<day2>\d{1,2})
73
+ ## optional year
74
+ ( ,? # optional comma
75
+ [ ]
76
+ (?<year2>\d{4})
77
+ )?
78
+ )
79
+ \b
80
+ )}ix
81
+
82
+
83
+
84
+ # FIX - remove this variant
85
+ # "standardize on month day [year]" !!!!
86
+
87
+ =begin
88
+ ###
89
+ # variant ii
90
+ # e.g. 26 July - 27 July
91
+ # 26 July,
92
+ XXX_DURATION_II_RE = %r{
93
+ (?<duration>
94
+ \b
95
+ (?
96
+ ## optional day name
97
+ ((?<day_name1>#{DAY_NAMES})
98
+ [ ]
99
+ )?
100
+ (?<day1>\d{1,2})
101
+ [ ]
102
+ (?<month_name1>#{MONTH_NAMES})
103
+ ## optional year
104
+ (
105
+ [ ]
106
+ (?<year1>\d{4})
107
+ )?
108
+
109
+ ## support + and - (add .. or such - why??)
110
+ [ ]*[-][ ]*
111
+
112
+ ## optional day name
113
+ ((?<day_name2>#{DAY_NAMES})
114
+ [ ]
115
+ )?
116
+ (?<day2>\d{1,2})
117
+ [ ]
118
+ (?<month_name2>#{MONTH_NAMES})
119
+ ## optional year
120
+ ( [ ]
121
+ (?<year2>\d{4})
122
+ )?
123
+ )
124
+ \b
125
+ )}ix
126
+ =end
127
+
128
+
129
+ # variant ii
130
+ # add support for shorthand
131
+ # August 16-18, 2011
132
+ # September 13-15, 2011
133
+ # October 18-20, 2011
134
+ # March 6-8 2012
135
+ # March 6-8
136
+ #
137
+ # - add support for August 16+17 or such (and check 16+18)
138
+ # use <op> to check if day2 is a plus or range or such - why? why not?
139
+
140
+ DURATION_II_RE = %r{
141
+ (?<duration>
142
+ \b
143
+ (?:
144
+ (?<month_name1>#{MONTH_NAMES})
145
+ [ ]
146
+ (?<day1>\d{1,2})
147
+ -
148
+ (?<day2>\d{1,2})
149
+ (?:
150
+ ,? ## optional comma
151
+ [ ]
152
+ (?<year1>\d{4})
153
+ )? ## optional year
154
+ )
155
+ \b
156
+ )}ix
157
+
158
+
159
+
160
+ #############################################
161
+ # map tables
162
+ # note: order matters; first come-first matched/served
163
+ DURATION_RE = Regexp.union(
164
+ DURATION_I_RE,
165
+ DURATION_II_RE,
166
+ )
167
+
168
+
169
+ def self._build_duration( m )
170
+ ## todo/check/fix - if end: works for kwargs!!!!!
171
+ duration = { start: {}, end: {}}
172
+
173
+ duration[:start][:y] = m[:year1].to_i(10) if m[:year1]
174
+ duration[:start][:m] = MONTH_MAP[ m[:month_name1].downcase ] if m[:month_name1]
175
+ duration[:start][:d] = m[:day1].to_i(10) if m[:day1]
176
+ duration[:start][:wday] = DAY_MAP[ m[:day_name1].downcase ] if m[:day_name1]
177
+
178
+ duration[:end][:y] = m[:year2].to_i(10) if m[:year2]
179
+ duration[:end][:m] = MONTH_MAP[ m[:month_name2].downcase ] if m[:month_name2]
180
+ duration[:end][:d] = m[:day2].to_i(10) if m[:day2]
181
+ duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ] if m[:day_name2]
182
+
183
+ duration
184
+ end
185
+ def _build_duration(m) self.class._build_duration( m ); end
186
+
187
+
188
+ end # class Lexer
189
+ end # module SportDb
190
+