sportdb-parser 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,7 +54,7 @@ TEXT_RE = %r{
54
54
  )
55
55
 
56
56
  (?:(?: (?:[ ]
57
- (?!vs?\.?[ ]) ## note - exclude (v[ ]/vs[ ]/v.[ ]/vs.[ ])
57
+ (?!vs?[ ]) ## note - exclude (v[ ]/vs[ ])
58
58
  )
59
59
  | # only single spaces allowed inline!!!
60
60
  [-]
@@ -68,29 +68,46 @@ BASICS_RE = %r{
68
68
  (?<vs>
69
69
  (?<=[ ]) # Positive lookbehind for space
70
70
  (?:
71
- vs\.?| ## allow optional dot (eg. vs. v.)
72
- v\.?|
73
- -
74
- ) # not bigger match first e.g. vs than v etc.
71
+ vs|v
72
+ )
73
+ # not bigger match first e.g. vs than v etc.
74
+ # todo/fix - make vs|v case sensitive!!! only match v/vs - why? why not?
75
75
  (?=[ ]) # positive lookahead for space
76
76
  )
77
77
  |
78
+ (?<spaces> [ ]{2,}) |
79
+ (?<space> [ ])
80
+ |
81
+ (?<sym>[;,@|\[\]-])
82
+ }ix
83
+
84
+
85
+ ## removed from basics
86
+ =begin
78
87
  (?<none>
79
88
  (?<=[ \[]|^) # Positive lookbehind for space or [
80
89
  -
81
90
  (?=[ ]*;) # positive lookahead for space
82
91
  )
83
92
  |
84
- (?<spaces> [ ]{2,}) |
85
- (?<space> [ ])
86
- |
87
- (?<sym>[;,@|\[\]])
88
- }ix
93
+ (?<vs>
94
+ (?<=[ ]) # Positive lookbehind for space
95
+ (?:
96
+ vs\.?| ## allow optional dot (eg. vs. v.)
97
+ v\.?|
98
+ -
99
+ ) # not bigger match first e.g. vs than v etc.
100
+ (?=[ ]) # positive lookahead for space
101
+ )
102
+ |
103
+
104
+ make - into a simple symbol !!!
105
+ =end
89
106
 
90
107
 
91
108
  MINUTE_RE = %r{
92
109
  (?<minute>
93
- (?<=[ ]) # Positive lookbehind for space required
110
+ (?<=[ (]) # Positive lookbehind for space or opening ( e.g. (61') required
94
111
  (?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
95
112
  (?: \+
96
113
  (?<value2>\d{1,3})
@@ -119,7 +136,125 @@ GOAL_OG_RE = %r{
119
136
 
120
137
 
121
138
 
122
- RE = Regexp.union( STATUS_RE,
139
+
140
+
141
+ PROP_BASICS_RE = %r{
142
+ (?<spaces> [ ]{2,}) |
143
+ (?<space> [ ])
144
+ |
145
+ (?<sym>[.;,\(\)\[\]-]) ## note - dot (.) is the (all-important) end-of-prop marker!!!
146
+ }ix
147
+
148
+
149
+ ## name different from text (does not allow number in name/text)
150
+ ##
151
+ ## note - includes special handling for dot (.) if at the end of line!!!
152
+ ## end-of-line dot (.) is the prop end-of-marker - do NOT eat-up!!!
153
+
154
+ PROP_NAME_RE = %r{
155
+ (?<prop_name> \b
156
+ (?<name>
157
+ \p{L}+
158
+ (?: \. (?: (?![ ]*$) )
159
+ )? ## edge case - check for end of prop marker! (e.g. Stop.)
160
+ (?:
161
+ [ ]? # only single spaces allowed inline!!!
162
+ (?:
163
+ (?:
164
+ (?<=\p{L}) ## use lookbehind
165
+ [/'-] ## must be surrounded by letters
166
+ ## e.g. One/Two NOT
167
+ ## One/ Two or One / Two or One /Two etc.
168
+ (?=\p{L}) ## use lookahead
169
+ )
170
+ |
171
+ (?:
172
+ (?<=[ ]) ## use lookbehind -- add letter (plus dot) or such - why? why not?
173
+ ['] ## must be surrounded by leading space and
174
+ ## traling letters (e.g. UDI 'Beter Bed)
175
+ (?=\p{L}) ## use lookahead
176
+ )
177
+ |
178
+ (?:
179
+ (?<=\p{L}) ## use lookbehind
180
+ ['] ## must be surrounded by leading letter and
181
+ ## trailing space PLUS letter (e.g. UDI' Beter Bed)
182
+ (?=[ ]\p{L}) ## use lookahead (space WITH letter
183
+ )
184
+ |
185
+ (?: \p{L}+
186
+ (?: \.
187
+ (?: (?![ ]*$) )
188
+ )? ## last dot is delimiter!!!
189
+ )
190
+ )+
191
+ )*
192
+ )
193
+ ## add lookahead - must be non-alphanum (or dot)
194
+ (?=[ .,;\]\)]|$)
195
+ )
196
+ }ix
197
+
198
+
199
+
200
+
201
+ ##############
202
+ # add support for props/ attributes e.g.
203
+ #
204
+ # Germany: Neuer - Kimmich, Rüdiger, Tah [Y], Mittelstädt – Andrich [Y] (46' Groß),
205
+ # Kroos (80' Can) – Musiala (74' Müller), Gündogan,
206
+ # Wirtz (63' Sane) – Havertz (63' Füllkrug).
207
+ # Scotland: Gunn – Porteous [R 44'], Hendry, Tierney (78' McKenna) – Ralston [Y],
208
+ # McTominay, McGregor (67' Gilmour), Robertson – Christie (82' Shankland),
209
+ # Adams (46' Hanley), McGinn (67' McLean).
210
+ #
211
+ ## note: colon (:) MUST be followed by one (or more) spaces
212
+ ## make sure mon feb 12 18:10 will not match
213
+ ## allow 1. FC Köln etc.
214
+ ## Mainz 05:
215
+ ## limit to 30 chars max
216
+ ## only allow chars incl. intl but (NOT ()[]/;)
217
+
218
+
219
+ PROP_KEY_RE = %r{
220
+ (?<prop_key> \b
221
+ (?<key>
222
+ (?:\p{L}+
223
+ |
224
+ \d+ # check for num lookahead (MUST be space or dot)
225
+ ## MUST be followed by (optional dot) and
226
+ ## required space !!!
227
+ ## MUST be follow by a to z!!!!
228
+ \.? ## optional dot
229
+ [ ]? ## make space optional too - why? why not?
230
+ ## yes - eg. 1st, 2nd, 5th etc.
231
+ \p{L}+
232
+ )
233
+ [\d\p{L}'/° -]*? ## allow almost anyting
234
+ ## fix - add negative lookahead
235
+ ## no space and dash etc.
236
+ ## only allowed "inline" not at the end
237
+ ## must end with latter or digit!
238
+ )
239
+ [ ]*? # slurp trailing spaces
240
+ :
241
+ (?=[ ]+) ## possitive lookahead (must be followed by space!!)
242
+ )
243
+ }ix
244
+
245
+
246
+
247
+
248
+ PROP_RE = Regexp.union(
249
+ PROP_BASICS_RE,
250
+ MINUTE_RE,
251
+ PROP_NAME_RE,
252
+ )
253
+
254
+
255
+
256
+ RE = Regexp.union( PROP_KEY_RE, ## start with prop key (match will/should switch into prop mode!!!)
257
+ STATUS_RE,
123
258
  TIMEZONE_RE,
124
259
  TIME_RE,
125
260
  DURATION_RE, # note - duration MUST match before date
@@ -130,225 +265,5 @@ RE = Regexp.union( STATUS_RE,
130
265
  TEXT_RE )
131
266
 
132
267
 
133
- def log( msg )
134
- ## append msg to ./logs.txt
135
- ## use ./errors.txt - why? why not?
136
- File.open( './logs.txt', 'a:utf-8' ) do |f|
137
- f.write( msg )
138
- f.write( "\n" )
139
- end
140
- end
141
-
142
-
143
-
144
- def tokenize_with_errors( line, typed: false,
145
- debug: false )
146
- tokens = []
147
- errors = [] ## keep a list of errors - why? why not?
148
-
149
- puts ">#{line}<" if debug
150
-
151
- pos = 0
152
- ## track last offsets - to report error on no match
153
- ## or no match in end of string
154
- offsets = [0,0]
155
- m = nil
156
-
157
- while m = RE.match( line, pos )
158
- if debug
159
- pp m
160
- puts "pos: #{pos}"
161
- end
162
- offsets = [m.begin(0), m.end(0)]
163
-
164
- if offsets[0] != pos
165
- ## match NOT starting at start/begin position!!!
166
- ## report parse error!!!
167
- msg = "!! WARN - parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
168
- puts msg
169
-
170
- errors << "parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]}"
171
- log( msg )
172
- end
173
-
174
- ##
175
- ## todo/fix - also check if possible
176
- ## if no match but not yet end off string!!!!
177
- ## report skipped text run too!!!
178
-
179
- pos = offsets[1]
180
-
181
- pp offsets if debug
182
-
183
- t = if m[:space]
184
- ## skip space
185
- nil
186
- elsif m[:spaces]
187
- ## skip spaces
188
- nil
189
- elsif m[:text]
190
- [:text, m[:text]] ## keep pos - why? why not?
191
- elsif m[:status] ## (match) status e.g. cancelled, awarded, etc.
192
- if m[:status_note] ## includes note? e.g. awarded; originally 2-0
193
- [:status, m[:status], {note:m[:status_note]}]
194
- else
195
- [:status, m[:status]]
196
- end
197
- elsif m[:time]
198
- if typed
199
- ## unify to iso-format
200
- ### 12.40 => 12:40
201
- ## 12h40 => 12:40 etc.
202
- ## keep string (no time-only type in ruby)
203
- hour = m[:hour].to_i(10) ## allow 08/07/etc.
204
- minute = m[:minute].to_i(10)
205
- ## check if valid - 0:00 - 24:00
206
- ## check if 24:00 possible? or only 0:00 (23:59)
207
- if (hour >= 0 && hour <= 24) &&
208
- (minute >=0 && minute <= 59)
209
- ## note - for debugging keep (pass along) "literal" time
210
- ## might use/add support for am/pm later
211
- [:time, m[:time], {h:hour,m:minute}]
212
- else
213
- raise ArgumentError, "parse error - time >#{m[:time]}< out-of-range"
214
- end
215
- else
216
- [:time, m[:time]]
217
- end
218
- elsif m[:date]
219
- if typed
220
- date = {}
221
- =begin
222
- ((?<day_name>#{DAY_NAMES})
223
- [ ]
224
- )?
225
- (?<month_name>#{MONTH_NAMES})
226
- (?: \/|[ ] )
227
- (?<day>\d{1,2})
228
- ## optional year
229
- ( [ ]
230
- (?<year>\d{4})
231
- )?
232
- =end
233
- ## map month names
234
- ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
235
- date[:y] = m[:year].to_i(10) if m[:year]
236
- date[:m] = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
237
- date[:d] = m[:day].to_i(10) if m[:day]
238
- date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
239
- ## note - for debugging keep (pass along) "literal" date
240
- [:date, m[:date], date]
241
- else
242
- [:date, m[:date]]
243
- end
244
- elsif m[:timezone]
245
- [:timezone, m[:timezone]]
246
- elsif m[:duration]
247
- if typed
248
- duration = { start: {}, end: {}}
249
- duration[:start][:y] = m[:year1].to_i(10) if m[:year1]
250
- duration[:start][:m] = MONTH_MAP[ m[:month_name1].downcase ] if m[:month_name1]
251
- duration[:start][:d] = m[:day1].to_i(10) if m[:day1]
252
- duration[:start][:wday] = DAY_MAP[ m[:day_name1].downcase ] if m[:day_name1]
253
- duration[:end][:y] = m[:year2].to_i(10) if m[:year2]
254
- duration[:end][:m] = MONTH_MAP[ m[:month_name2].downcase ] if m[:month_name2]
255
- duration[:end][:d] = m[:day2].to_i(10) if m[:day2]
256
- duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ] if m[:day_name2]
257
- ## note - for debugging keep (pass along) "literal" duration
258
- [:duration, m[:duration], duration]
259
- else
260
- [:duration, m[:duration]]
261
- end
262
- elsif m[:num]
263
- if typed
264
- ## note - strip enclosing () and convert to integer
265
- [:num, m[:value].to_i(10)]
266
- else
267
- [:num, m[:num]]
268
- end
269
- elsif m[:score]
270
- if typed
271
- score = {}
272
- ## check for pen
273
- score[:p] = [m[:p1].to_i(10),
274
- m[:p2].to_i(10)] if m[:p1] && m[:p2]
275
- score[:et] = [m[:et1].to_i(10),
276
- m[:et2].to_i(10)] if m[:et1] && m[:et2]
277
- score[:ft] = [m[:ft1].to_i(10),
278
- m[:ft2].to_i(10)] if m[:ft1] && m[:ft2]
279
- score[:ht] = [m[:ht1].to_i(10),
280
- m[:ht2].to_i(10)] if m[:ht1] && m[:ht2]
281
-
282
- ## note - for debugging keep (pass along) "literal" score
283
- [:score, m[:score], score]
284
- else
285
- [:score, m[:score]]
286
- end
287
- elsif m[:minute]
288
- if typed
289
- minute = {}
290
- minute[:m] = m[:value].to_i(10)
291
- minute[:offset] = m[:value2].to_i(10) if m[:value2]
292
- ## note - for debugging keep (pass along) "literal" minute
293
- [:minute, m[:minute], minute]
294
- else
295
- [:minute, m[:minute]]
296
- end
297
- elsif m[:og]
298
- typed ? [:og] : [:og, m[:og]] ## for typed drop - string version/variants
299
- elsif m[:pen]
300
- typed ? [:pen] : [:pen, m[:pen]]
301
- elsif m[:vs]
302
- typed ? [:vs] : [:vs, m[:vs]]
303
- elsif m[:none]
304
- typed ? [:none] : [:none, m[:none]]
305
- elsif m[:sym]
306
- sym = m[:sym]
307
- ## return symbols "inline" as is - why? why not?
308
- case sym
309
- when ',' then [:',']
310
- when ';' then [:';']
311
- when '@' then [:'@']
312
- when '|' then [:'|']
313
- else
314
- nil ## ignore others (e.g. brackets [])
315
- end
316
- else
317
- ## report error
318
- nil
319
- end
320
-
321
- tokens << t if t
322
-
323
- if debug
324
- print ">"
325
- print "*" * pos
326
- puts "#{line[pos..-1]}<"
327
- end
328
- end
329
-
330
- ## check if no match in end of string
331
- if offsets[1] != line.size
332
- msg = "!! WARN - parse error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size} in line >#{line}<"
333
- puts msg
334
- log( msg )
335
-
336
- errors << "parse error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size}"
337
- end
338
-
339
-
340
- [tokens,errors]
341
- end
342
-
343
-
344
- ### convience helper - ignore errors by default
345
- def tokenize( line, typed: false,
346
- debug: false )
347
- tokens, _ = tokenize_with_errors( line, typed: typed,
348
- debug: debug )
349
- tokens
350
- end
351
-
352
-
353
268
  end # class Parser
354
269
  end # module SportDb
@@ -0,0 +1,262 @@
1
+
2
+ module SportDb
3
+ class Parser
4
+
5
+
6
+
7
+ def log( msg )
8
+ ## append msg to ./logs.txt
9
+ ## use ./errors.txt - why? why not?
10
+ File.open( './logs.txt', 'a:utf-8' ) do |f|
11
+ f.write( msg )
12
+ f.write( "\n" )
13
+ end
14
+ end
15
+
16
+
17
+
18
+ def tokenize_with_errors( line, debug: false )
19
+ tokens = []
20
+ errors = [] ## keep a list of errors - why? why not?
21
+
22
+ puts ">#{line}<" if debug
23
+
24
+ pos = 0
25
+ ## track last offsets - to report error on no match
26
+ ## or no match in end of string
27
+ offsets = [0,0]
28
+ m = nil
29
+
30
+
31
+ ####
32
+ ## quick hack - keep re state/mode between tokenize calls!!!
33
+ @re ||= RE ## note - switch between RE & INSIDE_RE
34
+
35
+
36
+ while m = @re.match( line, pos )
37
+ if debug
38
+ pp m
39
+ puts "pos: #{pos}"
40
+ end
41
+ offsets = [m.begin(0), m.end(0)]
42
+
43
+ if offsets[0] != pos
44
+ ## match NOT starting at start/begin position!!!
45
+ ## report parse error!!!
46
+ msg = "!! WARN - parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
47
+ puts msg
48
+
49
+ errors << "parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]}"
50
+ log( msg )
51
+ end
52
+
53
+ ##
54
+ ## todo/fix - also check if possible
55
+ ## if no match but not yet end off string!!!!
56
+ ## report skipped text run too!!!
57
+
58
+ pos = offsets[1]
59
+
60
+ pp offsets if debug
61
+
62
+ ##
63
+ ## note: racc requires pairs e.g. [:TOKEN, VAL]
64
+ ## for VAL use "text" or ["text", { opts }] array
65
+
66
+
67
+ t = if @re == PROP_RE
68
+ if m[:space]
69
+ ## skip space
70
+ nil
71
+ elsif m[:spaces]
72
+ ## skip spaces
73
+ nil
74
+ elsif m[:prop_name]
75
+ if m[:name] == 'Y'
76
+ [:YELLOW_CARD, m[:name]]
77
+ elsif m[:name] == 'R'
78
+ [:RED_CARD, m[:name]]
79
+ else
80
+ [:PROP_NAME, m[:name]]
81
+ end
82
+ elsif m[:minute]
83
+ minute = {}
84
+ minute[:m] = m[:value].to_i(10)
85
+ minute[:offset] = m[:value2].to_i(10) if m[:value2]
86
+ ## note - for debugging keep (pass along) "literal" minute
87
+ [:MINUTE, [m[:minute], minute]]
88
+ elsif m[:sym]
89
+ sym = m[:sym]
90
+ ## return symbols "inline" as is - why? why not?
91
+ ## (?<sym>[;,@|\[\]-])
92
+
93
+ case sym
94
+ when ',' then [:',']
95
+ when ';' then [:';']
96
+ when '[' then [:'[']
97
+ when ']' then [:']']
98
+ when '(' then [:'(']
99
+ when ')' then [:')']
100
+ when '-' then [:'-']
101
+ when '.' then
102
+ ## switch back to top-level mode!!
103
+ puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE"
104
+ @re = RE
105
+ [:'.']
106
+ else
107
+ nil ## ignore others (e.g. brackets [])
108
+ end
109
+ else
110
+ ## report error
111
+ puts "!!! TOKENIZE ERROR (PROP_RE) - no match found"
112
+ nil
113
+ end
114
+ else ## assume TOP_LEVEL (a.k.a. RE) machinery
115
+ if m[:space]
116
+ ## skip space
117
+ nil
118
+ elsif m[:spaces]
119
+ ## skip spaces
120
+ nil
121
+ elsif m[:prop_key]
122
+ ## switch context to PROP_RE
123
+ @re = PROP_RE
124
+ puts " ENTER PROP_RE MODE"
125
+ [:PROP, m[:key]]
126
+ elsif m[:text]
127
+ [:TEXT, m[:text]] ## keep pos - why? why not?
128
+ elsif m[:status] ## (match) status e.g. cancelled, awarded, etc.
129
+ ## todo/check - add text (or status)
130
+ # to opts hash {} by default (for value)
131
+ if m[:status_note] ## includes note? e.g. awarded; originally 2-0
132
+ [:STATUS, [m[:status], {status: m[:status],
133
+ note: m[:status_note]} ]]
134
+ else
135
+ [:STATUS, [m[:status], {status: m[:status] } ]]
136
+ end
137
+ elsif m[:time]
138
+ ## unify to iso-format
139
+ ### 12.40 => 12:40
140
+ ## 12h40 => 12:40 etc.
141
+ ## keep string (no time-only type in ruby)
142
+ hour = m[:hour].to_i(10) ## allow 08/07/etc.
143
+ minute = m[:minute].to_i(10)
144
+ ## check if valid - 0:00 - 24:00
145
+ ## check if 24:00 possible? or only 0:00 (23:59)
146
+ if (hour >= 0 && hour <= 24) &&
147
+ (minute >=0 && minute <= 59)
148
+ ## note - for debugging keep (pass along) "literal" time
149
+ ## might use/add support for am/pm later
150
+ [:TIME, [m[:time], {h:hour,m:minute}]]
151
+ else
152
+ raise ArgumentError, "parse error - time >#{m[:time]}< out-of-range"
153
+ end
154
+ elsif m[:date]
155
+ date = {}
156
+ ## map month names
157
+ ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
158
+ date[:y] = m[:year].to_i(10) if m[:year]
159
+ date[:m] = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
160
+ date[:d] = m[:day].to_i(10) if m[:day]
161
+ date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
162
+ ## note - for debugging keep (pass along) "literal" date
163
+ [:DATE, [m[:date], date]]
164
+ elsif m[:timezone]
165
+ [:TIMEZONE, m[:timezone]]
166
+ elsif m[:duration]
167
+ ## todo/check/fix - if end: works for kwargs!!!!!
168
+ duration = { start: {}, end: {}}
169
+ duration[:start][:y] = m[:year1].to_i(10) if m[:year1]
170
+ duration[:start][:m] = MONTH_MAP[ m[:month_name1].downcase ] if m[:month_name1]
171
+ duration[:start][:d] = m[:day1].to_i(10) if m[:day1]
172
+ duration[:start][:wday] = DAY_MAP[ m[:day_name1].downcase ] if m[:day_name1]
173
+ duration[:end][:y] = m[:year2].to_i(10) if m[:year2]
174
+ duration[:end][:m] = MONTH_MAP[ m[:month_name2].downcase ] if m[:month_name2]
175
+ duration[:end][:d] = m[:day2].to_i(10) if m[:day2]
176
+ duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ] if m[:day_name2]
177
+ ## note - for debugging keep (pass along) "literal" duration
178
+ [:DURATION, [m[:duration], duration]]
179
+ elsif m[:num] ## fix - change to ord (for ordinal number!!!)
180
+ ## note - strip enclosing () and convert to integer
181
+ [:ORD, [m[:num], { value: m[:value].to_i(10) } ]]
182
+ elsif m[:score]
183
+ score = {}
184
+ ## check for pen
185
+ score[:p] = [m[:p1].to_i(10),
186
+ m[:p2].to_i(10)] if m[:p1] && m[:p2]
187
+ score[:et] = [m[:et1].to_i(10),
188
+ m[:et2].to_i(10)] if m[:et1] && m[:et2]
189
+ score[:ft] = [m[:ft1].to_i(10),
190
+ m[:ft2].to_i(10)] if m[:ft1] && m[:ft2]
191
+ score[:ht] = [m[:ht1].to_i(10),
192
+ m[:ht2].to_i(10)] if m[:ht1] && m[:ht2]
193
+
194
+ ## note - for debugging keep (pass along) "literal" score
195
+ [:SCORE, [m[:score], score]]
196
+ elsif m[:minute]
197
+ minute = {}
198
+ minute[:m] = m[:value].to_i(10)
199
+ minute[:offset] = m[:value2].to_i(10) if m[:value2]
200
+ ## note - for debugging keep (pass along) "literal" minute
201
+ [:MINUTE, [m[:minute], minute]]
202
+ elsif m[:og]
203
+ [:OG, m[:og]] ## for typed drop - string version/variants ?? why? why not?
204
+ elsif m[:pen]
205
+ [:PEN, m[:pen]]
206
+ elsif m[:vs]
207
+ [:VS, m[:vs]]
208
+ elsif m[:sym]
209
+ sym = m[:sym]
210
+ ## return symbols "inline" as is - why? why not?
211
+ ## (?<sym>[;,@|\[\]-])
212
+
213
+ case sym
214
+ when ',' then [:',']
215
+ when ';' then [:';']
216
+ when '@' then [:'@']
217
+ when '|' then [:'|']
218
+ when '[' then [:'[']
219
+ when ']' then [:']']
220
+ when '-' then [:'-']
221
+ else
222
+ nil ## ignore others (e.g. brackets [])
223
+ end
224
+ else
225
+ ## report error
226
+ puts "!!! TOKENIZE ERROR - no match found"
227
+ nil
228
+ end
229
+ end
230
+
231
+
232
+ tokens << t if t
233
+
234
+ if debug
235
+ print ">"
236
+ print "*" * pos
237
+ puts "#{line[pos..-1]}<"
238
+ end
239
+ end
240
+
241
+ ## check if no match in end of string
242
+ if offsets[1] != line.size
243
+ msg = "!! WARN - parse error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size} in line >#{line}<"
244
+ puts msg
245
+ log( msg )
246
+
247
+ errors << "parse error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size}"
248
+ end
249
+
250
+
251
+ [tokens,errors]
252
+ end
253
+
254
+
255
+ ### convience helper - ignore errors by default
256
+ def tokenize( line, debug: false )
257
+ tokens, _ = tokenize_with_errors( line, debug: debug )
258
+ tokens
259
+ end
260
+
261
+ end # class Parser
262
+ end # module SportDb
@@ -3,8 +3,8 @@ module SportDb
3
3
  module Module
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 4
7
- PATCH = 0
6
+ MINOR = 5
7
+ PATCH = 1
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version