sportdb-parser 0.5.9 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +2 -0
- data/lib/sportdb/parser/lexer.rb +47 -28
- data/lib/sportdb/parser/parser.rb +421 -344
- data/lib/sportdb/parser/racc_parser.rb +1 -1
- data/lib/sportdb/parser/racc_tree.rb +12 -5
- data/lib/sportdb/parser/token-date.rb +18 -1
- data/lib/sportdb/parser/token-minute.rb +45 -0
- data/lib/sportdb/parser/token-prop.rb +133 -0
- data/lib/sportdb/parser/token-text.rb +9 -2
- data/lib/sportdb/parser/token.rb +43 -177
- data/lib/sportdb/parser/version.rb +2 -2
- data/lib/sportdb/parser.rb +2 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9af6317b144478400067502e60de2f8d6232ebf6e036b0f99b78f9c29922dba2
|
4
|
+
data.tar.gz: 7066483378693f6376f9c30ec71e5d4172c817c11025dd4e669da6d581b6ad54
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '039fdc82039d05ae8f51847a79dd77c0a657e316e8b0705a28bdf2f8e594f37531ea07a230c9e1a9133f96293975190dba070f50515d58bc9926e4ef3e8e152f'
|
7
|
+
data.tar.gz: e0f6483cd26ba7ef4800ecf76efd6f05e01e3a2458dbc6b65fe6582654c8d28627facbfc8228655e361df84c43418c9583826908cdcf3d61bf06d186288c56fa
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
@@ -14,6 +14,8 @@ lib/sportdb/parser/parser.rb
|
|
14
14
|
lib/sportdb/parser/racc_parser.rb
|
15
15
|
lib/sportdb/parser/racc_tree.rb
|
16
16
|
lib/sportdb/parser/token-date.rb
|
17
|
+
lib/sportdb/parser/token-minute.rb
|
18
|
+
lib/sportdb/parser/token-prop.rb
|
17
19
|
lib/sportdb/parser/token-score.rb
|
18
20
|
lib/sportdb/parser/token-status.rb
|
19
21
|
lib/sportdb/parser/token-text.rb
|
data/lib/sportdb/parser/lexer.rb
CHANGED
@@ -247,6 +247,11 @@ def tokenize_with_errors
|
|
247
247
|
## flatten tokens
|
248
248
|
tokens = []
|
249
249
|
tokens_by_line.each do |tok|
|
250
|
+
|
251
|
+
if debug?
|
252
|
+
pp tok
|
253
|
+
end
|
254
|
+
|
250
255
|
tokens += tok
|
251
256
|
tokens << [:NEWLINE, "\n"] ## auto-add newlines
|
252
257
|
end
|
@@ -260,7 +265,7 @@ def _tokenize_line( line )
|
|
260
265
|
tokens = []
|
261
266
|
errors = [] ## keep a list of errors - why? why not?
|
262
267
|
|
263
|
-
puts ">#{line}<" if debug?
|
268
|
+
puts "line: >#{line}<" if debug?
|
264
269
|
|
265
270
|
pos = 0
|
266
271
|
## track last offsets - to report error on no match
|
@@ -275,10 +280,10 @@ def _tokenize_line( line )
|
|
275
280
|
|
276
281
|
|
277
282
|
while m = @re.match( line, pos )
|
278
|
-
if debug?
|
279
|
-
pp m
|
280
|
-
puts "pos: #{pos}"
|
281
|
-
end
|
283
|
+
# if debug?
|
284
|
+
# pp m
|
285
|
+
# puts "pos: #{pos}"
|
286
|
+
# end
|
282
287
|
offsets = [m.begin(0), m.end(0)]
|
283
288
|
|
284
289
|
if offsets[0] != pos
|
@@ -298,7 +303,7 @@ def _tokenize_line( line )
|
|
298
303
|
|
299
304
|
pos = offsets[1]
|
300
305
|
|
301
|
-
pp offsets if debug?
|
306
|
+
# pp offsets if debug?
|
302
307
|
|
303
308
|
##
|
304
309
|
## note: racc requires pairs e.g. [:TOKEN, VAL]
|
@@ -306,12 +311,8 @@ def _tokenize_line( line )
|
|
306
311
|
|
307
312
|
|
308
313
|
t = if @re == PROP_RE
|
309
|
-
if m[:space]
|
310
|
-
## skip space
|
311
|
-
nil
|
312
|
-
elsif m[:spaces]
|
313
|
-
## skip spaces
|
314
|
-
nil
|
314
|
+
if m[:space] || m[:spaces]
|
315
|
+
nil ## skip space(s)
|
315
316
|
elsif m[:prop_name]
|
316
317
|
if m[:name] == 'Y'
|
317
318
|
[:YELLOW_CARD, m[:name]]
|
@@ -339,11 +340,11 @@ def _tokenize_line( line )
|
|
339
340
|
when '(' then [:'(']
|
340
341
|
when ')' then [:')']
|
341
342
|
when '-' then [:'-']
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
343
|
+
# when '.' then
|
344
|
+
# ## switch back to top-level mode!!
|
345
|
+
# puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
|
346
|
+
# @re = RE
|
347
|
+
# [:'.']
|
347
348
|
else
|
348
349
|
nil ## ignore others (e.g. brackets [])
|
349
350
|
end
|
@@ -353,12 +354,8 @@ def _tokenize_line( line )
|
|
353
354
|
nil
|
354
355
|
end
|
355
356
|
else ## assume TOP_LEVEL (a.k.a. RE) machinery
|
356
|
-
if m[:space]
|
357
|
-
## skip space
|
358
|
-
nil
|
359
|
-
elsif m[:spaces]
|
360
|
-
## skip spaces
|
361
|
-
nil
|
357
|
+
if m[:space] || m[:spaces]
|
358
|
+
nil ## skip space(s)
|
362
359
|
elsif m[:prop_key]
|
363
360
|
## switch context to PROP_RE
|
364
361
|
@re = PROP_RE
|
@@ -397,6 +394,7 @@ def _tokenize_line( line )
|
|
397
394
|
## map month names
|
398
395
|
## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
|
399
396
|
date[:y] = m[:year].to_i(10) if m[:year]
|
397
|
+
date[:m] = m[:month].to_i(10) if m[:month]
|
400
398
|
date[:m] = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
|
401
399
|
date[:d] = m[:day].to_i(10) if m[:day]
|
402
400
|
date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
|
@@ -417,6 +415,8 @@ def _tokenize_line( line )
|
|
417
415
|
duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ] if m[:day_name2]
|
418
416
|
## note - for debugging keep (pass along) "literal" duration
|
419
417
|
[:DURATION, [m[:duration], duration]]
|
418
|
+
elsif m[:wday] ## standalone weekday e.g. Mo/Tu/We/etc.
|
419
|
+
[:WDAY, [m[:wday], { wday: DAY_MAP[ m[:day_name].downcase ] } ]]
|
420
420
|
elsif m[:num] ## fix - change to ord (for ordinal number!!!)
|
421
421
|
## note - strip enclosing () and convert to integer
|
422
422
|
[:ORD, [m[:num], { value: m[:value].to_i(10) } ]]
|
@@ -454,6 +454,7 @@ def _tokenize_line( line )
|
|
454
454
|
case sym
|
455
455
|
when ',' then [:',']
|
456
456
|
when ';' then [:';']
|
457
|
+
when '/' then [:'/']
|
457
458
|
when '@' then [:'@']
|
458
459
|
when '|' then [:'|']
|
459
460
|
when '[' then [:'[']
|
@@ -472,11 +473,11 @@ def _tokenize_line( line )
|
|
472
473
|
|
473
474
|
tokens << t if t
|
474
475
|
|
475
|
-
if debug?
|
476
|
-
print ">"
|
477
|
-
print "*" * pos
|
478
|
-
puts "#{line[pos..-1]}<"
|
479
|
-
end
|
476
|
+
# if debug?
|
477
|
+
# print ">"
|
478
|
+
# print "*" * pos
|
479
|
+
# puts "#{line[pos..-1]}<"
|
480
|
+
# end
|
480
481
|
end
|
481
482
|
|
482
483
|
## check if no match in end of string
|
@@ -489,6 +490,24 @@ def _tokenize_line( line )
|
|
489
490
|
end
|
490
491
|
|
491
492
|
|
493
|
+
##
|
494
|
+
## if in prop mode continue if last token is [,-]
|
495
|
+
## otherwise change back to "standard" mode
|
496
|
+
if @re == PROP_RE
|
497
|
+
if [:',', :'-'].include?( tokens[-1][0] )
|
498
|
+
## continue/stay in PROP_RE mode
|
499
|
+
## todo/check - auto-add PROP_CONT token or such
|
500
|
+
## to help parser with possible NEWLINE
|
501
|
+
## conflicts - why? why not?
|
502
|
+
else
|
503
|
+
## switch back to top-level mode!!
|
504
|
+
puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
|
505
|
+
@re = RE
|
506
|
+
## note - auto-add PROP_END (<PROP_END>)
|
507
|
+
tokens << [:PROP_END, "<PROP_END>"]
|
508
|
+
end
|
509
|
+
end
|
510
|
+
|
492
511
|
[tokens,errors]
|
493
512
|
end
|
494
513
|
|