sportdb-parser 0.6.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/lib/sportdb/parser/lexer.rb +55 -9
- data/lib/sportdb/parser/parser.rb +493 -396
- data/lib/sportdb/parser/racc_parser.rb +4 -2
- data/lib/sportdb/parser/token-date.rb +66 -15
- data/lib/sportdb/parser/token-score.rb +25 -14
- data/lib/sportdb/parser/token.rb +11 -2
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b299ddece5e64b86bb7ee6b55578099b0624b11d8e5f10721363f45d6ef5d8d8
|
4
|
+
data.tar.gz: 5712c99b200e6116c9f07fba1215a4bf2560e5bd848c3c8cc48959aa17997b85
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5d2fce54482e12542c35abd46a292d7f5e6b4db894bba3a7f911269f116d9fa530653d36ace4295e2f819bb974093b5567a5494a2d50b54ee3f250b314d40a73
|
7
|
+
data.tar.gz: 7b6ef8aaafa2d20c0356fcdc048211f24a04cc4f95819ad8d225b2c9a4a29e44d8f415190acfbe3e31b2f9cc457a12f8e75c460394e984d5b9b1f476f0f8e30f
|
data/CHANGELOG.md
CHANGED
data/lib/sportdb/parser/lexer.rb
CHANGED
@@ -147,12 +147,15 @@ def initialize( lines, debug: false )
|
|
147
147
|
## strip lines with comments and empty lines striped / removed
|
148
148
|
## keep empty lines? why? why not?
|
149
149
|
## keep leading spaces (indent) - why?
|
150
|
+
##
|
151
|
+
## note - KEEP empty lines (get turned into BLANK token!!!!)
|
152
|
+
|
150
153
|
@txt = String.new
|
151
154
|
txt_pre.each_line do |line| ## preprocess
|
152
155
|
line = line.strip
|
153
|
-
next if line.
|
156
|
+
next if line.start_with?('#') ### skip comments
|
154
157
|
|
155
|
-
line = line.sub( /#.*/, '' ).strip
|
158
|
+
line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
|
156
159
|
|
157
160
|
@txt << line
|
158
161
|
@txt << "\n"
|
@@ -193,6 +196,18 @@ def tokenize_with_errors
|
|
193
196
|
t
|
194
197
|
end
|
195
198
|
|
199
|
+
### check for "section" starters e.g. Teams or such
|
200
|
+
t = tokens[0]
|
201
|
+
if t[0] == :TEXT
|
202
|
+
text = t[1]
|
203
|
+
if text =~ /^teams$/i
|
204
|
+
t[0] = :TEAMS
|
205
|
+
elsif text =~ /^blank$/i ### todo/fix -- remove!!! add real blanks!!
|
206
|
+
t[0] = :BLANK
|
207
|
+
else
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
196
211
|
#################
|
197
212
|
## pass 2
|
198
213
|
## transform tokens (using simple patterns)
|
@@ -227,13 +242,22 @@ def tokenize_with_errors
|
|
227
242
|
end
|
228
243
|
|
229
244
|
|
230
|
-
if buf.match?( :TEXT, [:SCORE, :VS, :'-'], :TEXT )
|
245
|
+
if buf.match?( :TEXT, [:SCORE, :SCORE_MORE, :VS, :'-'], :TEXT )
|
231
246
|
nodes << [:TEAM, buf.next[1]]
|
232
247
|
nodes << buf.next
|
233
248
|
nodes << [:TEAM, buf.next[1]]
|
234
249
|
elsif buf.match?( :TEXT, :MINUTE )
|
235
250
|
nodes << [:PLAYER, buf.next[1]]
|
236
251
|
nodes << buf.next
|
252
|
+
elsif buf.match?( :DATE, :TIME ) ## merge DATE TIME into DATETIME
|
253
|
+
date = buf.next[1]
|
254
|
+
time = buf.next[1]
|
255
|
+
## puts "DATETIME:"
|
256
|
+
## pp date, time
|
257
|
+
val = [date[0] + ' ' + time[0], ## concat string of two tokens
|
258
|
+
{ date: date[1], time: time[1] }
|
259
|
+
]
|
260
|
+
nodes << [:DATETIME, val]
|
237
261
|
else
|
238
262
|
## pass through
|
239
263
|
nodes << buf.next
|
@@ -253,7 +277,8 @@ def tokenize_with_errors
|
|
253
277
|
end
|
254
278
|
|
255
279
|
tokens += tok
|
256
|
-
|
280
|
+
## auto-add newlines (unless BLANK!!)
|
281
|
+
tokens << [:NEWLINE, "\n"] unless tok[0][0] == :BLANK
|
257
282
|
end
|
258
283
|
|
259
284
|
[tokens,errors]
|
@@ -267,6 +292,17 @@ def _tokenize_line( line )
|
|
267
292
|
|
268
293
|
puts "line: >#{line}<" if debug?
|
269
294
|
|
295
|
+
|
296
|
+
### special case for empty line (aka BLANK)
|
297
|
+
if line.empty?
|
298
|
+
## note - blank always resets parser mode to std/top-level!!!
|
299
|
+
@re = RE
|
300
|
+
|
301
|
+
tokens << [:BLANK, '<|BLANK|>']
|
302
|
+
return [tokens, errors]
|
303
|
+
end
|
304
|
+
|
305
|
+
|
270
306
|
pos = 0
|
271
307
|
## track last offsets - to report error on no match
|
272
308
|
## or no match in end of string
|
@@ -420,7 +456,7 @@ def _tokenize_line( line )
|
|
420
456
|
elsif m[:num] ## fix - change to ord (for ordinal number!!!)
|
421
457
|
## note - strip enclosing () and convert to integer
|
422
458
|
[:ORD, [m[:num], { value: m[:value].to_i(10) } ]]
|
423
|
-
elsif m[:
|
459
|
+
elsif m[:score_more]
|
424
460
|
score = {}
|
425
461
|
## check for pen
|
426
462
|
score[:p] = [m[:p1].to_i(10),
|
@@ -433,8 +469,15 @@ def _tokenize_line( line )
|
|
433
469
|
m[:ht2].to_i(10)] if m[:ht1] && m[:ht2]
|
434
470
|
|
435
471
|
## note - for debugging keep (pass along) "literal" score
|
436
|
-
[:
|
437
|
-
elsif m[:
|
472
|
+
[:SCORE_MORE, [m[:score_more], score]]
|
473
|
+
elsif m[:score]
|
474
|
+
score = {}
|
475
|
+
## must always have ft for now e.g. 1-1 or such
|
476
|
+
score[:ft] = [m[:ft1].to_i(10),
|
477
|
+
m[:ft2].to_i(10)]
|
478
|
+
## note - for debugging keep (pass along) "literal" score
|
479
|
+
[:SCORE, [m[:score], score]]
|
480
|
+
elsif m[:minute]
|
438
481
|
minute = {}
|
439
482
|
minute[:m] = m[:value].to_i(10)
|
440
483
|
minute[:offset] = m[:value2].to_i(10) if m[:value2]
|
@@ -459,7 +502,10 @@ def _tokenize_line( line )
|
|
459
502
|
when '|' then [:'|']
|
460
503
|
when '[' then [:'[']
|
461
504
|
when ']' then [:']']
|
462
|
-
when '-' then [:'-']
|
505
|
+
when '-' then [:'-'] # level 1 OR (classic) dash
|
506
|
+
when '--' then [:'--'] # level 2
|
507
|
+
when '---' then [:'---'] # level 3
|
508
|
+
when '----' then [:'----'] # level 4
|
463
509
|
else
|
464
510
|
nil ## ignore others (e.g. brackets [])
|
465
511
|
end
|
@@ -504,7 +550,7 @@ def _tokenize_line( line )
|
|
504
550
|
puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
|
505
551
|
@re = RE
|
506
552
|
## note - auto-add PROP_END (<PROP_END>)
|
507
|
-
tokens << [:PROP_END, "
|
553
|
+
tokens << [:PROP_END, "<|PROP_END|>"]
|
508
554
|
end
|
509
555
|
end
|
510
556
|
|