sportdb-parser 0.5.7 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +1 -1
- data/lib/sportdb/parser/{tokenizer.rb → lexer.rb} +38 -29
- data/lib/sportdb/parser/parser.rb +334 -314
- data/lib/sportdb/parser/racc_parser.rb +23 -10
- data/lib/sportdb/parser/racc_tree.rb +1 -1
- data/lib/sportdb/parser/token-date.rb +2 -2
- data/lib/sportdb/parser/token-score.rb +2 -2
- data/lib/sportdb/parser/token-status.rb +2 -2
- data/lib/sportdb/parser/token-text.rb +2 -2
- data/lib/sportdb/parser/token.rb +2 -2
- data/lib/sportdb/parser/version.rb +1 -1
- data/lib/sportdb/parser.rb +31 -12
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3c102d758209b64a04033a772faad7cdaaa4631f5079e56b92dccdfc4b84292
|
4
|
+
data.tar.gz: 4b49b9a0234be96c552233b74fb4b2f8702b5d402d264382f2b13b9367515740
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 78faffba17eff5ff5dd4b665099cee8eff8addd5e8263433f0662da8a88bd4fa5fa80ed83968dcdb4b7c95ab4254508b156dc09c79ee2d58e556e20cf2168aba
|
7
|
+
data.tar.gz: 4a9a5546ccff399028a2e629a87e86e7c3ee505b7c5b16a15de0053918ddc8e7704c2c8c75517feaab0a9bb5648a8bcf63c66b2cc34800f9d41335748e336f66
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
@@ -9,6 +9,7 @@ config/rounds_misc.txt
|
|
9
9
|
config/rounds_pt.txt
|
10
10
|
lib/sportdb/parser.rb
|
11
11
|
lib/sportdb/parser/lang.rb
|
12
|
+
lib/sportdb/parser/lexer.rb
|
12
13
|
lib/sportdb/parser/parser.rb
|
13
14
|
lib/sportdb/parser/racc_parser.rb
|
14
15
|
lib/sportdb/parser/racc_tree.rb
|
@@ -17,5 +18,4 @@ lib/sportdb/parser/token-score.rb
|
|
17
18
|
lib/sportdb/parser/token-status.rb
|
18
19
|
lib/sportdb/parser/token-text.rb
|
19
20
|
lib/sportdb/parser/token.rb
|
20
|
-
lib/sportdb/parser/tokenizer.rb
|
21
21
|
lib/sportdb/parser/version.rb
|
@@ -1,6 +1,6 @@
|
|
1
1
|
|
2
2
|
module SportDb
|
3
|
-
class
|
3
|
+
class Lexer
|
4
4
|
|
5
5
|
|
6
6
|
|
@@ -14,6 +14,20 @@ def log( msg )
|
|
14
14
|
end
|
15
15
|
|
16
16
|
|
17
|
+
###
|
18
|
+
## todo/fix - use LangHelper or such
|
19
|
+
## e.g. class Lexer
|
20
|
+
## include LangHelper
|
21
|
+
## end
|
22
|
+
##
|
23
|
+
## merge back Lang into Lexer - why? why not?
|
24
|
+
## keep "old" access to checking for group, round & friends
|
25
|
+
## for now for compatibility
|
26
|
+
def is_group?( text ) Lang.is_group?( text ); end
|
27
|
+
def is_round?( text ) Lang.is_round?( text ); end
|
28
|
+
def is_leg?( text ) Lang.is_leg?( text ); end
|
29
|
+
|
30
|
+
|
17
31
|
## transforms
|
18
32
|
##
|
19
33
|
## Netherlands 1-2 (1-1) England
|
@@ -107,15 +121,11 @@ end # class Tokens
|
|
107
121
|
|
108
122
|
|
109
123
|
|
110
|
-
|
111
|
-
def tokenize( lines, debug: false )
|
112
|
-
tokens, _ = tokenize_with_errors( lines, debug: debug )
|
113
|
-
tokens
|
114
|
-
end
|
124
|
+
def debug?() @debug == true; end
|
115
125
|
|
116
|
-
def
|
126
|
+
def initialize( lines, debug: false )
|
127
|
+
@debug = debug
|
117
128
|
|
118
|
-
##
|
119
129
|
## note - for convenience - add support
|
120
130
|
## comments (incl. inline end-of-line comments) and empty lines here
|
121
131
|
## why? why not?
|
@@ -137,33 +147,33 @@ def tokenize_with_errors( lines, debug: false )
|
|
137
147
|
## strip lines with comments and empty lines striped / removed
|
138
148
|
## keep empty lines? why? why not?
|
139
149
|
## keep leading spaces (indent) - why?
|
140
|
-
txt = String.new
|
150
|
+
@txt = String.new
|
141
151
|
txt_pre.each_line do |line| ## preprocess
|
142
152
|
line = line.strip
|
143
153
|
next if line.empty? || line.start_with?('#') ### skip empty lines and comments
|
144
154
|
|
145
155
|
line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
|
146
156
|
|
147
|
-
txt << line
|
148
|
-
txt << "\n"
|
157
|
+
@txt << line
|
158
|
+
@txt << "\n"
|
149
159
|
end
|
150
|
-
|
160
|
+
end
|
161
|
+
|
151
162
|
|
163
|
+
|
164
|
+
def tokenize_with_errors
|
152
165
|
tokens_by_line = [] ## note: add tokens line-by-line (flatten later)
|
153
166
|
errors = [] ## keep a list of errors - why? why not?
|
154
167
|
|
155
|
-
txt.each_line do |line|
|
168
|
+
@txt.each_line do |line|
|
156
169
|
line = line.rstrip ## note - MUST remove/strip trailing newline (spaces optional)!!!
|
157
170
|
|
158
|
-
more_tokens, more_errors = _tokenize_line( line
|
171
|
+
more_tokens, more_errors = _tokenize_line( line )
|
159
172
|
|
160
173
|
tokens_by_line << more_tokens
|
161
174
|
errors += more_errors
|
162
175
|
end # each line
|
163
176
|
|
164
|
-
|
165
|
-
|
166
|
-
|
167
177
|
tokens_by_line = tokens_by_line.map do |tokens|
|
168
178
|
#############
|
169
179
|
## pass 1
|
@@ -246,11 +256,11 @@ end # method tokenize_with_errors
|
|
246
256
|
|
247
257
|
|
248
258
|
|
249
|
-
def _tokenize_line( line
|
259
|
+
def _tokenize_line( line )
|
250
260
|
tokens = []
|
251
261
|
errors = [] ## keep a list of errors - why? why not?
|
252
262
|
|
253
|
-
puts ">#{line}<" if debug
|
263
|
+
puts ">#{line}<" if debug?
|
254
264
|
|
255
265
|
pos = 0
|
256
266
|
## track last offsets - to report error on no match
|
@@ -265,7 +275,7 @@ def _tokenize_line( line, debug: false )
|
|
265
275
|
|
266
276
|
|
267
277
|
while m = @re.match( line, pos )
|
268
|
-
if debug
|
278
|
+
if debug?
|
269
279
|
pp m
|
270
280
|
puts "pos: #{pos}"
|
271
281
|
end
|
@@ -274,10 +284,10 @@ def _tokenize_line( line, debug: false )
|
|
274
284
|
if offsets[0] != pos
|
275
285
|
## match NOT starting at start/begin position!!!
|
276
286
|
## report parse error!!!
|
277
|
-
msg = "!! WARN - parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
|
287
|
+
msg = "!! WARN - parse error (tokenize) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
|
278
288
|
puts msg
|
279
289
|
|
280
|
-
errors << "parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]}"
|
290
|
+
errors << "parse error (tokenize) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
|
281
291
|
log( msg )
|
282
292
|
end
|
283
293
|
|
@@ -288,7 +298,7 @@ def _tokenize_line( line, debug: false )
|
|
288
298
|
|
289
299
|
pos = offsets[1]
|
290
300
|
|
291
|
-
pp offsets if debug
|
301
|
+
pp offsets if debug?
|
292
302
|
|
293
303
|
##
|
294
304
|
## note: racc requires pairs e.g. [:TOKEN, VAL]
|
@@ -331,7 +341,7 @@ def _tokenize_line( line, debug: false )
|
|
331
341
|
when '-' then [:'-']
|
332
342
|
when '.' then
|
333
343
|
## switch back to top-level mode!!
|
334
|
-
puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug
|
344
|
+
puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
|
335
345
|
@re = RE
|
336
346
|
[:'.']
|
337
347
|
else
|
@@ -352,7 +362,7 @@ def _tokenize_line( line, debug: false )
|
|
352
362
|
elsif m[:prop_key]
|
353
363
|
## switch context to PROP_RE
|
354
364
|
@re = PROP_RE
|
355
|
-
puts " ENTER PROP_RE MODE" if debug
|
365
|
+
puts " ENTER PROP_RE MODE" if debug?
|
356
366
|
[:PROP, m[:key]]
|
357
367
|
elsif m[:text]
|
358
368
|
[:TEXT, m[:text]] ## keep pos - why? why not?
|
@@ -462,7 +472,7 @@ def _tokenize_line( line, debug: false )
|
|
462
472
|
|
463
473
|
tokens << t if t
|
464
474
|
|
465
|
-
if debug
|
475
|
+
if debug?
|
466
476
|
print ">"
|
467
477
|
print "*" * pos
|
468
478
|
puts "#{line[pos..-1]}<"
|
@@ -475,13 +485,12 @@ def _tokenize_line( line, debug: false )
|
|
475
485
|
puts msg
|
476
486
|
log( msg )
|
477
487
|
|
478
|
-
errors << "parse error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size}"
|
488
|
+
errors << "parse error (tokenize) - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size} in line >#{line}<"
|
479
489
|
end
|
480
490
|
|
481
491
|
|
482
492
|
[tokens,errors]
|
483
493
|
end
|
484
494
|
|
485
|
-
|
486
|
-
end # class Parser
|
495
|
+
end # class Lexer
|
487
496
|
end # module SportDb
|