sportdb-parser 0.5.7 → 0.5.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +1 -1
- data/lib/sportdb/parser/{tokenizer.rb → lexer.rb} +38 -29
- data/lib/sportdb/parser/parser.rb +334 -314
- data/lib/sportdb/parser/racc_parser.rb +23 -10
- data/lib/sportdb/parser/racc_tree.rb +1 -1
- data/lib/sportdb/parser/token-date.rb +2 -2
- data/lib/sportdb/parser/token-score.rb +2 -2
- data/lib/sportdb/parser/token-status.rb +2 -2
- data/lib/sportdb/parser/token-text.rb +2 -2
- data/lib/sportdb/parser/token.rb +2 -2
- data/lib/sportdb/parser/version.rb +1 -1
- data/lib/sportdb/parser.rb +31 -12
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3c102d758209b64a04033a772faad7cdaaa4631f5079e56b92dccdfc4b84292
|
4
|
+
data.tar.gz: 4b49b9a0234be96c552233b74fb4b2f8702b5d402d264382f2b13b9367515740
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 78faffba17eff5ff5dd4b665099cee8eff8addd5e8263433f0662da8a88bd4fa5fa80ed83968dcdb4b7c95ab4254508b156dc09c79ee2d58e556e20cf2168aba
|
7
|
+
data.tar.gz: 4a9a5546ccff399028a2e629a87e86e7c3ee505b7c5b16a15de0053918ddc8e7704c2c8c75517feaab0a9bb5648a8bcf63c66b2cc34800f9d41335748e336f66
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
@@ -9,6 +9,7 @@ config/rounds_misc.txt
|
|
9
9
|
config/rounds_pt.txt
|
10
10
|
lib/sportdb/parser.rb
|
11
11
|
lib/sportdb/parser/lang.rb
|
12
|
+
lib/sportdb/parser/lexer.rb
|
12
13
|
lib/sportdb/parser/parser.rb
|
13
14
|
lib/sportdb/parser/racc_parser.rb
|
14
15
|
lib/sportdb/parser/racc_tree.rb
|
@@ -17,5 +18,4 @@ lib/sportdb/parser/token-score.rb
|
|
17
18
|
lib/sportdb/parser/token-status.rb
|
18
19
|
lib/sportdb/parser/token-text.rb
|
19
20
|
lib/sportdb/parser/token.rb
|
20
|
-
lib/sportdb/parser/tokenizer.rb
|
21
21
|
lib/sportdb/parser/version.rb
|
@@ -1,6 +1,6 @@
|
|
1
1
|
|
2
2
|
module SportDb
|
3
|
-
class
|
3
|
+
class Lexer
|
4
4
|
|
5
5
|
|
6
6
|
|
@@ -14,6 +14,20 @@ def log( msg )
|
|
14
14
|
end
|
15
15
|
|
16
16
|
|
17
|
+
###
|
18
|
+
## todo/fix - use LangHelper or such
|
19
|
+
## e.g. class Lexer
|
20
|
+
## include LangHelper
|
21
|
+
## end
|
22
|
+
##
|
23
|
+
## merge back Lang into Lexer - why? why not?
|
24
|
+
## keep "old" access to checking for group, round & friends
|
25
|
+
## for now for compatibility
|
26
|
+
def is_group?( text ) Lang.is_group?( text ); end
|
27
|
+
def is_round?( text ) Lang.is_round?( text ); end
|
28
|
+
def is_leg?( text ) Lang.is_leg?( text ); end
|
29
|
+
|
30
|
+
|
17
31
|
## transforms
|
18
32
|
##
|
19
33
|
## Netherlands 1-2 (1-1) England
|
@@ -107,15 +121,11 @@ end # class Tokens
|
|
107
121
|
|
108
122
|
|
109
123
|
|
110
|
-
|
111
|
-
def tokenize( lines, debug: false )
|
112
|
-
tokens, _ = tokenize_with_errors( lines, debug: debug )
|
113
|
-
tokens
|
114
|
-
end
|
124
|
+
def debug?() @debug == true; end
|
115
125
|
|
116
|
-
def
|
126
|
+
def initialize( lines, debug: false )
|
127
|
+
@debug = debug
|
117
128
|
|
118
|
-
##
|
119
129
|
## note - for convenience - add support
|
120
130
|
## comments (incl. inline end-of-line comments) and empty lines here
|
121
131
|
## why? why not?
|
@@ -137,33 +147,33 @@ def tokenize_with_errors( lines, debug: false )
|
|
137
147
|
## strip lines with comments and empty lines striped / removed
|
138
148
|
## keep empty lines? why? why not?
|
139
149
|
## keep leading spaces (indent) - why?
|
140
|
-
txt = String.new
|
150
|
+
@txt = String.new
|
141
151
|
txt_pre.each_line do |line| ## preprocess
|
142
152
|
line = line.strip
|
143
153
|
next if line.empty? || line.start_with?('#') ### skip empty lines and comments
|
144
154
|
|
145
155
|
line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
|
146
156
|
|
147
|
-
txt << line
|
148
|
-
txt << "\n"
|
157
|
+
@txt << line
|
158
|
+
@txt << "\n"
|
149
159
|
end
|
150
|
-
|
160
|
+
end
|
161
|
+
|
151
162
|
|
163
|
+
|
164
|
+
def tokenize_with_errors
|
152
165
|
tokens_by_line = [] ## note: add tokens line-by-line (flatten later)
|
153
166
|
errors = [] ## keep a list of errors - why? why not?
|
154
167
|
|
155
|
-
txt.each_line do |line|
|
168
|
+
@txt.each_line do |line|
|
156
169
|
line = line.rstrip ## note - MUST remove/strip trailing newline (spaces optional)!!!
|
157
170
|
|
158
|
-
more_tokens, more_errors = _tokenize_line( line
|
171
|
+
more_tokens, more_errors = _tokenize_line( line )
|
159
172
|
|
160
173
|
tokens_by_line << more_tokens
|
161
174
|
errors += more_errors
|
162
175
|
end # each line
|
163
176
|
|
164
|
-
|
165
|
-
|
166
|
-
|
167
177
|
tokens_by_line = tokens_by_line.map do |tokens|
|
168
178
|
#############
|
169
179
|
## pass 1
|
@@ -246,11 +256,11 @@ end # method tokenize_with_errors
|
|
246
256
|
|
247
257
|
|
248
258
|
|
249
|
-
def _tokenize_line( line
|
259
|
+
def _tokenize_line( line )
|
250
260
|
tokens = []
|
251
261
|
errors = [] ## keep a list of errors - why? why not?
|
252
262
|
|
253
|
-
puts ">#{line}<" if debug
|
263
|
+
puts ">#{line}<" if debug?
|
254
264
|
|
255
265
|
pos = 0
|
256
266
|
## track last offsets - to report error on no match
|
@@ -265,7 +275,7 @@ def _tokenize_line( line, debug: false )
|
|
265
275
|
|
266
276
|
|
267
277
|
while m = @re.match( line, pos )
|
268
|
-
if debug
|
278
|
+
if debug?
|
269
279
|
pp m
|
270
280
|
puts "pos: #{pos}"
|
271
281
|
end
|
@@ -274,10 +284,10 @@ def _tokenize_line( line, debug: false )
|
|
274
284
|
if offsets[0] != pos
|
275
285
|
## match NOT starting at start/begin position!!!
|
276
286
|
## report parse error!!!
|
277
|
-
msg = "!! WARN - parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
|
287
|
+
msg = "!! WARN - parse error (tokenize) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
|
278
288
|
puts msg
|
279
289
|
|
280
|
-
errors << "parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]}"
|
290
|
+
errors << "parse error (tokenize) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
|
281
291
|
log( msg )
|
282
292
|
end
|
283
293
|
|
@@ -288,7 +298,7 @@ def _tokenize_line( line, debug: false )
|
|
288
298
|
|
289
299
|
pos = offsets[1]
|
290
300
|
|
291
|
-
pp offsets if debug
|
301
|
+
pp offsets if debug?
|
292
302
|
|
293
303
|
##
|
294
304
|
## note: racc requires pairs e.g. [:TOKEN, VAL]
|
@@ -331,7 +341,7 @@ def _tokenize_line( line, debug: false )
|
|
331
341
|
when '-' then [:'-']
|
332
342
|
when '.' then
|
333
343
|
## switch back to top-level mode!!
|
334
|
-
puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug
|
344
|
+
puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
|
335
345
|
@re = RE
|
336
346
|
[:'.']
|
337
347
|
else
|
@@ -352,7 +362,7 @@ def _tokenize_line( line, debug: false )
|
|
352
362
|
elsif m[:prop_key]
|
353
363
|
## switch context to PROP_RE
|
354
364
|
@re = PROP_RE
|
355
|
-
puts " ENTER PROP_RE MODE" if debug
|
365
|
+
puts " ENTER PROP_RE MODE" if debug?
|
356
366
|
[:PROP, m[:key]]
|
357
367
|
elsif m[:text]
|
358
368
|
[:TEXT, m[:text]] ## keep pos - why? why not?
|
@@ -462,7 +472,7 @@ def _tokenize_line( line, debug: false )
|
|
462
472
|
|
463
473
|
tokens << t if t
|
464
474
|
|
465
|
-
if debug
|
475
|
+
if debug?
|
466
476
|
print ">"
|
467
477
|
print "*" * pos
|
468
478
|
puts "#{line[pos..-1]}<"
|
@@ -475,13 +485,12 @@ def _tokenize_line( line, debug: false )
|
|
475
485
|
puts msg
|
476
486
|
log( msg )
|
477
487
|
|
478
|
-
errors << "parse error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size}"
|
488
|
+
errors << "parse error (tokenize) - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size} in line >#{line}<"
|
479
489
|
end
|
480
490
|
|
481
491
|
|
482
492
|
[tokens,errors]
|
483
493
|
end
|
484
494
|
|
485
|
-
|
486
|
-
end # class Parser
|
495
|
+
end # class Lexer
|
487
496
|
end # module SportDb
|