sportdb-parser 0.5.7 → 0.5.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eb23029ea58744e513c4a6af7112ed46a3440540cb7fe77fd2b480c58bbd5b14
4
- data.tar.gz: 2aba09728fa175dbde9e270ad7250ee047639c1b8641068bcb5d159cf7e0da34
3
+ metadata.gz: b3c102d758209b64a04033a772faad7cdaaa4631f5079e56b92dccdfc4b84292
4
+ data.tar.gz: 4b49b9a0234be96c552233b74fb4b2f8702b5d402d264382f2b13b9367515740
5
5
  SHA512:
6
- metadata.gz: aa8dd925bdb7c3982ad18bba5f3864ee190535633043734186560e97a8692cfd589bf999e5e67fae3b23d3f625edd9608604b5fb3eb50222f84bd30a2dc97d0f
7
- data.tar.gz: ac3e2c3a6c00a22e142db199c97842fad22f67f5c507456ff117ac82dde0ddcfa5ed0c4f0e0a23c78c491fb39ed9cc8d7058491c32bf645f5296ac76861b9aae
6
+ metadata.gz: 78faffba17eff5ff5dd4b665099cee8eff8addd5e8263433f0662da8a88bd4fa5fa80ed83968dcdb4b7c95ab4254508b156dc09c79ee2d58e556e20cf2168aba
7
+ data.tar.gz: 4a9a5546ccff399028a2e629a87e86e7c3ee505b7c5b16a15de0053918ddc8e7704c2c8c75517feaab0a9bb5648a8bcf63c66b2cc34800f9d41335748e336f66
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.5.7
1
+ ### 0.5.8
2
2
  ### 0.0.1 / 2024-07-12
3
3
 
4
4
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -9,6 +9,7 @@ config/rounds_misc.txt
9
9
  config/rounds_pt.txt
10
10
  lib/sportdb/parser.rb
11
11
  lib/sportdb/parser/lang.rb
12
+ lib/sportdb/parser/lexer.rb
12
13
  lib/sportdb/parser/parser.rb
13
14
  lib/sportdb/parser/racc_parser.rb
14
15
  lib/sportdb/parser/racc_tree.rb
@@ -17,5 +18,4 @@ lib/sportdb/parser/token-score.rb
17
18
  lib/sportdb/parser/token-status.rb
18
19
  lib/sportdb/parser/token-text.rb
19
20
  lib/sportdb/parser/token.rb
20
- lib/sportdb/parser/tokenizer.rb
21
21
  lib/sportdb/parser/version.rb
@@ -1,6 +1,6 @@
1
1
 
2
2
  module SportDb
3
- class Parser
3
+ class Lexer
4
4
 
5
5
 
6
6
 
@@ -14,6 +14,20 @@ def log( msg )
14
14
  end
15
15
 
16
16
 
17
+ ###
18
+ ## todo/fix - use LangHelper or such
19
+ ## e.g. class Lexer
20
+ ## include LangHelper
21
+ ## end
22
+ ##
23
+ ## merge back Lang into Lexer - why? why not?
24
+ ## keep "old" access to checking for group, round & friends
25
+ ## for now for compatibility
26
+ def is_group?( text ) Lang.is_group?( text ); end
27
+ def is_round?( text ) Lang.is_round?( text ); end
28
+ def is_leg?( text ) Lang.is_leg?( text ); end
29
+
30
+
17
31
  ## transforms
18
32
  ##
19
33
  ## Netherlands 1-2 (1-1) England
@@ -107,15 +121,11 @@ end # class Tokens
107
121
 
108
122
 
109
123
 
110
- ### convience helper - ignore errors by default
111
- def tokenize( lines, debug: false )
112
- tokens, _ = tokenize_with_errors( lines, debug: debug )
113
- tokens
114
- end
124
+ def debug?() @debug == true; end
115
125
 
116
- def tokenize_with_errors( lines, debug: false )
126
+ def initialize( lines, debug: false )
127
+ @debug = debug
117
128
 
118
- ##
119
129
  ## note - for convenience - add support
120
130
  ## comments (incl. inline end-of-line comments) and empty lines here
121
131
  ## why? why not?
@@ -137,33 +147,33 @@ def tokenize_with_errors( lines, debug: false )
137
147
  ## strip lines with comments and empty lines striped / removed
138
148
  ## keep empty lines? why? why not?
139
149
  ## keep leading spaces (indent) - why?
140
- txt = String.new
150
+ @txt = String.new
141
151
  txt_pre.each_line do |line| ## preprocess
142
152
  line = line.strip
143
153
  next if line.empty? || line.start_with?('#') ### skip empty lines and comments
144
154
 
145
155
  line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
146
156
 
147
- txt << line
148
- txt << "\n"
157
+ @txt << line
158
+ @txt << "\n"
149
159
  end
150
-
160
+ end
161
+
151
162
 
163
+
164
+ def tokenize_with_errors
152
165
  tokens_by_line = [] ## note: add tokens line-by-line (flatten later)
153
166
  errors = [] ## keep a list of errors - why? why not?
154
167
 
155
- txt.each_line do |line|
168
+ @txt.each_line do |line|
156
169
  line = line.rstrip ## note - MUST remove/strip trailing newline (spaces optional)!!!
157
170
 
158
- more_tokens, more_errors = _tokenize_line( line, debug: debug )
171
+ more_tokens, more_errors = _tokenize_line( line )
159
172
 
160
173
  tokens_by_line << more_tokens
161
174
  errors += more_errors
162
175
  end # each line
163
176
 
164
-
165
-
166
-
167
177
  tokens_by_line = tokens_by_line.map do |tokens|
168
178
  #############
169
179
  ## pass 1
@@ -246,11 +256,11 @@ end # method tokenize_with_errors
246
256
 
247
257
 
248
258
 
249
- def _tokenize_line( line, debug: false )
259
+ def _tokenize_line( line )
250
260
  tokens = []
251
261
  errors = [] ## keep a list of errors - why? why not?
252
262
 
253
- puts ">#{line}<" if debug
263
+ puts ">#{line}<" if debug?
254
264
 
255
265
  pos = 0
256
266
  ## track last offsets - to report error on no match
@@ -265,7 +275,7 @@ def _tokenize_line( line, debug: false )
265
275
 
266
276
 
267
277
  while m = @re.match( line, pos )
268
- if debug
278
+ if debug?
269
279
  pp m
270
280
  puts "pos: #{pos}"
271
281
  end
@@ -274,10 +284,10 @@ def _tokenize_line( line, debug: false )
274
284
  if offsets[0] != pos
275
285
  ## match NOT starting at start/begin position!!!
276
286
  ## report parse error!!!
277
- msg = "!! WARN - parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
287
+ msg = "!! WARN - parse error (tokenize) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
278
288
  puts msg
279
289
 
280
- errors << "parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]}"
290
+ errors << "parse error (tokenize) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
281
291
  log( msg )
282
292
  end
283
293
 
@@ -288,7 +298,7 @@ def _tokenize_line( line, debug: false )
288
298
 
289
299
  pos = offsets[1]
290
300
 
291
- pp offsets if debug
301
+ pp offsets if debug?
292
302
 
293
303
  ##
294
304
  ## note: racc requires pairs e.g. [:TOKEN, VAL]
@@ -331,7 +341,7 @@ def _tokenize_line( line, debug: false )
331
341
  when '-' then [:'-']
332
342
  when '.' then
333
343
  ## switch back to top-level mode!!
334
- puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug
344
+ puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
335
345
  @re = RE
336
346
  [:'.']
337
347
  else
@@ -352,7 +362,7 @@ def _tokenize_line( line, debug: false )
352
362
  elsif m[:prop_key]
353
363
  ## switch context to PROP_RE
354
364
  @re = PROP_RE
355
- puts " ENTER PROP_RE MODE" if debug
365
+ puts " ENTER PROP_RE MODE" if debug?
356
366
  [:PROP, m[:key]]
357
367
  elsif m[:text]
358
368
  [:TEXT, m[:text]] ## keep pos - why? why not?
@@ -462,7 +472,7 @@ def _tokenize_line( line, debug: false )
462
472
 
463
473
  tokens << t if t
464
474
 
465
- if debug
475
+ if debug?
466
476
  print ">"
467
477
  print "*" * pos
468
478
  puts "#{line[pos..-1]}<"
@@ -475,13 +485,12 @@ def _tokenize_line( line, debug: false )
475
485
  puts msg
476
486
  log( msg )
477
487
 
478
- errors << "parse error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size}"
488
+ errors << "parse error (tokenize) - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size} in line >#{line}<"
479
489
  end
480
490
 
481
491
 
482
492
  [tokens,errors]
483
493
  end
484
494
 
485
-
486
- end # class Parser
495
+ end # class Lexer
487
496
  end # module SportDb