sportdb-parser 0.5.7 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eb23029ea58744e513c4a6af7112ed46a3440540cb7fe77fd2b480c58bbd5b14
4
- data.tar.gz: 2aba09728fa175dbde9e270ad7250ee047639c1b8641068bcb5d159cf7e0da34
3
+ metadata.gz: b3c102d758209b64a04033a772faad7cdaaa4631f5079e56b92dccdfc4b84292
4
+ data.tar.gz: 4b49b9a0234be96c552233b74fb4b2f8702b5d402d264382f2b13b9367515740
5
5
  SHA512:
6
- metadata.gz: aa8dd925bdb7c3982ad18bba5f3864ee190535633043734186560e97a8692cfd589bf999e5e67fae3b23d3f625edd9608604b5fb3eb50222f84bd30a2dc97d0f
7
- data.tar.gz: ac3e2c3a6c00a22e142db199c97842fad22f67f5c507456ff117ac82dde0ddcfa5ed0c4f0e0a23c78c491fb39ed9cc8d7058491c32bf645f5296ac76861b9aae
6
+ metadata.gz: 78faffba17eff5ff5dd4b665099cee8eff8addd5e8263433f0662da8a88bd4fa5fa80ed83968dcdb4b7c95ab4254508b156dc09c79ee2d58e556e20cf2168aba
7
+ data.tar.gz: 4a9a5546ccff399028a2e629a87e86e7c3ee505b7c5b16a15de0053918ddc8e7704c2c8c75517feaab0a9bb5648a8bcf63c66b2cc34800f9d41335748e336f66
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.5.7
1
+ ### 0.5.8
2
2
  ### 0.0.1 / 2024-07-12
3
3
 
4
4
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -9,6 +9,7 @@ config/rounds_misc.txt
9
9
  config/rounds_pt.txt
10
10
  lib/sportdb/parser.rb
11
11
  lib/sportdb/parser/lang.rb
12
+ lib/sportdb/parser/lexer.rb
12
13
  lib/sportdb/parser/parser.rb
13
14
  lib/sportdb/parser/racc_parser.rb
14
15
  lib/sportdb/parser/racc_tree.rb
@@ -17,5 +18,4 @@ lib/sportdb/parser/token-score.rb
17
18
  lib/sportdb/parser/token-status.rb
18
19
  lib/sportdb/parser/token-text.rb
19
20
  lib/sportdb/parser/token.rb
20
- lib/sportdb/parser/tokenizer.rb
21
21
  lib/sportdb/parser/version.rb
@@ -1,6 +1,6 @@
1
1
 
2
2
  module SportDb
3
- class Parser
3
+ class Lexer
4
4
 
5
5
 
6
6
 
@@ -14,6 +14,20 @@ def log( msg )
14
14
  end
15
15
 
16
16
 
17
+ ###
18
+ ## todo/fix - use LangHelper or such
19
+ ## e.g. class Lexer
20
+ ## include LangHelper
21
+ ## end
22
+ ##
23
+ ## merge back Lang into Lexer - why? why not?
24
+ ## keep "old" access to checking for group, round & friends
25
+ ## for now for compatibility
26
+ def is_group?( text ) Lang.is_group?( text ); end
27
+ def is_round?( text ) Lang.is_round?( text ); end
28
+ def is_leg?( text ) Lang.is_leg?( text ); end
29
+
30
+
17
31
  ## transforms
18
32
  ##
19
33
  ## Netherlands 1-2 (1-1) England
@@ -107,15 +121,11 @@ end # class Tokens
107
121
 
108
122
 
109
123
 
110
- ### convience helper - ignore errors by default
111
- def tokenize( lines, debug: false )
112
- tokens, _ = tokenize_with_errors( lines, debug: debug )
113
- tokens
114
- end
124
+ def debug?() @debug == true; end
115
125
 
116
- def tokenize_with_errors( lines, debug: false )
126
+ def initialize( lines, debug: false )
127
+ @debug = debug
117
128
 
118
- ##
119
129
  ## note - for convenience - add support
120
130
  ## comments (incl. inline end-of-line comments) and empty lines here
121
131
  ## why? why not?
@@ -137,33 +147,33 @@ def tokenize_with_errors( lines, debug: false )
137
147
  ## strip lines with comments and empty lines striped / removed
138
148
  ## keep empty lines? why? why not?
139
149
  ## keep leading spaces (indent) - why?
140
- txt = String.new
150
+ @txt = String.new
141
151
  txt_pre.each_line do |line| ## preprocess
142
152
  line = line.strip
143
153
  next if line.empty? || line.start_with?('#') ### skip empty lines and comments
144
154
 
145
155
  line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
146
156
 
147
- txt << line
148
- txt << "\n"
157
+ @txt << line
158
+ @txt << "\n"
149
159
  end
150
-
160
+ end
161
+
151
162
 
163
+
164
+ def tokenize_with_errors
152
165
  tokens_by_line = [] ## note: add tokens line-by-line (flatten later)
153
166
  errors = [] ## keep a list of errors - why? why not?
154
167
 
155
- txt.each_line do |line|
168
+ @txt.each_line do |line|
156
169
  line = line.rstrip ## note - MUST remove/strip trailing newline (spaces optional)!!!
157
170
 
158
- more_tokens, more_errors = _tokenize_line( line, debug: debug )
171
+ more_tokens, more_errors = _tokenize_line( line )
159
172
 
160
173
  tokens_by_line << more_tokens
161
174
  errors += more_errors
162
175
  end # each line
163
176
 
164
-
165
-
166
-
167
177
  tokens_by_line = tokens_by_line.map do |tokens|
168
178
  #############
169
179
  ## pass 1
@@ -246,11 +256,11 @@ end # method tokenize_with_errors
246
256
 
247
257
 
248
258
 
249
- def _tokenize_line( line, debug: false )
259
+ def _tokenize_line( line )
250
260
  tokens = []
251
261
  errors = [] ## keep a list of errors - why? why not?
252
262
 
253
- puts ">#{line}<" if debug
263
+ puts ">#{line}<" if debug?
254
264
 
255
265
  pos = 0
256
266
  ## track last offsets - to report error on no match
@@ -265,7 +275,7 @@ def _tokenize_line( line, debug: false )
265
275
 
266
276
 
267
277
  while m = @re.match( line, pos )
268
- if debug
278
+ if debug?
269
279
  pp m
270
280
  puts "pos: #{pos}"
271
281
  end
@@ -274,10 +284,10 @@ def _tokenize_line( line, debug: false )
274
284
  if offsets[0] != pos
275
285
  ## match NOT starting at start/begin position!!!
276
286
  ## report parse error!!!
277
- msg = "!! WARN - parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
287
+ msg = "!! WARN - parse error (tokenize) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
278
288
  puts msg
279
289
 
280
- errors << "parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]}"
290
+ errors << "parse error (tokenize) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
281
291
  log( msg )
282
292
  end
283
293
 
@@ -288,7 +298,7 @@ def _tokenize_line( line, debug: false )
288
298
 
289
299
  pos = offsets[1]
290
300
 
291
- pp offsets if debug
301
+ pp offsets if debug?
292
302
 
293
303
  ##
294
304
  ## note: racc requires pairs e.g. [:TOKEN, VAL]
@@ -331,7 +341,7 @@ def _tokenize_line( line, debug: false )
331
341
  when '-' then [:'-']
332
342
  when '.' then
333
343
  ## switch back to top-level mode!!
334
- puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug
344
+ puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
335
345
  @re = RE
336
346
  [:'.']
337
347
  else
@@ -352,7 +362,7 @@ def _tokenize_line( line, debug: false )
352
362
  elsif m[:prop_key]
353
363
  ## switch context to PROP_RE
354
364
  @re = PROP_RE
355
- puts " ENTER PROP_RE MODE" if debug
365
+ puts " ENTER PROP_RE MODE" if debug?
356
366
  [:PROP, m[:key]]
357
367
  elsif m[:text]
358
368
  [:TEXT, m[:text]] ## keep pos - why? why not?
@@ -462,7 +472,7 @@ def _tokenize_line( line, debug: false )
462
472
 
463
473
  tokens << t if t
464
474
 
465
- if debug
475
+ if debug?
466
476
  print ">"
467
477
  print "*" * pos
468
478
  puts "#{line[pos..-1]}<"
@@ -475,13 +485,12 @@ def _tokenize_line( line, debug: false )
475
485
  puts msg
476
486
  log( msg )
477
487
 
478
- errors << "parse error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size}"
488
+ errors << "parse error (tokenize) - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size} in line >#{line}<"
479
489
  end
480
490
 
481
491
 
482
492
  [tokens,errors]
483
493
  end
484
494
 
485
-
486
- end # class Parser
495
+ end # class Lexer
487
496
  end # module SportDb