sportdb-parser 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9af6317b144478400067502e60de2f8d6232ebf6e036b0f99b78f9c29922dba2
4
- data.tar.gz: 7066483378693f6376f9c30ec71e5d4172c817c11025dd4e669da6d581b6ad54
3
+ metadata.gz: 85526406c8cd97a5b4e8580e64597b60f2046f4667a97080434238e067be2788
4
+ data.tar.gz: dcd5e6aaa854654974644c026fb99545c31ef2d5929d0518d8418630b5d6ea76
5
5
  SHA512:
6
- metadata.gz: '039fdc82039d05ae8f51847a79dd77c0a657e316e8b0705a28bdf2f8e594f37531ea07a230c9e1a9133f96293975190dba070f50515d58bc9926e4ef3e8e152f'
7
- data.tar.gz: e0f6483cd26ba7ef4800ecf76efd6f05e01e3a2458dbc6b65fe6582654c8d28627facbfc8228655e361df84c43418c9583826908cdcf3d61bf06d186288c56fa
6
+ metadata.gz: fc086846a66d2657d3debae5562fe20fbd2327741c8af1485972dfd0b8f46b3c649c0345ba173f2d3f40622bd4bddecc7ea0072d4d129bc5dc542554c539ebab
7
+ data.tar.gz: c0c4653cb40cb89e8086b6dc00ed853c62abf75fc18972cf98230a779cd8a7f73d797098ec53c2e942e6271c00c9d11a1da8f6975704141cbfbba599ec741098
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.6.0
1
+ ### 0.6.2
2
2
  ### 0.0.1 / 2024-07-12
3
3
 
4
4
  * Everything is new. First release.
@@ -147,12 +147,15 @@ def initialize( lines, debug: false )
147
147
  ## strip lines with comments and empty lines striped / removed
148
148
  ## keep empty lines? why? why not?
149
149
  ## keep leading spaces (indent) - why?
150
+ ##
151
+ ## note - KEEP empty lines (get turned into BLANK token!!!!)
152
+
150
153
  @txt = String.new
151
154
  txt_pre.each_line do |line| ## preprocess
152
155
  line = line.strip
153
- next if line.empty? || line.start_with?('#') ### skip empty lines and comments
156
+ next if line.start_with?('#') ### skip comments
154
157
 
155
- line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
158
+ line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
156
159
 
157
160
  @txt << line
158
161
  @txt << "\n"
@@ -193,6 +196,18 @@ def tokenize_with_errors
193
196
  t
194
197
  end
195
198
 
199
+ ### check for "section" starters e.g. Teams or such
200
+ t = tokens[0]
201
+ if t[0] == :TEXT
202
+ text = t[1]
203
+ if text =~ /^teams$/i
204
+ t[0] = :TEAMS
205
+ elsif text =~ /^blank$/i ### todo/fix -- remove!!! add real blanks!!
206
+ t[0] = :BLANK
207
+ else
208
+ end
209
+ end
210
+
196
211
  #################
197
212
  ## pass 2
198
213
  ## transform tokens (using simple patterns)
@@ -227,13 +242,22 @@ def tokenize_with_errors
227
242
  end
228
243
 
229
244
 
230
- if buf.match?( :TEXT, [:SCORE, :VS, :'-'], :TEXT )
245
+ if buf.match?( :TEXT, [:SCORE, :SCORE_MORE, :VS, :'-'], :TEXT )
231
246
  nodes << [:TEAM, buf.next[1]]
232
247
  nodes << buf.next
233
248
  nodes << [:TEAM, buf.next[1]]
234
249
  elsif buf.match?( :TEXT, :MINUTE )
235
250
  nodes << [:PLAYER, buf.next[1]]
236
251
  nodes << buf.next
252
+ elsif buf.match?( :DATE, :TIME ) ## merge DATE TIME into DATETIME
253
+ date = buf.next[1]
254
+ time = buf.next[1]
255
+ ## puts "DATETIME:"
256
+ ## pp date, time
257
+ val = [date[0] + ' ' + time[0], ## concat string of two tokens
258
+ { date: date[1], time: time[1] }
259
+ ]
260
+ nodes << [:DATETIME, val]
237
261
  else
238
262
  ## pass through
239
263
  nodes << buf.next
@@ -253,7 +277,8 @@ def tokenize_with_errors
253
277
  end
254
278
 
255
279
  tokens += tok
256
- tokens << [:NEWLINE, "\n"] ## auto-add newlines
280
+ ## auto-add newlines (unless BLANK!!)
281
+ tokens << [:NEWLINE, "\n"] unless tok[0][0] == :BLANK
257
282
  end
258
283
 
259
284
  [tokens,errors]
@@ -267,6 +292,17 @@ def _tokenize_line( line )
267
292
 
268
293
  puts "line: >#{line}<" if debug?
269
294
 
295
+
296
+ ### special case for empty line (aka BLANK)
297
+ if line.empty?
298
+ ## note - blank always resets parser mode to std/top-level!!!
299
+ @re = RE
300
+
301
+ tokens << [:BLANK, '<|BLANK|>']
302
+ return [tokens, errors]
303
+ end
304
+
305
+
270
306
  pos = 0
271
307
  ## track last offsets - to report error on no match
272
308
  ## or no match in end of string
@@ -353,7 +389,9 @@ def _tokenize_line( line )
353
389
  puts "!!! TOKENIZE ERROR (PROP_RE) - no match found"
354
390
  nil
355
391
  end
356
- else ## assume TOP_LEVEL (a.k.a. RE) machinery
392
+ ###################################################
393
+ ## assume TOP_LEVEL (a.k.a. RE) machinery
394
+ else
357
395
  if m[:space] || m[:spaces]
358
396
  nil ## skip space(s)
359
397
  elsif m[:prop_key]
@@ -372,6 +410,11 @@ def _tokenize_line( line )
372
410
  else
373
411
  [:STATUS, [m[:status], {status: m[:status] } ]]
374
412
  end
413
+ elsif m[:note]
414
+ ### todo/check:
415
+ ## use value hash - why? why not? or simplify to:
416
+ ## [:NOTE, m[:note]]
417
+ [:NOTE, [m[:note], {note: m[:note] } ]]
375
418
  elsif m[:time]
376
419
  ## unify to iso-format
377
420
  ### 12.40 => 12:40
@@ -420,7 +463,7 @@ def _tokenize_line( line )
420
463
  elsif m[:num] ## fix - change to ord (for ordinal number!!!)
421
464
  ## note - strip enclosing () and convert to integer
422
465
  [:ORD, [m[:num], { value: m[:value].to_i(10) } ]]
423
- elsif m[:score]
466
+ elsif m[:score_more]
424
467
  score = {}
425
468
  ## check for pen
426
469
  score[:p] = [m[:p1].to_i(10),
@@ -433,8 +476,15 @@ def _tokenize_line( line )
433
476
  m[:ht2].to_i(10)] if m[:ht1] && m[:ht2]
434
477
 
435
478
  ## note - for debugging keep (pass along) "literal" score
436
- [:SCORE, [m[:score], score]]
437
- elsif m[:minute]
479
+ [:SCORE_MORE, [m[:score_more], score]]
480
+ elsif m[:score]
481
+ score = {}
482
+ ## must always have ft for now e.g. 1-1 or such
483
+ score[:ft] = [m[:ft1].to_i(10),
484
+ m[:ft2].to_i(10)]
485
+ ## note - for debugging keep (pass along) "literal" score
486
+ [:SCORE, [m[:score], score]]
487
+ elsif m[:minute]
438
488
  minute = {}
439
489
  minute[:m] = m[:value].to_i(10)
440
490
  minute[:offset] = m[:value2].to_i(10) if m[:value2]
@@ -459,7 +509,10 @@ def _tokenize_line( line )
459
509
  when '|' then [:'|']
460
510
  when '[' then [:'[']
461
511
  when ']' then [:']']
462
- when '-' then [:'-']
512
+ when '-' then [:'-'] # level 1 OR (classic) dash
513
+ when '--' then [:'--'] # level 2
514
+ when '---' then [:'---'] # level 3
515
+ when '----' then [:'----'] # level 4
463
516
  else
464
517
  nil ## ignore others (e.g. brackets [])
465
518
  end
@@ -504,7 +557,7 @@ def _tokenize_line( line )
504
557
  puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
505
558
  @re = RE
506
559
  ## note - auto-add PROP_END (<PROP_END>)
507
- tokens << [:PROP_END, "<PROP_END>"]
560
+ tokens << [:PROP_END, "<|PROP_END|>"]
508
561
  end
509
562
  end
510
563