sportdb-parser 0.6.3 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d1dd9f29584bb09508c0fb6ad253a84ff299c7f33650d27195abbc679c1e893f
4
- data.tar.gz: 6af07bc6f008e1b6c53cd796253f66bd68f2f28299a750896be83176bae7d085
3
+ metadata.gz: 00ec5bcacfe56b29e9589507c11d3bfe361caed4b45ebdfa3b05901c8229b019
4
+ data.tar.gz: bd36a9b6c0b84a9a033d721c4adf086ff25703ed31dde3fc8265c421ba6273c1
5
5
  SHA512:
6
- metadata.gz: f8adae46063426008e23f01ef3d625a3477c01a827b942edadddfad7e774e8689de3545c1c6ae8e32d0fa43b2bfbf5c9dc9962947c97be79ab325a41a7896c48
7
- data.tar.gz: e0b843c9b2eee0ebfec203889ed5ea90f94eb22f80ca0df58d8865f05dd08f4d4bc3547b59f0fbe00e323a09df9c13289954ddea34bf437cad6d196c29a2e436
6
+ metadata.gz: db5568eb30b924f0e963402ed3089edc1b1cdbcf31cfaa4177da6215b82e0646badd428e94a14a42abfbb4688c7543f8b04f698eb1551082e9731b1937a23e19
7
+ data.tar.gz: d3619f1ea496cf4fdc08d0904afee295679d6f1aa0c35aacd09ccf8e19ad00beb78f832efef3f24189b8793b0dd24064544854e84d7d26653398962d79bda311
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.6.3
1
+ ### 0.6.4
2
2
  ### 0.0.1 / 2024-07-12
3
3
 
4
4
  * Everything is new. First release.
@@ -246,9 +246,10 @@ def tokenize_with_errors
246
246
  nodes << [:TEAM, buf.next[1]]
247
247
  nodes << buf.next
248
248
  nodes << [:TEAM, buf.next[1]]
249
- elsif buf.match?( :TEXT, :MINUTE )
250
- nodes << [:PLAYER, buf.next[1]]
251
- nodes << buf.next
249
+ # note - now handled (upstream) with GOAL_RE mode!!!
250
+ # elsif buf.match?( :TEXT, :MINUTE )
251
+ # nodes << [:PLAYER, buf.next[1]]
252
+ # nodes << buf.next
252
253
  elsif buf.match?( :DATE, :TIME ) ## merge DATE TIME into DATETIME
253
254
  date = buf.next[1]
254
255
  time = buf.next[1]
@@ -315,11 +316,85 @@ def _tokenize_line( line )
315
316
  @re ||= RE ## note - switch between RE & INSIDE_RE
316
317
 
317
318
 
319
+ if @re == RE ## top-level
320
+ ### check for modes once (per line) here to speed-up parsing
321
+ ### for now goals only possible for start of line!!
322
+ ### fix - remove optional [] - why? why not?
323
+
324
+ ## start with prop key (match will switch into prop mode!!!)
325
+ ## - fix - remove leading spaces in regex (upstream) - why? why not?
326
+ m = PROP_KEY_RE.match( line )
327
+ if m
328
+ ### switch into new mode
329
+ ## switch context to PROP_RE
330
+ @re = PROP_RE
331
+ puts " ENTER PROP_RE MODE" if debug?
332
+ tokens << [:PROP, m[:key]]
333
+
334
+ offsets = [m.begin(0), m.end(0)]
335
+ pos = offsets[1] ## update pos
336
+ end
337
+
338
+ m = PLAYER_WITH_SCORE_RE.match( line )
339
+ if m
340
+ ## switch context to GOAL_RE (goalline(s)
341
+ ## split token (automagically) into two!! - player AND minute!!!
342
+ @re = GOAL_RE
343
+ puts " ENTER GOAL_RE MODE" if debug?
344
+
345
+ score = {}
346
+ ## must always have ft for now e.g. 1-1 or such
347
+ ### change to (generic) score from ft -
348
+ ## might be score a.e.t. or such - why? why not?
349
+ score[:ft] = [m[:ft1].to_i(10),
350
+ m[:ft2].to_i(10)]
351
+ ## note - for debugging keep (pass along) "literal" score
352
+ tokens << [:SCORE, [m[:score], score]]
353
+
354
+ ## auto-add player token
355
+ tokens << [:PLAYER, m[:name]]
356
+
357
+ offsets = [m.begin(0), m.end(0)]
358
+ pos = offsets[1] ## update pos
359
+ end
360
+
361
+ m = PLAYER_WITH_MINUTE_RE.match( line )
362
+ if m
363
+ ## switch context to GOAL_RE (goalline(s)
364
+ ## split token (automagically) into two!! - player AND minute!!!
365
+ @re = GOAL_RE
366
+ puts " ENTER GOAL_RE MODE" if debug?
367
+
368
+ ## check for optional open_bracket
369
+ tokens << [:'['] if m[:open_bracket]
370
+
371
+ ## check for -; (none with separator)
372
+ ## todo - find a better way? how possible?
373
+ tokens << [:NONE, "<|NONE|>"] if m[:none]
374
+
375
+
376
+
377
+ ## auto-add player token first
378
+ tokens << [:PLAYER, m[:name]]
379
+ ## minute props
380
+ minute = {}
381
+ minute[:m] = m[:value].to_i(10)
382
+ minute[:offset] = m[:value2].to_i(10) if m[:value2]
383
+ ## t is minute only
384
+ tokens << [:MINUTE, [m[:minute], minute]]
385
+
386
+ offsets = [m.begin(0), m.end(0)]
387
+ pos = offsets[1] ## update pos
388
+ end
389
+ end
390
+
391
+
392
+
318
393
  while m = @re.match( line, pos )
319
- # if debug?
320
- # pp m
321
- # puts "pos: #{pos}"
322
- # end
394
+ # if debug?
395
+ # pp m
396
+ # puts "pos: #{pos}"
397
+ # end
323
398
  offsets = [m.begin(0), m.end(0)]
324
399
 
325
400
  if offsets[0] != pos
@@ -389,16 +464,53 @@ def _tokenize_line( line )
389
464
  puts "!!! TOKENIZE ERROR (PROP_RE) - no match found"
390
465
  nil
391
466
  end
467
+ elsif @re == GOAL_RE
468
+ if m[:space] || m[:spaces]
469
+ nil ## skip space(s)
470
+ elsif m[:prop_name] ## note - change prop_name to player
471
+ [:PLAYER, m[:name]]
472
+ elsif m[:minute]
473
+ minute = {}
474
+ minute[:m] = m[:value].to_i(10)
475
+ minute[:offset] = m[:value2].to_i(10) if m[:value2]
476
+ ## note - for debugging keep (pass along) "literal" minute
477
+ [:MINUTE, [m[:minute], minute]]
478
+ elsif m[:score]
479
+ score = {}
480
+ ## must always have ft for now e.g. 1-1 or such
481
+ ### change to (generic) score from ft -
482
+ ## might be score a.e.t. or such - why? why not?
483
+ score[:ft] = [m[:ft1].to_i(10),
484
+ m[:ft2].to_i(10)]
485
+ ## note - for debugging keep (pass along) "literal" score
486
+ [:SCORE, [m[:score], score]]
487
+ elsif m[:og]
488
+ [:OG, m[:og]] ## for typed drop - string version/variants ?? why? why not?
489
+ elsif m[:pen]
490
+ [:PEN, m[:pen]]
491
+ elsif m[:sym]
492
+ sym = m[:sym]
493
+ ## return symbols "inline" as is - why? why not?
494
+ ## (?<sym>[;,@|\[\]-])
495
+
496
+ case sym
497
+ when ',' then [:',']
498
+ when ';' then [:';']
499
+ when '[' then [:'[']
500
+ when ']' then [:']']
501
+ else
502
+ nil ## ignore others (e.g. brackets [])
503
+ end
504
+ else
505
+ ## report error
506
+ puts "!!! TOKENIZE ERROR (GOAL_RE) - no match found"
507
+ nil
508
+ end
392
509
  ###################################################
393
510
  ## assume TOP_LEVEL (a.k.a. RE) machinery
394
511
  else
395
512
  if m[:space] || m[:spaces]
396
513
  nil ## skip space(s)
397
- elsif m[:prop_key]
398
- ## switch context to PROP_RE
399
- @re = PROP_RE
400
- puts " ENTER PROP_RE MODE" if debug?
401
- [:PROP, m[:key]]
402
514
  elsif m[:text]
403
515
  [:TEXT, m[:text]] ## keep pos - why? why not?
404
516
  elsif m[:status] ## (match) status e.g. cancelled, awarded, etc.
@@ -436,7 +548,9 @@ def _tokenize_line( line )
436
548
  date = {}
437
549
  ## map month names
438
550
  ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
439
- date[:y] = m[:year].to_i(10) if m[:year]
551
+ date[:y] = m[:year].to_i(10) if m[:year]
552
+ ## check - use y too for two-digit year or keep separate - why? why not?
553
+ date[:yy] = m[:yy].to_i(10) if m[:yy] ## two digit year (e.g. 25 or 78 etc.)
440
554
  date[:m] = m[:month].to_i(10) if m[:month]
441
555
  date[:m] = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
442
556
  date[:d] = m[:day].to_i(10) if m[:day]
@@ -480,6 +594,8 @@ def _tokenize_line( line )
480
594
  elsif m[:score]
481
595
  score = {}
482
596
  ## must always have ft for now e.g. 1-1 or such
597
+ ### change to (generic) score from ft -
598
+ ## might be score a.e.t. or such - why? why not?
483
599
  score[:ft] = [m[:ft1].to_i(10),
484
600
  m[:ft2].to_i(10)]
485
601
  ## note - for debugging keep (pass along) "literal" score
@@ -490,10 +606,6 @@ def _tokenize_line( line )
490
606
  minute[:offset] = m[:value2].to_i(10) if m[:value2]
491
607
  ## note - for debugging keep (pass along) "literal" minute
492
608
  [:MINUTE, [m[:minute], minute]]
493
- elsif m[:og]
494
- [:OG, m[:og]] ## for typed drop - string version/variants ?? why? why not?
495
- elsif m[:pen]
496
- [:PEN, m[:pen]]
497
609
  elsif m[:vs]
498
610
  [:VS, m[:vs]]
499
611
  elsif m[:sym]
@@ -514,8 +626,13 @@ def _tokenize_line( line )
514
626
  when '---' then [:'---'] # level 3
515
627
  when '----' then [:'----'] # level 4
516
628
  else
629
+ puts "!!! TOKENIZE ERROR (sym) - ignore sym >#{sym}<"
517
630
  nil ## ignore others (e.g. brackets [])
518
631
  end
632
+ elsif m[:any]
633
+ ## todo/check log error
634
+ puts "!!! TOKENIZE ERROR (any) - no match found >#{m[:any]}<"
635
+ nil
519
636
  else
520
637
  ## report error
521
638
  puts "!!! TOKENIZE ERROR - no match found"
@@ -560,6 +677,12 @@ def _tokenize_line( line )
560
677
  tokens << [:PROP_END, "<|PROP_END|>"]
561
678
  end
562
679
  end
680
+
681
+
682
+ if @re == GOAL_RE ### ALWAYS switch back to top level mode
683
+ puts " LEAVE GOAL_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
684
+ @re = RE
685
+ end
563
686
 
564
687
  [tokens,errors]
565
688
  end