sportdb-parser 0.6.3 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d1dd9f29584bb09508c0fb6ad253a84ff299c7f33650d27195abbc679c1e893f
4
- data.tar.gz: 6af07bc6f008e1b6c53cd796253f66bd68f2f28299a750896be83176bae7d085
3
+ metadata.gz: 7cb697dbecb7802a9701c60527bb67125dc7ac550b6c5c80f67044e51683eb15
4
+ data.tar.gz: d53cc9c126b93a4702b2c633daf863a3e4d4b63397f7c5ac60bd0646ab1d2da9
5
5
  SHA512:
6
- metadata.gz: f8adae46063426008e23f01ef3d625a3477c01a827b942edadddfad7e774e8689de3545c1c6ae8e32d0fa43b2bfbf5c9dc9962947c97be79ab325a41a7896c48
7
- data.tar.gz: e0b843c9b2eee0ebfec203889ed5ea90f94eb22f80ca0df58d8865f05dd08f4d4bc3547b59f0fbe00e323a09df9c13289954ddea34bf437cad6d196c29a2e436
6
+ metadata.gz: 341aa9f1b25259db42452f28bd69f5643ecb5a3b45755a88565aac2b7a2842bc34d8d2a53e7d3b7a4b96c15c199c000167f4e94e608486a4ae9fefa9756bc7a0
7
+ data.tar.gz: d983116b79d9f361e136ef989ac02041ea735cfda42614f0ff08f5fd43899d7deebc135a82c020ff6dc7c9c6cf6ee413bbff7cdc9f65e41a682b3bf5cb5b082f
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.6.3
1
+ ### 0.6.5
2
2
  ### 0.0.1 / 2024-07-12
3
3
 
4
4
  * Everything is new. First release.
@@ -246,9 +246,10 @@ def tokenize_with_errors
246
246
  nodes << [:TEAM, buf.next[1]]
247
247
  nodes << buf.next
248
248
  nodes << [:TEAM, buf.next[1]]
249
- elsif buf.match?( :TEXT, :MINUTE )
250
- nodes << [:PLAYER, buf.next[1]]
251
- nodes << buf.next
249
+ # note - now handled (upstream) with GOAL_RE mode!!!
250
+ # elsif buf.match?( :TEXT, :MINUTE )
251
+ # nodes << [:PLAYER, buf.next[1]]
252
+ # nodes << buf.next
252
253
  elsif buf.match?( :DATE, :TIME ) ## merge DATE TIME into DATETIME
253
254
  date = buf.next[1]
254
255
  time = buf.next[1]
@@ -315,11 +316,103 @@ def _tokenize_line( line )
315
316
  @re ||= RE ## note - switch between RE & INSIDE_RE
316
317
 
317
318
 
319
+ if @re == RE ## top-level
320
+ ### check for modes once (per line) here to speed-up parsing
321
+ ### for now goals only possible for start of line!!
322
+ ### fix - remove optional [] - why? why not?
323
+
324
+ ## start with prop key (match will switch into prop mode!!!)
325
+ ## - fix - remove leading spaces in regex (upstream) - why? why not?
326
+ m = PROP_KEY_RE.match( line )
327
+ if m
328
+ ### switch into new mode
329
+ ## switch context to PROP_RE
330
+ puts " ENTER PROP_RE MODE" if debug?
331
+ key = m[:key]
332
+
333
+
334
+ ### todo - add prop yellow/red cards too - why? why not?
335
+ if ['sent off', 'red cards'].include?( key.downcase)
336
+ @re = PROP_CARDS_RE ## use CARDS_RE ???
337
+ tokens << [:PROP_REDCARDS, m[:key]]
338
+ elsif ['yellow cards'].include?( key.downcase )
339
+ @re = PROP_CARDS_RE
340
+ tokens << [:PROP_YELLOWCARDS, m[:key]]
341
+ elsif ['ref', 'referee'].include?( key.downcase )
342
+ @re = PROP_RE ## (re)use prop setup for now - why? why not?
343
+ tokens << [:PROP_REFEREE, m[:key]]
344
+ elsif ['goals'].include?( key.downcase )
345
+ @re = PROP_GOAL_RE
346
+ tokens << [:PROP_GOALS, m[:key]]
347
+ else ## assume (team) line-up
348
+ @re = PROP_RE ## use LINEUP_RE ???
349
+ tokens << [:PROP, m[:key]]
350
+ end
351
+
352
+ offsets = [m.begin(0), m.end(0)]
353
+ pos = offsets[1] ## update pos
354
+ end
355
+
356
+ m = PLAYER_WITH_SCORE_RE.match( line )
357
+ if m
358
+ ## switch context to GOAL_RE (goalline(s)
359
+ ## split token (automagically) into two!! - player AND minute!!!
360
+ @re = GOAL_RE
361
+ puts " ENTER GOAL_RE MODE" if debug?
362
+
363
+ score = {}
364
+ ## must always have ft for now e.g. 1-1 or such
365
+ ### change to (generic) score from ft -
366
+ ## might be score a.e.t. or such - why? why not?
367
+ score[:ft] = [m[:ft1].to_i(10),
368
+ m[:ft2].to_i(10)]
369
+ ## note - for debugging keep (pass along) "literal" score
370
+ tokens << [:SCORE, [m[:score], score]]
371
+
372
+ ## auto-add player token
373
+ tokens << [:PLAYER, m[:name]]
374
+
375
+ offsets = [m.begin(0), m.end(0)]
376
+ pos = offsets[1] ## update pos
377
+ end
378
+
379
+ m = PLAYER_WITH_MINUTE_RE.match( line )
380
+ if m
381
+ ## switch context to GOAL_RE (goalline(s)
382
+ ## split token (automagically) into two!! - player AND minute!!!
383
+ @re = GOAL_RE
384
+ puts " ENTER GOAL_RE MODE" if debug?
385
+
386
+ ## check for optional open_bracket
387
+ tokens << [:'['] if m[:open_bracket]
388
+
389
+ ## check for -; (none with separator)
390
+ ## todo - find a better way? how possible?
391
+ tokens << [:NONE, "<|NONE|>"] if m[:none]
392
+
393
+
394
+
395
+ ## auto-add player token first
396
+ tokens << [:PLAYER, m[:name]]
397
+ ## minute props
398
+ minute = {}
399
+ minute[:m] = m[:value].to_i(10)
400
+ minute[:offset] = m[:value2].to_i(10) if m[:value2]
401
+ ## t is minute only
402
+ tokens << [:MINUTE, [m[:minute], minute]]
403
+
404
+ offsets = [m.begin(0), m.end(0)]
405
+ pos = offsets[1] ## update pos
406
+ end
407
+ end
408
+
409
+
410
+
318
411
  while m = @re.match( line, pos )
319
- # if debug?
320
- # pp m
321
- # puts "pos: #{pos}"
322
- # end
412
+ # if debug?
413
+ # pp m
414
+ # puts "pos: #{pos}"
415
+ # end
323
416
  offsets = [m.begin(0), m.end(0)]
324
417
 
325
418
  if offsets[0] != pos
@@ -345,10 +438,44 @@ def _tokenize_line( line )
345
438
  ## note: racc requires pairs e.g. [:TOKEN, VAL]
346
439
  ## for VAL use "text" or ["text", { opts }] array
347
440
 
348
-
349
- t = if @re == PROP_RE
441
+ t = if @re == PROP_CARDS_RE
442
+ if m[:space] || m[:spaces]
443
+ nil ## skip space(s)
444
+ elsif m[:prop_name]
445
+ [:PROP_NAME, m[:name]]
446
+ elsif m[:minute]
447
+ minute = {}
448
+ minute[:m] = m[:value].to_i(10)
449
+ minute[:offset] = m[:value2].to_i(10) if m[:value2]
450
+ ## note - for debugging keep (pass along) "literal" minute
451
+ [:MINUTE, [m[:minute], minute]]
452
+ elsif m[:sym]
453
+ sym = m[:sym]
454
+ case sym
455
+ when ',' then [:',']
456
+ when ';' then [:';']
457
+ when '-' then [:'-']
458
+ else
459
+ nil ## ignore others (e.g. brackets [])
460
+ end
461
+ else
462
+ ## report error
463
+ puts "!!! TOKENIZE ERROR (PROP_CARDS_RE) - no match found"
464
+ nil
465
+ end
466
+ elsif @re == PROP_RE ### todo/fix - change to LINEUP_RE !!!!
350
467
  if m[:space] || m[:spaces]
351
468
  nil ## skip space(s)
469
+ elsif m[:prop_key] ## check for inline prop keys
470
+ key = m[:key]
471
+ ## supported for now coach/trainer (add manager?)
472
+ if ['coach',
473
+ 'trainer'].include?( key.downcase )
474
+ [:COACH, m[:key]] ## use COACH_KEY or such - why? why not?
475
+ else
476
+ ## report error - for unknown (inline) prop key in lineup
477
+ nil
478
+ end
352
479
  elsif m[:prop_name]
353
480
  if m[:name] == 'Y'
354
481
  [:YELLOW_CARD, m[:name]]
@@ -376,11 +503,6 @@ def _tokenize_line( line )
376
503
  when '(' then [:'(']
377
504
  when ')' then [:')']
378
505
  when '-' then [:'-']
379
- # when '.' then
380
- # ## switch back to top-level mode!!
381
- # puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
382
- # @re = RE
383
- # [:'.']
384
506
  else
385
507
  nil ## ignore others (e.g. brackets [])
386
508
  end
@@ -389,16 +511,53 @@ def _tokenize_line( line )
389
511
  puts "!!! TOKENIZE ERROR (PROP_RE) - no match found"
390
512
  nil
391
513
  end
514
+ elsif @re == GOAL_RE || @re == PROP_GOAL_RE
515
+ if m[:space] || m[:spaces]
516
+ nil ## skip space(s)
517
+ elsif m[:prop_name] ## note - change prop_name to player
518
+ [:PLAYER, m[:name]]
519
+ elsif m[:minute]
520
+ minute = {}
521
+ minute[:m] = m[:value].to_i(10)
522
+ minute[:offset] = m[:value2].to_i(10) if m[:value2]
523
+ ## note - for debugging keep (pass along) "literal" minute
524
+ [:MINUTE, [m[:minute], minute]]
525
+ elsif m[:score]
526
+ score = {}
527
+ ## must always have ft for now e.g. 1-1 or such
528
+ ### change to (generic) score from ft -
529
+ ## might be score a.e.t. or such - why? why not?
530
+ score[:ft] = [m[:ft1].to_i(10),
531
+ m[:ft2].to_i(10)]
532
+ ## note - for debugging keep (pass along) "literal" score
533
+ [:SCORE, [m[:score], score]]
534
+ elsif m[:og]
535
+ [:OG, m[:og]] ## for typed drop - string version/variants ?? why? why not?
536
+ elsif m[:pen]
537
+ [:PEN, m[:pen]]
538
+ elsif m[:sym]
539
+ sym = m[:sym]
540
+ ## return symbols "inline" as is - why? why not?
541
+ ## (?<sym>[;,@|\[\]-])
542
+
543
+ case sym
544
+ when ',' then [:',']
545
+ when ';' then [:';']
546
+ when '[' then [:'[']
547
+ when ']' then [:']']
548
+ else
549
+ nil ## ignore others (e.g. brackets [])
550
+ end
551
+ else
552
+ ## report error
553
+ puts "!!! TOKENIZE ERROR (GOAL_RE) - no match found"
554
+ nil
555
+ end
392
556
  ###################################################
393
557
  ## assume TOP_LEVEL (a.k.a. RE) machinery
394
558
  else
395
559
  if m[:space] || m[:spaces]
396
560
  nil ## skip space(s)
397
- elsif m[:prop_key]
398
- ## switch context to PROP_RE
399
- @re = PROP_RE
400
- puts " ENTER PROP_RE MODE" if debug?
401
- [:PROP, m[:key]]
402
561
  elsif m[:text]
403
562
  [:TEXT, m[:text]] ## keep pos - why? why not?
404
563
  elsif m[:status] ## (match) status e.g. cancelled, awarded, etc.
@@ -436,7 +595,9 @@ def _tokenize_line( line )
436
595
  date = {}
437
596
  ## map month names
438
597
  ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
439
- date[:y] = m[:year].to_i(10) if m[:year]
598
+ date[:y] = m[:year].to_i(10) if m[:year]
599
+ ## check - use y too for two-digit year or keep separate - why? why not?
600
+ date[:yy] = m[:yy].to_i(10) if m[:yy] ## two digit year (e.g. 25 or 78 etc.)
440
601
  date[:m] = m[:month].to_i(10) if m[:month]
441
602
  date[:m] = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
442
603
  date[:d] = m[:day].to_i(10) if m[:day]
@@ -480,6 +641,8 @@ def _tokenize_line( line )
480
641
  elsif m[:score]
481
642
  score = {}
482
643
  ## must always have ft for now e.g. 1-1 or such
644
+ ### change to (generic) score from ft -
645
+ ## might be score a.e.t. or such - why? why not?
483
646
  score[:ft] = [m[:ft1].to_i(10),
484
647
  m[:ft2].to_i(10)]
485
648
  ## note - for debugging keep (pass along) "literal" score
@@ -490,10 +653,6 @@ def _tokenize_line( line )
490
653
  minute[:offset] = m[:value2].to_i(10) if m[:value2]
491
654
  ## note - for debugging keep (pass along) "literal" minute
492
655
  [:MINUTE, [m[:minute], minute]]
493
- elsif m[:og]
494
- [:OG, m[:og]] ## for typed drop - string version/variants ?? why? why not?
495
- elsif m[:pen]
496
- [:PEN, m[:pen]]
497
656
  elsif m[:vs]
498
657
  [:VS, m[:vs]]
499
658
  elsif m[:sym]
@@ -514,8 +673,13 @@ def _tokenize_line( line )
514
673
  when '---' then [:'---'] # level 3
515
674
  when '----' then [:'----'] # level 4
516
675
  else
676
+ puts "!!! TOKENIZE ERROR (sym) - ignore sym >#{sym}<"
517
677
  nil ## ignore others (e.g. brackets [])
518
678
  end
679
+ elsif m[:any]
680
+ ## todo/check log error
681
+ puts "!!! TOKENIZE ERROR (any) - no match found >#{m[:any]}<"
682
+ nil
519
683
  else
520
684
  ## report error
521
685
  puts "!!! TOKENIZE ERROR - no match found"
@@ -546,8 +710,8 @@ def _tokenize_line( line )
546
710
  ##
547
711
  ## if in prop mode continue if last token is [,-]
548
712
  ## otherwise change back to "standard" mode
549
- if @re == PROP_RE
550
- if [:',', :'-'].include?( tokens[-1][0] )
713
+ if @re == PROP_RE || @re == PROP_CARDS_RE || @re == PROP_GOAL_RE
714
+ if [:',', :'-', :';'].include?( tokens[-1][0] )
551
715
  ## continue/stay in PROP_RE mode
552
716
  ## todo/check - auto-add PROP_CONT token or such
553
717
  ## to help parser with possible NEWLINE
@@ -560,6 +724,12 @@ def _tokenize_line( line )
560
724
  tokens << [:PROP_END, "<|PROP_END|>"]
561
725
  end
562
726
  end
727
+
728
+
729
+ if @re == GOAL_RE ### ALWAYS switch back to top level mode
730
+ puts " LEAVE GOAL_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
731
+ @re = RE
732
+ end
563
733
 
564
734
  [tokens,errors]
565
735
  end