RubyGems - sportdb-parser - Versions diffs - 0.6.15 → 0.6.17 - Mend

sportdb-parser 0.6.15 → 0.6.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +1 -1
data/lib/sportdb/parser/lexer.rb +21 -2
data/lib/sportdb/parser/token-minute.rb +7 -2
data/lib/sportdb/parser/token-score.rb +24 -1
data/lib/sportdb/parser/token-text.rb +19 -6
data/lib/sportdb/parser/token.rb +2 -1
data/lib/sportdb/parser/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: d907a1b09e04c9c17884afe7881961b597b547ca9ab742ecb6e06f89f8bfe151
-  data.tar.gz: da287bc13de7217bf3c6a2ffcfdd8d7f3d376baff413b79a58dd544bb2bfcc3a
+  metadata.gz: 1ba31cc4284de6a4ea05615ad37a30546375c5e3218847a8f69c3c359074fb9c
+  data.tar.gz: 5b8c73196c6bd08a399c7687cd65f4e05f4c3a66580eaeecd6dbaad33837b822
 SHA512:
-  metadata.gz: 3be1466a2d4ef5a5d1129f6b0fa9f67c6258c5a1cc60aab831ac5c4c7121e691fa64a9f33e71508d91b9809bcdc1b77b152fc2f7c1580172937662865e8ee33e
-  data.tar.gz: fe14835fc3195b5e441bdd3f9763bfe1303aa471e041a908cafc027becfbfd5ffd2e9696948e2dd88647ef7de88ebabf39674e42ba6656144e478150822e699e
+  metadata.gz: cf4bc5b5a112effc59895c405e3484224d4d94aadf4771fecff90458b01aa63b43703632fdada70df9ee906aa8e517b1736dbdb2de795ba53b3b8e05d6c1ba4c
+  data.tar.gz: 46b6a97cb0af77debec3bd58ee24bd12b1236680dc53ff76a50b725621961bb9aabdfc151d9379d01672ebbef2423c3ce4f4d948adc506789c846491a7f30f15

data/CHANGELOG.md CHANGED Viewed

@@ -1,4 +1,4 @@
-### 0.6.15
+### 0.6.17
 ### 0.0.1 / 2024-07-12
 * Everything is new. First release.

data/lib/sportdb/parser/lexer.rb CHANGED Viewed

@@ -290,6 +290,20 @@ end   # method tokenize_with_errors
+### add a QUICK_PLAYER_WITH_MINUTE  check
+QUICK_PLAYER_WITH_MINUTE_RE = %r{
+      \b
+         \d{1,3}      ## constrain numbers to 0 to 999!!!
+        (?: (?:
+                \+\d{1,3}
+            )?
+            |
+            (?: \?{2} | _{2} )  ## add support for n/a (not/available)
+        )
+        '   ## must have minute marker!!!!
+}ix
 def _tokenize_line( line )
   tokens = []
   errors = []   ## keep a list of errors - why? why not?
@@ -387,7 +401,12 @@ def _tokenize_line( line )
       offsets = [m.begin(0), m.end(0)]
       pos = offsets[1]    ## update pos
-    elsif (m = PLAYER_WITH_MINUTE_RE.match( line ))
+    ####  FIX/FIX/TODO
+    ### looks to hang in player with minute
+    ###  FIX - improve / rework PLAYER_WITH_MINUTE_RE  regex!!!!
+    elsif (_quick = QUICK_PLAYER_WITH_MINUTE_RE.match(line) &&
+                m = PLAYER_WITH_MINUTE_RE.match( line ))
       ##  switch context to GOAL_RE (goalline(s)
       ##   split token (automagically) into two!! - player AND minute!!!
       @re = GOAL_RE
@@ -414,7 +433,7 @@ def _tokenize_line( line )
     end
   end
   old_pos = -1   ## allows to backtrack to old pos (used in geo)

data/lib/sportdb/parser/token-minute.rb CHANGED Viewed

@@ -69,10 +69,12 @@ MINUTE_RE = %r{
 #                          or others with first matching position
 #                          or if chars get eaten-up?
 #                        let us know if \G is required here or not
+#
+##  note - use \A (instead of ^) - \A strictly matches the start of the string.
 PLAYER_WITH_MINUTE_RE = %r{
-           ^    ### note - MUST start line; leading spaces optional (eat-up)
+           \A    ### note - MUST start line; leading spaces optional (eat-up)
            [ ]*
              (?:      # optional open bracket ([) -- remove later
                 (?<open_bracket> \[ )
@@ -143,8 +145,11 @@ PLAYER_WITH_MINUTE_RE = %r{
 }ix
+##  note - use \A (instead of ^) - \A strictly matches the start of the string.
 PLAYER_WITH_SCORE_RE = %r{
-           ^    ### note - MUST start line; leading spaces optional (eat-up)
+           \A    ### note - MUST start line; leading spaces optional (eat-up)
            [ ]*
    (?<player_with_score>
                    (?<score>

data/lib/sportdb/parser/token-score.rb CHANGED Viewed

@@ -43,6 +43,28 @@ class Lexer
                 ## todo/check:  remove loakahead assertion here - why require space?
                 ## note: \b works only after non-alphanum e.g. )
+   ####
+   ## support short all-in-one e.g.
+   ##  e.g.      3-4 pen. 2-2 a.e.t. (1-1, 1-1) becomes
+   ##   3-4 pen. (2-2, 1-1, 1-1)
+   SCORE__P_ET_FT_HT_V2__RE = %r{
+          (?<score_more>
+               \b
+                (?<p1>\d{1,2}) - (?<p2>\d{1,2})
+                   [ ]* #{P_EN} [ ]+
+                   \(
+               (?<et1>\d{1,2}) - (?<et2>\d{1,2})
+                   [ ]*, [ ]*
+               (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
+                   [ ]*, [ ]*
+               (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
+                   [ ]*
+                \)
+               (?=[ ,\]]|$)
+            )}ix       ## todo/check:  remove loakahead assertion here - why require space?
+                               ## note: \b works only after non-alphanum e.g. )
     ## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1)  or
     ##      3-4p 2-2aet (1-1, )     or
@@ -128,7 +150,8 @@ class Lexer
 ## check - find a better name for SCORE_MORE - SCORE_EX, SCORE_BIG, or ___ - why? why not?
 SCORE_MORE_RE = Regexp.union(
-  SCORE__P_ET_FT_HT__RE,  # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
+  SCORE__P_ET_FT_HT_V2__RE,  # e.g. 5-1 pen. (2-2, 1-1, 1-0)
+  SCORE__P_ET_FT_HT__RE,    # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
   SCORE__P_FT_HT__RE,     # e.g. 5-1 pen. (1-1)
   SCORE__P_ET__RE,        # e.g. 2-2 a.e.t.  or  5-1 pen. 2-2 a.e.t.
   SCORE__P__RE,           # e.g. 5-1 pen.

data/lib/sportdb/parser/token-text.rb CHANGED Viewed

@@ -60,6 +60,10 @@ TEXT_RE = %r{
                     1/ \d{1,2} [ ] \p{L}+
                   |
                 ## opt 4 - add another weirdo case
+                ##   e.g.   's Gravenwezel-Schilde
+                    '[s]
+                  |
+                ## opt 5 - add another weirdo case
                 ##   e.g. 5.-8. Platz Playoffs  - keep - why? why not?
                     \d+\.-\d+\.  [ ]? \p{L}+
                )
@@ -70,18 +74,27 @@ TEXT_RE = %r{
                                ##    AND switch to case-sensitive (via -i!!!)
                       )
                       |     # only single spaces allowed inline!!!
-                     [-/]
+                     [_/]
                   )?
                 (?:
-                  \p{L} |
+                  \p{L}
+                     |
                   [&'°]
-                    |
+                     |
+                   (?:  (?<! [ ])   ## todo - check regex - make sure lookbehind is always first/before!!
+                      [-]  ### allow e.g. Sport-  if lookbehind is unicode letter or dot (.)
+                          ###       or    U.N.A.M.-Pumas
+                          ##         (?<= [\p{L}.] )
+                            ## try more flexible (use negative lookbehind - no space)
+                    )
+                     |
                  (?:
                    \d+
                    (?!
-                     [0-9h'+-] |    ## protected break on 12h / 12' / 1-1
-                                    ##  check usege for 3+4 - possible? where ? why?
-                     (?:[.:]\d)     ## protected/exclude/break on 12.03 / 12:03
+                     [0-9h'+] |    ## protected break on 12h / 12' / 1-1
+                                    ##  check usege for 3+4 - possible? where ? why?
+                     (?:[.:-]\d)     ## protected/exclude/break on 12.03 / 12:03 / 12-12
+                                     ##  BUT allow Park21-Arena for example e.g. 21-A :-)
                     )
                    ## negative lookahead for numbers
                    ##   note - include digits itself!!!

data/lib/sportdb/parser/token.rb CHANGED Viewed

@@ -199,7 +199,8 @@ PROP_GOAL_RE =  Regexp.union(
 ####
 #
-ROUND_OUTLINE_RE = %r{  ^
+##  note - use \A (instead of ^) - \A strictly matches the start of the string.
+ROUND_OUTLINE_RE = %r{   \A
                            [ ]*  ## ignore leading spaces (if any)
                          (?: »|>> )
                            [ ]+

data/lib/sportdb/parser/version.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module SportDb
     module Parser
   MAJOR = 0    ## todo: namespace inside version or something - why? why not??
   MINOR = 6
-  PATCH = 15
+  PATCH = 17
   VERSION = [MAJOR,MINOR,PATCH].join('.')
   def self.version

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: sportdb-parser
 version: !ruby/object:Gem::Version
-  version: 0.6.15
+  version: 0.6.17
 platform: ruby
 authors:
 - Gerald Bauer
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2025-03-09 00:00:00.000000000 Z
+date: 2025-03-10 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: cocos