RubyGems - sportdb-parser - Versions diffs - 0.6.16 → 0.6.18 - Mend

sportdb-parser 0.6.16 → 0.6.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +1 -1
data/Manifest.txt +1 -0
data/lib/sportdb/parser/token-geo.rb +134 -0
data/lib/sportdb/parser/token-minute.rb +7 -2
data/lib/sportdb/parser/token-text.rb +27 -11
data/lib/sportdb/parser/token.rb +2 -65
data/lib/sportdb/parser/version.rb +1 -1
data/lib/sportdb/parser.rb +1 -0
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 3da9280d27bf1e4662eb10f9451679e4aace18b9a0e1bfa29dd1e7b6bcbdc5e5
-  data.tar.gz: e6786f648848cd075ef3e0f6d8d7fda2d31743f989653c0fcf2312a33a223357
+  metadata.gz: 45e7965659ecd817aa2f54d04f81c673c6b7be3c1ea3294bf978b25462786726
+  data.tar.gz: c897aac96c5229c589af5966e68497d1dcd28b748241a15d0f386a2b65265b6a
 SHA512:
-  metadata.gz: 04250d17d120c12dc0b3980ff971b02fa178e617f35af70651f86011d9f5d4cad1d81df84a1f5af97ab73cb9023cc6cb190b13c420af71f3bcb2af7df6a526f1
-  data.tar.gz: 120486063a9a82891a63914654965b799aef774680695de8bda3bb52894399d0800b98efb852f87f672dbe303dc6c415b91a10a094989e94ddf3e319b3183cc9
+  metadata.gz: f918e333fba1dae8abcc3f143db0ef39b9c64e4fcaf89469482173c5b2944ba9a4e58cb9e71642c6fa5f14069609a40df23a62f6a0cc697bd22b465e73a17c97
+  data.tar.gz: 0c39baca8b5bd3d05720c1ade1c492361b186268ab549819e2428d41d2c2bab34424deefe328ca63504cf1bc802694edbea58e438e6225d7f2c69e325b662213

data/CHANGELOG.md CHANGED Viewed

@@ -1,4 +1,4 @@
-### 0.6.16
+### 0.6.18
 ### 0.0.1 / 2024-07-12
 * Everything is new. First release.

data/Manifest.txt CHANGED Viewed

@@ -15,6 +15,7 @@ lib/sportdb/parser/parser.rb
 lib/sportdb/parser/racc_parser.rb
 lib/sportdb/parser/racc_tree.rb
 lib/sportdb/parser/token-date.rb
+lib/sportdb/parser/token-geo.rb
 lib/sportdb/parser/token-minute.rb
 lib/sportdb/parser/token-prop.rb
 lib/sportdb/parser/token-score.rb

data/lib/sportdb/parser/token-geo.rb ADDED Viewed

@@ -0,0 +1,134 @@
+module SportDb
+class Lexer
+##
+#  allow Cote'd Ivoir or such
+##   e.g. add '
+## todo/fix - make geo text regex more generic
+##               only care about two space rule
+GEO_TEXT_RE = %r{
+    ## must start with alpha (allow unicode letters!!)
+    (?<text>
+           ## positive lookbehind -  for now space (or beginning of line - for testing) only
+           ##  (MUST be fixed number of chars - no quantifier e.g. +? etc.)
+            (?<= [ ,›>\[\]]|^)
+            (?:
+                # opt 1 - start with alpha
+                 \p{L}+    ## all unicode letters (e.g. [a-z])
+                   |
+                # opt 2 - start with num!! -
+                     \d+  # check for num lookahead (MUST be space or dot)
+                      ## MAY be followed by (optional space) !
+                      ## MUST be follow by a to z!!!!
+                      [ ]?   ## make space optional too  - why? why not?
+                             ##  yes - eg. 1st, 2nd, 5th etc.
+                       \p{L}+
+                  |
+                ## opt 3 - add another weirdo case
+                ##   e.g.   's Gravenwezel-Schilde
+                ##   add more letters (or sequences here - why? why not?)
+                    '\p{L}+
+               )
+               ##
+               ## todo/check - find a different "more intuitive" regex/rule if possible?
+               ##    for single spaces only (and _/ MUST not be surround by spaces)
+              (?:
+                  [ ]?   # only single spaces allowed inline!!!
+                  (?:
+                     \p{L} | \d | [.&'°]
+                      |
+                     (?: (?<! [ ])  ## no space allowed before (but possible after)
+                          [-]
+                     )
+                       |
+                     (?: (?<! [ ])  ## no spaces allowed around these characters
+                          [_/]
+                         (?! [ ])
+                     )
+                  )+
+              )*
+              ## must NOT end with space or dash(-)
+              ##  todo/fix - possible in regex here
+              ##     only end in alphanum a-z0-9 (not dot or & ???)
+            ## add lookahead/lookbehind
+           ##    must be space!!!
+           ##   (or comma or  start/end of string)
+           ##   kind of \b !!!
+            ## positive lookahead
+            (?=[ ,›>\[\]]|$)
+   )
+}ix
+##
+# for timezone format use for now:
+# (BRT/UTC-3)      (e.g. brazil time)
+#
+# (CET/UTC+1)   - central european time
+# (CEST/UTC+2)  - central european summer time  - daylight saving time (DST).
+# (EET/UTC+1)  - eastern european time
+# (EEST/UTC+2)  - eastern european summer time  - daylight saving time (DST).
+#
+# UTC+3
+# UTC+4
+# UTC+0
+# UTC+00
+# UTC+0000
+#
+#  - allow +01 or +0100  - why? why not
+#  -       +0130 (01:30)
+#
+# see
+#   https://en.wikipedia.org/wiki/Time_zone
+#   https://en.wikipedia.org/wiki/List_of_UTC_offsets
+#   https://en.wikipedia.org/wiki/UTC−04:00  etc.
+#
+#  e.g. (UTC-2) or (CEST/UTC-2) etc.
+#    todo check - only allow upcase
+#    or  (utc-2) and (cest/utc-2) too - why? why not?
+TIMEZONE_RE = %r{
+   (?<timezone>
+      \(
+           ## optional "local" timezone name eg. BRT or CEST etc.
+           (?:  [a-z]+
+                 /
+           )?
+            [a-z]+
+            [+-]
+            \d{1,4}   ## e.g. 0 or 00 or 0000
+      \)
+   )
+}ix
+GEO_BASICS_RE = %r{
+    (?<spaces> [ ]{2,}) |
+    (?<space>  [ ])
+        |
+    (?<sym> [,›>\[] )
+}ix
+GEO_RE = Regexp.union(
+                    TIMEZONE_RE,
+                    GEO_BASICS_RE,
+                    GEO_TEXT_RE,
+                    ANY_RE,
+                      )
+end # class Lexer
+end # module SportDb

data/lib/sportdb/parser/token-minute.rb CHANGED Viewed

@@ -69,10 +69,12 @@ MINUTE_RE = %r{
 #                          or others with first matching position
 #                          or if chars get eaten-up?
 #                        let us know if \G is required here or not
+#
+##  note - use \A (instead of ^) - \A strictly matches the start of the string.
 PLAYER_WITH_MINUTE_RE = %r{
-           ^    ### note - MUST start line; leading spaces optional (eat-up)
+           \A    ### note - MUST start line; leading spaces optional (eat-up)
            [ ]*
              (?:      # optional open bracket ([) -- remove later
                 (?<open_bracket> \[ )
@@ -143,8 +145,11 @@ PLAYER_WITH_MINUTE_RE = %r{
 }ix
+##  note - use \A (instead of ^) - \A strictly matches the start of the string.
 PLAYER_WITH_SCORE_RE = %r{
-           ^    ### note - MUST start line; leading spaces optional (eat-up)
+           \A    ### note - MUST start line; leading spaces optional (eat-up)
            [ ]*
    (?<player_with_score>
                    (?<score>

data/lib/sportdb/parser/token-text.rb CHANGED Viewed

@@ -2,6 +2,17 @@ module SportDb
 class Lexer
+## todo - use ANY_RE  to token_commons or such - for shared by many?
+## general catch-all  (RECOMMENDED (ALWAYS) use as last entry in union)
+##   to avoid advance of pos match!!!
+ANY_RE = %r{
+               (?<any> .)
+          }ix
 ##  note - do NOT allow single alpha text for now
 ##   add later??      A - B    C - D  - why?
 ## opt 1) one alpha
@@ -60,34 +71,39 @@ TEXT_RE = %r{
                     1/ \d{1,2} [ ] \p{L}+
                   |
                 ## opt 4 - add another weirdo case
+                ##   e.g.   's Gravenwezel-Schilde
+                    '[s]
+                  |
+                ## opt 5 - add another weirdo case
                 ##   e.g. 5.-8. Platz Playoffs  - keep - why? why not?
                     \d+\.-\d+\.  [ ]? \p{L}+
                )
-              (?:(?:  (?:[ ]
+              (?:(?:  (?:[ ]   # only single spaces allowed inline!!!
                         (?! (?-i: vs?[ ])
                           )    ## note - exclude (v[ ]/vs[ ])
                                ##    AND switch to case-sensitive (via -i!!!)
                       )
-                      |     # only single spaces allowed inline!!!
-                     [-/]
+                      |
+                     [/-]   ## must NOT be surrounded by spaces
                   )?
                 (?:
-                  \p{L} |
-                  [&'°]
-                    |
+                  \p{L}
+                     |
+                  [.&'°]
+                     |
                  (?:
                    \d+
                    (?!
-                     [0-9h'+-] |    ## protected break on 12h / 12' / 1-1
-                                    ##  check usege for 3+4 - possible? where ? why?
-                     (?:[.:]\d)     ## protected/exclude/break on 12.03 / 12:03
+                     [0-9h'+] |    ## protected break on 12h / 12' / 1-1
+                                    ##  check usege for 3+4 - possible? where ? why?
+                     (?:[.:-]\d)     ## protected/exclude/break on 12.03 / 12:03 / 12-12
+                                     ##  BUT allow Park21-Arena for example e.g. 21-A :-)
                     )
                    ## negative lookahead for numbers
                    ##   note - include digits itself!!!
                    ##   note - remove / (slash) e.g. allows UDI'19/Beter Bed
-                 )|
-                 \.
+                 )
                )
               )*  ## must NOT end with space or dash(-)
               ##  todo/fix - possible in regex here

data/lib/sportdb/parser/token.rb CHANGED Viewed

@@ -21,49 +21,6 @@ TIME_RE = %r{
-##
-# for timezone format use for now:
-# (BRT/UTC-3)      (e.g. brazil time)
-#
-# (CET/UTC+1)   - central european time
-# (CEST/UTC+2)  - central european summer time  - daylight saving time (DST).
-# (EET/UTC+1)  - eastern european time
-# (EEST/UTC+2)  - eastern european summer time  - daylight saving time (DST).
-#
-# UTC+3
-# UTC+4
-# UTC+0
-# UTC+00
-# UTC+0000
-#
-#  - allow +01 or +0100  - why? why not
-#  -       +0130 (01:30)
-#
-# see
-#   https://en.wikipedia.org/wiki/Time_zone
-#   https://en.wikipedia.org/wiki/List_of_UTC_offsets
-#   https://en.wikipedia.org/wiki/UTC−04:00  etc.
-#
-#  e.g. (UTC-2) or (CEST/UTC-2) etc.
-#    todo check - only allow upcase
-#    or  (utc-2) and (cest/utc-2) too - why? why not?
-TIMEZONE_RE = %r{
-   (?<timezone>
-      \(
-           ## optional "local" timezone name eg. BRT or CEST etc.
-           (?:  [a-z]+
-                 /
-           )?
-            [a-z]+
-            [+-]
-            \d{1,4}   ## e.g. 0 or 00 or 0000
-      \)
-   )
-}ix
 ## add wday / stand-alone week day - as separate regex or
 ##          use TEXT with is_wday? check or such with
 ##                requirement of beginning of line (anchored to line) only??
@@ -124,12 +81,6 @@ BASICS_RE = %r{
 }ix
-## general catch-all  (RECOMMENDED (ALWAYS) use as last entry in union)
-##   to avoid advance of pos match!!!
-ANY_RE = %r{
-               (?<any> .)
-          }ix
 RE = Regexp.union(
                     STATUS_RE,
@@ -148,21 +99,6 @@ RE = Regexp.union(
                       )
-GEO_BASICS_RE = %r{
-    (?<spaces> [ ]{2,}) |
-    (?<space>  [ ])
-        |
-    (?<sym> [,›>\[] )
-}ix
-GEO_RE = Regexp.union(
-                    TIMEZONE_RE,
-                    GEO_BASICS_RE,
-                    TEXT_RE,
-                    ANY_RE,
-                      )
 ######################################################
 ## goal mode (switched to by PLAYER_WITH_MINUTE_RE)
@@ -199,7 +135,8 @@ PROP_GOAL_RE =  Regexp.union(
 ####
 #
-ROUND_OUTLINE_RE = %r{  ^
+##  note - use \A (instead of ^) - \A strictly matches the start of the string.
+ROUND_OUTLINE_RE = %r{   \A
                            [ ]*  ## ignore leading spaces (if any)
                          (?: »|>> )
                            [ ]+

data/lib/sportdb/parser/version.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module SportDb
     module Parser
   MAJOR = 0    ## todo: namespace inside version or something - why? why not??
   MINOR = 6
-  PATCH = 16
+  PATCH = 18
   VERSION = [MAJOR,MINOR,PATCH].join('.')
   def self.version

data/lib/sportdb/parser.rb CHANGED Viewed

@@ -23,6 +23,7 @@ require_relative 'parser/token-text'
 require_relative 'parser/token-status'
 require_relative 'parser/token-minute'
 require_relative 'parser/token-prop'    ## team prop(erty) mode (note - must be before token)
+require_relative 'parser/token-geo'
 require_relative 'parser/token'
 require_relative 'parser/lexer'

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: sportdb-parser
 version: !ruby/object:Gem::Version
-  version: 0.6.16
+  version: 0.6.18
 platform: ruby
 authors:
 - Gerald Bauer
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2025-03-09 00:00:00.000000000 Z
+date: 2025-03-11 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: cocos
@@ -104,6 +104,7 @@ files:
 - lib/sportdb/parser/racc_parser.rb
 - lib/sportdb/parser/racc_tree.rb
 - lib/sportdb/parser/token-date.rb
+- lib/sportdb/parser/token-geo.rb
 - lib/sportdb/parser/token-minute.rb
 - lib/sportdb/parser/token-prop.rb
 - lib/sportdb/parser/token-score.rb