RubyGems - sportdb-parser - Versions diffs - 0.6.0 → 0.6.1 - Mend

sportdb-parser 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +1 -1
data/lib/sportdb/parser/lexer.rb +55 -9
data/lib/sportdb/parser/parser.rb +493 -396
data/lib/sportdb/parser/racc_parser.rb +4 -2
data/lib/sportdb/parser/token-date.rb +66 -15
data/lib/sportdb/parser/token-score.rb +25 -14
data/lib/sportdb/parser/token.rb +11 -2
data/lib/sportdb/parser/version.rb +1 -1
metadata +2 -2

data/lib/sportdb/parser/racc_parser.rb CHANGED Viewed

@@ -68,12 +68,14 @@ def initialize( txt,  debug: false )
   def on_error(error_token_id, error_value, value_stack)
-    args = [error_token_id, error_value, value_stack]
+    ## auto-add error_token (as string)
+    error_token = Racc_token_to_s_table[error_token_id]
+    args = [error_token, error_token_id, error_value, value_stack]
     puts
     puts "!! on parse error:"
     puts "args=#{args.pretty_inspect}"
-    @errors << "parse error on token: #{error_token_id} with value: #{error_value}, stack: #{value_stack.pretty_inspect}"
+    @errors << "parse error on token: #{error_token} (#{error_token_id}) with value: #{error_value}, stack: #{value_stack.pretty_inspect}"
     ## exit 1  ##   exit for now  -  get and print more info about context etc.!!
   end

data/lib/sportdb/parser/token-date.rb CHANGED Viewed

@@ -147,15 +147,15 @@ DATE_II_RE = %r{
 # e.g. iso-date  -  2011-08-25
-##    todo/check - allow  2011-8-25  or 2011-8-3 / 2011-08-03 etc. - why? why not?
+##   note - allow/support ("shortcuts") e.g 2011-8-25  or 2011-8-3 / 2011-08-03 etc.
 DATE_III_RE = %r{
 (?<date>
   \b
    (?<year>\d{4})
        -
-   (?<month>\d{2})
+   (?<month>\d{1,2})
        -
-   (?<day>\d{2})
+   (?<day>\d{1,2})
   \b
 )}ix
@@ -214,29 +214,36 @@ end
 #
 #  Sun Jun/23 - Wed Jun/26   -- YES
 #  Jun/23 - Jun/26           -- YES
-#  Tue Jun/25 + Wed Jun/26   -- YES
-#  Jun/25 + Jun/26           -- YES
-#
-#  Jun/25 - 26        - why? why not???
+#  Jun/25 - 26        - why? why not???  - YES - see blow variant iii!!!
+#  Tue Jun/25 + Wed Jun/26   -- NO
+#  Jun/25 + Jun/26           -- NO
 #  Jun/25 .. 26        - why? why not???
 #  Jun/25 to 26        - why? why not???
 #  Jun/25 + 26        - add - why? why not???
 #  Sun-Wed Jun/23-26  -  add - why? why not???
 #  Wed+Thu Jun/26+27 2024  -  add - why? why not???
 #
-#  maybe use comman and plus for list of dates
+#  maybe use comma and plus for list of dates
 #    Tue Jun/25, Wed Jun/26, Thu Jun/27  ??
 #    Tue Jun/25 + Wed Jun/26 + Thu Jun/27  ??
 #
 #   add back optional comma (before) year - why? why not?
+#
 ##
 #   todo add plus later on - why? why not?
+###   todo/fix  add optional comma (,) before year
+### regex note/tip/remindr -  \b () \b MUST always get enclosed in parantheses
+##                                     because alternation (|) has lowest priority/binding
 DURATION_I_RE =  %r{
 (?<duration>
     \b
+  (?:
    ## optional day name
    ((?<day_name1>#{DAY_NAMES})
       [ ]
@@ -245,12 +252,13 @@ DURATION_I_RE =  %r{
       (?: \/|[ ] )
    (?<day1>\d{1,2})
    ## optional year
-   ( [ ]
+   (  ,?   # optional comma
+      [ ]
       (?<year1>\d{4})
    )?
    ## support + and -  (add .. or such - why??)
-   [ ]*[-][ ]*
+   [ ]* - [ ]*
    ## optional day name
    ((?<day_name2>#{DAY_NAMES})
@@ -260,20 +268,28 @@ DURATION_I_RE =  %r{
       (?: \/|[ ] )
    (?<day2>\d{1,2})
    ## optional year
-   ( [ ]
+   (  ,?   # optional comma
+      [ ]
       (?<year2>\d{4})
    )?
+  )
    \b
 )}ix
+#   FIX - remove this variant
+#         "standardize on month day [year]" !!!!
+=begin
 ###
 #   variant ii
 # e.g. 26 July - 27 July
-DURATION_II_RE =  %r{
+#      26 July,
+XXX_DURATION_II_RE =  %r{
 (?<duration>
     \b
+  (?
    ## optional day name
    ((?<day_name1>#{DAY_NAMES})
       [ ]
@@ -282,7 +298,8 @@ DURATION_II_RE =  %r{
       [ ]
    (?<month_name1>#{MONTH_NAMES})
    ## optional year
-   ( [ ]
+   (
+       [ ]
       (?<year1>\d{4})
    )?
@@ -300,16 +317,50 @@ DURATION_II_RE =  %r{
    ( [ ]
       (?<year2>\d{4})
    )?
+  )
+   \b
+)}ix
+=end
+#  variant ii
+#  add support for shorthand
+#     August 16-18, 2011
+#     September 13-15, 2011
+#      October 18-20, 2011
+#      March/6-8, 2012
+#      March 6-8 2012
+#      March 6-8
+#
+#   - add support for August 16+17 or such (and check 16+18)
+#       use <op> to check if day2 is a plus or range or such - why? why not?
+DURATION_II_RE =  %r{
+(?<duration>
+    \b
+   (?:
+       (?<month_name1>#{MONTH_NAMES})
+           [ /]
+        (?<day1>\d{1,2})
+             -
+        (?<day2>\d{1,2})
+          (?:
+            ,?     ## optional comma
+            [ ]
+            (?<year1>\d{4})
+          )?     ## optional year
+   )
    \b
 )}ix
 #############################################
 # map tables
 #  note: order matters; first come-first matched/served
 DURATION_RE = Regexp.union(
    DURATION_I_RE,
-   DURATION_II_RE
+   DURATION_II_RE,
 )

data/lib/sportdb/parser/token-score.rb CHANGED Viewed

@@ -17,7 +17,7 @@ class Lexer
     ##      3-4 pen.   2-2 a.e.t.
     ##               2-2 a.e.t.
     SCORE__P_ET__RE = %r{
-        (?<score>
+        (?<score_more>
            \b
             (?:
                (?<p1>\d{1,2}) - (?<p2>\d{1,2})
@@ -34,7 +34,7 @@ class Lexer
     ##  note: allow SPECIAL with penalty only
     ##      3-4 pen.
     SCORE__P__RE = %r{
-        (?<score>
+        (?<score_more>
            \b
               (?<p1>\d{1,2}) - (?<p2>\d{1,2})
                 [ ]* #{P_EN}
@@ -52,7 +52,7 @@ class Lexer
     ##               2-2 a.e.t. (1-1)
     SCORE__P_ET_FT_HT__RE = %r{
-          (?<score>
+          (?<score_more>
                \b
                (?:
                 (?<p1>\d{1,2}) - (?<p2>\d{1,2})
@@ -79,7 +79,7 @@ class Lexer
     ##   special case for case WITHOUT extra time!!
     ##     same as above (but WITHOUT extra time and pen required)
     SCORE__P_FT_HT__RE = %r{
-             (?<score>
+             (?<score_more>
                 \b
      (?<p1>\d{1,2}) - (?<p2>\d{1,2})
         [ ]* #{P_EN} [ ]+
@@ -99,36 +99,47 @@ class Lexer
             ## note: \b works only after non-alphanum e.g. )
-    ## e.g. 2-1 (1-1) or
-    ##      2-1
+    ##########
+    ## e.g. 2-1 (1-1)
     SCORE__FT_HT__RE = %r{
-            (?<score>
+            (?<score_more>
               \b
               (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
-               (?:
                    [ ]+ \( [ ]*
                 (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
                    [ ]* \)
-               )?   # note: make half time (HT) score optional for now
              (?=[ ,\]]|$)
              )}ix    ## todo/check:  remove loakahead assertion here - why require space?
                     ## note: \b works only after non-alphanum e.g. )
+    #####
+    ##      2-1
+    SCORE__FT__RE = %r{
+            (?<score>
+              \b
+              (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
+              \b
+             )}ix
 #############################################
 # map tables
 #  note: order matters; first come-first matched/served
+#
+## check - find a better name for SCORE_MORE - SCORE_EX, SCORE_BIG, or ___ - why? why not?
-SCORE_RE = Regexp.union(
+SCORE_MORE_RE = Regexp.union(
   SCORE__P_ET_FT_HT__RE,  # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
   SCORE__P_FT_HT__RE,     # e.g. 5-1 pen. (1-1)
   SCORE__P_ET__RE,        # e.g. 2-2 a.e.t.  or  5-1 pen. 2-2 a.e.t.
   SCORE__P__RE,           # e.g. 5-1 pen.
-  SCORE__FT_HT__RE,        # e.g. 1-1 (1-0) or 1-1  -- note - must go last!!!
+  SCORE__FT_HT__RE,        # e.g. 1-1 (1-0)
+  ##  note - keep basic score as its own token!!!!
+  ##   that is, SCORE & SCORE_MORE
+  ### SCORE__FT__RE,           # e.g. 1-1  -- note - must go last!!!
 )
+SCORE_RE   =   SCORE__FT__RE
 end  #  class Lexer
 end  # module SportDb

data/lib/sportdb/parser/token.rb CHANGED Viewed

@@ -111,7 +111,15 @@ BASICS_RE = %r{
     (?<spaces> [ ]{2,}) |
     (?<space>  [ ])
         |
-    (?<sym>[;,/@|\[\]-])
+    (?<sym>  (?<=^|[ ])  ## positive lookahead
+                  (?: ----|
+                      ---|
+                      --
+                  )
+             (?=[ ])   ## positive lookahead
+    )
+        |
+    (?<sym> [;,/@|\[\]-] )
 }ix
@@ -124,7 +132,8 @@ RE = Regexp.union(  PROP_KEY_RE, ##  start with prop key (match will/should swit
                      DURATION_RE,  # note - duration MUST match before date
                     DATE_RE,
                     WDAY_RE,   # allow standalone weekday name (e.g. Mo/Tu/etc.) - why? why not?
-                    SCORE_RE,
+                    SCORE_MORE_RE,
+                    SCORE_RE,   ## note basic score e.g. 1-1 must go after SCORE_MORE_RE!!!
                     BASICS_RE,
                     MINUTE_RE,
                     GOAL_OG_RE, GOAL_PEN_RE,

data/lib/sportdb/parser/version.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module SportDb
     module Parser
   MAJOR = 0    ## todo: namespace inside version or something - why? why not??
   MINOR = 6
-  PATCH = 0
+  PATCH = 1
   VERSION = [MAJOR,MINOR,PATCH].join('.')
   def self.version

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: sportdb-parser
 version: !ruby/object:Gem::Version
-  version: 0.6.0
+  version: 0.6.1
 platform: ruby
 authors:
 - Gerald Bauer
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2025-01-30 00:00:00.000000000 Z
+date: 2025-02-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: cocos