RubyGems - sportdb-parser - Versions diffs - 0.5.9 → 0.6.1 - Mend

sportdb-parser 0.5.9 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +1 -1
data/Manifest.txt +2 -0
data/lib/sportdb/parser/lexer.rb +101 -36
data/lib/sportdb/parser/parser.rb +561 -387
data/lib/sportdb/parser/racc_parser.rb +5 -3
data/lib/sportdb/parser/racc_tree.rb +12 -5
data/lib/sportdb/parser/token-date.rb +81 -13
data/lib/sportdb/parser/token-minute.rb +45 -0
data/lib/sportdb/parser/token-prop.rb +133 -0
data/lib/sportdb/parser/token-score.rb +25 -14
data/lib/sportdb/parser/token-text.rb +9 -2
data/lib/sportdb/parser/token.rb +51 -176
data/lib/sportdb/parser/version.rb +2 -2
data/lib/sportdb/parser.rb +2 -0
metadata +4 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 43f5fed1d5709a2bd2642046f1e3d367212c5ebcc71084f1a17b00738e5036de
-  data.tar.gz: 3082d8cf6879adb3735e2dc28a9213dbc5f0781438d0e4248244ac10d02085b5
+  metadata.gz: b299ddece5e64b86bb7ee6b55578099b0624b11d8e5f10721363f45d6ef5d8d8
+  data.tar.gz: 5712c99b200e6116c9f07fba1215a4bf2560e5bd848c3c8cc48959aa17997b85
 SHA512:
-  metadata.gz: 50176108fb5d9c81ce7234a7abbdbc4e29aaf25af37005d442d0b351ea699c93dee02293d975a340d2e9f03a13c76836aa30d0278e6a6a82ba28fbbba313f50c
-  data.tar.gz: 90b3f15722d7540f70ebb10718a643473e19de09a15e014f70e65b815ff4ed014c4c953266918dbca2baf34da94bd5681e403d27cc2b97f643781bf6f714e23f
+  metadata.gz: 5d2fce54482e12542c35abd46a292d7f5e6b4db894bba3a7f911269f116d9fa530653d36ace4295e2f819bb974093b5567a5494a2d50b54ee3f250b314d40a73
+  data.tar.gz: 7b6ef8aaafa2d20c0356fcdc048211f24a04cc4f95819ad8d225b2c9a4a29e44d8f415190acfbe3e31b2f9cc457a12f8e75c460394e984d5b9b1f476f0f8e30f

data/CHANGELOG.md CHANGED Viewed

@@ -1,4 +1,4 @@
-### 0.5.9
+### 0.6.1
 ### 0.0.1 / 2024-07-12
 * Everything is new. First release.

data/Manifest.txt CHANGED Viewed

@@ -14,6 +14,8 @@ lib/sportdb/parser/parser.rb
 lib/sportdb/parser/racc_parser.rb
 lib/sportdb/parser/racc_tree.rb
 lib/sportdb/parser/token-date.rb
+lib/sportdb/parser/token-minute.rb
+lib/sportdb/parser/token-prop.rb
 lib/sportdb/parser/token-score.rb
 lib/sportdb/parser/token-status.rb
 lib/sportdb/parser/token-text.rb

data/lib/sportdb/parser/lexer.rb CHANGED Viewed

@@ -147,12 +147,15 @@ def initialize( lines, debug: false )
     ##   strip lines with comments and empty lines striped / removed
     ##      keep empty lines? why? why not?
     ##      keep leading spaces (indent) - why?
+    ##
+    ##  note - KEEP empty lines (get turned into BLANK token!!!!)
     @txt = String.new
     txt_pre.each_line do |line|    ## preprocess
        line = line.strip
-       next if line.empty? || line.start_with?('#')   ###  skip empty lines and comments
+       next if line.start_with?('#')   ###  skip comments
-       line = line.sub( /#.*/, '' ).strip             ###  cut-off end-of line comments too
+       line = line.sub( /#.*/, '' ).strip   ###  cut-off end-of line comments too
        @txt << line
        @txt << "\n"
@@ -193,6 +196,18 @@ def tokenize_with_errors
                    t
                  end
+        ### check for "section" starters e.g. Teams or such
+        t = tokens[0]
+        if t[0] == :TEXT
+            text = t[1]
+            if text =~ /^teams$/i
+               t[0] = :TEAMS
+            elsif text =~  /^blank$/i   ### todo/fix -- remove!!! add real blanks!!
+               t[0] = :BLANK
+            else
+            end
+        end
         #################
         ## pass 2
         ##    transform tokens (using simple patterns)
@@ -227,13 +242,22 @@ def tokenize_with_errors
           end
-          if buf.match?( :TEXT, [:SCORE, :VS, :'-'], :TEXT )
+          if buf.match?( :TEXT, [:SCORE, :SCORE_MORE, :VS, :'-'], :TEXT )
              nodes << [:TEAM, buf.next[1]]
              nodes << buf.next
              nodes << [:TEAM, buf.next[1]]
           elsif buf.match?( :TEXT, :MINUTE )
              nodes << [:PLAYER, buf.next[1]]
              nodes << buf.next
+          elsif buf.match?( :DATE, :TIME )   ## merge DATE TIME into DATETIME
+               date = buf.next[1]
+               time = buf.next[1]
+               ## puts "DATETIME:"
+               ## pp date, time
+               val =  [date[0] + ' ' + time[0],  ## concat string of two tokens
+                        { date: date[1], time: time[1] }
+                      ]
+               nodes << [:DATETIME, val]
           else
              ## pass through
              nodes << buf.next
@@ -247,8 +271,14 @@ def tokenize_with_errors
     ## flatten tokens
     tokens = []
     tokens_by_line.each do |tok|
+         if debug?
+           pp tok
+         end
          tokens  += tok
-         tokens  << [:NEWLINE, "\n"]   ## auto-add newlines
+         ## auto-add newlines  (unless BLANK!!)
+         tokens  << [:NEWLINE, "\n"]   unless tok[0][0] == :BLANK
     end
     [tokens,errors]
@@ -260,7 +290,18 @@ def _tokenize_line( line )
   tokens = []
   errors = []   ## keep a list of errors - why? why not?
-  puts ">#{line}<"    if debug?
+  puts "line: >#{line}<"    if debug?
+   ### special case for empty line (aka BLANK)
+   if line.empty?
+       ## note - blank always resets parser mode to std/top-level!!!
+       @re = RE
+       tokens << [:BLANK, '<|BLANK|>']
+       return [tokens, errors]
+   end
   pos = 0
   ## track last offsets - to report error on no match
@@ -275,10 +316,10 @@ def _tokenize_line( line )
   while m = @re.match( line, pos )
-    if debug?
-      pp m
-      puts "pos: #{pos}"
-    end
+#    if debug?
+#      pp m
+#      puts "pos: #{pos}"
+#    end
     offsets = [m.begin(0), m.end(0)]
     if offsets[0] != pos
@@ -298,7 +339,7 @@ def _tokenize_line( line )
     pos = offsets[1]
-    pp offsets   if debug?
+#    pp offsets   if debug?
     ##
     ## note: racc requires pairs e.g. [:TOKEN, VAL]
@@ -306,12 +347,8 @@ def _tokenize_line( line )
   t = if @re == PROP_RE
-         if m[:space]
-              ## skip space
-              nil
-         elsif m[:spaces]
-              ## skip spaces
-              nil
+         if m[:space] || m[:spaces]
+              nil    ## skip space(s)
          elsif m[:prop_name]
                if m[:name] == 'Y'
                  [:YELLOW_CARD, m[:name]]
@@ -339,11 +376,11 @@ def _tokenize_line( line )
             when '(' then [:'(']
             when ')' then [:')']
             when '-' then [:'-']
-            when '.' then
-                ## switch back to top-level mode!!
-                puts "  LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
-                @re = RE
-                [:'.']
+           # when '.' then
+           #     ## switch back to top-level mode!!
+           #     puts "  LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
+           #     @re = RE
+           #     [:'.']
             else
               nil  ## ignore others (e.g. brackets [])
             end
@@ -353,12 +390,8 @@ def _tokenize_line( line )
              nil
          end
       else  ## assume TOP_LEVEL (a.k.a. RE) machinery
-        if m[:space]
-           ## skip space
-           nil
-        elsif m[:spaces]
-           ## skip spaces
-           nil
+        if m[:space] || m[:spaces]
+           nil   ## skip space(s)
         elsif m[:prop_key]
            ##  switch context  to PROP_RE
            @re = PROP_RE
@@ -397,6 +430,7 @@ def _tokenize_line( line )
  ## map month names
  ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
             date[:y] = m[:year].to_i(10)  if m[:year]
+            date[:m] = m[:month].to_i(10)  if m[:month]
             date[:m] = MONTH_MAP[ m[:month_name].downcase ]   if m[:month_name]
             date[:d]  = m[:day].to_i(10)   if m[:day]
             date[:wday] = DAY_MAP[ m[:day_name].downcase ]   if m[:day_name]
@@ -417,10 +451,12 @@ def _tokenize_line( line )
             duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ]   if m[:day_name2]
             ## note - for debugging keep (pass along) "literal" duration
             [:DURATION, [m[:duration], duration]]
+        elsif m[:wday]    ## standalone weekday e.g. Mo/Tu/We/etc.
+             [:WDAY, [m[:wday], { wday: DAY_MAP[ m[:day_name].downcase ] } ]]
         elsif m[:num]   ## fix - change to ord (for ordinal number!!!)
               ## note -  strip enclosing () and convert to integer
              [:ORD, [m[:num], { value: m[:value].to_i(10) } ]]
-        elsif m[:score]
+        elsif m[:score_more]
               score = {}
               ## check for pen
               score[:p] = [m[:p1].to_i(10),
@@ -433,8 +469,15 @@ def _tokenize_line( line )
                             m[:ht2].to_i(10)]  if m[:ht1] && m[:ht2]
             ## note - for debugging keep (pass along) "literal" score
-            [:SCORE, [m[:score], score]]
-        elsif m[:minute]
+            [:SCORE_MORE, [m[:score_more], score]]
+        elsif m[:score]
+            score = {}
+            ## must always have ft for now e.g. 1-1 or such
+            score[:ft] = [m[:ft1].to_i(10),
+                          m[:ft2].to_i(10)]
+          ## note - for debugging keep (pass along) "literal" score
+          [:SCORE, [m[:score], score]]
+      elsif m[:minute]
               minute = {}
               minute[:m]      = m[:value].to_i(10)
               minute[:offset] = m[:value2].to_i(10)   if m[:value2]
@@ -454,11 +497,15 @@ def _tokenize_line( line )
           case sym
           when ',' then [:',']
           when ';' then [:';']
+          when '/' then [:'/']
           when '@' then [:'@']
           when '|' then [:'|']
           when '[' then [:'[']
           when ']' then [:']']
-          when '-' then [:'-']
+          when '-' then [:'-']        # level 1 OR (classic) dash
+          when '--'   then [:'--']    # level 2
+          when '---'  then [:'---']   # level 3
+          when '----' then [:'----']  # level 4
           else
             nil  ## ignore others (e.g. brackets [])
           end
@@ -472,11 +519,11 @@ def _tokenize_line( line )
     tokens << t    if t
-    if debug?
-      print ">"
-      print "*" * pos
-      puts "#{line[pos..-1]}<"
-    end
+#    if debug?
+#      print ">"
+#      print "*" * pos
+#      puts "#{line[pos..-1]}<"
+#    end
   end
   ## check if no match in end of string
@@ -489,6 +536,24 @@ def _tokenize_line( line )
   end
+   ##
+   ## if in prop mode continue if   last token is [,-]
+   ##        otherwise change back to "standard" mode
+   if @re == PROP_RE
+     if [:',', :'-'].include?( tokens[-1][0] )
+        ## continue/stay in PROP_RE mode
+        ##  todo/check - auto-add PROP_CONT token or such
+        ##                to help parser with possible NEWLINE
+        ##                  conflicts  - why? why not?
+     else
+        ## switch back to top-level mode!!
+        puts "  LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
+        @re = RE
+        ## note - auto-add PROP_END (<PROP_END>)
+        tokens << [:PROP_END, "<|PROP_END|>"]
+     end
+   end
   [tokens,errors]
 end