RubyGems - sportdb-parser - Versions diffs - 0.6.0 → 0.6.2 - Mend

sportdb-parser 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +1 -1
data/lib/sportdb/parser/lexer.rb +63 -10
data/lib/sportdb/parser/parser.rb +521 -404
data/lib/sportdb/parser/racc_parser.rb +4 -2
data/lib/sportdb/parser/token-date.rb +66 -15
data/lib/sportdb/parser/token-minute.rb +19 -4
data/lib/sportdb/parser/token-score.rb +25 -14
data/lib/sportdb/parser/token-status.rb +109 -0
data/lib/sportdb/parser/token.rb +13 -2
data/lib/sportdb/parser/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 9af6317b144478400067502e60de2f8d6232ebf6e036b0f99b78f9c29922dba2
-  data.tar.gz: 7066483378693f6376f9c30ec71e5d4172c817c11025dd4e669da6d581b6ad54
+  metadata.gz: 85526406c8cd97a5b4e8580e64597b60f2046f4667a97080434238e067be2788
+  data.tar.gz: dcd5e6aaa854654974644c026fb99545c31ef2d5929d0518d8418630b5d6ea76
 SHA512:
-  metadata.gz: '039fdc82039d05ae8f51847a79dd77c0a657e316e8b0705a28bdf2f8e594f37531ea07a230c9e1a9133f96293975190dba070f50515d58bc9926e4ef3e8e152f'
-  data.tar.gz: e0f6483cd26ba7ef4800ecf76efd6f05e01e3a2458dbc6b65fe6582654c8d28627facbfc8228655e361df84c43418c9583826908cdcf3d61bf06d186288c56fa
+  metadata.gz: fc086846a66d2657d3debae5562fe20fbd2327741c8af1485972dfd0b8f46b3c649c0345ba173f2d3f40622bd4bddecc7ea0072d4d129bc5dc542554c539ebab
+  data.tar.gz: c0c4653cb40cb89e8086b6dc00ed853c62abf75fc18972cf98230a779cd8a7f73d797098ec53c2e942e6271c00c9d11a1da8f6975704141cbfbba599ec741098

data/CHANGELOG.md CHANGED Viewed

@@ -1,4 +1,4 @@
-### 0.6.0
+### 0.6.2
 ### 0.0.1 / 2024-07-12
 * Everything is new. First release.

data/lib/sportdb/parser/lexer.rb CHANGED Viewed

@@ -147,12 +147,15 @@ def initialize( lines, debug: false )
     ##   strip lines with comments and empty lines striped / removed
     ##      keep empty lines? why? why not?
     ##      keep leading spaces (indent) - why?
+    ##
+    ##  note - KEEP empty lines (get turned into BLANK token!!!!)
     @txt = String.new
     txt_pre.each_line do |line|    ## preprocess
        line = line.strip
-       next if line.empty? || line.start_with?('#')   ###  skip empty lines and comments
+       next if line.start_with?('#')   ###  skip comments
-       line = line.sub( /#.*/, '' ).strip             ###  cut-off end-of line comments too
+       line = line.sub( /#.*/, '' ).strip   ###  cut-off end-of line comments too
        @txt << line
        @txt << "\n"
@@ -193,6 +196,18 @@ def tokenize_with_errors
                    t
                  end
+        ### check for "section" starters e.g. Teams or such
+        t = tokens[0]
+        if t[0] == :TEXT
+            text = t[1]
+            if text =~ /^teams$/i
+               t[0] = :TEAMS
+            elsif text =~  /^blank$/i   ### todo/fix -- remove!!! add real blanks!!
+               t[0] = :BLANK
+            else
+            end
+        end
         #################
         ## pass 2
         ##    transform tokens (using simple patterns)
@@ -227,13 +242,22 @@ def tokenize_with_errors
           end
-          if buf.match?( :TEXT, [:SCORE, :VS, :'-'], :TEXT )
+          if buf.match?( :TEXT, [:SCORE, :SCORE_MORE, :VS, :'-'], :TEXT )
              nodes << [:TEAM, buf.next[1]]
              nodes << buf.next
              nodes << [:TEAM, buf.next[1]]
           elsif buf.match?( :TEXT, :MINUTE )
              nodes << [:PLAYER, buf.next[1]]
              nodes << buf.next
+          elsif buf.match?( :DATE, :TIME )   ## merge DATE TIME into DATETIME
+               date = buf.next[1]
+               time = buf.next[1]
+               ## puts "DATETIME:"
+               ## pp date, time
+               val =  [date[0] + ' ' + time[0],  ## concat string of two tokens
+                        { date: date[1], time: time[1] }
+                      ]
+               nodes << [:DATETIME, val]
           else
              ## pass through
              nodes << buf.next
@@ -253,7 +277,8 @@ def tokenize_with_errors
          end
          tokens  += tok
-         tokens  << [:NEWLINE, "\n"]   ## auto-add newlines
+         ## auto-add newlines  (unless BLANK!!)
+         tokens  << [:NEWLINE, "\n"]   unless tok[0][0] == :BLANK
     end
     [tokens,errors]
@@ -267,6 +292,17 @@ def _tokenize_line( line )
   puts "line: >#{line}<"    if debug?
+   ### special case for empty line (aka BLANK)
+   if line.empty?
+       ## note - blank always resets parser mode to std/top-level!!!
+       @re = RE
+       tokens << [:BLANK, '<|BLANK|>']
+       return [tokens, errors]
+   end
   pos = 0
   ## track last offsets - to report error on no match
   ##   or no match in end of string
@@ -353,7 +389,9 @@ def _tokenize_line( line )
              puts "!!! TOKENIZE ERROR (PROP_RE) - no match found"
              nil
          end
-      else  ## assume TOP_LEVEL (a.k.a. RE) machinery
+      ###################################################
+      ## assume TOP_LEVEL (a.k.a. RE) machinery
+      else
         if m[:space] || m[:spaces]
            nil   ## skip space(s)
         elsif m[:prop_key]
@@ -372,6 +410,11 @@ def _tokenize_line( line )
           else
              [:STATUS, [m[:status], {status: m[:status] } ]]
           end
+        elsif m[:note]
+            ###  todo/check:
+            ##      use value hash - why? why not? or simplify to:
+            ##  [:NOTE, m[:note]]
+             [:NOTE, [m[:note], {note: m[:note] } ]]
         elsif m[:time]
               ## unify to iso-format
               ###   12.40 => 12:40
@@ -420,7 +463,7 @@ def _tokenize_line( line )
         elsif m[:num]   ## fix - change to ord (for ordinal number!!!)
               ## note -  strip enclosing () and convert to integer
              [:ORD, [m[:num], { value: m[:value].to_i(10) } ]]
-        elsif m[:score]
+        elsif m[:score_more]
               score = {}
               ## check for pen
               score[:p] = [m[:p1].to_i(10),
@@ -433,8 +476,15 @@ def _tokenize_line( line )
                             m[:ht2].to_i(10)]  if m[:ht1] && m[:ht2]
             ## note - for debugging keep (pass along) "literal" score
-            [:SCORE, [m[:score], score]]
-        elsif m[:minute]
+            [:SCORE_MORE, [m[:score_more], score]]
+        elsif m[:score]
+            score = {}
+            ## must always have ft for now e.g. 1-1 or such
+            score[:ft] = [m[:ft1].to_i(10),
+                          m[:ft2].to_i(10)]
+          ## note - for debugging keep (pass along) "literal" score
+          [:SCORE, [m[:score], score]]
+      elsif m[:minute]
               minute = {}
               minute[:m]      = m[:value].to_i(10)
               minute[:offset] = m[:value2].to_i(10)   if m[:value2]
@@ -459,7 +509,10 @@ def _tokenize_line( line )
           when '|' then [:'|']
           when '[' then [:'[']
           when ']' then [:']']
-          when '-' then [:'-']
+          when '-' then [:'-']        # level 1 OR (classic) dash
+          when '--'   then [:'--']    # level 2
+          when '---'  then [:'---']   # level 3
+          when '----' then [:'----']  # level 4
           else
             nil  ## ignore others (e.g. brackets [])
           end
@@ -504,7 +557,7 @@ def _tokenize_line( line )
         puts "  LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
         @re = RE
         ## note - auto-add PROP_END (<PROP_END>)
-        tokens << [:PROP_END, "<PROP_END>"]
+        tokens << [:PROP_END, "<|PROP_END|>"]
      end
    end