RubyGems - sportdb-parser - Versions diffs - 0.6.0 → 0.6.1 - Mend

sportdb-parser 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +1 -1
data/lib/sportdb/parser/lexer.rb +55 -9
data/lib/sportdb/parser/parser.rb +493 -396
data/lib/sportdb/parser/racc_parser.rb +4 -2
data/lib/sportdb/parser/token-date.rb +66 -15
data/lib/sportdb/parser/token-score.rb +25 -14
data/lib/sportdb/parser/token.rb +11 -2
data/lib/sportdb/parser/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 9af6317b144478400067502e60de2f8d6232ebf6e036b0f99b78f9c29922dba2
-  data.tar.gz: 7066483378693f6376f9c30ec71e5d4172c817c11025dd4e669da6d581b6ad54
+  metadata.gz: b299ddece5e64b86bb7ee6b55578099b0624b11d8e5f10721363f45d6ef5d8d8
+  data.tar.gz: 5712c99b200e6116c9f07fba1215a4bf2560e5bd848c3c8cc48959aa17997b85
 SHA512:
-  metadata.gz: '039fdc82039d05ae8f51847a79dd77c0a657e316e8b0705a28bdf2f8e594f37531ea07a230c9e1a9133f96293975190dba070f50515d58bc9926e4ef3e8e152f'
-  data.tar.gz: e0f6483cd26ba7ef4800ecf76efd6f05e01e3a2458dbc6b65fe6582654c8d28627facbfc8228655e361df84c43418c9583826908cdcf3d61bf06d186288c56fa
+  metadata.gz: 5d2fce54482e12542c35abd46a292d7f5e6b4db894bba3a7f911269f116d9fa530653d36ace4295e2f819bb974093b5567a5494a2d50b54ee3f250b314d40a73
+  data.tar.gz: 7b6ef8aaafa2d20c0356fcdc048211f24a04cc4f95819ad8d225b2c9a4a29e44d8f415190acfbe3e31b2f9cc457a12f8e75c460394e984d5b9b1f476f0f8e30f

data/CHANGELOG.md CHANGED Viewed

@@ -1,4 +1,4 @@
-### 0.6.0
+### 0.6.1
 ### 0.0.1 / 2024-07-12
 * Everything is new. First release.

data/lib/sportdb/parser/lexer.rb CHANGED Viewed

@@ -147,12 +147,15 @@ def initialize( lines, debug: false )
     ##   strip lines with comments and empty lines striped / removed
     ##      keep empty lines? why? why not?
     ##      keep leading spaces (indent) - why?
+    ##
+    ##  note - KEEP empty lines (get turned into BLANK token!!!!)
     @txt = String.new
     txt_pre.each_line do |line|    ## preprocess
        line = line.strip
-       next if line.empty? || line.start_with?('#')   ###  skip empty lines and comments
+       next if line.start_with?('#')   ###  skip comments
-       line = line.sub( /#.*/, '' ).strip             ###  cut-off end-of line comments too
+       line = line.sub( /#.*/, '' ).strip   ###  cut-off end-of line comments too
        @txt << line
        @txt << "\n"
@@ -193,6 +196,18 @@ def tokenize_with_errors
                    t
                  end
+        ### check for "section" starters e.g. Teams or such
+        t = tokens[0]
+        if t[0] == :TEXT
+            text = t[1]
+            if text =~ /^teams$/i
+               t[0] = :TEAMS
+            elsif text =~  /^blank$/i   ### todo/fix -- remove!!! add real blanks!!
+               t[0] = :BLANK
+            else
+            end
+        end
         #################
         ## pass 2
         ##    transform tokens (using simple patterns)
@@ -227,13 +242,22 @@ def tokenize_with_errors
           end
-          if buf.match?( :TEXT, [:SCORE, :VS, :'-'], :TEXT )
+          if buf.match?( :TEXT, [:SCORE, :SCORE_MORE, :VS, :'-'], :TEXT )
              nodes << [:TEAM, buf.next[1]]
              nodes << buf.next
              nodes << [:TEAM, buf.next[1]]
           elsif buf.match?( :TEXT, :MINUTE )
              nodes << [:PLAYER, buf.next[1]]
              nodes << buf.next
+          elsif buf.match?( :DATE, :TIME )   ## merge DATE TIME into DATETIME
+               date = buf.next[1]
+               time = buf.next[1]
+               ## puts "DATETIME:"
+               ## pp date, time
+               val =  [date[0] + ' ' + time[0],  ## concat string of two tokens
+                        { date: date[1], time: time[1] }
+                      ]
+               nodes << [:DATETIME, val]
           else
              ## pass through
              nodes << buf.next
@@ -253,7 +277,8 @@ def tokenize_with_errors
          end
          tokens  += tok
-         tokens  << [:NEWLINE, "\n"]   ## auto-add newlines
+         ## auto-add newlines  (unless BLANK!!)
+         tokens  << [:NEWLINE, "\n"]   unless tok[0][0] == :BLANK
     end
     [tokens,errors]
@@ -267,6 +292,17 @@ def _tokenize_line( line )
   puts "line: >#{line}<"    if debug?
+   ### special case for empty line (aka BLANK)
+   if line.empty?
+       ## note - blank always resets parser mode to std/top-level!!!
+       @re = RE
+       tokens << [:BLANK, '<|BLANK|>']
+       return [tokens, errors]
+   end
   pos = 0
   ## track last offsets - to report error on no match
   ##   or no match in end of string
@@ -420,7 +456,7 @@ def _tokenize_line( line )
         elsif m[:num]   ## fix - change to ord (for ordinal number!!!)
               ## note -  strip enclosing () and convert to integer
              [:ORD, [m[:num], { value: m[:value].to_i(10) } ]]
-        elsif m[:score]
+        elsif m[:score_more]
               score = {}
               ## check for pen
               score[:p] = [m[:p1].to_i(10),
@@ -433,8 +469,15 @@ def _tokenize_line( line )
                             m[:ht2].to_i(10)]  if m[:ht1] && m[:ht2]
             ## note - for debugging keep (pass along) "literal" score
-            [:SCORE, [m[:score], score]]
-        elsif m[:minute]
+            [:SCORE_MORE, [m[:score_more], score]]
+        elsif m[:score]
+            score = {}
+            ## must always have ft for now e.g. 1-1 or such
+            score[:ft] = [m[:ft1].to_i(10),
+                          m[:ft2].to_i(10)]
+          ## note - for debugging keep (pass along) "literal" score
+          [:SCORE, [m[:score], score]]
+      elsif m[:minute]
               minute = {}
               minute[:m]      = m[:value].to_i(10)
               minute[:offset] = m[:value2].to_i(10)   if m[:value2]
@@ -459,7 +502,10 @@ def _tokenize_line( line )
           when '|' then [:'|']
           when '[' then [:'[']
           when ']' then [:']']
-          when '-' then [:'-']
+          when '-' then [:'-']        # level 1 OR (classic) dash
+          when '--'   then [:'--']    # level 2
+          when '---'  then [:'---']   # level 3
+          when '----' then [:'----']  # level 4
           else
             nil  ## ignore others (e.g. brackets [])
           end
@@ -504,7 +550,7 @@ def _tokenize_line( line )
         puts "  LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
         @re = RE
         ## note - auto-add PROP_END (<PROP_END>)
-        tokens << [:PROP_END, "<PROP_END>"]
+        tokens << [:PROP_END, "<|PROP_END|>"]
      end
    end