RubyGems - sportdb-parser - Versions diffs - 0.5.7 → 0.5.8 - Mend

sportdb-parser 0.5.7 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +1 -1
data/Manifest.txt +1 -1
data/lib/sportdb/parser/{tokenizer.rb → lexer.rb} +38 -29
data/lib/sportdb/parser/parser.rb +334 -314
data/lib/sportdb/parser/racc_parser.rb +23 -10
data/lib/sportdb/parser/racc_tree.rb +1 -1
data/lib/sportdb/parser/token-date.rb +2 -2
data/lib/sportdb/parser/token-score.rb +2 -2
data/lib/sportdb/parser/token-status.rb +2 -2
data/lib/sportdb/parser/token-text.rb +2 -2
data/lib/sportdb/parser/token.rb +2 -2
data/lib/sportdb/parser/version.rb +1 -1
data/lib/sportdb/parser.rb +31 -12
metadata +3 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: eb23029ea58744e513c4a6af7112ed46a3440540cb7fe77fd2b480c58bbd5b14
-  data.tar.gz: 2aba09728fa175dbde9e270ad7250ee047639c1b8641068bcb5d159cf7e0da34
+  metadata.gz: b3c102d758209b64a04033a772faad7cdaaa4631f5079e56b92dccdfc4b84292
+  data.tar.gz: 4b49b9a0234be96c552233b74fb4b2f8702b5d402d264382f2b13b9367515740
 SHA512:
-  metadata.gz: aa8dd925bdb7c3982ad18bba5f3864ee190535633043734186560e97a8692cfd589bf999e5e67fae3b23d3f625edd9608604b5fb3eb50222f84bd30a2dc97d0f
-  data.tar.gz: ac3e2c3a6c00a22e142db199c97842fad22f67f5c507456ff117ac82dde0ddcfa5ed0c4f0e0a23c78c491fb39ed9cc8d7058491c32bf645f5296ac76861b9aae
+  metadata.gz: 78faffba17eff5ff5dd4b665099cee8eff8addd5e8263433f0662da8a88bd4fa5fa80ed83968dcdb4b7c95ab4254508b156dc09c79ee2d58e556e20cf2168aba
+  data.tar.gz: 4a9a5546ccff399028a2e629a87e86e7c3ee505b7c5b16a15de0053918ddc8e7704c2c8c75517feaab0a9bb5648a8bcf63c66b2cc34800f9d41335748e336f66

data/CHANGELOG.md CHANGED Viewed

@@ -1,4 +1,4 @@
-### 0.5.7
+### 0.5.8
 ### 0.0.1 / 2024-07-12
 * Everything is new. First release.

data/Manifest.txt CHANGED Viewed

@@ -9,6 +9,7 @@ config/rounds_misc.txt
 config/rounds_pt.txt
 lib/sportdb/parser.rb
 lib/sportdb/parser/lang.rb
+lib/sportdb/parser/lexer.rb
 lib/sportdb/parser/parser.rb
 lib/sportdb/parser/racc_parser.rb
 lib/sportdb/parser/racc_tree.rb
@@ -17,5 +18,4 @@ lib/sportdb/parser/token-score.rb
 lib/sportdb/parser/token-status.rb
 lib/sportdb/parser/token-text.rb
 lib/sportdb/parser/token.rb
-lib/sportdb/parser/tokenizer.rb
 lib/sportdb/parser/version.rb

data/lib/sportdb/parser/{tokenizer.rb → lexer.rb} RENAMED Viewed

@@ -1,6 +1,6 @@
 module SportDb
-class Parser
+class Lexer
@@ -14,6 +14,20 @@ def log( msg )
 end
+  ###
+  ##  todo/fix -   use LangHelper or such
+  ##   e.g.     class Lexer
+  ##                include LangHelper
+  ##            end
+  ##
+  ##  merge back Lang into Lexer - why? why not?
+  ## keep "old" access to checking for group, round & friends
+  ##    for now for compatibility
+  def is_group?( text )  Lang.is_group?( text ); end
+  def is_round?( text )  Lang.is_round?( text ); end
+  def is_leg?( text )    Lang.is_leg?( text ); end
 ## transforms
 ##
 ##  Netherlands  1-2 (1-1)   England
@@ -107,15 +121,11 @@ end  # class Tokens
-### convience helper - ignore errors by default
-def tokenize( lines, debug: false )
-  tokens, _ = tokenize_with_errors( lines, debug: debug )
-  tokens
-end
+def debug?()  @debug == true; end
-def tokenize_with_errors( lines, debug: false )
+def initialize( lines, debug: false )
+   @debug = debug
-##
 ##  note - for convenience - add support
 ##         comments (incl. inline end-of-line comments) and empty lines here
 ##             why? why not?
@@ -137,33 +147,33 @@ def tokenize_with_errors( lines, debug: false )
     ##   strip lines with comments and empty lines striped / removed
     ##      keep empty lines? why? why not?
     ##      keep leading spaces (indent) - why?
-    txt = String.new
+    @txt = String.new
     txt_pre.each_line do |line|    ## preprocess
        line = line.strip
        next if line.empty? || line.start_with?('#')   ###  skip empty lines and comments
        line = line.sub( /#.*/, '' ).strip             ###  cut-off end-of line comments too
-       txt << line
-       txt << "\n"
+       @txt << line
+       @txt << "\n"
     end
+end
+def tokenize_with_errors
     tokens_by_line = []   ## note: add tokens line-by-line (flatten later)
     errors         = []   ## keep a list of errors - why? why not?
-    txt.each_line do |line|
+    @txt.each_line do |line|
         line = line.rstrip   ## note - MUST remove/strip trailing newline (spaces optional)!!!
-        more_tokens, more_errors = _tokenize_line( line, debug: debug )
+        more_tokens, more_errors = _tokenize_line( line )
         tokens_by_line  << more_tokens
         errors          += more_errors
     end # each line
     tokens_by_line = tokens_by_line.map do |tokens|
         #############
         ## pass 1
@@ -246,11 +256,11 @@ end   # method tokenize_with_errors
-def _tokenize_line( line, debug: false )
+def _tokenize_line( line )
   tokens = []
   errors = []   ## keep a list of errors - why? why not?
-  puts ">#{line}<"    if debug
+  puts ">#{line}<"    if debug?
   pos = 0
   ## track last offsets - to report error on no match
@@ -265,7 +275,7 @@ def _tokenize_line( line, debug: false )
   while m = @re.match( line, pos )
-    if debug
+    if debug?
       pp m
       puts "pos: #{pos}"
     end
@@ -274,10 +284,10 @@ def _tokenize_line( line, debug: false )
     if offsets[0] != pos
       ## match NOT starting at start/begin position!!!
       ##  report parse error!!!
-      msg =  "!! WARN - parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
+      msg =  "!! WARN - parse error (tokenize) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
       puts msg
-      errors << "parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]}"
+      errors << "parse error (tokenize) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
       log( msg )
     end
@@ -288,7 +298,7 @@ def _tokenize_line( line, debug: false )
     pos = offsets[1]
-    pp offsets   if debug
+    pp offsets   if debug?
     ##
     ## note: racc requires pairs e.g. [:TOKEN, VAL]
@@ -331,7 +341,7 @@ def _tokenize_line( line, debug: false )
             when '-' then [:'-']
             when '.' then
                 ## switch back to top-level mode!!
-                puts "  LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE"  if debug
+                puts "  LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
                 @re = RE
                 [:'.']
             else
@@ -352,7 +362,7 @@ def _tokenize_line( line, debug: false )
         elsif m[:prop_key]
            ##  switch context  to PROP_RE
            @re = PROP_RE
-           puts "  ENTER PROP_RE MODE"  if debug
+           puts "  ENTER PROP_RE MODE"  if debug?
            [:PROP, m[:key]]
         elsif m[:text]
           [:TEXT, m[:text]]   ## keep pos - why? why not?
@@ -462,7 +472,7 @@ def _tokenize_line( line, debug: false )
     tokens << t    if t
-    if debug
+    if debug?
       print ">"
       print "*" * pos
       puts "#{line[pos..-1]}<"
@@ -475,13 +485,12 @@ def _tokenize_line( line, debug: false )
     puts msg
     log( msg )
-    errors << "parse error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size}"
+    errors << "parse error (tokenize) - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size} in line >#{line}<"
   end
   [tokens,errors]
 end
-end  # class Parser
+end  # class Lexer
 end # module SportDb