RubyGems - sportdb-parser - Versions diffs - 0.5.6 → 0.5.8 - Mend

sportdb-parser 0.5.6 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +1 -1
data/Manifest.txt +1 -1
data/lib/sportdb/parser/{tokenizer.rb → lexer.rb} +38 -29
data/lib/sportdb/parser/parser.rb +340 -320
data/lib/sportdb/parser/racc_parser.rb +40 -12
data/lib/sportdb/parser/racc_tree.rb +1 -1
data/lib/sportdb/parser/token-date.rb +2 -2
data/lib/sportdb/parser/token-score.rb +2 -2
data/lib/sportdb/parser/token-status.rb +2 -2
data/lib/sportdb/parser/token-text.rb +2 -2
data/lib/sportdb/parser/token.rb +2 -2
data/lib/sportdb/parser/version.rb +1 -1
data/lib/sportdb/parser.rb +31 -12
metadata +3 -3

data/lib/sportdb/parser/racc_parser.rb CHANGED Viewed

@@ -5,12 +5,18 @@
 class RaccMatchParser
-def initialize( txt )
+def initialize( txt,  debug: false )
     ## puts "==> txt:"
     ## puts txt
-    parser = SportDb::Parser.new
-    @tokens = parser.tokenize( txt )
+    @tree   = []
+    @errors = []
+    ### todo:
+    ##  -  pass along debug flag
+    lexer = SportDb::Lexer.new( txt )
+    ##  note - use tokenize_with_errors and add/collect tokenize errors
+    @tokens, @errors = lexer.tokenize_with_errors
     ## pp @tokens
     ## quick hack - convert to racc format single char literal tokens e.g. '@' etc.
@@ -22,11 +28,22 @@ def initialize( txt )
                  end
                end
   end
+  def debug( value ) @debug = value; end
+  def debug?()  @debug == true; end
+  ## debug - trace / print message
+  def trace( msg )
+     puts "  [parse] " + msg    if debug?
+  end
   def next_token
     tok = @tokens.shift
-    puts "next_token => #{tok.pretty_inspect}"
+    trace( "next_token => #{tok.pretty_inspect}" )
     tok
   end
@@ -34,19 +51,30 @@ def initialize( txt )
 #      puts "Parse error on token: #{error_token_id}, value: #{error_value}"
 #  end
-  def parse
-     puts "parse:"
-     @tree = []
+  def parse_with_errors
+     trace( "start parse:" )
      do_parse
-     @tree
+     [@tree, @errors]
+  end
+  def parse  ## convenience shortcut (ignores errors)
+    tree, _ = parse_with_errors
+    tree
   end
-  def on_error(*args)
+  attr_reader :errors
+  def errors?()   @errors.size > 0; end
+  def on_error(error_token_id, error_value, value_stack)
+    args = [error_token_id, error_value, value_stack]
     puts
     puts "!! on parse error:"
     puts "args=#{args.pretty_inspect}"
-    exit 1  ##   exit for now  -  get and print more info about context etc.!!
+    @errors << "parse error on token: #{error_token_id} with value: #{error_value}, stack: #{value_stack.pretty_inspect}"
+    ## exit 1  ##   exit for now  -  get and print more info about context etc.!!
   end

data/lib/sportdb/parser/racc_tree.rb CHANGED Viewed

@@ -66,7 +66,7 @@ RoundDef   = Struct.new( :name, :date, :duration )  do
     printer.text( "<RoundDef " )
     printer.text( self.name )
     printer.text( " date=" + self.date.pretty_inspect ) if date
-    printer.text( " durattion=" + self.duration.pretty_inspect ) if duration
+    printer.text( " duration=" + self.duration.pretty_inspect ) if duration
     printer.text( ">" )
   end
 end

data/lib/sportdb/parser/token-date.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module SportDb
-class Parser
+class Lexer
@@ -297,6 +297,6 @@ DURATION_RE = Regexp.union(
-end  #   class Parser
+end  #   class Lexer
 end  # module SportDb

data/lib/sportdb/parser/token-score.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module SportDb
-class Parser
+class Lexer
     ## todo/check: use ‹› (unicode chars) to mark optional parts in regex constant name - why? why not?
@@ -130,5 +130,5 @@ SCORE_RE = Regexp.union(
   SCORE__FT_HT__RE,        # e.g. 1-1 (1-0) or 1-1  -- note - must go last!!!
 )
-end  #  class Parser
+end  #  class Lexer
 end  # module SportDb

data/lib/sportdb/parser/token-status.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module SportDb
-class Parser
+class Lexer
 ##  (match) status
 ##    note: english usage - cancelled (in UK), canceled (in US)
@@ -40,6 +40,6 @@ STATUS_RE = %r{
 }ix
-end  #  class Parser
+end  #  class Lexer
 end  # module SportDb

data/lib/sportdb/parser/token-text.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module SportDb
-class Parser
+class Lexer
 ##  note - do NOT allow single alpha text for now
@@ -129,5 +129,5 @@ TEXT_RE = %r{
 }ix
-end # class Parser
+end # class Lexer
 end # module SportDb

data/lib/sportdb/parser/token.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 module SportDb
-class Parser
+class Lexer
 ##
@@ -265,5 +265,5 @@ RE = Regexp.union(  PROP_KEY_RE, ##  start with prop key (match will/should swit
                      TEXT_RE )
-end  # class Parser
+end  # class Lexer
 end # module SportDb

data/lib/sportdb/parser/version.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module SportDb
     module Parser
   MAJOR = 0    ## todo: namespace inside version or something - why? why not??
   MINOR = 5
-  PATCH = 6
+  PATCH = 8
   VERSION = [MAJOR,MINOR,PATCH].join('.')
   def self.version

data/lib/sportdb/parser.rb CHANGED Viewed

@@ -22,7 +22,7 @@ require_relative 'parser/token-date'
 require_relative 'parser/token-text'
 require_relative 'parser/token-status'
 require_relative 'parser/token'
-require_relative 'parser/tokenizer'
+require_relative 'parser/lexer'
 require_relative 'parser/parser'   ## auto-generated by racc (from parser.y)
 require_relative 'parser/racc_parser'
@@ -46,18 +46,37 @@ end  # module SportDb
 module SportDb
-###
-##  todo/fix -   use LangHelper or such
-##   e.g.     class Parser
-##                include LangHelper
-##            end
 class Parser
-  ## keep "old" access to checking for group, round & friends
-  ##    for now for compatibility
-  def is_group?( text )  Lang.is_group?( text ); end
-  def is_round?( text )  Lang.is_round?( text ); end
-  def is_leg?( text )    Lang.is_leg?( text ); end
-end
+####################
+#  "default" lexer & parser  (wraps RaccMatchParser)
+  def tokenize_with_errors( lines, debug: false )
+     lexer = Lexer.new( lines )
+     tokens, errors = lexer.tokenize_with_errors
+     [tokens, errors]
+  end
+  ### convience helper - ignore errors by default
+  def tokenize( lines, debug: false )
+    tokens, _ = tokenize_with_errors( lines, debug: debug )
+    tokens
+  end
+  def parse_with_errors( lines, debug: false )
+    ## todo/check - if lines needs to chack for array of lines and such
+    ##                        or handled by tokenizer???
+    parser = RaccMatchParser.new( lines )
+    tree, errors = parser.parse_with_errors
+    [tree, errors]
+  end
+  ### convience helper - ignore errors by default
+  def parse( lines, debug: false )
+    tree, _ = parse_with_errors( lines, debug: debug )
+    tree
+  end
+end  # class Parser
 end  # module SportDb

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: sportdb-parser
 version: !ruby/object:Gem::Version
-  version: 0.5.6
+  version: 0.5.8
 platform: ruby
 authors:
 - Gerald Bauer
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2025-01-20 00:00:00.000000000 Z
+date: 2025-01-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: cocos
@@ -97,6 +97,7 @@ files:
 - config/rounds_pt.txt
 - lib/sportdb/parser.rb
 - lib/sportdb/parser/lang.rb
+- lib/sportdb/parser/lexer.rb
 - lib/sportdb/parser/parser.rb
 - lib/sportdb/parser/racc_parser.rb
 - lib/sportdb/parser/racc_tree.rb
@@ -105,7 +106,6 @@ files:
 - lib/sportdb/parser/token-status.rb
 - lib/sportdb/parser/token-text.rb
 - lib/sportdb/parser/token.rb
-- lib/sportdb/parser/tokenizer.rb
 - lib/sportdb/parser/version.rb
 homepage: https://github.com/sportdb/sport.db
 licenses: