RubyGems - ebnf - Versions diffs - 0.0.1 → 0.1.0 - Mend

ebnf 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

data/lib/ebnf/ll1/lexer.rb CHANGED Viewed

@@ -1,5 +1,5 @@
-module RDF::LL1
-  require 'rdf/ll1/scanner'    unless defined?(Scanner)
+module EBNF::LL1
+  require 'ebnf/ll1/scanner'    unless defined?(Scanner)
   ##
   # A lexical analyzer
@@ -10,13 +10,13 @@ module RDF::LL1
   #     ...
   #   ]
   #   ttl = "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ."
-  #   lexer = RDF::LL1::Lexer.tokenize(ttl, terminals)
+  #   lexer = EBNF::LL1::Lexer.tokenize(ttl, terminals)
   #   lexer.each_token do |token|
   #     puts token.inspect
   #   end
   #
   # @example Tokenizing and returning a token stream
-  #   lexer = RDF::LL1::Lexer.tokenize(...)
+  #   lexer = EBNF::LL1::Lexer.tokenize(...)
   #   while :some-condition
   #     token = lexer.first # Get the current token
   #     token = lexer.shift # Get the current token and shift to the next
@@ -24,8 +24,8 @@ module RDF::LL1
   #
   # @example Handling error conditions
   #   begin
-  #     RDF::Turtle::Lexer.tokenize(query)
-  #   rescue RDF::Turtle::Lexer::Error => error
+  #     EBNF::LL1::Lexer.tokenize(query)
+  #   rescue EBNF::LL1::Lexer::Error => error
   #     warn error.inspect
   #   end
   #
@@ -307,7 +307,7 @@ module RDF::LL1
     # Represents a lexer token.
     #
     # @example Creating a new token
-    #   token = RDF::LL1::Lexer::Token.new(:LANGTAG, "en")
+    #   token = EBNF::LL1::Lexer::Token.new(:LANGTAG, "en")
     #   token.type   #=> :LANGTAG
     #   token.value  #=> "en"
     #
@@ -369,10 +369,10 @@ module RDF::LL1
       # of this token.
       #
       # @example Matching using the symbolic type
-      #   RDF::LL1::Lexer::Token.new(:NIL) === :NIL     #=> true
+      #   EBNF::LL1::Lexer::Token.new(:NIL) === :NIL     #=> true
       #
       # @example Matching using the string value
-      #   RDF::LL1::Lexer::Token.new(nil, "{") === "{"  #=> true
+      #   EBNF::LL1::Lexer::Token.new(nil, "{") === "{"  #=> true
       #
       # @param  [Symbol, String] value
       # @return [Boolean]
@@ -425,7 +425,7 @@ module RDF::LL1
     # Raised for errors during lexical analysis.
     #
     # @example Raising a lexer error
-    #   raise RDF::LL1::Lexer::Error.new(
+    #   raise EBNF::LL1::Lexer::Error.new(
     #     "invalid token '%' on line 10",
     #     :input => query, :token => '%', :lineno => 9)
     #
@@ -472,4 +472,4 @@ module RDF::LL1
       end
     end
   end # class Lexer
-end # module RDF::Turtle
+end # module EBNF

data/lib/ebnf/ll1/parser.rb CHANGED Viewed

@@ -1,7 +1,6 @@
-require 'rdf'
-require 'rdf/ll1/lexer'
+require 'ebnf/ll1/lexer'
-module RDF::LL1
+module EBNF::LL1
   ##
   # A Generic LL1 parser using a lexer and branch tables defined using the SWAP tool chain (modified).
   module Parser
@@ -33,9 +32,9 @@ module RDF::LL1
       #
       # @param [Symbol] term
       #   Term which is a key in the branch table
-      # @yield [reader, phase, input, current]
-      # @yieldparam [RDF::Reader] reader
-      #   Reader instance
+      # @yield [parse, phase, input, current]
+      # @yieldparam [Object] parse
+      #   Parser instance
       # @yieldparam [Symbol] phase
       #   Phase of parsing, one of :start, or :finish
       # @yieldparam [Hash] input
@@ -45,7 +44,7 @@ module RDF::LL1
       #   may be initialized with data to pass to further productions,
       #   during :finish, it contains data placed by earlier productions
       # @yieldparam [Prod] block
-      #   Block passed to initialization for yielding to calling reader.
+      #   Block passed to initialization for yielding to calling parser.
       #   Should conform to the yield specs for #initialize
       # Yield to generate a triple
       def production(term, &block)
@@ -66,9 +65,9 @@ module RDF::LL1
       # @param [Hash] options
       # @option options [Boolean] :unescape
       #   Cause strings and codepoints to be unescaped.
-      # @yield [reader, term, token, input]
-      # @yieldparam [RDF::Reader] reader
-      #   Reader instance
+      # @yield [parser, term, token, input]
+      # @yieldparam [Object] parser
+      #   Parser instance
       # @yieldparam [Symbol] term
       #   A symbol indicating the production which referenced this terminal
       # @yieldparam [String] token
@@ -76,7 +75,7 @@ module RDF::LL1
       # @yieldparam [Hash] input
       #   A Hash containing input from the parent production
       # @yieldparam [Prod] block
-      #   Block passed to initialization for yielding to calling reader.
+      #   Block passed to initialization for yielding to calling parser.
       #   Should conform to the yield specs for #initialize
       def terminal(term, regexp, options = {}, &block)
         @@patterns ||= []
@@ -96,10 +95,10 @@ module RDF::LL1
     # @example
     #   require 'rdf/ll1/parser'
     #
-    #   class Reader << RDF::Reader
-    #     include RDF::LL1::Parser
+    #   class MyParser
+    #     include EBNF::LL1::Parser
     #
-    #     branch      RDF::Turtle::Reader::BRANCH
+    #     branch      MyParser::BRANCH
     #
     #     ##
     #     # Defines a production called during different phases of parsing
@@ -107,14 +106,14 @@ module RDF::LL1
     #     # current production
     #     #
     #     # Yield to generate a triple
-    #     production :object do |reader, phase, input, current|
+    #     production :object do |parser, phase, input, current|
     #       object = current[:resource]
     #       yield :statement, RDF::Statement.new(input[:subject], input[:predicate], object)
     #     end
     #
     #     ##
     #     # Defines the pattern for a terminal node
-    #     terminal :BLANK_NODE_LABEL, %r(_:(#{PN_LOCAL})) do |reader, production, token, input|
+    #     terminal :BLANK_NODE_LABEL, %r(_:(#{PN_LOCAL})) do |parser, production, token, input|
     #       input[:BLANK_NODE_LABEL] = RDF::Node.new(token)
     #     end
     #
@@ -138,29 +137,26 @@ module RDF::LL1
     #   end
     #
     # @param  [String, #to_s]          input
-    # @param [Symbol, #to_s] prod The starting production for the parser.
-    #   It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.
+    # @param [Symbol, #to_s] prod The starting production for the parser. It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.
     # @param  [Hash{Symbol => Object}] options
-    # @option options [Hash{Symbol,String => Hash{Symbol,String => Array<Symbol,String>}}] :branch
-    #   LL1 branch table.
+    # @option options [Hash{Symbol,String => Hash{Symbol,String => Array<Symbol,String>}}] :branch LL1 branch table.
     # @option options [HHash{Symbol,String => Array<Symbol,String>}] :first ({})
     #   Lists valid terminals that can precede each production (for error recovery).
-    # @option options [HHash{Symbol,String => Array<Symbol,String>}] :follow ({})
+    # @option options [Hash{Symbol,String => Array<Symbol,String>}] :follow ({})
     #   Lists valid terminals that can follow each production (for error recovery).
     # @option options [Boolean]  :validate     (false)
-    #   whether to validate the parsed statements and values. If not validating,
-    #   the parser will attempt to recover from errors.
+    #   whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.
     # @option options [Boolean] :progress
     #   Show progress of parser productions
     # @option options [Boolean] :debug
     #   Detailed debug output
     # @yield [context, *data]
-    #   Yields for to return data to reader
+    #   Yields for to return data to parser
     # @yieldparam [:statement, :trace] context
     #   Context for block
     # @yieldparam [Symbol] *data
     #   Data specific to the call
-    # @return [RDF::LL1::Parser]
+    # @return [EBNF::LL1::Parser]
     # @see http://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
     def parse(input = nil, prod = nil, options = {}, &block)
       @options = options.dup
@@ -179,7 +175,7 @@ module RDF::LL1
       raise Error, "Starting production not defined" unless prod
       @prod_data = [{}]
-      prod = RDF::URI(prod).fragment.to_sym unless prod.is_a?(Symbol)
+      prod = prod.split('#').last.to_sym unless prod.is_a?(Symbol)
       todo_stack = [{:prod => prod, :terms => nil}]
       while !todo_stack.empty?
@@ -216,8 +212,8 @@ module RDF::LL1
             end
             if sequence.nil?
-              if prod_branch.has_key?(:"ebnf:empty")
-                debug("parse(production)", :level => 2) {"empty sequence for ebnf:empty"}
+              if prod_branch.has_key?(:_empty)
+                debug("parse(production)", :level => 2) {"empty sequence for _empty"}
               else
                 # If there is no sequence for this production, we're
                 # in error recovery, and _token_ has been advanced to
@@ -357,7 +353,7 @@ module RDF::LL1
       # If this token can be used by the top production, return it
       # Otherwise, if the banch table allows empty, also return the token
       return token if !@recovering && (
-        (@branch[cur_prod] && @branch[cur_prod].has_key?(:"ebnf:empty")) ||
+        (@branch[cur_prod] && @branch[cur_prod].has_key?(:_empty)) ||
         first.any? {|t| token === t})
       # Otherwise, it's an error condition, and skip either until
@@ -417,7 +413,7 @@ module RDF::LL1
     def get_token
       token = begin
         @lexer.first
-      rescue RDF::LL1::Lexer::Error => e
+      rescue EBNF::LL1::Lexer::Error => e
         # Recover from lexer error
         @lineno = e.lineno
         error("get_token", "With input '#{e.input}': #{e.message}",
@@ -537,5 +533,5 @@ module RDF::LL1
         super(message.to_s)
       end
     end # class Error
-  end # class Reader
-end # module RDF::Turtle
+  end # class Parser
+end # module EBNF::LL1

data/lib/ebnf/ll1/scanner.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 require 'strscan'    unless defined?(StringScanner)
-module RDF::LL1
+module EBNF::LL1
   ##
   # Overload StringScanner with file operations
   #

data/lib/ebnf/parser.rb ADDED Viewed

@@ -0,0 +1,297 @@
+module EBNF
+  module Parser
+    ##
+    # Iterate over rule strings.
+    # a line that starts with '\[' or '@' starts a new rule
+    #
+    # @param [StringScanner] scanner
+    # @yield rule_string
+    # @yieldparam [String] rule_string
+    def eachRule(scanner)
+      cur_lineno = 1
+      r = ''
+      until scanner.eos?
+        case
+        when s = scanner.scan(%r(\s+)m)
+          # Eat whitespace
+          cur_lineno += s.count("\n")
+          #debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
+        when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
+          # Eat comments
+          cur_lineno += s.count("\n")
+          debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
+        when s = scanner.scan(%r(^@terminals))
+          #debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
+          yield(r) unless r.empty?
+          @lineno = cur_lineno
+          yield(s)
+          r = ''
+        when s = scanner.scan(/@pass/)
+          # Found rule start, if we've already collected a rule, yield it
+          #debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
+          yield r unless r.empty?
+          @lineno = cur_lineno
+          r = s
+        when s = scanner.scan(/\[(?=\w+\])/)
+          # Found rule start, if we've already collected a rule, yield it
+          yield r unless r.empty?
+          #debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
+          @lineno = cur_lineno
+          r = s
+        else
+          # Collect until end of line, or start of comment
+          s = scanner.scan_until(%r((?:/\*)|$)m)
+          cur_lineno += s.count("\n")
+          #debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
+          r += s
+        end
+      end
+      yield r unless r.empty?
+    end
+    ##
+    # Parse a rule into a rule number, a symbol and an expression
+    #
+    # @param [String] rule
+    # @return [Rule]
+    def ruleParts(rule)
+      num_sym, expr = rule.split('::=', 2).map(&:strip)
+      num, sym = num_sym.split(']', 2).map(&:strip)
+      num = num[1..-1]
+      r = Rule.new(sym && sym.to_sym, num, ebnf(expr).first, :ebnf => self)
+      debug("ruleParts") { r.inspect }
+      r
+    end
+    ##
+    # Parse a string into an expression tree and a remaining string
+    #
+    # @example
+    #     >>> ebnf("a b c")
+    #     ((seq, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
+    #
+    #     >>> ebnf("a? b+ c*")
+    #     ((seq, \[(opt, ('id', 'a')), (plus, ('id', 'b')), ('*', ('id', 'c'))\]), '')
+    #
+    #     >>> ebnf(" | x xlist")
+    #     ((alt, \[(seq, \[\]), (seq, \[('id', 'x'), ('id', 'xlist')\])\]), '')
+    #
+    #     >>> ebnf("a | (b - c)")
+    #     ((alt, \[('id', 'a'), (diff, \[('id', 'b'), ('id', 'c')\])\]), '')
+    #
+    #     >>> ebnf("a b | c d")
+    #     ((alt, \[(seq, \[('id', 'a'), ('id', 'b')\]), (seq, \[('id', 'c'), ('id', 'd')\])\]), '')
+    #
+    #     >>> ebnf("a | b | c")
+    #     ((alt, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
+    #
+    #     >>> ebnf("a) b c")
+    #     (('id', 'a'), ' b c')
+    #
+    #     >>> ebnf("BaseDecl? PrefixDecl*")
+    #     ((seq, \[(opt, ('id', 'BaseDecl')), ('*', ('id', 'PrefixDecl'))\]), '')
+    #
+    #     >>> ebnf("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
+    #     ((alt, \[('id', 'NCCHAR1'), ("'", diff), (range, '0-9'), (hex, '#x00B7'), (range, '#x0300-#x036F'), (range, '#x203F-#x2040')\]), '')
+    #
+    # @param [String] s
+    # @return [Array]
+    def ebnf(s)
+      debug("ebnf") {"(#{s.inspect})"}
+      e, s = depth {alt(s)}
+      debug {"=> alt returned #{[e, s].inspect}"}
+      unless s.empty?
+        t, ss = depth {terminal(s)}
+        debug {"=> terminal returned #{[t, ss].inspect}"}
+        return [e, ss] if t.is_a?(Array) && t.first == :")"
+      end
+      [e, s]
+    end
+    ##
+    # Parse alt
+    #     >>> alt("a | b | c")
+    #     ((alt, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
+    # @param [String] s
+    # @return [Array]
+    def alt(s)
+      debug("alt") {"(#{s.inspect})"}
+      args = []
+      while !s.empty?
+        e, s = depth {seq(s)}
+        debug {"=> seq returned #{[e, s].inspect}"}
+        if e.to_s.empty?
+          break unless args.empty?
+          e = [:seq, []] # empty sequence
+        end
+        args << e
+        unless s.empty?
+          t, ss = depth {terminal(s)}
+          break unless t[0] == :alt
+          s = ss
+        end
+      end
+      args.length > 1 ? [args.unshift(:alt), s] : [e, s]
+    end
+    ##
+    # parse seq
+    #
+    #     >>> seq("a b c")
+    #     ((seq, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
+    #
+    #     >>> seq("a b? c")
+    #     ((seq, \[('id', 'a'), (opt, ('id', 'b')), ('id', 'c')\]), '')
+    def seq(s)
+      debug("seq") {"(#{s.inspect})"}
+      args = []
+      while !s.empty?
+        e, ss = depth {diff(s)}
+        debug {"=> diff returned #{[e, ss].inspect}"}
+        unless e.to_s.empty?
+          args << e
+          s = ss
+        else
+          break;
+        end
+      end
+      if args.length > 1
+        [args.unshift(:seq), s]
+      elsif args.length == 1
+        args + [s]
+      else
+        ["", s]
+      end
+    end
+    ##
+    # parse diff
+    #
+    #     >>> diff("a - b")
+    #     ((diff, \[('id', 'a'), ('id', 'b')\]), '')
+    def diff(s)
+      debug("diff") {"(#{s.inspect})"}
+      e1, s = depth {postfix(s)}
+      debug {"=> postfix returned #{[e1, s].inspect}"}
+      unless e1.to_s.empty?
+        unless s.empty?
+          t, ss = depth {terminal(s)}
+          debug {"diff #{[t, ss].inspect}"}
+          if t.is_a?(Array) && t.first == :diff
+            s = ss
+            e2, s = primary(s)
+            unless e2.to_s.empty?
+              return [[:diff, e1, e2], s]
+            else
+              error("diff", "Syntax Error")
+              raise "Syntax Error"
+            end
+          end
+        end
+      end
+      [e1, s]
+    end
+    ##
+    # parse postfix
+    #
+    #     >>> postfix("a b c")
+    #     (('id', 'a'), ' b c')
+    #
+    #     >>> postfix("a? b c")
+    #     ((opt, ('id', 'a')), ' b c')
+    def postfix(s)
+      debug("postfix") {"(#{s.inspect})"}
+      e, s = depth {primary(s)}
+      debug {"=> primary returned #{[e, s].inspect}"}
+      return ["", s] if e.to_s.empty?
+      if !s.empty?
+        t, ss = depth {terminal(s)}
+        debug {"=> #{[t, ss].inspect}"}
+        if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
+          return [[t.first, e], ss]
+        end
+      end
+      [e, s]
+    end
+    ##
+    # parse primary
+    #
+    #     >>> primary("a b c")
+    #     (('id', 'a'), ' b c')
+    def primary(s)
+      debug("primary") {"(#{s.inspect})"}
+      t, s = depth {terminal(s)}
+      debug {"=> terminal returned #{[t, s].inspect}"}
+      if t.is_a?(Symbol) || t.is_a?(String)
+        [t, s]
+      elsif %w(range hex).map(&:to_sym).include?(t.first)
+        [t, s]
+      elsif t.first == :"("
+        e, s = depth {ebnf(s)}
+        debug {"=> ebnf returned #{[e, s].inspect}"}
+        [e, s]
+      else
+        ["", s]
+      end
+    end
+    ##
+    # parse one terminal; return the terminal and the remaining string
+    #
+    # A terminal is represented as a tuple whose 1st item gives the type;
+    # some types have additional info in the tuple.
+    #
+    # @example
+    #     >>> terminal("'abc' def")
+    #     (("'", 'abc'), ' def')
+    #
+    #     >>> terminal("[0-9]")
+    #     ((range, '0-9'), '')
+    #     >>> terminal("#x00B7")
+    #     ((hex, '#x00B7'), '')
+    #     >>> terminal ("\[#x0300-#x036F\]")
+    #     ((range, '#x0300-#x036F'), '')
+    #     >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
+    #     ((range, "^<>'{}|^`"), '-\[#x00-#x20\]')
+    def terminal(s)
+      s = s.strip
+      case m = s[0,1]
+      when '"', "'"
+        l, s = s[1..-1].split(m, 2)
+        [l, s]
+      when '['
+        l, s = s[1..-1].split(']', 2)
+        [[:range, l], s]
+      when '#'
+        s.match(/(#\w+)(.*)$/)
+        l, s = $1, $2
+        [[:hex, l], s]
+      when /[[:alpha:]]/
+        s.match(/(\w+)(.*)$/)
+        l, s = $1, $2
+        [l.to_sym, s]
+      when '@'
+        s.match(/@(#\w+)(.*)$/)
+        l, s = $1, $2
+        [[:"@", l], s]
+      when '-'
+        [[:diff], s[1..-1]]
+      when '?'
+        [[:opt], s[1..-1]]
+      when '|'
+        [[:alt], s[1..-1]]
+      when '+'
+        [[:plus], s[1..-1]]
+      when '*'
+        [[:star], s[1..-1]]
+      when /[\(\)]/
+        [[m.to_sym], s[1..-1]]
+      else
+        error("terminal", "unrecognized terminal: #{s.inspect}")
+        raise "Syntax Error"
+      end
+    end
+  end
+end