RubyGems - ebnf - Versions diffs - 0.1.0 → 0.2.0 - Mend

ebnf 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

data/lib/ebnf/base.rb CHANGED Viewed

@@ -165,7 +165,7 @@ module EBNF
         require 'sxp'
         SXP::Generator.string(ast.sort)
       rescue LoadError
-        ast.to_sxp
+        ast.sort_by{|r| r.num.to_f}.to_sxp
       end
     end
     def to_s; to_sxp; end
@@ -219,7 +219,7 @@ module EBNF
     # Progress output, less than debugging
     def progress(*args)
-      return unless @options[:progress]
+      return unless @options[:progress] || @options[:debug]
       options = args.last.is_a?(Hash) ? args.pop : {}
       depth = options[:depth] || @depth
       args << yield if block_given?

data/lib/ebnf/ll1.rb CHANGED Viewed

@@ -50,126 +50,148 @@ module EBNF
       # Comprehnsion rule, create shorter versions of all non-terminal sequences
       comprehensions = []
-      begin
-        comprehensions = []
-        ast.select {|r| r.seq? && r.kind == :rule && r.expr.length > 2}.each do |rule|
-          new_expr = rule.expr[2..-1].unshift(:seq)
-          unless ast.any? {|r| r.expr == new_expr}
-            debug("first_follow") {"add comprehension rule for #{rule.sym} => #{new_expr.inspect}"}
-            new_rule = rule.build(new_expr)
-            rule.comp = new_rule
-            comprehensions << new_rule
+      ittr = 0
+      depth do
+        begin
+          comprehensions = []
+          ast.select {|r| r.seq? && r.kind == :rule && r.expr.length > 2}.each do |rule|
+            new_expr = rule.expr[2..-1].unshift(:seq)
+            unless ast.any? {|r| r.expr == new_expr}
+              debug("FF.c") {"(#{ittr}) add comprehension rule for #{rule.sym} => #{new_expr.inspect}"}
+              new_rule = rule.build(new_expr)
+              rule.comp = new_rule
+              comprehensions << new_rule
+            end
           end
-        end
-        @ast += comprehensions
-        progress("first_follow") {"comprehensions #{comprehensions.length}"}
-      end while !comprehensions.empty?
-      # Fi(a w' ) = { a } for every terminal a
-      # For each rule who's expr's first element of a seq a terminal, or having any element of alt a terminal, add that terminal to the first set for this rule
-      each(:rule) do |rule|
-        each(:terminal) do |terminal|
-          rule.add_first([terminal.sym]) if rule.starts_with(terminal.sym)
-        end
+          @ast += comprehensions
+          progress("FF.c") {"(#{ittr}) comprehensions #{comprehensions.length}"}
+          ittr += 1
+        end while !comprehensions.empty?
-        # Add strings to first for strings which are start elements
-        start_strs = rule.starts_with(String)
-        rule.add_first(start_strs) if start_strs
-      end
-      # # Fi(ε) = { ε }
-      # Add _eps as a first of _empty
-      empty = ast.detect {|r| r.sym == :_empty}
-      empty.add_first([:_eps])
-      # Loop until no more first elements are added
-      firsts, follows = 0, 0
-      begin
-        firsts, follows = 0, 0
+        # Fi(a w' ) = { a } for every terminal a
+        # For each rule who's expr's first element of a seq a terminal, or having any element of alt a terminal, add that terminal to the first set for this rule
         each(:rule) do |rule|
-          each(:rule) do |first_rule|
-            next if first_rule == rule || first_rule.first.nil?
-            # Fi(A w' ) = Fi(A) for every nonterminal A with ε not in Fi(A)
-            # For each rule that starts with another rule having firsts, add  the firsts of that rule to this rule, unless it already has those terminals in its first
-            if rule.starts_with(first_rule.sym)
-              depth {debug("FF.1") {"add first #{first_rule.first.inspect} to #{rule.sym}"}}
-              firsts += rule.add_first(first_rule.first)
+          each(:terminal) do |terminal|
+            if rule.starts_with?(terminal.sym)
+              debug("FF.t") {"(0) add first #{terminal.sym} to #{rule.sym}"}
+              rule.add_first([terminal.sym])
             end
+          end
-            # Fi(A w' ) = Fi(A) \ { ε } ∪ Fi(w' ) for every nonterminal A with ε in Fi(A)
-            # For each rule starting with eps, add the terminals for the comprehension of this rule
-            if rule.seq? &&
-               rule.expr.fetch(1, nil) == first_rule &&
-               first_rule.first.include?(:_eps) &&
-               (comp = rule.comp)
+          # Add strings to first for strings which are start elements
+          start_strs = rule.starts_with?(String)
+          if start_strs
+            debug("FF.t") {"(0) add firsts #{start_strs.join(", ")} to #{rule.sym}"}
+            rule.add_first(start_strs)
+          end
+        end
+        # # Fi(ε) = { ε }
+        # Add _eps as a first of _empty
+        find_rule(:_empty).add_first([:_eps])
+        # Loop until no more first elements are added
+        firsts, follows, ittr = 0, 0, 0
+        begin
+          firsts, follows = 0, 0
+          each(:rule) do |rule|
+            each(:rule) do |first_rule|
+              next if first_rule == rule || first_rule.first.nil?
+              # Fi(A w' ) = Fi(A) for every nonterminal A with ε not in Fi(A)
+              # For each rule that starts with another rule having firsts which don't include _eps, add  the firsts of that rule to this rule, unless it already has those terminals in its first.
+              # Note that it's simpler to promote all fi(A) to fi(A w') and exclude _eps, as this covers corner cases of the following rule.
+              if rule.starts_with?(first_rule.sym) && first_rule.first != [:_eps]
+                debug("FF.1") {"(#{ittr}) add first #{first_rule.first.inspect} from #{first_rule.sym} to #{rule.sym}"}
+                firsts += rule.add_first(first_rule.first - [:_eps])
+              end
-              depth {debug("FF.2") {"add first #{first_rule.first.inspect} to #{comp.sym}"}}
-              firsts += comp.add_first(first_rule.first)
+              # Fi(A w' ) = Fi(A) \ { ε } ∪ Fi(w' ) for every nonterminal A with ε in Fi(A)
+              # For each rule starting with eps, add the terminals for the comprehension of this rule
+              if rule.seq? &&
+                 rule.expr.fetch(1, nil) == first_rule.sym &&
+                 first_rule.first_includes_eps? &&
+                 (comp = rule.comp) &&
+                 comp.first &&
+                 !(comp.first - [:_eps]).empty?
+                to_add = comp.first - [:_eps]
+                debug("FF.2") {"(#{ittr}) add first #{to_add.inspect} from #{comp.sym} to #{rule.sym}"}
+                firsts += rule.add_first(to_add)
+              end
             end
-          end
-          # Only run these rules if the rule is a sequence having two or more elements, whos first element is also a sequence and first_rule is the comprehension of rule
-          if rule.seq? && (comp = rule.comp)
-             #if there is a rule of the form Aj → wAiw' , then
-             #
-            if (ai = find_rule(rule.expr[1])) && ai.kind == :rule && comp.first
-              #    * if the terminal a is in Fi(w' ), then add a to Fo(Ai)
-              #
-              # Add follow terminals based on the first terminals
-              # of a comprehension of this rule (having the same
-              # sequence other than the first rule in the sequence)
+            # Only run these rules if the rule is a sequence having two or more elements, whos first element is also a sequence and first_rule is the comprehension of rule
+            if rule.seq? && (comp = rule.comp)
+               #if there is a rule of the form Aj → wAiw' , then
+               #
+              if (ai = find_rule(rule.expr[1])) && ai.kind == :rule && comp.first
+                #    * if the terminal a is in Fi(w' ), then add a to Fo(Ai)
+                #
+                # Add follow terminals based on the first terminals
+                # of a comprehension of this rule (having the same
+                # sequence other than the first rule in the sequence)
+                #
+                # @example
+                #   rule: (seq a b c)
+                #   first_rule: (seq b c)
+                #   if first_rule.first == [T]
+                #   => a.follow += [T]
+                debug("FF.3") {"(#{ittr}) add follow #{comp.first.inspect} from #{comp.sym} to #{ai.sym}"}
+                follows += ai.add_follow(comp.first)
+              end
+              # Follows of a rule are also follows of the comprehension of the rule.
+              if rule.follow
+                debug("FF.4") {"(#{ittr}) add follow #{rule.follow.inspect} to from #{rule.sym} #{comp.sym}"}
+                follows += comp.add_follow(rule.follow)
+              end
+              #    * if ε is in Fi(w' ), then add Fo(Aj) to Fo(Ai)
               #
-              # @example
-              #   rule: (seq a b c)
-              #   first_rule: (seq b c)
-              #   if first_rule.first == [T]
-              #   => a.follow += [T]
-              depth {debug("FF.3") {"add follow #{comp.first.inspect} to #{ai.sym}"}}
-              follows += ai.add_follow(comp.first)
+              # If the comprehension of a sequence has an _eps first, then the follows of the rule also become the follows of the first member of the rule
+              if comp.first && comp.first.include?(:_eps) && rule.first &&
+                 (member = find_rule(rule.expr.fetch(1, nil))) &&
+                 member.kind == :rule
+                debug("FF.5") {"(#{ittr}) add follow #{rule.follow.inspect} from #{rule.sym} to #{member.sym}"}
+                follows += member.add_follow(rule.first)
+              end
             end
-            # Follows of a rule are also follows of the comprehension of the rule.
-            if rule.follow
-              depth {debug("FF.4") {"add follow #{rule.follow.inspect} to #{comp.sym}"}}
-              follows += comp.add_follow(rule.follow)
+            # Firsts of elements of an alt are firsts of the alt
+            if rule.alt?
+              rule.expr[1..-1].map {|s| find_rule(s)}.compact.select(&:first).each do |mem|
+                debug("FF.6") {"(#{ittr}) add first #{mem.first.inspect} from #{mem.sym} to #{rule.sym}"}
+                rule.add_first(mem.first)
+              end
             end
-            #    * if ε is in Fi(w' ), then add Fo(Aj) to Fo(Ai)
-            #
-            # If the comprehension of a sequence has an _eps first, then the follows of the rule also become the follows of the first member of the rule
-            if comp.first && comp.first.include?(:_eps) && rule.first &&
-               (member = find_rule(rule.expr.fetch(1, nil))) &&
+            # Follows of a rule are also follows of the last production in the rule
+            if rule.seq? && rule.follow &&
+               (member = find_rule(rule.expr.last)) &&
                member.kind == :rule
-              depth {debug("FF.5") {"add follow #{rule.follow.inspect} to #{member.sym}"}}
-              follows += member.add_follow(rule.first)
+              debug("FF.7") {"(#{ittr}) add follow #{rule.follow.inspect} to #{member.sym}"}
+              follows += member.add_follow(rule.follow)
             end
-          end
-          # Follows of a rule are also follows of the last production in the rule
-          if rule.seq? && rule.follow &&
-             (member = find_rule(rule.expr.last)) &&
-             member.kind == :rule
-            depth {debug("FF.6") {"add follow #{rule.follow.inspect} to #{member.sym}"}}
-            follows += member.add_follow(rule.follow)
-          end
-          # For alts, anything that follows the rule follows each member of the rule
-          if rule.alt? && rule.follow
-            rule.expr[1..-1].map {|s| find_rule(s)}.each do |mem|
-              if mem && mem.kind == :rule
-                depth {debug("FF.7") {"add follow #{rule.first.inspect} to #{mem.sym}"}}
-                follows += mem.add_follow(rule.follow)
+            # For alts, anything that follows the rule follows each member of the rule
+            if rule.alt? && rule.follow
+              rule.expr[1..-1].map {|s| find_rule(s)}.each do |mem|
+                if mem && mem.kind == :rule
+                  debug("FF.8") {"(#{ittr}) add follow #{rule.first.inspect} to #{mem.sym}"}
+                  follows += mem.add_follow(rule.follow)
+                end
               end
             end
           end
-        end
-        progress("first_follow") {"firsts #{firsts}, follows #{follows}"}
-      end while (firsts + follows) > 0
+          progress("first_follow") {"(#{ittr}) firsts #{firsts}, follows #{follows}"}
+          ittr += 1
+        end while (firsts + follows) > 0
+      end
     end
     ##
@@ -183,19 +205,19 @@ module EBNF
       @first = ast.
         select(&:first).
         inject({}) {|memo, r|
-          memo[r.sym] = r.first.reject {|t| t == :_eps};
+          memo[r.sym] = r.first if r.first
           memo
         }
       @follow = ast.
         select(&:follow).
         inject({}) {|memo, r|
-          memo[r.sym] = r.first.reject {|t| t == :_eps};
+          memo[r.sym] = r.first if r.first
           memo
         }
       @terminals = ast.map do |r|
         (r.first || []) + (r.follow || [])
       end.flatten.uniq
-      @terminals = (@terminals - [:_eps, :_eof, :_empty]).sort_by(&:to_s)
+      @terminals = (@terminals - [:_eps, :_eof, :_empty]).sort_by(&:inspect)
       @branch = {}
       @already = []
@@ -228,14 +250,14 @@ module EBNF
       if table.is_a?(Hash)
         io.puts "#{ind0}#{name} = {"
-        table.keys.sort_by(&:to_s).each do |prod|
+        table.keys.sort_by(&:inspect).each do |prod|
           case table[prod]
           when Array
             list = table[prod].map(&:inspect).join(",\n#{ind2}")
             io.puts "#{ind1}#{prod.inspect} => [\n#{ind2}#{list}],"
           when Hash
             io.puts "#{ind1}#{prod.inspect} => {"
-            table[prod].keys.sort_by(&:to_s).each do |term|
+            table[prod].keys.sort_by(&:inspect).each do |term|
               list = table[prod][term].map(&:inspect).join(", ")
               io.puts "#{ind2}#{term.inspect} => [#{list}],"
             end
@@ -247,7 +269,7 @@ module EBNF
         io.puts "#{ind0}}.freeze\n"
       else
         io.puts "#{ind0}#{name} = [\n#{ind1}" +
-          table.sort_by(&:to_s).map(&:inspect).join(",\n#{ind1}") +
+          table.sort_by(&:inspect).map(&:inspect).join(",\n#{ind1}") +
           "\n#{ind0}].freeze\n"
       end
     end

data/lib/ebnf/ll1/lexer.rb CHANGED Viewed

@@ -71,13 +71,16 @@ module EBNF::LL1
     # @return [String]
     # @see    http://www.w3.org/TR/rdf-sparql-query/#codepointEscape
     def self.unescape_codepoints(string)
+      string = string.dup
+      string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)
       # Decode \uXXXX and \UXXXXXXXX code points:
       string = string.gsub(UCHAR) do |c|
         s = [(c[2..-1]).hex].pack('U*')
         s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
       end
-      string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding)      # Ruby 1.9+
+      string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding)
       string
     end
@@ -114,26 +117,26 @@ module EBNF::LL1
     # Initializes a new lexer instance.
     #
     # @param  [String, #to_s]                 input
-    # @param  [Array<Array<Symbol, Regexp>>]  terminals
+    # @param  [Array<Array<Symbol, Regexp>, Terminal>]  terminals
     #   Array of symbol, regexp pairs used to match terminals.
     #   If the symbol is nil, it defines a Regexp to match string terminals.
     # @param  [Hash{Symbol => Object}]        options
     # @option options [Regexp]                :whitespace (WS)
-    # @option options [Regexp]                :comment (COMMENT)
-    # @option options [Array<Symbol>]         :unescape_terms ([])
     #   Regular expression matching the beginning of terminals that may cross newlines
+    # @option options [Regexp]                :comment (COMMENT)
     def initialize(input = nil, terminals = nil, options = {})
       @options        = options.dup
       @whitespace     = @options[:whitespace]     || WS
       @comment        = @options[:comment]        || COMMENT
-      @unescape_terms = @options[:unescape_terms] || []
-      @terminals      = terminals
+      @terminals      = terminals.map do |term|
+        term.is_a?(Array) ? Terminal.new(*term) : term
+      end
       raise Error, "Terminal patterns not defined" unless @terminals && @terminals.length > 0
       @lineno = 1
       @scanner = Scanner.new(input) do |string|
-        string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding)      # Ruby 1.9+
+        string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding)
         string
       end
     end
@@ -209,7 +212,7 @@ module EBNF::LL1
         token
       end
     rescue ArgumentError, Encoding::CompatibilityError => e
-      raise Error.new("#{e.message} on line #{lineno + 1}",
+      raise Error.new(e.message,
         :input => (scanner.rest[0..100] rescue '??'), :token => lexme, :lineno => lineno)
     rescue Error
       raise
@@ -248,13 +251,6 @@ module EBNF::LL1
     # @return [StringScanner]
     attr_reader :scanner
-    # Perform string and codepoint unescaping
-    # @param [String] string
-    # @return [String]
-    def unescape(string)
-      self.class.unescape_string(self.class.unescape_codepoints(string))
-    end
     ##
     # Skip whitespace or comments, as defined through input options or defaults
     def skip_whitespace
@@ -270,22 +266,80 @@ module EBNF::LL1
     end
     ##
-    # Return the matched token
+    # Return the matched token.
+    #
+    # If the token was matched with a case-insensitive regexp,
+    # track this with the resulting {Token}, so that comparisons
+    # with that token are also case insensitive
     #
     # @return [Token]
     def match_token
-      @terminals.each do |(term, regexp)|
-        #STDERR.puts "match[#{term}] #{scanner.rest[0..100].inspect} against #{regexp.inspect}" #if term == :STRING_LITERAL_SINGLE_QUOTE
-        if matched = scanner.scan(regexp)
-          matched = unescape(matched) if @unescape_terms.include?(term)
-          #STDERR.puts "  unescape? #{@unescape_terms.include?(term).inspect}"
-          #STDERR.puts "  matched #{term.inspect}: #{matched.inspect}"
-          return token(term, matched)
+      @terminals.each do |term|
+        #STDERR.puts "match[#{term.type}] #{scanner.rest[0..100].inspect} against #{term.regexp.inspect}" #if term.type == :STRING_LITERAL_SINGLE_QUOTE
+        if matched = scanner.scan(term.regexp)
+          #STDERR.puts "  matched #{term.type.inspect}: #{matched.inspect}"
+          return token(term.type, term.canonicalize(matched))
         end
       end
       nil
     end
+    # Terminal class, representing the terminal identifier and
+    # matching regular expression. Optionally, a Terminal may include
+    # a map to turn case-insensitively matched terminals into their
+    # canonical form
+    class Terminal
+      attr_reader :type
+      attr_reader :regexp
+      # @param [Symbol, nil] type
+      # @param [Regexp] regexp
+      # @param [Hash{Symbol => Object}] options
+      # @option options [Hash{String => String}] :map ({})
+      #   A mapping from terminals, in lower-case form, to
+      #   their canonical value
+      # @option options [Boolean] :unescape
+      #   Cause strings and codepoints to be unescaped.
+      def initialize(type, regexp, options = {})
+        @type, @regexp, @options = type, regexp, options
+        @map = options.fetch(:map, {})
+      end
+      # Map a terminal to it's canonical form. If there is no
+      # map, `value` is returned. `value` is unescaped if there
+      # is no canonical mapping, and the `:unescape` option is set.
+      #
+      # @param [String] value
+      #   value to canonicalize
+      # @return [String]
+      def canonicalize(value)
+        @map.fetch(value.downcase, unescape(value))
+      end
+      def ==(other)
+        case other
+        when Array
+          @type == other.first && @regexp == other.last
+        when Terminal
+          @type == other.type && @regexp == other.regexp
+        end
+      end
+      protected
+      # Perform string and codepoint unescaping if defined for this terminal
+      # @param [String] string
+      # @return [String]
+      def unescape(string)
+        if @options[:unescape]
+          Lexer.unescape_string(Lexer.unescape_codepoints(string))
+        else
+          string
+        end
+      end
+    end
   protected
     ##
@@ -298,9 +352,10 @@ module EBNF::LL1
     # @param  [Symbol] type
     # @param  [String] value
     #   Scanner instance with access to matched groups
+    # @param  [Hash{Symbol => Object}] options
     # @return [Token]
-    def token(type, value)
-      Token.new(type, value, :lineno => lineno)
+    def token(type, value, options = {})
+      Token.new(type, value, options.merge(:lineno => lineno))
     end
     ##
@@ -313,19 +368,6 @@ module EBNF::LL1
     #
     # @see http://en.wikipedia.org/wiki/Lexical_analysis#Token
     class Token
-      ##
-      # Initializes a new token instance.
-      #
-      # @param  [Symbol]                 type
-      # @param  [String]                 value
-      # @param  [Hash{Symbol => Object}] options
-      # @option options [Integer]        :lineno (nil)
-      def initialize(type, value, options = {})
-        @type, @value = (type ? type.to_s.to_sym : nil), value
-        @options = options.dup
-        @lineno  = @options.delete(:lineno)
-      end
       ##
       # The token's symbol type.
       #
@@ -350,6 +392,20 @@ module EBNF::LL1
       # @return [Hash]
       attr_reader :options
+      ##
+      # Initializes a new token instance.
+      #
+      # @param  [Symbol]                 type
+      # @param  [String]                 value
+      # @param  [Hash{Symbol => Object}] options
+      # @option options [Integer]        :lineno (nil)
+      def initialize(type, value, options = {})
+        @type = type.to_s.to_sym if type
+        @value = value.to_s
+        @options = options.dup
+        @lineno  = @options.delete(:lineno)
+      end
       ##
       # Returns the attribute named by `key`.
       #
@@ -378,8 +434,10 @@ module EBNF::LL1
       # @return [Boolean]
       def ===(value)
         case value
-          when Symbol   then value == @type
-          when ::String then value.to_s == @value.to_s
+          when Symbol
+            value == @type
+          when ::String
+            @value == (@options[:case_insensitive] ? value.to_s.downcase : value.to_s)
           else value == @value
         end
       end