RubyGems - babel_bridge - Versions diffs - 0.3.1 → 0.4.0 - Mend

babel_bridge 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

data/babel_bridge.gemspec +18 -15
data/examples/turing/turing++.rb +154 -0
data/examples/turing/turing_demo.rb +19 -19
data/lib/nodes.rb +2 -1
data/lib/nodes/empty_node.rb +11 -1
data/lib/nodes/many_node.rb +37 -48
data/lib/nodes/node.rb +26 -32
data/lib/nodes/non_terminal_node.rb +23 -90
data/lib/nodes/rule_node.rb +102 -0
data/lib/nodes/terminal_node.rb +6 -18
data/lib/parser.rb +45 -31
data/lib/pattern_element.rb +34 -47
data/lib/rule.rb +3 -3
data/lib/shell.rb +35 -16
data/lib/version.rb +2 -2
data/spec/bb_spec.rb +164 -0
data/test/test_bb.rb +12 -17
metadata +44 -5

data/lib/nodes/non_terminal_node.rb CHANGED Viewed

@@ -5,113 +5,46 @@ http://babel-bridge.rubyforge.org/
 =end
 module BabelBridge
-# non-terminal node
+# rule node
 # subclassed automatically by parser.rule for each unique non-terminal
 class NonTerminalNode < Node
-  attr_accessor :matches,:match_names
-  def match_names
-    @match_names ||= []
-  end
-  def matches
-    @matches ||= []
-  end
-  # length returns the number of sub-nodes
-  def length
-    matches.length
-  end
-  def matches_by_name
-    @matches_by_name||= begin
-      raise "matches.length #{matches.length} != match_names.length #{match_names.length}" unless matches.length==match_names.length
-      mbn={}
-      mn=match_names
-      matches.each_with_index do |match,i|
-        name=mn[i]
-        next unless name
-        if current=mbn[name] # name already used
-          # convert to MultiMatchesArray if not already
-          mbn[name]=MultiMatchesArray.new([current]) if !current.kind_of? MultiMatchesArray
-          # add to array
-          mbn[name]<<match
-        else
-          mbn[name]=match
-        end
-      end
-      mbn
-    end
-  end
-  def inspect(options={})
-    return "#{self.class}" if matches.length==0
-    matches_inspected=matches.collect{|a|a.inspect(options)}.compact
-    if matches_inspected.length==0 then nil
-    elsif matches_inspected.length==1
-      m=matches_inspected[0]
-      ret="#{self.class} > "+matches_inspected[0]
-      if options[:simple]
-        ret=if m["\n"] then m
-        else
-          # just show the first and last nodes in the chain
-          ret.gsub(/( > [A-Z][a-zA-Z0-9:]+ > (\.\.\. > )?)/," > ... > ")
-        end
-      end
-      ret
+  def trailing_whitespace_range
+    if matches.length == 0
+      preceding_whitespace_range || (0..-1)
     else
-      (["#{self.class}"]+matches_inspected).join("\n").gsub("\n","\n  ")
+      matches[-1].trailing_whitespace_range
     end
   end
-  #********************
-  # alter methods
-  #********************
-  def reset_matches_by_name
-    @matches_by_name=nil
+  def update_match_length
+    m = matches[-1]
+    @match_length = m ? m.offset_after_match - offset : 0
   end
-  # defines where to forward missing methods to; override for custom behavior
-  def forward_to(method_name)
-    matches.each {|m| return m if m.respond_to?(method_name)}
-    nil
+  #*****************************
+  # Array interface implementation
+  #*****************************
+  def matches
+    @matches ||= []
   end
-  def respond_to?(method_name)
-    super ||
-    matches_by_name[method_name] ||
-    forward_to(method_name)
+  include Enumerable
+  def length
+    matches.length
   end
-  def method_missing(method_name, *args)  #method_name is a symbol
-    unless matches_by_name.has_key? method_name
-      if f=forward_to(method_name)
-        return f.send(method_name,*args)
-      end
-      raise "#{self.class}: missing method #{method_name.inspect} / doesn't match named pattern element: #{matches_by_name.keys.inspect}"
-    end
-    case ret=matches_by_name[method_name]
-    when EmptyNode then nil
-    else ret
-    end
+  def <<(node)
+    matches<<node
+    update_match_length
   end
-  # adds a match with name (optional)
-  # returns self so you can chain add_match or concat methods
-  def add_match(match,name=nil)
-    reset_matches_by_name
-    matches<<match
-    match_names<<name
-    self.match_length=match.next - offset
-    self
+  def [](i)
+    matches[i]
   end
-  # concatinate all matches from another node
-  # returns self so you can chain add_match or concat methods
-  def concat(node)
-    names=node.match_names
-    node.matches.each_with_index { |match,i| add_match(match,names[i])}
-    self
+  def each(&block)
+    matches.each(&block)
   end
 end
 end

data/lib/nodes/rule_node.rb ADDED Viewed

@@ -0,0 +1,102 @@
+=begin
+Copyright 2011 Shane Brinkman-Davis
+See README for licence information.
+http://babel-bridge.rubyforge.org/
+=end
+module BabelBridge
+# rule node
+# subclassed automatically by parser.rule for each unique non-terminal
+class RuleNode < NonTerminalNode
+  def match_names
+    @match_names ||= []
+  end
+  def matches_by_name
+    @matches_by_name||= begin
+      raise "matches.length #{matches.length} != match_names.length #{match_names.length}" unless matches.length==match_names.length
+      mbn={}
+      mn=match_names
+      matches.each_with_index do |match,i|
+        name=mn[i]
+        next unless name
+        if current=mbn[name] # name already used
+          # convert to MultiMatchesArray if not already
+          mbn[name]=MultiMatchesArray.new([current]) if !current.kind_of? MultiMatchesArray
+          # add to array
+          mbn[name]<<match
+        else
+          mbn[name]=match
+        end
+      end
+      mbn
+    end
+  end
+  def inspect(options={})
+    return "#{self.class}" if matches.length==0
+    matches_inspected=matches.collect{|a|a.inspect(options)}.compact
+    if matches_inspected.length==0 then nil
+    elsif matches_inspected.length==1
+      m=matches_inspected[0]
+      ret="#{self.class} > "+matches_inspected[0]
+      if options[:simple]
+        ret=if m["\n"] then m
+        else
+          # just show the first and last nodes in the chain
+          ret.gsub(/( > [A-Z][a-zA-Z0-9:]+ > (\.\.\. > )?)/," > ... > ")
+        end
+      end
+      ret
+    else
+      (["#{self.class}"]+matches_inspected).join("\n").gsub("\n","\n  ")
+    end
+  end
+  #********************
+  # alter methods
+  #********************
+  def reset_matches_by_name
+    @matches_by_name=nil
+  end
+  # defines where to forward missing methods to; override for custom behavior
+  def forward_to(method_name)
+    matches.each {|m| return m if m.respond_to?(method_name)}
+    nil
+  end
+  def respond_to?(method_name)
+    super ||
+    matches_by_name[method_name] ||
+    forward_to(method_name)
+  end
+  def method_missing(method_name, *args)  #method_name is a symbol
+    unless matches_by_name.has_key? method_name
+      if f=forward_to(method_name)
+        return f.send(method_name,*args)
+      end
+      match_path = [self]
+      while match_path[-1].matches.length==1
+        match_path<<match_path[-1].matches[0]
+      end
+      raise "#{match_path.collect{|m|m.class}.join(' > ')}: no methods or named pattern elements match: #{method_name.inspect}"
+    end
+    case ret=matches_by_name[method_name]
+    when EmptyNode then nil
+    else ret
+    end
+  end
+  # adds a match with name (optional)
+  def add_match(match,name=nil)
+    reset_matches_by_name
+    matches<<match
+    match_names<<name
+    update_match_length
+  end
+end
+end

data/lib/nodes/terminal_node.rb CHANGED Viewed

@@ -8,25 +8,12 @@ module BabelBridge
 # used for String and Regexp PatternElements
 # not subclassed
 class TerminalNode < Node
-  attr_accessor :pattern
-  def initialize(parent,match_length,pattern)
+  attr_accessor :pattern, :trailing_whitespace_offset
+  def initialize(parent,range,pattern)
     node_init(parent)
-    self.match_length=match_length
-    self.pattern=pattern
-    @ignore_whitespace = parser.ignore_whitespace?
-    consume_trailing_whitespace if @ignore_whitespace
-  end
-  def consume_trailing_whitespace
-    offset = self.next
-    if src[offset..-1].index(/\A\s*/)==0
-      range = $~.offset(0)
-      self.match_length += range[1]-range[0]
-    end
-  end
-  def to_s
-    @ignore_whitespace ? text.strip : text
+    self.offset = range.min
+    self.match_length = range.max-range.min
+    self.pattern = pattern
   end
   def inspect(options={})
@@ -34,5 +21,6 @@ class TerminalNode < Node
   end
   def matches; [self]; end
 end
 end

data/lib/parser.rb CHANGED Viewed

@@ -8,7 +8,7 @@ class Parser
   # These methods are used in the creation of a Parser Sub-Class to define
   # its grammar
   class <<self
-    attr_accessor :rules,:module_name,:root_rule
+    attr_accessor :rules, :module_name, :root_rule, :whitespace_regexp
     def rules
       @rules||={}
@@ -32,12 +32,12 @@ class Parser
     #   MyParser.rule :name, to_match1, to_match2, etc...
     #
     # The first rule added is the root-rule for the parser.
-    # You can override by:
+    # You can override by:
     #   class MyParser < BabelBridge::Parser
     #     root_rule = :new_root_rool
     #   end
     #
-    # The block is executed in the context of the rule-varient's node type, a subclass of: NonTerminalNode
+    # The block is executed in the context of the rule-varient's node type, a subclass of: RuleNode
     # This allows you to add whatever functionality you want to a your nodes in the final parse tree.
     # Also note you can override the post_match method. This allows you to restructure the parse tree as it is parsed.
     def rule(name,*pattern,&block)
@@ -52,7 +52,7 @@ class Parser
     #       typical example is the "**" exponentiation operator which should be evaluated right-to-left.
     def binary_operators_rule(name,elements_pattern,operators,options={},&block)
       right_operators = options[:right_operators]
-      rule(name,many(elements_pattern,Tools::array_to_or_regexp(operators))) do
+      rule(name,many(elements_pattern,Tools::array_to_or_regexp(operators))) do
         self.class_eval &block if block
         class <<self
           attr_accessor :operators_from_rule, :right_operators
@@ -98,17 +98,13 @@ class Parser
       @root_rule=rule
     end
-    def ignore_whitespace
-      @ignore_whitespace = true
-    end
-    def ignore_whitespace?
-      @ignore_whitespace
+    def ignore_whitespace(regexp = /\s*/)
+      @whitespace_regexp = /\A(#{regexp})?/
     end
   end
-  def ignore_whitespace?
-    self.class.ignore_whitespace?
+  def whitespace_regexp
+    self.class.whitespace_regexp || /\A/
   end
   #*********************************************
@@ -136,14 +132,18 @@ class Parser
   #
   #*********************************************
   class <<self
-    def many(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
-    def many?(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.optionally.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
-    def many!(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.dont.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
+    def many(m,delimiter=nil) PatternElementHash.new.match.many(m).delimiter(delimiter) end
+    def many?(m,delimiter=nil) PatternElementHash.new.optionally.match.many(m).delimiter(delimiter) end
+    def many!(m,delimiter=nil) PatternElementHash.new.dont.match.many(m).delimiter(delimiter) end
     def match?(*args) PatternElementHash.new.optionally.match(*args) end
     def match(*args) PatternElementHash.new.match(*args) end
     def match!(*args) PatternElementHash.new.dont.match(*args) end
+    # if ignore_whitespace is used, after every TerminalNode, all whitespace is consumed. Wrapping include_whitespace around a pattern-element allows you to explicitly match the preceeding whitespace for that one element.
+    # NOTE: you can ALWAYS explicitly match any trailing whitespace
+    def include_whitespace(*args) PatternElementHash.new.include_whitespace.match(*args) end
     def dont; PatternElementHash.new.dont end
     def optionally; PatternElementHash.new.optionally end
     def could; PatternElementHash.new.could end
@@ -167,10 +167,24 @@ class Parser
   end
   def reset_parser_tracking
-    self.src=nil
-    self.failure_index=0
-    self.expecting_list={}
-    self.parse_cache={}
+    @parsing_did_not_match_entire_input = false
+    @src = nil
+    @failure_index = 0
+    @expecting_list = {}
+    @parse_cache = {}
+    @white_space_ranges = {}
+  end
+  # memoizing whitespace parser
+  def white_space_range(start)
+    @white_space_ranges[start]||=begin
+      # src should always be a string - unless this is called AFTER parsing is done. Currently this can happen with the way ManyNode handles .match_length and .next
+      # We should be able to just use:
+      #   src[start..-1].index whitespace_regexp
+      ((src||"")[start..-1]||"").index whitespace_regexp
+      r = $~.offset 0
+      start+r[0] .. start+r[1]-1
+    end
   end
   def cached(rule_class,offset)
@@ -187,12 +201,10 @@ class Parser
   def log_parsing_failure(index,expecting)
     if index>failure_index
-      key=expecting[:pattern]
-      @expecting_list={key=>expecting}
+      @expecting_list = {expecting[:pattern] => expecting}
       @failure_index = index
     elsif index == failure_index
-      key=expecting[:pattern]
-      self.expecting_list[key]=expecting
+      @expecting_list[expecting[:pattern]] = expecting
     else
       # ignored
     end
@@ -208,9 +220,11 @@ class Parser
     unless rule
       if ret
         if ret.next<src.length # parse only succeeds if the whole input is matched
-          @parsing_did_not_match_entire_input=true
-          @failure_index=ret.next
-          @failed_parse = ret
+          if ret.next >= @failure_index
+            @parsing_did_not_match_entire_input=true
+            @failure_index = ret.next
+            @failed_parse = ret
+          end
           ret=nil
         else
           reset_parser_tracking
@@ -240,7 +254,7 @@ class Parser
   def nodes_interesting_parse_path(node)
     path = node.parent_list
     path << node
-    path.pop while path[-1] && !path[-1].kind_of?(NonTerminalNode)
+    path.pop while path[-1] && !path[-1].kind_of?(RuleNode)
     path
   end
@@ -277,7 +291,7 @@ ENDTXT
   end
   #option: :verbose => true
-  def parser_failure_info(options={})
+  def parser_failure_info(options={})
     return unless src
     verbose = options[:verbose]
     bracketing_lines=5
@@ -295,10 +309,10 @@ ENDTXT
       ret+="\nParser did not match entire input.\n"
       if verbose
         ret+="\nParsed:\n#{Tools::indent failed_parse.inspect}\n"
-      end
+      end
     end
     ret+expecting_output
   end
 end
-end
+end

data/lib/pattern_element.rb CHANGED Viewed

@@ -21,7 +21,7 @@ end
 #   :optional
 class PatternElement
   attr_accessor :parser,:optional,:negative,:name,:terminal,:could_match
-  attr_accessor :match,:rule_variant
+  attr_accessor :match,:rule_variant,:include_whitespace
   #match can be:
   # true, Hash, Symbol, String, Regexp
@@ -56,7 +56,7 @@ class PatternElement
     if !match && terminal
       # log failures on Terminal patterns for debug output if overall parse fails
-      parent_node.parser.log_parsing_failure(parent_node.next,:pattern=>self.match,:node=>parent_node)
+      parent_node.parser.log_parsing_failure(match_start_index(parent_node),:pattern=>self.match,:node=>parent_node)
     end
     # return match
@@ -83,24 +83,29 @@ class PatternElement
     self.parser=lambda {|parent_node| EmptyNode.new(parent_node)}
   end
+  def match_start_index(parent_node)
+    if include_whitespace
+      parent_node.trailing_whitespace_range.first
+    else
+      parent_node.next
+    end
+  end
   # initialize PatternElement as a parser that matches exactly the string specified
   def init_string(string)
-    self.parser=lambda do |parent_node|
-      if parent_node.src[parent_node.next,string.length]==string
-        TerminalNode.new(parent_node,string.length,string)
-      end
-    end
-    self.terminal=true
+    init_regex Regexp.escape(string)
   end
   # initialize PatternElement as a parser that matches the given regex
   def init_regex(regex)
     optimized_regex=/\A#{regex}/  # anchor the search
     self.parser=lambda do |parent_node|
-      offset=parent_node.next
+      offset = match_start_index(parent_node)
       if parent_node.src[offset..-1].index(optimized_regex)==0
         range=$~.offset(0)
-        TerminalNode.new(parent_node,range[1]-range[0],regex)
+        range = (range.min+offset)..(range.max+offset)
+        TerminalNode.new(parent_node,range,regex)
       end
     end
     self.terminal=true
@@ -138,66 +143,48 @@ class PatternElement
     self.optional ||= hash[:optional] || hash[:optionally]
     self.could_match ||= hash[:could]
     self.negative ||= hash[:dont]
+    self.include_whitespace ||= hash[:include_whitespace]
   end
   # initialize the PatternElement as a many-parser from hashed parameters (hash[:many] is assumed to be set)
   def init_many(hash)
     # generate single_parser
     init hash[:many]
-    single_parser=parser
+    single_parser = parser
     # generate delimiter_pattern_element
-    delimiter_pattern_element= hash[:delimiter] && PatternElement.new(hash[:delimiter],rule_variant)
-    # generate post_delimiter_element
-    post_delimiter_element=hash[:post_delimiter] && case hash[:post_delimiter]
-    when TrueClass then delimiter_pattern_element
-    else PatternElement.new(hash[:post_delimiter],rule_variant)
-    end
+    delimiter_pattern_element = hash[:delimiter] && PatternElement.new(hash[:delimiter],rule_variant)
     # generate many-parser
-    self.parser= lambda do |parent_node|
-      last_match=single_parser.call(parent_node)
-      many_node=ManyNode.new(parent_node)
+    self.parser = lambda do |parent_node|
+      many_node = ManyNode.new parent_node
       if delimiter_pattern_element
         # delimited matching
-        while last_match
-          many_node<<last_match
+        while true
+          #match primary
+          match = single_parser.call many_node
+          break unless match
+          many_node << match
           #match delimiter
-          delimiter_match = delimiter_pattern_element.parse(many_node)
+          delimiter_match = delimiter_pattern_element.parse many_node
           break unless delimiter_match
-          many_node.delimiter_matches<<delimiter_match
-          #match next
-          last_match=single_parser.call(many_node)
+          many_node << delimiter_match
         end
+        many_node.separate_delimiter_matches
       else
         # not delimited matching
-        while last_match
-          many_node<<last_match
-          last_match=single_parser.call(many_node)
+        while true
+          match = single_parser.call many_node
+          break unless match
+          many_node << match
         end
       end
       # success only if we have at least one match
-      return nil unless many_node.length>0
-      # pop the post delimiter matched with delimiter_pattern_element
-      many_node.delimiter_matches.pop if many_node.length==many_node.delimiter_matches.length
-      # If post_delimiter is requested, many_node and delimiter_matches will be the same length
-      if post_delimiter_element
-        post_delimiter_match=post_delimiter_element.parse(many_node)
-        # fail if post_delimiter didn't match
-        return nil unless post_delimiter_match
-        many_node.delimiter_matches<<post_delimiter_match
-      end
-      many_node
+      many_node.length>0 && many_node
     end
   end
 end
-end
+end