RubyGems - kanocc - Versions diffs - 0.1.0 → 0.2.0 - Mend

kanocc 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

data/examples/bind.rb +26 -0
data/examples/calculator.rb +19 -15
data/examples/doc_calc.rb +42 -0
data/examples/minipascalsyntax.html +371 -0
data/examples/ruby_quiz_78.rb +12 -11
data/lib/kanocc.rb +73 -102
data/lib/kanocc/earley.rb +262 -217
data/lib/kanocc/grammar_rule.rb +7 -21
data/lib/kanocc/nonterminal.rb +67 -65
data/lib/kanocc/scanner.rb +168 -85
data/lib/kanocc/token.rb +24 -0
data/lib/todo +2 -3
metadata +13 -9

data/lib/kanocc/grammar_rule.rb CHANGED

@@ -15,36 +15,22 @@
 #  You should have received a copy of the GNU General Public License,
 #  version 3 along with Kanocc.  If not, see <http://www.gnu.org/licenses/>.
 #
+require 'rubygems'
 module Kanocc
   class GrammarRule
-    attr_reader :lhs, :rhs, :method, :operator_prec
-    attr_accessor :prec
+    attr_reader :lhs, :rhs, :method
+    attr_accessor :precedence, :derives_right
     def initialize(lhs, rhs, method)
       @lhs = lhs
       @rhs = rhs
       @method = method
-      if (operator =rhs.find {|s| s.is_a?(String) or s.is_a?(Token)})
-        @operator_prec = Nonterminal.operator_precedence(operator)
-      end
-      @prec = 0
-      @logger.debug("#{lhs} --> #{rhs.map {|gs| gs.is_a?(Symbol) ? gs.to_s : gs}.join}, #prec = #{@prec}, method = #{method}") unless not @logger
-    end
-    def operator_prec
-      unless @operator_prec_calculated
-          operator = rhs.find {|s| s.is_a?(String) or s.is_a?(Token)}
-          if operator
-            @operator_prec = lhs.operator_precedence(operator)
-          end
-          @operator_prec_calculated = true
-      end
-      @operator_prec
-    end
+      @prededence = 0
+      @logger.debug("#{lhs} --> #{rhs.map {|gs| gs.is_a?(Symbol) ? gs.to_s : gs}.join}, #prec = #{@prec}, method = #{method}") if @logger
+    end
     def inspect
       return lhs.inspect + " ::= " + rhs.map{|gs| gs.inspect}.join(" ")
     end
   end
 end

data/lib/kanocc/nonterminal.rb CHANGED

@@ -1,4 +1,4 @@
-#
+#
 #  Copyright 2008 Christian Surlykke
 #
 #  This file is part of Kanocc.
@@ -20,46 +20,12 @@ module Kanocc
   class Nonterminal
     @@rules = Hash.new
     @@last_rule = Hash.new
-    @@derives_right = Hash.new
-    @@operator_precedence = Hash.new
+    @@bind_right = Hash.new
     @@method_names = Hash.new
     Left = 1
     Right = 2
-    def Nonterminal.derives_right
-      @@derives_right[self] = true
-    end
-    def Nonterminal.derives_right?
-      return @@derives_right[self]
-    end
-    def Nonterminal.set_operator_precedence(operator, precedence)
-      raise "Precedence must be an integer" unless precedence.class == Fixnum
-      @@operator_precedence[self] ||= Hash.new
-      if is_an_operator?(operator)
-        @@operator_precedence[self][operator] = precedence
-      elsif is_an_array_of_operators(operator)
-        operator.each {|o| @@operator_precedence[self][o] = precedence}
-      else
-        raise "Operator must be a string, a token or an array of those"
-      end
-    end
-    def Nonterminal.operator_precedence(operator)
-      (@@operator_precedence[self] and @@operator_precedence[self][operator]) or 0
-    end
-    def Nonterminal.is_an_array_of_operators(arr)
-       arr.is_a?(Array) and
-       arr.collect{|o| is_an_operator?(o)}.inject {|b1, b2| b1 and b2 }
-    end
-    def Nonterminal.is_an_operator?(operator)
-        operator.is_a?(String) or operator.is_a?(Token)
-    end
     def Nonterminal.rules
       rules = @@rules[self]
       return rules ? rules : []
@@ -97,36 +63,52 @@ module Kanocc
     def Nonterminal.zm(symbols, sep = nil)
       list_class = new_list_class
+      non_empty_list_class = new_list_class
       list_class.rule() {@elements = []}
-      list_class.rule(om(symbols, sep)) {@elements = @rhs[0].elements}
+      list_class.rule(non_empty_list_class) {@elements = @rhs[0].elements}
+      non_empty_list_class.rule(*symbols) {@elements = @rhs}
+      if sep
+        non_empty_list_class.rule(non_empty_list_class, sep, *symbols) {@elements = @rhs[0].elements + @rhs[2..@rhs.length]}
+      else
+	non_empty_list_class.rule(non_empty_list_class, *symbols) {@elements = @rhs[0].elements + @rhs[1..@rhs.length]}
+      end
       return list_class
     end
     def Nonterminal.om(symbols, sep = nil)
       symbols = [symbols] unless symbols.is_a? Array
-      list_class = new_list_class
-      list_class.rule(*symbols) {@elements = @rhs}
+      non_empty_list_class = new_list_class
+      non_empty_list_class.rule(*symbols) {@elements = @rhs}
       if sep
-        list_class.rule(list_class, sep, *symbols) {@elements = @rhs[0].elements + @rhs[2..@rhs.length]}
+        non_empty_list_class.rule(non_empty_list_class, sep, *symbols) {@elements = @rhs[0].elements + @rhs[2..@rhs.length]}
       else
-        list_class.rule(list_class, *symbols) {@elements = @rhs[0].elements + @rhs[1..@rhs.length]}
+        non_empty_list_class.rule(non_empty_list_class, *symbols) {@elements = @rhs[0].elements + @rhs[1..@rhs.length]}
       end
-      return list_class
+      return non_empty_list_class
     end
+    def Nonterminal.zo(symbols)
+      zero_or_one_class = new_list_class
+      zero_or_one_class.rule(*symbols) {@elements = @rhs}
+      zero_or_one_class.rule() {@elements = []}
+    end
     @@listClassNumber = 0
     def Nonterminal.new_list_class
-      list_class = Class.new(List)
+      list_class = Class.new(AnonymousNonterminal)
       @@listClassNumber += 1
       def list_class.inspect
         return "anonList_#{@@listClassNumber}"
       end
       return list_class
     end
     def Nonterminal.generate_method_name(*args)
-      method_name = self.name + " --> " + args.map {|a| a.inspect}.join(' ')
+      class_name = self.name || ""
+      method_name = class_name + " --> " + args.map {|a| a.inspect}.join(' ')
       @@method_names[self] ||= []
       i = 1
       while @@method_names[self].member?(method_name) do
@@ -135,10 +117,16 @@ module Kanocc
       @@method_names[self].push(method_name)
       return method_name
     end
-    def Nonterminal.prec(p)
-      raise "Call to prec not preceded by rule" unless @@last_rule[self]
-      @@last_rule[self].prec = p
+    def Nonterminal.precedence(prec)
+      raise "Given rule precedence was not a Numeric" unless prec.is_a? Numeric
+      raise "Call to precedence must be preceded by a rule" unless @@last_rule[self]
+      @@last_rule[self].precedence = prec
+    end
+    def Nonterminal.derives_right
+      raise "Call to derives_right must be preceded by a rule" unless @@last_rule[self]
+      @@last_rule[self].derives_right = true
     end
     def Nonterminal.show_method_names
@@ -148,29 +136,43 @@ module Kanocc
     def inspect
       self.class.name
     end
+    def Nonterminal.show_rules
+      rules.each do |rule|
+	puts rule.inspect
+      end
+    end
+    def Nonterminal.show_all_rules
+       queue = [self]
+       done = {}
+       i = 0
+       while (i < queue.length)
+	 queue[i].show_rules
+	 done[queue[i]] = true
+         queue[i].rules.each do |rule|
+	   rule.rhs.each do |gs|
+	     if gs.respond_to?(:rules) and not done[gs]
+	       queue.push(gs)
+             end
+           end
+         end
+	 i += 1
+       end
+    end
   end
-  class List < Nonterminal
+  class AnonymousNonterminal < Nonterminal
     attr_reader :elements
-        protected
-    # Assumes @rhs[0] is a Kanocc::List and that rhs.length > 1
-    def collect(strip_separator = false)
-      @elements = @rhs[0].elements
-      if strip_separator
-        @elements = @elements + @rhs[2..@rhs.length]
-      else
-        @elements = @elements + @rhs[1..@rhs.length]
-      end
-    end
   end
   class Error < Nonterminal
-    attr_reader :text
-    def initialize
-      super
-      @text = "FIXME"
+    def str
+      "hey" # FIXME
     end
   end
+  class StartSymbol < Nonterminal
+  end
 end

data/lib/kanocc/scanner.rb CHANGED

@@ -2,8 +2,7 @@
 #  Copyright 2008 Christian Surlykke
 #
 #  This file is part of Kanocc.
-#require 'logger'
+#
 #  Kanocc is free software: you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License, version 3
 #  as published by the Free Software Foundation.
@@ -19,132 +18,216 @@
 require 'stringio'
 require 'strscan'
 require "logger"
+require 'rubygems'
 module Kanocc
   class Scanner
-    attr_accessor :logger
+    attr_accessor :logger, :current_match, :input
     def initialize(init = {})
-      if init[:logger]
-        @logger = init[:logger]
-      else
+      @logger = init[:logger]
+      unless @logger
         @logger = Logger.new(STDOUT)
         @logger.level = Logger::WARN
       end
       @ws_regs = [/\s/]
-      @recognizables = []
-      @regexps = []
+      @terminals = []
+      @string_patterns = {}
+      @input = ""
+      @stringScanner = StringScanner.new(@input)
+      @current_match = nil
     end
     def set_whitespace(*ws_regs)
-      @ws_regs = []
-      ws_regs.each do |ws_reg|
-        unless ws_reg.is_a?(Regexp)
-          raise "set_whitespace must be given a list of Regexp's"
-        end
-        @ws_regs << ws_reg
-      end
+      raise "set_whitespace must be given a list of Regexp's" \
+	if ws_regs.find {|ws_reg| not ws_reg.is_a?(RegExp)}
+      @ws_regs = ws_regs
     end
-    def set_recognized(*rec)
+    def set_recognized(*recognizables)
       @recognizables = []
-      rec.each do |r|
-        if r.class == Class and r.ancestors.include?(Token)
-	  @recognizables = @recognizables + r.patterns
-        elsif r.is_a? String
-          @recognizables << {:literal => r,
-	                     :regexp  => Regexp.new(Regexp.escape(r))}
-        else
-          raise "set_recognized must be given a list of Tokens classes and or strings"
-        end
+      @literals = []
+      @tokens = []
+      @string_patterns = {}
+      recognizables.each do |recognizable|
+	unless (recognizable.class == Class and recognizable.ancestors.include?(Token)) or
+	       recognizable.is_a?(String)
+          raise "set_recognized must be given a list of Tokens classes" +
+	        "and or strings, got #{recognizable.inspect}"
+	end
+	@recognizables << recognizable
+	if recognizable.is_a? String
+	  @string_patterns[recognizable] = Regexp.new(Regexp.escape(recognizable))
+	  @literals << recognizable
+	else
+	  @tokens << recognizable
+	end
       end
     end
-    def each_token(input)
-      if input.is_a?(IO)
-        @input = input.readlines.join("")
-      elsif input.is_a?(String)
-        @input = input
-      else
-        raise "Input must be a string or an IO object"
-      end
+    def input=(input)
+      @input = input
       @stringScanner = StringScanner.new(@input)
-      while match = do_match do
-        if match[:matches]
-          @logger.debug("Yielding #{match}")
-          yield(match)
-        end
-        @stringScanner.pos += match[:length]
-      end
+      @current_match = nil
     end
-    private
-    def do_match
-      if @stringScanner.pos >= @stringScanner.string.length
-        return nil;
-      end
-      token_match = match_token
-      whitespace_match = match_whitespace
-      if whitespace_match[:length] > token_match[:length]
-        return whitespace_match
-      elsif token_match[:length] > 0
-        return token_match
+    def next_match!
+      do_match!
+      return @current_match
+    end
+    private
+    def do_match!
+      if @stringScanner.pos >= @input.length
+	@current_match = nil
+      elsif match_token
+        @stringScanner.pos += @current_match.length
+      elsif (whitespace_len = match_whitespace) > 0
+        @stringScanner.pos += whitespace_len
+	do_match!
       else
 	# So we've not been able to match tokens nor whitespace.
         # We return the first character of the remaining input as a string
         # literal
-        string = @stringScanner.string.slice(@stringScanner.pos, 1)
-        matches = [{:literal => string,
-	            :regexp  => Regexp.new(Regexp.escape(string))}]
-	return {:matches => matches,
-	        :string => string,
-	        :start_pos => @stringScanner.pos,
-		:length => 1}
+	str = @stringScanner.string.slice(@stringScanner.pos, 1)
+	regexp = Regexp.new(Regexp.escape(str))
+	@current_match = LexicalMatch.new([str], {str=>regexp}, @stringScanner.pos, 1)
+	@stringScanner.pos += 1
       end
     end
+    private
     def match_token
-      matches = []
+      matching_terminals = []
+      regexps = {}
       max_length = 0
-      @recognizables.each do |rec|
-	if (len = @stringScanner.match?(rec[:regexp])) and len > 0
+      @recognizables.each do |recognizable|
+	len, regexp = match(recognizable)
+	if len > 0
 	  if len > max_length
             # Now, we have a match longer than whatever we had,
             # so we discharge what we had, and save the new one
-            matches = [rec]
-            max_length = len
+            matching_terminals = [recognizable]
+            regexps = {recognizable => regexp}
+	    max_length = len
           elsif len == max_length
             # This regular expression matches a string of same length
-            # as our previous match, so we prepare to return both
-            matches << rec
+            # as our previous match(es), so we prepare to return both/all
+            matching_terminals << recognizable
+	    regexps[recognizable] = regexp
           end
         end
       end
-      start_pos = @stringScanner.pos
-      string = @stringScanner.string.slice(start_pos, max_length)
-      return {:matches => matches,
-	      :string  => string,
-	      :start_pos => start_pos,
-	      :length => max_length}
+      if max_length == 0
+	return false
+      else
+	@current_match = LexicalMatch.new(matching_terminals, regexps, @stringScanner.pos, max_length)
+	return true
+      end
     end
+    def match(recognizable)
+      if recognizable.class == Class # It must be a token
+	return recognizable.match(@stringScanner)
+      elsif (len = @stringScanner.match?(@string_patterns[recognizable])) and len > 0
+	return len, @string_patterns[recognizable]
+      else
+	return 0, nil
+      end
+    end
     def match_whitespace
-      max_length = 0
+      max_len = 0
       for i in 0..@ws_regs.size - 1 do
         len = @stringScanner.match?(@ws_regs[i]) || 0
-        if len > max_length
-          max_length = len
+        if len > max_len
+          max_len = len
         end
       end
-      string = @stringScanner.string.slice(@stringScanner.pos, max_length)
-      result = {:string => string,
-	        :start_pos => @stringScanner.pos,
-	        :length => max_length}
-      return result
+      return max_len
+    end
+    def do_match2!
+      while @stringScanner.pos < @input.length do
+	look_for_token_match
+	look_for_whitespace_match
+	if @whitespace_match_length > @match_length
+	  @stringScanner.pos  += @whitespace_match_length
+	elsif @match_length > 0
+	  @current_match = LexicalMatch.new(@matching_recognizables, @regexps, @stringScanner.pos, @match_length)
+	  @stringScanner.pos += @match_length
+	  break
+	else
+          str = @stringScanner.string.slice(@stringScanner.pos, 1)
+          regexp = Regexp.new(Regexp.escape(str))
+          @current_match = LexicalMatch.new([str], {str=>regexp}, @stringScanner.pos, 1)
+          @stringScanner.pos += 1
+	  break
+	end
+      end
+    end
+    def look_for_token_match
+      @matching_recognizables = []
+      @regexps = {}
+      @match_length = 0
+      @tokens.each do |token|
+	new_match_length, regexp = token.match(@stringScanner)
+	if new_match_length > match_length
+	  @matching_recognizables = [token]
+	  @regexps = {token => regexp}
+	  @match_length = new_match_length
+	elsif new_match_length > 0 and new_match_length == match_length
+	  @matching_recognizables << token
+	  @regexps[token] = regexp
+	end
+      end
+      @literals.each do |literal|
+	new_match_length = @stringScanner.match?(@string_patterns[literal])
+	if new_match_length > match_length
+	  matching_recognizables = [literal]
+	  regexps = {literal => @string_patterns[literal]}
+	  match_length = new_match_length
+	elsif new_match_length > 0 and new_match_length == match_length
+	  matching_recognizables << literal
+	  regexps[literal] = @string_paterns[literal]
+	end
+      end
+    end
+    def look_for_whitespace_match
+      @whitespace_match_length = 0
+      for i in 0..@ws_regs.size - 1 do
+        len = @stringScanner.match?(@ws_regs[i]) || 0
+        if len > @whitespace_match_length
+          @whitespace_match_length = len
+        end
+      end
+    end
+  end
+  class LexicalMatch
+    attr_accessor :terminals, :start_pos, :length
+    def initialize(terminals, regexps, start_pos, length)
+      @terminals = terminals
+      @regexps = regexps
+      @start_pos = start_pos
+      @length = length
+    end
+    def regexp(terminal)
+      @regexps[terminal]
     end
   end
 end