RubyGems - llip - Versions diffs - 0.1.0 - Mend

llip 0.1.0

Files changed (42) hide show

data/History.txt +4 -0
data/MIT-LICENSE +21 -0
data/Manifest.txt +45 -0
data/README.txt +148 -0
data/Rakefile +66 -0
data/examples/ariteval/ariteval.rb +132 -0
data/examples/ariteval/evaluator.rb +61 -0
data/examples/ariteval/exp.rb +104 -0
data/lib/llip.rb +6 -0
data/lib/llip/abstract_parser.rb +170 -0
data/lib/llip/abstract_scanner.rb +83 -0
data/lib/llip/buffer.rb +35 -0
data/lib/llip/llip_error.rb +43 -0
data/lib/llip/parser.rb +93 -0
data/lib/llip/production_compiler.rb +168 -0
data/lib/llip/production_specification.rb +79 -0
data/lib/llip/recursive_production_compiler.rb +35 -0
data/lib/llip/regexp_abstract_scanner.rb +116 -0
data/lib/llip/regexp_parser.rb +197 -0
data/lib/llip/regexp_scanner.rb +33 -0
data/lib/llip/regexp_specification.rb +210 -0
data/lib/llip/token.rb +47 -0
data/lib/llip/visitable.rb +37 -0
data/spec/ariteval/ariteval_spec.rb +111 -0
data/spec/ariteval/evaluator_spec.rb +106 -0
data/spec/ariteval/exp_spec.rb +232 -0
data/spec/llip/abstract_parser_spec.rb +273 -0
data/spec/llip/abstract_scanner_spec.rb +152 -0
data/spec/llip/buffer_spec.rb +60 -0
data/spec/llip/llip_error_spec.rb +77 -0
data/spec/llip/parser_spec.rb +163 -0
data/spec/llip/production_compiler_spec.rb +271 -0
data/spec/llip/production_specification_spec.rb +75 -0
data/spec/llip/recursive_production_compiler_spec.rb +86 -0
data/spec/llip/regexp_abstract_scanner_spec.rb +320 -0
data/spec/llip/regexp_parser_spec.rb +265 -0
data/spec/llip/regexp_scanner_spec.rb +40 -0
data/spec/llip/regexp_specification_spec.rb +734 -0
data/spec/llip/token_spec.rb +70 -0
data/spec/llip/visitable_spec.rb +38 -0
data/spec/spec_helper.rb +10 -0
metadata +110 -0

data/lib/llip/production_compiler.rb ADDED

@@ -0,0 +1,168 @@
+module LLIP
+  # It's the main class which handles the generation of the source code dinamically.
+  class ProductionCompiler
+    # It contains the produced
+    attr_reader :code
+    def initialize
+      reset
+    end
+    # It initializes the compiler for a new generation.
+    def start(name)
+      reset
+      @name_str = name
+      @name = str_to_sym(name)
+      @code << <<-CODE
+			def parse_#{name}
+				result = productions[#{@name}].default.call(@scanner,self)
+		CODE
+      self
+    end
+    # :call-seq:
+    # 	token(Array)
+    # 	token(Symbol)
+    # 	token(String)
+    #
+    # If the argument is a Symbol or a String, the produced code will match them through ==.
+    # It the argument is an Array, lookaheads will be used, so the scanner must support lookaheads (or use a Buffer which supports them).
+    def token(tokens)
+      lookaheads = ""
+      name = nil
+      token_identifier = nil
+      if tokens.kind_of? Array
+        tokens_names = tokens.map { |tk| build_token_name(tk) }
+        token_identifier = "["
+        tokens_names.each { |tk| token_identifier << tk + "," }
+        token_identifier[-1] = "]"
+        name = build_token_name(tokens[0])
+        counter = 0
+        tokens[1..-1].each do |token|
+          lookaheads << " and "
+          counter += 1
+          token = build_token_name(token)
+          lookaheads << "@scanner.lookahead(#{counter}) == #{token}"
+        end
+      else
+        name = build_token_name(tokens)
+        token_identifier = name
+      end
+      @code << <<-CODE
+			#{@else}if @scanner.current == #{name}#{lookaheads}
+				result = productions[#{@name}].tokens[#{token_identifier}].call(result,@scanner,self)
+		CODE
+      @else = "els"
+      self
+    end
+    # It closes the method definition
+    def end(raise_on_error=true)
+    build_else(raise_on_error) if @else != ""
+    build_end
+  end
+  # It resets the compiler
+  def reset
+    @code = ""
+    @name = nil
+    @else = ""
+  end
+  # It takes a ProductionSpecification and then call its compiling methods by itself. It takes care to order all the productions the right way.
+  def compile(production)
+    start(production.name)
+    sort_production(production).each { |tk| token(tk)}
+    self.end(production.raise_on_error)
+  end
+  def sort_production(production) # :nodoc:
+    tokens = production.tokens
+    lk_tk = []
+    not_lk_tk = []
+    tokens.keys.each	do |tk|
+      if tk.kind_of? Array
+        lk_tk << tk
+        lk_tk << tk[0] if tokens.has_key? tk[0]
+      end
+    end
+    not_lk_tk = tokens.keys - lk_tk
+    lk_tk.uniq!
+    lk_tk.sort! do |a,b|
+      if a.kind_of? Array and b.kind_of? Array
+        if a.size > b.size
+          -1
+        else
+          1
+        end
+      elsif a.kind_of? Array and not b.kind_of? Array
+        -1
+      else
+        1
+      end
+    end
+    if not_lk_tk.include? :everything
+      ret_value = not_lk_tk + lk_tk
+      ret_value.delete(:everything)
+      ret_value << :everything
+      ret_value
+    else
+      not_lk_tk + lk_tk
+    end
+  end
+  protected
+  # :call-seq:
+  # 	str_to_sym(object) => ":#{object.to_s}"
+  #
+  def str_to_sym(string)
+    string = string.to_s
+    ":\"#{string}\""
+  end
+  # :call-seq:
+  # 	build_token_name(string) => "'#{string}'"
+  # 	build_token_name(symbol) => ":#{object.to_s}"
+  #
+  def build_token_name(string)
+    if string.kind_of? String
+      "'#{string.gsub("\\","\\\\\\")}'"
+    elsif string.kind_of? Symbol
+      str_to_sym(string)
+    end
+  end
+  # It builds the else clause in the method definition.
+  # It accepts a raise_on_error parameter to specify if it has to raise or not.
+  def build_else(raise_on_error=true)
+    if raise_on_error
+      @code << <<-CODE
+				else
+					raise NotAllowedTokenError.new(@scanner.current,#{@name})
+			CODE
+    end
+    @code << "\nend\n"
+    @else = ""
+  end
+  # It closes the method definition and sets the return value
+  def build_end
+    @code << <<-CODE
+	 			return result
+			end
+		CODE
+  end
+end
+end

data/lib/llip/production_specification.rb ADDED

@@ -0,0 +1,79 @@
+module LLIP
+  # A ProductionSpecification contains all it's needed to transform it into live code.
+  # This transformation is done by ProductionCompiler or RecursiveProductionCompiler.
+  #
+  # The flow of the execution of a production is:
+  # 1. The default block is called and it's result is stored in a +result+ var.
+  # 2. The current token is matched against every key of the ProductionSpecification#tokens
+  #    hash, and if this match is positive the associated block is executed.
+  #    The result is stored inside the +result+ var.
+  #    If nothing matches and the ProductionSpecification#mode is :single and ProductionSpecification#raise_on_error
+  #    is true an exception must be raised. if nothing matches and the ProductionSpecification#mode is
+  #    recursive the production must return the +result+ var.
+  # 3. If the ProductionSpecification#mode is :single, the production must return
+  #    the +result+ var. If the ProductionSpecification#mode is :recursive, the step
+  # 2  is going to be executed until it recognizes a Token.
+  class ProductionSpecification
+    NIL_BLOCK = lambda { nil }
+    # The production name.
+    attr_reader :name
+    # It's an hash which has as keys the token to recognize and as value the block to be executed with it.
+    # They are specified through ProductionSpecification#token.
+    attr_reader :tokens
+    # The mode of the production. It can be :single or :recursive.
+    attr_accessor :mode
+    # This attribute specifies if the production should raise an exception if the current token hasn't been recognized.
+    # It's important only for :single productions.
+    attr_accessor :raise_on_error
+    def initialize(name)
+      @name = name
+      @tokens = {}
+      @mode = :single
+      @default = NIL_BLOCK
+      @raise_on_error = true
+    end
+    # :call-seq:
+    #   token(*token_name) { |result, scanner, parser| ... }
+    #
+    # The block specified through this method will be executed when the token with the specified name is matched.
+    # If more than a name is given, the parser should automatically use lookahead and match all the tokens.
+    #
+    # This name is going to be matched for equality with a Token.
+    #
+    # The arguments of the block will be filled by:
+    # * The +result+ argument contains the result of a previous called block inside this production.
+    # * The +scanner+ is an instance of a class descending from AbstractScanner. It's the scanner used by the parser.
+    #   It's important to call +next+ on this scanner to make it build the next token.
+    # * The +parser+ is an instance of a class descending from AbstractParser. It's the caller of the production.
+    #   It's necessary to call other productions.
+    def token(*args,&block) # :yields: result,scanner,parser
+      args.flatten!
+      block = args.pop if args.last.respond_to? :call
+      args = args.first if args.size == 1
+      @tokens[args] = block || NIL_BLOCK
+      self
+    end
+    # :call-seq:
+    #   default() { |scanner, parser| ... }
+    #
+    # The specified block is going to be executed before any token is recognized.
+    # The default is NIL_BLOCK.
+    def default(block=nil,&b)
+      block ||= b
+      @default = block if block
+      @default
+    end
+  end
+end

data/lib/llip/recursive_production_compiler.rb ADDED

@@ -0,0 +1,35 @@
+require File.dirname(__FILE__) + '/production_compiler'
+module LLIP
+  #It modifies ProductionCompiler to add support to a recursive behaviour.
+  class RecursiveProductionCompiler < ProductionCompiler
+    def start(name)
+      super
+      @code << <<-CODE
+        while not @scanner.current.nil?
+      CODE
+    end
+    protected
+    def build_else(raise_on_error=true)
+      if raise_on_error
+        @code << <<-CODE
+          else
+            break
+	CODE
+      end
+      @code << "\nend\n"
+      @else = ""
+    end
+    def build_end
+      @code << <<-CODE
+	end
+      CODE
+      super
+    end
+  end
+end

data/lib/llip/regexp_abstract_scanner.rb ADDED

@@ -0,0 +1,116 @@
+require File.dirname(__FILE__) + '/regexp_specification'
+require File.dirname(__FILE__) + '/abstract_scanner'
+require File.dirname(__FILE__) + '/llip_error'
+module LLIP
+  # The RegexpAbstractScanner is the main abstract scanner of LLIP.
+  # To have a real scanner, just subclass it and add some regular expressions.
+  #
+  # See ClassMethods to know how.
+  class RegexpAbstractScanner < AbstractScanner
+    def self.inherited(other)
+      other.extend(ClassMethods)
+    end
+    def initialize(*args)
+      super
+      self.class.build unless self.class.built?
+    end
+    def next
+      return @current = Token.new(:nil,nil,@current_line,@current_char) unless @next_char
+      line = @current_line
+      char = @current_char
+      regexp = self.class.scanning_table[@next_char]
+      unless regexp
+        token = Token.new(:nil,@next_char,line,char)
+        raise LLIPError.new(token,"there isn't a regular expression which starts with #{@next_char}")
+      end
+      state = regexp.init
+      string = ""
+      while state[@next_char] != :error and @next_char
+        state = state[@next_char]
+        string << @next_char
+        read_next
+      end
+      token = Token.new(state.regexp.name,string,line,char)
+      if state.final?
+        @current = token
+      else
+        raise UnvalidTokenError.new(token)
+      end
+    end
+    module ClassMethods
+      # Its where all the regular expressions are stored. The keys are the starting_chars of the RegexpSpecification.
+      # While the table can be modified directly, it's reccomanded to use the add_regexp method.
+      def scanning_table
+        @scanning_table ||= Hash.new
+      end
+      # It allows to add a RegularExpression to the scanner and it makes sure that all the specified tokens don't collide.
+      #
+      # If a RegexpSpecification has starting_chars == :everything, it's set to the default value of the scanning_table.
+      def add_regexp(regexp)
+        starting_chars = regexp.starting_chars
+        if starting_chars.kind_of? Symbol
+          scanning_table.default = regexp
+        else
+          common_chars = starting_chars.select { |c| scanning_table.has_key? c }
+          starting_chars = starting_chars - common_chars
+          starting_chars.each { |c| scanning_table[c] = regexp }
+          colliding_states = common_chars.map { |c| scanning_table[c] }
+          colliding_states.uniq!
+          colliding_states.zip(common_chars).each { |r,c| scanning_table[c] = RegexpSpecification.mix(regexp,r) }
+        end
+        if @built
+          build
+        end
+        self
+      end
+      # It fix a problem with all the regexp that ends with ".*" or ".+".
+      # If such a regexp is given without calling this method,
+      # all the successive chars are going to be included by that regexp.
+      # This method add :error in the last state of that regexp for all
+      # starting chars in the scanner.
+      #
+      # This method is automatically called when a new scanner is istantiated.
+      def build
+        regexps = scanning_table.values.uniq
+        regexps << scanning_table.default if scanning_table.default
+        fixable = []
+        regexps.each do |regexp|
+          regexp.last.each do |state|
+            fixable << state if state.error == state
+          end
+        end
+        starting_chars = scanning_table.keys
+        fixable.each do |state|
+          starting_chars.each do |char|
+            state[char] = :error
+          end
+        end
+        @built = true
+        self
+      end
+      # It returns true if the build method has been called.
+      def built?
+        @built = false if @built.nil?
+        @built
+      end
+    end
+  end
+end

data/lib/llip/regexp_parser.rb ADDED

@@ -0,0 +1,197 @@
+require File.dirname(__FILE__) + '/abstract_parser'
+module LLIP
+  # It's a parser for regular expression. It correctly builds a RegexpSpecification given a valid regular expression string.
+  #
+  # === Grammar
+  #
+  # VN = { EXP , ELEMENT}
+  #
+  # char = every charachter
+  #
+  # symb = { ( , ) , . , * , + , \ , |}
+  #
+  # VT = char U symb
+  #
+  # In every production it has been used "or" instead of "|" to not make confusion.
+  #
+  # P = {
+  #   EXP -> META EXP
+  #   EXP -> META or EXP
+  #   EXP -> META
+  #   META -> ELEMENT*
+  #   META -> ELEMENT+
+  #   META -> ELEMENT
+  #   ELEMENT -> char or . or \symb
+  #   ELEMENT -> (EXP)
+  # }
+  #
+  # or in EBNF format
+  #
+  # P' = {
+  #   EXP ::= META{[|]EXP}
+  #   META ::= ELEMENT[* or  +]
+  #   ELEMENT ::= char or . or \symb or (EXP)
+  # }
+  #
+  class LLIP::RegexpParser < LLIP::AbstractParser
+    SPECIALS_TABLE = {
+      "n" => "\n",
+      "r" => "\r",
+      "t" => "\t"
+    }
+    SPECIALS_TABLE.default = lambda { |hash,key| raise 'Unknown special #{key}' }
+    scope(:scope)
+    production(:scope,:single) do |p|
+      p.default do |scanner,parser|
+        parser[:regexp] = RegexpSpecification.new
+        parser[:last] = [parser[:regexp].add_state]
+        parser.parse_exp
+        parser[:regexp].last.each { |s| s.final= true }
+        parser[:last].each { |s| s.final = true }
+        parser[:regexp]
+      end
+    end
+    production(:exp,:recursive) do |p|
+      p.default do |scanner,parser|
+        parser.parse_meta.last
+      end
+      p.token("|") do |result,scanner,parser|
+        result
+        scanner.next
+        parser[:last] = result
+        parser.parse_meta.last
+      end
+      p.token(:char) do |result,scanner,parser|
+        parser.parse_meta
+        result
+      end
+      p.token(".") do |result,scanner,parser|
+        parser.parse_meta
+        result
+      end
+      p.token("(") do |result,scanner,parser|
+        parser.parse_meta
+        result
+      end
+      p.token("\\") do |result,scanner,parser|
+        parser.parse_meta
+        result
+      end
+    end
+    production(:meta,:single) do |p|
+      p.raise_on_error = false
+      p.default do |scanner,parser|
+        MetaAccessor.new(parser[:last],parser.parse_element)
+      end
+      p.token("*") do |meta,scanner,parser|
+        if meta.results == :everything
+          parser[:last].last.error = parser[:last].last
+        else
+          if meta.results.kind_of? Array
+            meta.results.each do |c|
+              parser[:last].each { |s| s[c] = meta.last.last[c] }
+            end
+          else
+            parser[:last].last[meta.results] = parser[:last].last
+          end
+          parser[:last].concat(meta.last)
+        end
+        scanner.next
+        meta
+      end
+      p.token("+") do |meta,scanner,parser|
+        if meta.results == :everything
+          parser[:last].last.error = parser[:last].last
+          parser[:last] = [parser[:last].last]
+        else
+          if meta.results.kind_of? Array
+            meta.results.each do |c|
+              parser[:last].each { |s| s[c] = meta.last.last[c] }
+            end
+          else
+            parser[:last].last[meta.results] = parser[:last].last
+          end
+        end
+        scanner.next
+        meta
+      end
+    end
+    production(:element,:single) do |p|
+      p.token(:char) do |result, scanner, parser|
+        parser.add_char(parser,scanner)
+      end
+      p.token(".") do |result, scanner, parser|
+        r = parser[:regexp].add_state
+        parser[:last].last.error = r
+        parser[:last] << r
+        scanner.next
+        :everything
+      end
+      p.token("\\") do |result,scanner,parser|
+        if scanner.next == :symbol
+          parser.add_char(parser,scanner)
+        else
+          parser.add_char(parser,scanner,SPECIALS_TABLE[scanner.current.value])
+        end
+      end
+      p.token("(") do |result,scanner,parser|
+        scanner.next
+        first_state = parser[:last].last
+        parser.parse_exp
+        unless scanner.current == ")"
+          raise "Every '(' must be followed by a ')'"
+        end
+        scanner.next
+        parser[:last] = first_state.last
+        first_state.keys
+      end
+    end
+    def add_char(parser, scanner, char=scanner.current.value)
+      r = parser[:regexp].add_state
+      parser[:last].each { |s| s[char] = r }
+      parser[:regexp].add_state(r)
+      parser[:last] = [r]
+      scanner.next
+      char
+    end
+    class MetaAccessor
+      attr_accessor :results
+      attr_accessor :last
+      def initialize(last,results)
+        @results = results
+        @last = last
+      end
+    end
+  end
+end