RubyGems - heist - Versions diffs - 0.1.0 → 0.2.0 - Mend

heist 0.1.0 → 0.2.0

Files changed (41) hide show

data/History.txt +17 -0
data/Manifest.txt +23 -19
data/README.txt +84 -52
data/lib/builtin/library.scm +208 -10
data/lib/builtin/primitives.rb +154 -92
data/lib/builtin/syntax.scm +22 -5
data/lib/heist.rb +49 -17
data/lib/parser/nodes.rb +47 -24
data/lib/parser/ruby.rb +29 -0
data/lib/parser/scheme.rb +455 -143
data/lib/parser/scheme.tt +23 -5
data/lib/repl.rb +19 -16
data/lib/runtime/binding.rb +24 -2
data/lib/runtime/callable/continuation.rb +23 -2
data/lib/runtime/callable/function.rb +122 -21
data/lib/runtime/callable/macro.rb +169 -123
data/lib/runtime/callable/macro/expansion.rb +137 -2
data/lib/runtime/callable/macro/matches.rb +125 -41
data/lib/runtime/callable/macro/tree.rb +141 -0
data/lib/runtime/callable/syntax.rb +44 -0
data/lib/runtime/data/cons.rb +234 -0
data/lib/runtime/data/expression.rb +15 -6
data/lib/runtime/data/identifier.rb +19 -2
data/lib/runtime/frame.rb +102 -35
data/lib/runtime/runtime.rb +44 -19
data/lib/runtime/scope.rb +145 -30
data/lib/runtime/stack.rb +103 -1
data/lib/runtime/stackless.rb +48 -6
data/test/arithmetic.scm +11 -2
data/test/continuations.scm +16 -2
data/test/equivalence.scm +34 -0
data/test/functional.scm +4 -0
data/test/lists.scm +78 -0
data/test/macro-helpers.scm +1 -0
data/test/macros.scm +111 -24
data/test/numbers.scm +30 -8
data/test/test_heist.rb +67 -12
metadata +25 -21
data/lib/builtin/syntax.rb +0 -166
data/lib/runtime/callable/macro/splice.rb +0 -56
data/lib/runtime/data/list.rb +0 -36

data/lib/runtime/runtime.rb CHANGED Viewed

@@ -1,59 +1,84 @@
-require 'forwardable'
 module Heist
+  # +Runtime+ objects represent instances of the Heist runtime environment.
+  # Each +Runtime+ defines a top-level +Scope+, into which are injected
+  # the standard set of primitive functions and special forms as defined
+  # in <tt>lib/builtin</tt>.
+  #
+  # +Runtime+ exposes several methods from the top-level +Scope+ object,
+  # allowing runtime objects to be used as interfaces for defining
+  # functions, eval'ing code and running source files.
+  #
   class Runtime
-    %w[ data/expression     data/identifier   data/list
-        callable/function   callable/macro    callable/continuation
-        scope               binding           frame
-        stack               stackless
+    %w[ data/expression     data/identifier   data/cons
+        callable/function   callable/syntax   callable/macro  callable/continuation
+        frame               stack             stackless
+        scope               binding
     ].each do |file|
       require RUNTIME_PATH + file
     end
     extend Forwardable
-    def_delegators(:@top_level, :[], :eval, :define, :syntax, :call)
+    def_delegators(:@top_level, :[], :eval, :exec, :define, :syntax, :run)
-    attr_reader :order
     attr_accessor :stack, :top_level
+    # A +Runtime+ is initialized using a set of options. The available
+    # options include the following, all of which are +false+ unless
+    # you override them yourself:
+    #
+    # * <tt>:continuations</tt>: set to +true+ to enable <tt>call/cc</tt>
+    # * <tt>:lazy</tt>: set to +true+ to enable lazy evaluation
+    # * <tt>:unhygienic</tt>: set to +true+ to disable macro hygiene
+    #
     def initialize(options = {})
       @lazy          = !!options[:lazy]
       @continuations = !!options[:continuations]
       @hygienic      = !options[:unhygienic]
       @top_level = Scope.new(self)
-      @stack = create_stack
-      syntax_type = (lazy? or not @hygienic) ? 'rb' : 'scm'
+      @stack = stackless? ? Stackless.new : Stack.new
       run("#{ BUILTIN_PATH }primitives.rb")
-      run("#{ BUILTIN_PATH }syntax.#{syntax_type}")
+      run("#{ BUILTIN_PATH }syntax.scm")
       run("#{ BUILTIN_PATH }library.scm")
       @start_time = Time.now.to_f
     end
-    def run(path)
-      return instance_eval(File.read(path)) if File.extname(path) == '.rb'
-      @top_level.run(path)
-    end
+    # Returns the length of time the +Runtime+ has been alive for, as a
+    # number in microseconds.
     def elapsed_time
       (Time.now.to_f - @start_time) * 1000000
     end
+    # Returns +true+ iff the +Runtime+ is using lazy evaluation.
     def lazy?; @lazy; end
+    # Returns +true+ iff the +Runtime+ is using hygienic macros.
     def hygienic?; @hygienic; end
+    # Returns +true+ iff the +Runtime+ is using the faster +Stackless+
+    # evaluator, which does not support <tt>(call/cc)</tt>.
     def stackless?
       lazy? or not @continuations
     end
-    def create_stack
-      stackless? ? Stackless.new : Stack.new
+    def to_s
+      "#<runtime: #{ stackless? ? 'call/cc disabled' : 'call/cc enabled'
+               }, #{ hygienic? ? 'hygienic' : 'unhygienic'
+               }, #{ lazy? ? 'lazy' : 'eager' }>"
+    end
+    alias :inspect :to_s
+    def info
+      [ "Heist Scheme interpreter v. #{ VERSION }",
+        "Evaluation mode: #{ lazy? ? 'LAZY' : 'EAGER' }",
+        "Continuations enabled? #{ stackless? ? 'NO' : 'YES' }",
+        "Macros: #{ hygienic? ? 'HYGIENIC' : 'UNHYGIENIC' }\n\n"
+      ] * "\n"
     end
   end

data/lib/runtime/scope.rb CHANGED Viewed

@@ -1,9 +1,24 @@
 module Heist
   class Runtime
+    # +Scope+ is primarily used to represent symbol tables, though it also
+    # has a few other scope-related responsibilities such as defining
+    # functions (functions need to remember the scope they appear in) and
+    # loading files. Scheme uses lexical scope, which we model using a simple
+    # delegation system.
+    #
+    # Every +Scope+ has a hash (<tt>@symbols</tt>) in which it stores names
+    # of variables and their associated values, and a parent scope
+    # (<tt>@parent</tt>). If a variable cannot be found in one scope, the
+    # lookup is delegated to the parent until we get to the top level, at
+    # which point an exception is raised.
+    #
     class Scope
       attr_reader :runtime
+      # A +Scope+ is initialized using another +Scope+ to use as the parent.
+      # The parent may also be a +Runtime+ instance, indicating that the
+      # new +Scope+ is being used as the top level of a runtime environment.
       def initialize(parent = {})
         @symbols = {}
         is_runtime = (Runtime === parent)
@@ -11,48 +26,127 @@ module Heist
         @runtime = is_runtime ? parent : parent.runtime
       end
+      # Returns the value corresponding to the given variable name. If the
+      # name does not exist in the receiver, the call is delegated to its
+      # parent scope. If the name cannot be found in any scope an exception
+      # is raised.
+      #
+      # In lazy mode, +Binding+ objects are stored in the symbol table when
+      # functions are called; we do not evaluate the arguments to a function
+      # before calling it, but instead we force an argument's value if the
+      # function's body attempts to access it by name.
+      #
       def [](name)
         name = to_name(name)
-        bound = @symbols.has_key?(to_name(name))
+        bound = @symbols.has_key?(name)
         raise UndefinedVariable.new(
           "Variable '#{name}' is not defined") unless bound or Scope === @parent
         value = bound ? @symbols[name] : @parent[name]
-        value = value.extract if Binding === value
+        value = value.force! if value.respond_to?(:force!)
         value
       end
+      # Binds the given +value+ to the given +name+ in the receiving +Scope+.
+      # Note this always sets the variable in the receiver; see <tt>set!</tt>
+      # for a method corresponding to Scheme's <tt>(set!)</tt> function.
       def []=(name, value)
         @symbols[to_name(name)] = value
         value.name = name if Function === value
         value
       end
+      # Returns +true+ iff the given name is bound as a variable in the
+      # receiving scope or in any of its ancestor scopes.
       def defined?(name)
         @symbols.has_key?(to_name(name)) or
             (Scope === @parent and @parent.defined?(name))
       end
-      def set!(name, value)
+      # Returns a +Scope+ object representing the innermost scope in which
+      # the given name is bound. This is used to find out whether two or
+      # more identifiers have the same binding.
+      #
+      #   outer = Scope.new
+      #   outer['foo'] = "a value"
+      #
+      #   inner = Scope.new(outer)
+      #   inner['bar'] = "something"
+      #
+      #   inner.innermost_binding('foo') #=> outer
+      #   inner.innermost_binding('bar') #=> inner
+      #
+      def innermost_binding(name)
         name = to_name(name)
-        bound = @symbols.has_key?(name)
-        raise UndefinedVariable.new(
-          "Cannot set undefined variable '#{name}'") unless bound or Scope === @parent
-        return @parent.set!(name, value) unless bound
-        self[name] = value
+        @symbols.has_key?(name) ?
+            self :
+        Scope === @parent ?
+            @parent.innermost_binding(name) :
+            nil
+      end
+      # Analogous to Scheme's <tt>(set!)</tt> procedure. Assigns the given
+      # +value+ to the given variable +name+ in the innermost region in
+      # which +name+ is bound. If the +name+ does not exist in the receiving
+      # scope, the assignment is delegated to the parent. If no visible
+      # binding exists for the given +name+ an exception is raised.
+      def set!(name, value)
+        scope = innermost_binding(name)
+        raise UndefinedVariable.new("Cannot set undefined variable '#{name}'") if scope.nil?
+        scope[name] = value
       end
+      # +define+ is used to define functions using either Scheme or Ruby
+      # code. Takes either a name and a Ruby block to represent the function,
+      # or a name, a list of formal arguments and a list of body expressions.
+      # The <tt>(define)</tt> primitive exposes this method to the Scheme
+      # environment. This method allows easy extension using Ruby, for
+      # example:
+      #
+      #   scope.define('+') |*args|
+      #     args.inject { |a,b| a + b }
+      #   end
+      #
+      # See +Function+ for more information.
+      #
       def define(name, *args, &block)
         self[name] = Function.new(self, *args, &block)
       end
-      def syntax(name, holes = [], &block)
-        self[name] = Syntax.new(self, holes,&block)
+      # +syntax+ is similar to +define+, but is used for defining syntactic
+      # forms. Heist's parser has no predefined syntax apart from generic
+      # Lisp paren syntax and Scheme data literals. All special forms are
+      # defined as special functions and stored in the symbol table, making
+      # them first-class objects that can be easily aliased and overridden.
+      #
+      # This method takes a name and a Ruby block. The block will be called
+      # with the calling +Scope+ object and a +Cons+ containing the section
+      # of the parse tree representing the parameters the form has been called
+      # with.
+      #
+      # It is not recommended that you write your own syntax using Ruby
+      # since it requires too much knowledge of the plumbing for features
+      # like tail calls and continuations. If you define new syntax using
+      # Scheme macros you get correct behaviour of these features for free.
+      #
+      # See +Syntax+ for more information.
+      #
+      def syntax(name, &block)
+        self[name] = Syntax.new(self, &block)
+      end
+      # Parses and executes the given string of source code in the receiving
+      # +Scope+. Accepts strings of Scheme source and arrays of Ruby data to
+      # be interpreted as Scheme lists.
+      def eval(source)
+        source = Heist.parse(source)
+        source.eval(self)
       end
+      alias :exec :eval
+      # Returns all the variable names visible in the receiving +Scope+ that
+      # match the given regex +pattern+. Used by the REPL for tab completion.
       def grep(pattern)
         base = (Scope === @parent) ? @parent.grep(pattern) : []
         @symbols.each do |key, value|
@@ -61,25 +155,23 @@ module Heist
         base.uniq
       end
-      # TODO: this isn't great, figure out a way for functions
-      # to transparently handle inter-primitive calls so Ruby can
-      # call Scheme code as well as other Ruby code
-      def call(name, *params)
-        self[name].body.call(*params)
-      end
-      def run(path)
-        path   = path + FILE_EXT unless File.file?(path)
-        source = Heist.parse(File.read(path))
-        scope  = FileScope.new(self, path)
-        source.eval(scope)
-      end
-      def eval(source)
-        source = Heist.parse(source) if String === source
-        source.eval(self)
+      # Runs the given Scheme or Ruby definition file in the receiving
+      # +Scope+. Note that local vars in this method can cause block vars
+      # to become delocalized when running Ruby files under 1.8, so make
+      # sure we use 'obscure' names here.
+      def run(_path)
+        return instance_eval(File.read(_path)) if File.extname(_path) == '.rb'
+        _path   = _path + FILE_EXT unless File.file?(_path)
+        _source = Heist.parse(File.read(_path))
+        _scope  = FileScope.new(self, _path)
+        _source.eval(_scope)
       end
+      # Loads the given Scheme file and executes it in the global scope.
+      # Paths are treated as relative to the current file. If no local file
+      # is found, the path is assumed to refer to a module from the Heist
+      # standard library. The <tt>(load)</tt> primitive is a wrapper
+      # around this method.
       def load(path)
         dir = load_path.find do |dir|
           File.file?("#{dir}/#{path}") or File.file?("#{dir}/#{path}#{FILE_EXT}")
@@ -89,16 +181,34 @@ module Heist
         true
       end
+      # Returns the path of the current file. The receiving scope must have
+      # a +FileScope+ as an ancestor, otherwise this method will return +nil+.
       def current_file
         @path || @parent.current_file rescue nil
       end
     private
+      # Calls the named primitive function with the given arguments, and
+      # returns the result of the call.
+      #
+      # TODO: this is currently hampered by the fact that Functions expect to
+      # be called with a +Scope+, but Ruby primitives are not given the
+      # current +scope+. Figure out something better.
+      def call(name, *params)
+        self[name].body.call(*params)
+      end
+      # Converts any Ruby object to a name string. All names are downcased
+      # as this Scheme is case-insensitive.
       def to_name(name)
         name.to_s.downcase
       end
+      # Returns the current set of directories in which to look for Scheme
+      # files to load. Includes the standard library path by default, and
+      # the directory of the current file if the receiving +Scope+ has a
+      # +FileScope+ as an ancestor.
       def load_path
         paths, file = [], current_file
         paths << File.dirname(file) if file
@@ -106,6 +216,11 @@ module Heist
       end
     end
+    # A +FileScope+ is a special kind of +Scope+ used to represent the region
+    # of a single file. It provides Scheme code with an awareness of its
+    # path so it can load local files. +FileScope+ instances delegate all
+    # variable assignments to their parent +Scope+ (this is typically the
+    # global scope) so that variables are visible across files.
     class FileScope < Scope
       extend Forwardable
       def_delegators(:@parent, :[]=)

data/lib/runtime/stack.rb CHANGED Viewed

@@ -1,29 +1,116 @@
 module Heist
   class Runtime
+    # +Stack+ is responsible for executing code by successively evaluating
+    # expressions. It provides fine-grained intermediate result inspection
+    # to support the Scheme notion of continuations, working with the +Frame+
+    # and +Body+ classes to evaluate expressions and function bodies piece
+    # by piece. Using the +Stack+ engine allows the creation of +Continuation+
+    # functions, which save the current state of the stack (i.e. the state
+    # of any unfinished expressions and function bodies) and allow it to be
+    # resumed at some later time.
+    #
+    # +Stack+ inherits from +Array+, and is a last-in-first-out structure:
+    # the next expression evaluated is always the last expression on the
+    # stack.
+    #
+    # You should think of the +Stack+ as an array of +Frame+ objects that
+    # hold expressions and track their progress. For example, take the
+    # expression:
+    #
+    #   (+ (- (* 8 9) (/ 21 7)) 4)
+    #
+    # Evaluating it involves evaluating each subexpression to fill in holes
+    # where we expect values; when all the holes in an expression have been
+    # filled, we can apply the resulting function to the arguments and get
+    # a value. Evaluating this expression causes the stack to evolve as
+    # follows, where STATE lists the expressions on the stack and <tt>[]</tt>
+    # represents a hole that is waiting for a value:
+    #
+    #   PUSH:  (+ (- (* 8 9) (/ 21 7)) 4)
+    #   STATE: ([] [] 4)
+    #
+    #   PUSH:  +
+    #   VALUE: #<procedure:+>
+    #   STATE: (#<procedure:+> [] 4)
+    #
+    #   PUSH:  (- (* 8 9) (/ 21 7))
+    #   STATE: (#<procedure:+> [] 4), ([] [] [])
+    #
+    #   PUSH:  -
+    #   VALUE: #<procedure:->
+    #   STATE: (#<procedure:+> [] 4), (#<procedure:-> [] [])
+    #
+    #   PUSH:  (* 8 9)
+    #   STATE: (#<procedure:+> [] 4), (#<procedure:-> [] []), ([] 8 9)
+    #
+    #   PUSH:  *
+    #   VALUE: #<procedure:*>
+    #   STATE: (#<procedure:+> [] 4), (#<procedure:-> [] []), (#<procedure:*> 8 9)
+    #
+    #   VALUE: 72
+    #   STATE: (#<procedure:+> [] 4), (#<procedure:-> 72 [])
+    #
+    #   PUSH:  (/ 21 7)
+    #   STATE: (#<procedure:+> [] 4), (#<procedure:-> 72 []), ([] 21 7)
+    #
+    #   PUSH:  /
+    #   VALUE: #<procedure:/>
+    #   STATE: (#<procedure:+> [] 4), (#<procedure:-> 72 []), (#<procedure:/> 21 7)
+    #
+    #   VALUE: 3
+    #   STATE: (#<procedure:+> [] 4), (#<procedure:-> 72 3)
+    #
+    #   VALUE: 69
+    #   STATE: (#<procedure:+> 69 4)
+    #
+    #   VALUE: 73
+    #
+    # So we find that <tt>(+ (- (* 8 9) (/ 21 7)) 4)</tt> gives the value 73.
+    # Whenever a value is returned by a subexpression we must inspect it to
+    # see if a +Continuation+ has been called. All this inspection of
+    # intermediate values takes time; if you don't need full +Continuation+
+    # support, use the faster +Stackless+ engine instead.
+    #
     class Stack < Array
       attr_reader :value
+      # Pushes a new +Frame+ or +Body+ onto the +Stack+ and then executes
+      # the resulting code until the pushed frame returns a value, which
+      # is then returned.
       def <<(frame)
         super
         clear!(size - 1)
       end
+      # Creates and returns a copy of the stack, which represents the current
+      # computational state: any unfinished expressions and function bodies
+      # are stored in the stack. Pass +false+ to discard the final frame,
+      # which will typically be a call to <tt>(call/cc)</tt> when creating
+      # a +Continuation+.
       def copy(keep_last = true)
         copy = self.class.new
         range = keep_last ? 0..-1 : 0...-1
         self[range].each do |frame|
-          copy[copy.size] = frame.dup
+          copy[copy.size] = frame.clone
         end
         copy
       end
+      # Fills a hole in the final +Frame+ on the +Stack+ by replacing the
+      # given epxression +subexpr+ with the given +value+. If the +value+
+      # is a +Frame+, this frame is pushed onto the stack rather than filling
+      # a hole in the previous frame.
       def fill!(subexpr, value)
         return self[size] = value if Frame === value
         return @value = value if empty?
         last.fill!(subexpr, value)
       end
+      # Causes the stack to evaluate expressions in order to pop them off the
+      # stack, until it gets down to the size given by +limit+. The resulting
+      # value if returned after all necessary computations have been done,
+      # and if an error takes place at any point we empty the stack.
       def clear!(limit = 0)
         process! while size > limit
         @value
@@ -32,6 +119,14 @@ module Heist
         raise ex
       end
+      # Sets the +value+ on the +Stack+, which is always the value returned by
+      # the last completed expression or function body. If the given +value+
+      # is another +Stack+, this new stack replaces the state of the receiver;
+      # this takes place when a +Continuation+ is called. If the +value+ is
+      # a +Frame+, it is pushed onto the stack and we set a flag to indicate
+      # that a tail call is in effect and the replacement target of the call
+      # needs to be repointed: the expression that generated the tail call will
+      # have been removed from the stack by the time the call returns.
       def value=(value)
         @value  = value
         @unwind = (Stack === @value)
@@ -41,6 +136,11 @@ module Heist
     private
+      # Processes one piece of the final +Frame+ on the +Stack+ and inspects the
+      # return value. The value must be inspected to see if a +Continuation+ has
+      # been called (indicated by <tt>@unwind</tt>), or a tail call has taken
+      # place. Continuation calls replace the state of the stack, and tail calls
+      # need modifying so they fill the correct hole when they return.
       def process!
         self.value = last.process!
         return if empty? or @unwind or not last.complete?
@@ -48,6 +148,8 @@ module Heist
         fill!(pop.target, @value)
       end
+      # Replaces the state of the receiver with the state of the argument. We
+      # call this when calling a +Continuation+, or when recovering from errors.
       def restack!(stack = [])
         pop while not empty?
         stack.each_with_index { |frame, i| self[i] = frame }