heist 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,10 +2,145 @@ module Heist
2
2
  class Runtime
3
3
  class Macro
4
4
 
5
+ # +Expansion+ is responsible for expanding syntactic forms matched by
6
+ # successful +Macro+ calls. Any successful +Macro+ call returns an
7
+ # +Expansion+ object, which is used as a signal to the evaluator that
8
+ # the +Expression+ contained in the expansion should be inlined into
9
+ # the syntax tree.
5
10
  class Expansion
6
11
  attr_reader :expression
7
- def initialize(expression)
8
- @expression = expression
12
+
13
+ # An +Expansion+ is initialized using the lexical +Scope+ of the
14
+ # +Macro+ and the +Scope+ of the macro call site (both required
15
+ # for hygiene purposes), plus a +Cons+ representing the expansion
16
+ # template and a +Matches+ object containing the input expressions
17
+ # to be transcribed using the template. After initialization the
18
+ # expanded +Expression+ is available via the +Expansion+ object's
19
+ # +expression+ attribute.
20
+ def initialize(lexical_scope, calling_scope, template, matches)
21
+ @lexical_scope = lexical_scope
22
+ @calling_scope = calling_scope
23
+ @hygienic = lexical_scope.runtime.hygienic?
24
+ @expression = expand(template, matches)
25
+ end
26
+
27
+ # Accepts a template object (a +Cons+, +Identifier+ or similar) and a
28
+ # +Matches+ instance and returns the result of expanding the +template+
29
+ # using the +matches+. The +depth+ and +ignoring_ellipses+ arguments are
30
+ # for internal state maintainance as we recursively expand the template
31
+ # forms. +depth+ indicates the current repetition depth, i.e. how
32
+ # many ellipses follow the current subtemplate, and +ignoring_ellipses+
33
+ # is +true+ iff we're expanding a template in which ellipses should be
34
+ # transcribed verbatim. This is an R6RS feature; if a template opens
35
+ # with an ellipsis, we transcribe the rest of the template as normal
36
+ # except that any ellipses in the template are inserted as ellipses
37
+ # into the output, without causing their preceeding forms to repeat.
38
+ #
39
+ # From the R5RS spec
40
+ # http://www.schemers.org/Documents/Standards/R5RS/HTML/r5rs-Z-H-7.html
41
+ #
42
+ # When a macro use is transcribed according to the template of the
43
+ # matching <syntax rule>, pattern variables that occur in the template
44
+ # are replaced by the subforms they match in the input. Pattern variables
45
+ # that occur in subpatterns followed by one or more instances of the
46
+ # identifier '...' are allowed only in subtemplates that are followed
47
+ # by as many instances of '...'. They are replaced in the output by all
48
+ # of the subforms they match in the input, distributed as indicated. It
49
+ # is an error if the output cannot be built up as specified.
50
+ #
51
+ # Identifiers that appear in the template but are not pattern variables
52
+ # or the identifier '...' are inserted into the output as literal
53
+ # identifiers. If a literal identifier is inserted as a free identifier
54
+ # then it refers to the binding of that identifier within whose scope
55
+ # the instance of 'syntax-rules' appears. If a literal identifier is
56
+ # inserted as a bound identifier then it is in effect renamed to prevent
57
+ # inadvertent captures of free identifiers.
58
+ #
59
+ def expand(template, matches, depth = 0, ignoring_ellipses = false)
60
+ case template
61
+
62
+ when Cons then
63
+ # If the template is a list opening with an ellipsis, expand
64
+ # the rest of the list, transcribing ellipses verbatim
65
+ return expand(template.cdr.car,
66
+ matches, depth, true) if template.car == ELLIPSIS
67
+
68
+ result, last, repeater, template_pair = nil, nil, nil, template
69
+
70
+ # Set up a closure to push forms onto the output. Needs to
71
+ # track both the head (+result+, for returning) and the tail
72
+ # (+last+, for appending new forms). Links each inserted form
73
+ # to its containing +Cons+ to enable further expansions to
74
+ # be inlined.
75
+ push = lambda do |value|
76
+ pair = Cons.new(value)
77
+ pair.hosts(value)
78
+ result ||= pair
79
+ last.cdr = pair if last
80
+ last = pair
81
+ end
82
+
83
+ # Iterate over the template, inserting matches as we go
84
+ while not template_pair.null?
85
+ cell = template_pair.car
86
+
87
+ # Increment the repetition depth if the current subtemplate
88
+ # is followed by an ellipsis and we are not treating ellipses
89
+ # as literals
90
+ followed_by_ellipsis = (template_pair.cdr.car == ELLIPSIS) && !ignoring_ellipses
91
+ dx = followed_by_ellipsis ? 1 : 0
92
+
93
+ repeater = cell if followed_by_ellipsis
94
+
95
+ # Once we reach an ellipsis, expand the preceeding form
96
+ # the correct number of times depending on the +matches+
97
+ if cell == ELLIPSIS and not ignoring_ellipses
98
+ matches.expand!(repeater, depth + 1) do
99
+ push[expand(repeater, matches, depth + 1)]
100
+ end
101
+
102
+ # If the current subtemplate is not an ellipsis and is
103
+ # not followed by an ellipsis, expand it and push the
104
+ # result onto the output
105
+ else
106
+ push[expand(cell, matches, depth + dx,
107
+ ignoring_ellipses)] unless followed_by_ellipsis
108
+ end
109
+
110
+ template_pair = template_pair.cdr
111
+ end
112
+ result
113
+
114
+ when Identifier then
115
+ # If the template is a pattern variable, return the current
116
+ # match for that variable. See +Matches+ to see how repeated
117
+ # patterns are handled.
118
+ return matches.get(template) if matches.has?(template)
119
+
120
+ # Otherwise, if using unhygienic macros, return the template
121
+ # verbatim as a new symbol.
122
+ return Identifier.new(template) unless @hygienic
123
+
124
+ # If using hygienic macros: bind the identifier to the macro's
125
+ # lexical scope if it is defined there, otherwise rename it
126
+ # as appropriate to avoid clashes with variables in the
127
+ # calling scope.
128
+ @lexical_scope.defined?(template) ?
129
+ Binding.new(template, @lexical_scope, false) :
130
+ rename(template)
131
+
132
+ else
133
+ template
134
+ end
135
+ end
136
+
137
+ # Returns a new +Identifier+ that does clash with any of the names
138
+ # visible in the <tt>Expansion</tt>'s calling scope.
139
+ def rename(id)
140
+ return id unless @calling_scope.defined?(id)
141
+ i = 1
142
+ i += 1 while @calling_scope.defined?("#{id}#{i}")
143
+ Identifier.new("#{id}#{i}")
9
144
  end
10
145
  end
11
146
 
@@ -2,71 +2,155 @@ module Heist
2
2
  class Runtime
3
3
  class Macro
4
4
 
5
+ # +Matches+ instances, with help from the +Tree+ class, are data structures
6
+ # that represent the way in which syntactic expressions match patterns found
7
+ # in +Macro+ rules. They provide an API for storing and retrieving such data,
8
+ # with the aim of removing some clutter from the macro parsing and expansion
9
+ # routines.
10
+ #
11
+ # At a pure Ruby level, a +Matches+ is a wrapper around a hash that maps
12
+ # pattern variables to +Tree+ objects, which themselves are wrappers around
13
+ # nested arrays that represent how repeated pattern matches are grouped
14
+ # together. For example, given the pattern
15
+ #
16
+ # (do ([variable init step ...] ...)
17
+ # (test expression ...)
18
+ # command ...)
19
+ #
20
+ # and the expression
21
+ #
22
+ # (do ([x 6 (- x 1)]
23
+ # [acc 1])
24
+ # ((zero? x) acc)
25
+ # (display x) (newline)
26
+ # (set! acc (* acc x)))
27
+ #
28
+ # the +Matches+ object would contain the following:
29
+ #
30
+ # @data = {
31
+ # "variable" => [ x,
32
+ # acc
33
+ # ],
34
+ #
35
+ # "init" => [ 6,
36
+ # 1
37
+ # ],
38
+ #
39
+ # "step" => [ [ (- x 1)
40
+ # ],
41
+ # []
42
+ # ],
43
+ #
44
+ # "test" => (zero? x),
45
+ #
46
+ # "expression" => [ acc
47
+ # ],
48
+ #
49
+ # "command" => [ (display x),
50
+ # (newline),
51
+ # (set! acc (* acc x))
52
+ # ]
53
+ # }
54
+ #
55
+ # Breaking this down, we see +test+ is not followed by an ellipsis in the
56
+ # pattern, and can thus only consume one item from the input expression.
57
+ # So, its match data is a single +Expression+. +variable+, +init+, +command+
58
+ # and +expression+ are all followed by a single ellipsis (+variable+ and
59
+ # +init+ appear in a list that is followed by an ellipsis), so can consume
60
+ # several values each; their match data are arrays of expressions.
61
+ #
62
+ # +step+, on the other hand, is followed by two ellipses: it itself is
63
+ # followed by an ellipsis, and <tt>step ...</tt> appears inside a list that
64
+ # is also followed by an ellipsis. If a pattern is followed by more than
65
+ # one ellipsis, the match data it generates is a tree of nested arrays
66
+ # that describe how the expressions are grouped. Here, we see that the
67
+ # expression <tt>[variable init step ...]</tt> appears twice in the input,
68
+ # so +step+'s root match element is an array of two elements. But, +step+
69
+ # does not match any data in the second appearance (<tt>[acc 1]</tt>), so
70
+ # the second element of this array is empty. The first element is an array
71
+ # containing the single match from the first appearance (<tt>(- x 1)</tt>
72
+ # in the expression <tt>[x 6 (- x 1)]</tt>).
73
+ #
74
+ # +Matches+ tries to hide many of these details so the macro routines can
75
+ # read and write to this data structure in the simplest possible terms.
76
+ #
5
77
  class Matches
6
- def initialize
7
- @data = {}
8
- @depth = 0
9
- @names = []
78
+
79
+ # A +Matches+ is initialized using a +Cons+ representing a macro pattern,
80
+ # and an array of formal keywords supplied by the +Macro+. Keywords do
81
+ # not need to store matches so they are ignored here.
82
+ def initialize(pattern, formals)
83
+ @data = {}
84
+ names = Macro.pattern_vars(pattern, formals)
85
+ names.each { |name| @data[name] = Tree.new(name) }
10
86
  end
11
87
 
12
- def depth=(depth)
13
- mark!(depth) if depth < @depth
14
- @names[depth] = [] if depth >= @depth
15
- @depth = depth
88
+ # Tells the +Matches+ object that the given pattern variables (the array
89
+ # +names+) have encountered a trailing ellipsis at the given repetition
90
+ # depth. This allows +Matches+ to group repeated patterns correctly.
91
+ def descend!(names, depth)
92
+ @data.each do |name, set|
93
+ set.descend!(depth) if names.include?(name)
94
+ end
16
95
  end
17
96
 
18
- def put(name, expression)
97
+ # Writes an expression to the +Matches+ object under the variable +name+.
98
+ # The receiver deals with storing it in the correct repetition group.
99
+ def put(name, value)
19
100
  name = name.to_s
20
- @names[@depth] << name
21
- @data[name] ||= Splice.new(name, @depth)
22
- @data[name] << expression unless expression.nil?
101
+ @data[name] << value if has?(name)
23
102
  end
24
103
 
25
- def inspecting(depth)
26
- @inspecting = true
27
- self.depth = depth
104
+ # Returns +true+ iff the receiver has a pattern variable named +name+.
105
+ def has?(name)
106
+ @data.has_key?(name.to_s)
28
107
  end
29
108
 
109
+ # Retrieves an expression from the +Matches+ under the given +name+. The
110
+ # receiver deals with pulling the expression from the right point in the
111
+ # tree; see the <tt>expand!</tt>, <tt>iterate!</tt> and <tt>Tree#read</tt>
112
+ # and <tt>Tree#shift!</tt> methods.
30
113
  def get(name)
31
- @inspecting ? @names[@depth] << name.to_s :
32
- @data[name.to_s].read
33
- end
34
-
35
- def defined?(name)
36
- @data.has_key?(name.to_s)
114
+ @data[name.to_s].read
37
115
  end
38
116
 
39
- def expand!
40
- @inspecting = false
41
- size.times { yield and iterate! }
117
+ # Takes a +template+ +Expression+, a repetition +depth+ and a block, and
118
+ # calls the block +n+ times, where +n+ is the number of matches for the
119
+ # pattern variables in the template at the given depth and the current
120
+ # iteration point in the tree. After each block call, the +Matches+ object
121
+ # moves the pointer for all the applicable pattern variables along one
122
+ # place at the given depth -- see <tt>iterate!</tt> and <tt>Tree#shift!</tt>.
123
+ def expand!(template, depth)
124
+ names = Macro.pattern_vars(template)
125
+ size(names, depth).times { yield() and iterate!(names, depth) }
42
126
  end
43
127
 
44
128
  private
45
129
 
46
- def mark!(depth)
47
- d = @depth
48
- while @names[d]
49
- @names[d].uniq.each { |name| @data[name].mark!(depth) }
50
- d += 1
130
+ # Returns the number of matched expressions available for the given set
131
+ # of pattern variables at the given depth, at the current iteration point.
132
+ # An exception is raised if the names do not all yield the same number
133
+ # of matches; this indicates a piece of mismatched syntax that cannot be
134
+ # expanded correctly.
135
+ def size(names, depth)
136
+ sizes = []
137
+ @data.each do |name, tree|
138
+ sizes << tree.size(depth) if names.include?(name)
51
139
  end
52
- end
53
-
54
- def size
55
- names = @names[@depth].uniq
56
- splices = @data.select { |k,v| names.include?(k.to_s) }
57
- sizes = splices.map { |pair| pair.last.size(@depth) }.uniq
58
140
 
59
- return 0 if sizes.empty?
141
+ sizes.uniq!
60
142
  return sizes.first if sizes.size == 1
61
143
 
62
- expressions = splices.map { |pair| '"' + pair.last.to_s(@depth) + '"' } * ', '
63
144
  raise MacroTemplateMismatch.new(
64
- "Macro could not be expanded: expressions #{expressions} are of different sizes")
145
+ "Macro could not be expanded: mismatched repetition patterns")
65
146
  end
66
147
 
67
- def iterate!
68
- @data.each do |name, splice|
69
- splice.shift!(@depth) if @names[@depth].include?(name)
148
+ # Shifts the tree pointer (see <tt>Tree#shift!</tt> for all the given
149
+ # +names+ along one place at the given +depth+. This is used while
150
+ # expanding repeated patterns using <tt>expand!</tt>.
151
+ def iterate!(names, depth)
152
+ @data.each do |name, tree|
153
+ tree.shift!(depth) if names.include?(name)
70
154
  end
71
155
  end
72
156
  end
@@ -0,0 +1,141 @@
1
+ module Heist
2
+ class Runtime
3
+ class Macro
4
+
5
+ # <tt>Tree</tt>s are used by instances of +Matches+ to store expressions
6
+ # matched by macro patterns. Patterns may contain patterns that repeat
7
+ # (indicated by following the pattern with an ellipsis), and these repetitions
8
+ # may be nested. +Tree+ instances store expressions in a set of nested arrays
9
+ # that match the repetition structure of the macro pattern being matched.
10
+ # See +Matches+ for a fuller explanation.
11
+ #
12
+ # Every +Tree+ contains an array called <tt>@data</tt> that stores the
13
+ # expressions matched by a pattern variable, a variable <tt>@depth</tt> that
14
+ # stores the maximum repetition depth of the tree (a non-repeated pattern
15
+ # has depth zero), and an array <tt>@indexes</tt> which is used to maintain
16
+ # a list of array indexes that point to the current read position in the
17
+ # tree while a macro is being expanded. For example, taking the pattern from
18
+ # the +Matches+ example:
19
+ #
20
+ # (do ([variable init step ...] ...)
21
+ # (test expression ...)
22
+ # command ...)
23
+ #
24
+ # Say we had the following expression (not entirely valid (do) syntax, but
25
+ # compatible with the above pattern):
26
+ #
27
+ # (do ([x 6 (- x 1) (- acc 1)]
28
+ # [y 5]
29
+ # [acc 1 (* x acc)])
30
+ # ((zero? x) acc)
31
+ # (display x) (newline)
32
+ # (set! acc (* acc x)))
33
+ #
34
+ # The resulting +Matches+ object would contain the following data for the
35
+ # variable +step+:
36
+ #
37
+ # "step" => [ [ (- x 1),
38
+ # (- acc 1)
39
+ # ],
40
+ #
41
+ # [],
42
+ #
43
+ # [ (* x acc)
44
+ # ]
45
+ # ]
46
+ #
47
+ # That is, the outermost repetition <tt>[variable init step ...]</tt>
48
+ # occurs three times; the first appearance includes two matches for
49
+ # <tt>step ...</tt>, the second no matches and the third one match. With
50
+ # this data, an <tt>@indexes</tt> state of <tt>[0,0]</tt> would read
51
+ # <tt>(- x 1)</tt>, a state of <tt>[0,1]</tt> would read <tt>(- acc 1)</tt>,
52
+ # and <tt>[2,0]</tt> would read <tt>(* x acc)</tt>; the latter instructing
53
+ # the +Tree+ to get the third element of the root array, then the first
54
+ # element of _that_ array to find the right value.
55
+ #
56
+ # In practise, all +Tree+ objects have an extra array around the data as
57
+ # presented above, to make the no-repetition case consistent with the
58
+ # representation for arbitrarily nested repetitions. That is, the methods
59
+ # in this class expect to read from an array in general, so the representation
60
+ # of a non-repeating pattern is just a single-element array to simplify
61
+ # the implementation of these methods in the general case. The first item
62
+ # in the <tt>@indexes</tt> array is always zero. We could remove this extra
63
+ # container and add a type check on <tt>@data</tt> when reading, but the
64
+ # current implementation seems more elegant for the moment.
65
+ #
66
+ class Tree
67
+
68
+ # A +Tree+ is initialized using the name of the pattern variable it is
69
+ # associated with (for debugging purposes).
70
+ def initialize(name)
71
+ @name = name
72
+ @data = []
73
+ @depth = 0
74
+ end
75
+
76
+ # Tells the receiving +Tree+ that its pattern variable has been visited
77
+ # at a repetition depth of +depth+ during pattern matching. This allocates
78
+ # a new empty array at an appropriate place in the tree to store matches
79
+ # (or groups of matches) if any are encountered. Calls to this method are
80
+ # also used to determine the tree's maximum depth.
81
+ def descend!(depth)
82
+ tail(depth-1) << []
83
+ @depth = depth if depth > @depth
84
+ end
85
+
86
+ # Pushes an expression onto the end of the final branch of the tree. All
87
+ # expressions should exist at the same depth (the tree's maximum depth),
88
+ # seeing as the pattern should be followed by the same number of ellipses
89
+ # every time it is encountered.
90
+ def <<(value)
91
+ return if Cons::NULL == value
92
+ tail(@depth) << value
93
+ end
94
+
95
+ # Returns the expression at the current read position as instructed by
96
+ # the <tt>@indexes</tt> list.
97
+ def read
98
+ current(@depth)[indexes[@depth]]
99
+ end
100
+
101
+ # Shifts the read position at the given +depth+ along by one, by adding 1
102
+ # to one of the values in <tt>@indexes</tt>. The macro expander calls this
103
+ # while walking a template to iterate over repetition branches.
104
+ def shift!(depth)
105
+ indexes[depth] += 1
106
+ indexes[depth] = 0 if indexes[depth] >= current(depth).size
107
+ end
108
+
109
+ # Returns the number of matches (or groups of matches) on the current
110
+ # read branch at the given +depth+. Returns zero if no branch exists at
111
+ # the given indexes.
112
+ def size(depth)
113
+ current(depth).size rescue 0
114
+ end
115
+
116
+ private
117
+
118
+ # Returns the rightmost branch of the tree at the given +depth+. Used
119
+ # when allocating new branches as repetition blocks are entered.
120
+ def tail(depth)
121
+ (0...depth).inject(@data) { |list, d| list.last }
122
+ end
123
+
124
+ # Returns the current read branch at the given +depth+, as instructed
125
+ # by the <tt>@indexes</tt> list.
126
+ def current(depth)
127
+ indexes[0...depth].inject(@data) { |list, i| list[i] }
128
+ end
129
+
130
+ # Initializes the <tt>@indexes</tt> list once the maximum depth is
131
+ # known, and returns the list thereafter.
132
+ def indexes
133
+ @indexes ||= (0..@depth).map { 0 }
134
+ @indexes
135
+ end
136
+ end
137
+
138
+ end
139
+ end
140
+ end
141
+