bud 0.9.4 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,367 @@
1
+ require 'bud/lattice-core'
2
+
3
+ class Bud::MaxLattice < Bud::Lattice
4
+ wrapper_name :lmax
5
+
6
+ def initialize(i=nil)
7
+ unless i.nil? || i.class <= Comparable
8
+ reject_input(i)
9
+ end
10
+ @v = i
11
+ end
12
+
13
+ def merge(i)
14
+ i_val = i.reveal
15
+ (@v.nil? || (i_val != nil && i_val > @v)) ? i : self
16
+ end
17
+
18
+ morph :gt do |k|
19
+ Bud::BoolLattice.new(!!(@v && @v > k))
20
+ end
21
+
22
+ morph :gt_eq do |k|
23
+ Bud::BoolLattice.new(!!(@v && @v >= k))
24
+ end
25
+
26
+ # XXX: support MaxLattice input?
27
+ morph :+ do |i|
28
+ # Since bottom of lmax is negative infinity, + is a no-op
29
+ return self if @v.nil?
30
+ reject_input(i, "+") unless i.class <= Numeric
31
+ self.class.new(@v + i)
32
+ end
33
+
34
+ morph :min_of do |i|
35
+ reject_input(i, "min_of") unless i.class <= Numeric
36
+ (@v.nil? || i < @v) ? self.class.new(i) : self
37
+ end
38
+
39
+ def lt_eq(k)
40
+ Bud::BoolLattice.new(!!(@v && @v <= k))
41
+ end
42
+ end
43
+
44
+ class Bud::MinLattice < Bud::Lattice
45
+ wrapper_name :lmin
46
+
47
+ def initialize(i=nil)
48
+ unless i.nil? || i.class <= Comparable
49
+ reject_input(i)
50
+ end
51
+ @v = i
52
+ end
53
+
54
+ def merge(i)
55
+ i_val = i.reveal
56
+ (@v.nil? || (i_val != nil && i_val < @v)) ? i : self
57
+ end
58
+
59
+ morph :lt do |k|
60
+ Bud::BoolLattice.new(!!(@v && @v < k))
61
+ end
62
+
63
+ # XXX: support MinLattice input
64
+ morph :+ do |i|
65
+ # Since bottom of lmin is infinity, + is a no-op
66
+ return self if @v.nil?
67
+ reject_input(i, "+") unless i.class <= Numeric
68
+ self.class.new(@v + i)
69
+ end
70
+ end
71
+
72
+ # XXX: consider creating two fixed ("interned") values for true and false.
73
+ class Bud::BoolLattice < Bud::Lattice
74
+ wrapper_name :lbool
75
+
76
+ def initialize(i=false)
77
+ reject_input(i) unless [true, false].include? i
78
+ @v = i
79
+ end
80
+
81
+ def merge(i)
82
+ self.class.new(@v || i.reveal)
83
+ end
84
+
85
+ # XXX: ugly syntax
86
+ morph :when_true do |&blk|
87
+ blk.call if @v
88
+ end
89
+ end
90
+
91
+ class Bud::MapLattice < Bud::Lattice
92
+ wrapper_name :lmap
93
+
94
+ def initialize(i={})
95
+ reject_input(i) unless i.class == Hash
96
+ i.each_pair do |k,val|
97
+ reject_input(i) if k.class <= Bud::Lattice
98
+ reject_input(i) unless val.class <= Bud::Lattice
99
+ end
100
+ @v = i
101
+ end
102
+
103
+ def merge(i)
104
+ rv = @v.merge(i.reveal) do |k, lhs_v, rhs_v|
105
+ lhs_v.merge(rhs_v)
106
+ end
107
+ wrap_unsafe(rv)
108
+ end
109
+
110
+ def inspect
111
+ "<#{self.class.wrapper}: #{@v.inspect}>"
112
+ end
113
+
114
+ # XXX: If the key is not in the map, we would like to return some generic
115
+ # "bottom" value that is shared by all lattice types. Unfortunately, such a
116
+ # value does not exist, so we need the caller to tell us which class to use as
117
+ # an optional second argument (if omitted, fetching a non-existent key yields
118
+ # a runtime exception). Another alternative would be to specify the type of
119
+ # the map's values when the lmap is declared, but that hinders code reuse.
120
+ morph :at do |k, *args|
121
+ if @v.has_key? k
122
+ @v[k]
123
+ else
124
+ raise Bud::Error if args.empty?
125
+ args.first.new
126
+ end
127
+ end
128
+
129
+ morph :apply_morph do |sym, *args|
130
+ raise Bud::Error unless Bud::Lattice.global_morphs.include? sym
131
+ do_apply(sym, args)
132
+ end
133
+
134
+ monotone :apply_monotone do |sym, *args|
135
+ raise Bud::Error unless Bud::Lattice.global_mfuncs.include? sym
136
+ do_apply(sym, args)
137
+ end
138
+
139
+ def do_apply(sym, args)
140
+ rv = {}
141
+ @v.each_pair do |k, val|
142
+ res = val.send(sym, *args)
143
+ raise Bud::Error unless res.kind_of? Bud::Lattice
144
+ rv[k] = res
145
+ end
146
+ wrap_unsafe(rv)
147
+ end
148
+
149
+ morph :key? do |k|
150
+ Bud::BoolLattice.new(@v.has_key? k)
151
+ end
152
+
153
+ morph :key_set do
154
+ Bud::SetLattice.new(@v.keys)
155
+ end
156
+
157
+ monotone :size do
158
+ Bud::MaxLattice.new(@v.size)
159
+ end
160
+
161
+ morph :intersect do |i|
162
+ i_tbl = i.reveal
163
+ # Scan the smaller map, probe the larger one
164
+ scan, probe = (@v.size < i_tbl.size ? [@v, i_tbl] : [i_tbl, @v])
165
+ rv = {}
166
+ scan.each do |k,val|
167
+ rv[k] = val.merge(probe[k]) if probe.has_key? k
168
+ end
169
+ wrap_unsafe(rv)
170
+ end
171
+
172
+ # Produce a Bloom collection (array of tuples) from this lmap, optionally
173
+ # applying a user-provided code block to each (k,v) pair in turn. Note that
174
+ # this is slightly different from how projection over an lmap would work: we
175
+ # return an array, whereas projection would return an lmap.
176
+ morph :to_collection do |&blk|
177
+ @v.map(&blk)
178
+ end
179
+
180
+ # Return true if this map is strictly smaller than or equal to the given
181
+ # map. "x" is strictly smaller than or equal to "y" if:
182
+ # (a) every key in "x" also appears in "y"
183
+ # (b) for every key k in "x", x[k] <= y[k]
184
+ #
185
+ # NB: For this to be a morphism, we require that (a) "self" is deflationary
186
+ # (or fixed) (b) the input lattice value is inflationary (or fixed). We
187
+ # currently don't have a way to express (a) in the type system.
188
+ def lt_eq(i)
189
+ reject_input(i, "lt_eq") unless i.class <= self.class
190
+
191
+ @v.each do |k, v|
192
+ unless i.key?(k).reveal == true
193
+ return Bud::BoolLattice.new(false)
194
+ end
195
+ unless v.lt_eq(i.at(k).reveal).reveal == true
196
+ return Bud::BoolLattice.new(false)
197
+ end
198
+ end
199
+
200
+ return Bud::BoolLattice.new(true)
201
+ end
202
+ end
203
+
204
+ # A set lattice contains zero or more primitive (non-lattice) values.
205
+ class Bud::SetLattice < Bud::Lattice
206
+ wrapper_name :lset
207
+
208
+ def initialize(i=[])
209
+ reject_input(i) if i.any? {|e| e.kind_of? Bud::Lattice}
210
+
211
+ i = Set.new(i) unless i.kind_of? Set
212
+ @v = i
213
+ end
214
+
215
+ def merge(i)
216
+ wrap_unsafe(@v | i.reveal)
217
+ end
218
+
219
+ morph :intersect do |i|
220
+ wrap_unsafe(@v & i.reveal)
221
+ end
222
+
223
+ morph :product do |i, &blk|
224
+ rv = Set.new
225
+ @v.each do |a|
226
+ if blk.nil?
227
+ t = i.pro {|b| [a,b]}
228
+ else
229
+ t = i.pro {|b| blk.call(a, b)}
230
+ end
231
+ rv.merge(t.reveal)
232
+ end
233
+ wrap_unsafe(rv)
234
+ end
235
+
236
+ morph :contains? do |i|
237
+ Bud::BoolLattice.new(@v.member? i)
238
+ end
239
+
240
+ morph :pro do |&blk|
241
+ # We don't use Set#map, since it returns an Array (ugh).
242
+ rv = Set.new
243
+ @v.each do |t|
244
+ val = blk.call(t)
245
+ rv << val unless val.nil?
246
+ end
247
+ wrap_unsafe(rv)
248
+ end
249
+
250
+ monotone :size do
251
+ Bud::MaxLattice.new(@v.size)
252
+ end
253
+
254
+ # Assuming that this set contains tuples (arrays) as elements, this performs
255
+ # an equijoin between the current lattice and i. The join predicate is
256
+ # "self_t[lhs_idx] == i_t[rhs_idx]", for all tuples self_t and i_t in self and
257
+ # i, respectively. The return value is the result of passing pairs of join
258
+ # tuples to the user-supplied block.
259
+ morph :eqjoin do |i, lhs_idx, rhs_idx, &blk|
260
+ rv = Set.new
261
+ @v.each do |a|
262
+ i.probe(rhs_idx, a[lhs_idx]).each do |b|
263
+ rv << blk.call(a, b)
264
+ end
265
+ end
266
+ wrap_unsafe(rv)
267
+ end
268
+
269
+ # Assuming that this set contains tuples (arrays), this returns a list of
270
+ # tuples (possibly empty) whose idx'th column has the value "v".
271
+ # XXX: we assume probe(idx, v) will only be called for a single value of idx!
272
+ def probe(idx, v)
273
+ @ht ||= build_ht(idx)
274
+ return @ht[v] || []
275
+ end
276
+
277
+ private
278
+ def build_ht(idx)
279
+ rv = {}
280
+ @v.each do |i|
281
+ field = i[idx]
282
+ rv[field] ||= []
283
+ rv[field] << i
284
+ end
285
+ rv
286
+ end
287
+ end
288
+
289
+ # A set that admits only non-negative numbers. This allows "sum" to be an
290
+ # order-preserving map. Note that this does duplicate elimination on its input,
291
+ # so it actually computes "SUM(DISTINCT ...)" in SQL.
292
+ #
293
+ # XXX: for methods that take a user-provided code block, we need to ensure that
294
+ # the set continues to contain only positive numbers.
295
+ class Bud::PositiveSetLattice < Bud::SetLattice
296
+ wrapper_name :lpset
297
+
298
+ def initialize(i=[])
299
+ super
300
+ @v.each do |n|
301
+ reject_input(i) unless n.class <= Numeric
302
+ reject_input(i) if n < 0
303
+ end
304
+ end
305
+
306
+ monotone :pos_sum do
307
+ @sum = @v.reduce(:+) if @sum.nil?
308
+ Bud::MaxLattice.new(@sum)
309
+ end
310
+ end
311
+
312
+ # XXX: Should this be just syntax sugar for a map lattice instead?
313
+ class Bud::BagLattice < Bud::Lattice
314
+ wrapper_name :lbag
315
+
316
+ def initialize(i={})
317
+ reject_input(i) unless i.class <= Hash
318
+ i.each do |k, mult|
319
+ reject_input(i) if k.class <= Bud::Lattice
320
+ reject_input(i) unless (mult.class <= Integer && mult > 0)
321
+ end
322
+ @v = i
323
+ end
324
+
325
+ # Note that for merge to be idempotent, we need to use the traditional
326
+ # definition of multiset union (per-element max of multiplicities, rather than
327
+ # sum of multiplicities).
328
+ def merge(i)
329
+ rv = @v.merge(i.reveal) do |k, lhs_v, rhs_v|
330
+ [lhs_v, rhs_v].max
331
+ end
332
+ wrap_unsafe(rv)
333
+ end
334
+
335
+ morph :intersect do |i|
336
+ i_tbl = i.reveal
337
+ # Scan the smaller one, probe the larger one
338
+ scan, probe = (@v.size < i_tbl.size ? [@v, i_tbl] : [i_tbl, @v])
339
+ rv = {}
340
+ scan.each do |k,val|
341
+ rv[k] = [val, probe[k]].min if probe.has_key? k
342
+ end
343
+ wrap_unsafe(rv)
344
+ end
345
+
346
+ morph :multiplicity do |k|
347
+ rv = @v[k]
348
+ rv ||= 0
349
+ Bud::MaxLattice.new(rv)
350
+ end
351
+
352
+ morph :+ do |i|
353
+ rv = @v.merge(i.reveal) do |k, lhs_v, rhs_v|
354
+ lhs_v + rhs_v
355
+ end
356
+ self.class.new(rv)
357
+ end
358
+
359
+ morph :contains? do |i|
360
+ Bud::BoolLattice.new(@v.has_key? i)
361
+ end
362
+
363
+ monotone :size do
364
+ @size = @v.values.reduce(:+) if @size.nil?
365
+ Bud::MaxLattice.new(@size)
366
+ end
367
+ end
@@ -11,6 +11,7 @@ class Class
11
11
  end
12
12
 
13
13
  # FIXME: Use a subclass of Struct.
14
+ # FIXME: Should likely override eql? as well
14
15
  class Struct
15
16
  def <=>(o)
16
17
  if o.class == self.class
@@ -139,7 +140,7 @@ class Module
139
140
  end
140
141
 
141
142
  # bloom statements to be registered with Bud runtime. optional +block_name+
142
- # allows for multiple bloom blocks per module, and overriding
143
+ # allows for multiple bloom blocks per module and method overriding
143
144
  def bloom(block_name=nil, &block)
144
145
  # If no block name was specified, generate a unique name
145
146
  if block_name.nil?
@@ -148,7 +149,7 @@ class Module
148
149
  @block_id += 1
149
150
  else
150
151
  unless block_name.class <= Symbol
151
- raise Bud::CompileError, "bloom block names must be a symbol: #{block_name}"
152
+ raise Bud::CompileError, "block name must be a symbol: #{block_name}"
152
153
  end
153
154
  end
154
155
 
@@ -161,15 +162,24 @@ class Module
161
162
  # module; this indicates a likely programmer error.
162
163
  if instance_methods(false).include?(meth_name) ||
163
164
  instance_methods(false).include?(meth_name.to_sym)
164
- raise Bud::CompileError, "duplicate named bloom block: '#{block_name}' in #{self}"
165
+ raise Bud::CompileError, "duplicate block name: '#{block_name}' in #{self}"
165
166
  end
166
167
  ast = Source.read_block(caller[0]) # pass in caller's location via backtrace
168
+
167
169
  # ast corresponds only to the statements of the block. Wrap it in a method
168
170
  # definition for backward compatibility for now.
169
- # First wrap ast in a block if it is only a single statement
170
- ast = s(:block) if ast.nil?
171
- ast = s(:block, ast) unless ast.sexp_type == :block
172
- ast = s(:defn, meth_name.to_sym, s(:args), s(:scope, ast))
171
+
172
+ # If the block contained multiple statements, the AST will have a top-level
173
+ # :block node. Since ruby_parser ASTs for method definitions don't contain
174
+ # such a node, remove it.
175
+ if ast.nil?
176
+ ast = []
177
+ elsif ast.sexp_type == :block
178
+ ast = ast.sexp_body
179
+ else
180
+ ast = [ast]
181
+ end
182
+ ast = s(:defn, meth_name.to_sym, s(:args), *ast)
173
183
  unless self.respond_to? :__bloom_asts__
174
184
  def self.__bloom_asts__
175
185
  @__bloom_asts__ ||= {}
@@ -180,11 +190,11 @@ class Module
180
190
  define_method(meth_name.to_sym, &block)
181
191
  end
182
192
 
183
- private
184
193
  # Return a string with a version of the class name appropriate for embedding
185
194
  # into a method name. Annoyingly, if you define class X nested inside
186
195
  # class/module Y, X's class name is the string "Y::X". We don't want to define
187
196
  # method names with semicolons in them, so just return "X" instead.
197
+ private
188
198
  def self.get_class_name(klass)
189
199
  (klass.name.nil? or klass.name == "") \
190
200
  ? "Anon#{klass.object_id}" \
data/lib/bud/rewrite.rb CHANGED
@@ -1,6 +1,4 @@
1
1
  require 'rubygems'
2
- require 'ruby2ruby'
3
- require 'set'
4
2
 
5
3
  class RuleRewriter < Ruby2Ruby # :nodoc: all
6
4
  attr_accessor :rule_indx, :rules, :depends
@@ -8,10 +6,11 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
8
6
  OP_LIST = Set.new([:<<, :<, :<=])
9
7
  TEMP_OP_LIST = Set.new([:-@, :~, :+@])
10
8
  MONOTONE_WHITELIST = Set.new([:==, :+, :<=, :-, :<, :>, :*, :~,
11
- :pairs, :matches, :combos, :flatten,
12
- :lefts, :rights, :map, :flat_map, :pro,
9
+ :pairs, :matches, :combos, :flatten, :new,
10
+ :lefts, :rights, :map, :flat_map, :pro, :merge,
13
11
  :cols, :key_cols, :val_cols, :payloads, :lambda,
14
- :tabname, :ip_port, :port, :ip, :int_ip_port])
12
+ :tabname, :ip_port, :port, :ip, :int_ip_port,
13
+ :current_value])
15
14
 
16
15
  def initialize(seed, bud_instance)
17
16
  @bud_instance = bud_instance
@@ -21,7 +20,8 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
21
20
  @collect = false
22
21
  @rules = []
23
22
  @depends = []
24
- @nm_funcs_called = false
23
+ @iter_stack = []
24
+ @refs_in_body = Set.new
25
25
  super()
26
26
  end
27
27
 
@@ -29,6 +29,7 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
29
29
  def resolve(obj, prefix, name)
30
30
  qn = prefix ? prefix + "." + name.to_s : name.to_s
31
31
  return [:collection, qn, obj.tables[name]] if obj.tables.has_key? name
32
+ return [:lattice, qn, obj.lattices[name]] if obj.lattices.has_key? name
32
33
 
33
34
  # does name refer to an import name?
34
35
  iobj = obj.import_instance name
@@ -38,12 +39,13 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
38
39
  end
39
40
 
40
41
  def exp_id_type(recv, name, args) # call only if sexp type is :call
41
- return $not_id unless args.size == 1
42
+ return $not_id unless args.empty?
42
43
  ty = $not_id
43
44
  if recv
44
45
  if recv.first == :call
45
- # possibly nested reference.
46
- rty, rqn, robj = exp_id_type(recv[1], recv[2], recv[3]) # rty, rqn, .. = receiver's type, qual name etc.
46
+ # possibly nested reference
47
+ # rty, rqn, .. = receiver's type, qual name etc.
48
+ rty, rqn, robj = exp_id_type(recv[1], recv[2], recv[3..-1])
47
49
  ty = resolve(robj, rqn, name) if rty == :import
48
50
  end
49
51
  else
@@ -56,26 +58,88 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
56
58
  def call_to_id(exp)
57
59
  # convert a series of nested calls, a sexp of the form
58
60
  # s(:call,
59
- # s(:call, s(:call, nil, :a, s(:arglist)), :b, s(:arglist)),
60
- # :bar ,
61
- # s(:arglist)))
61
+ # s(:call, s(:call, nil, :a), :b),
62
+ # :bar))
62
63
  # to the string "a.b.bar"
63
- raise "Malformed exp: #{exp}" unless (exp[0] == :call)
64
- _, recv, op, args = exp
64
+ raise Bud::CompileError, "malformed exp: #{exp}" unless exp.sexp_type == :call
65
+ _, recv, op = exp
65
66
  return recv.nil? ? op.to_s : call_to_id(recv) + "." + op.to_s
66
67
  end
67
68
 
69
+ # We want to distinguish between collection dependencies that occur in
70
+ # top-level expressions versus collections that are referenced inside rule
71
+ # bodies. We just want to set a flag when processing the :iter body, but
72
+ # annoyingly it seems that is hard to do without duplicating the
73
+ # implementation of process_iter().
74
+ #
75
+ # XXX: the whole RuleRewriter approach is wrong because it conflates
76
+ # converting ASTs to strings with doing analysis on ASTs. Those should be
77
+ # split into two separate passes.
78
+ def process_iter(exp)
79
+ iter = process exp.shift
80
+ args = exp.shift
81
+
82
+ @iter_stack.push(true)
83
+ body = exp.empty? ? nil : process(exp.shift)
84
+ @iter_stack.pop
85
+
86
+ do_process_iter(iter, args, body)
87
+ end
88
+
89
+ def do_process_iter(iter, args, body)
90
+ args = case args
91
+ when 0 then
92
+ " ||"
93
+ else
94
+ a = process(args)[1..-2]
95
+ a = " |#{a}|" unless a.empty?
96
+ a
97
+ end
98
+
99
+ b, e = if iter == "END" then
100
+ [ "{", "}" ]
101
+ else
102
+ [ "do", "end" ]
103
+ end
104
+
105
+ iter.sub!(/\(\)$/, '')
106
+
107
+ # REFACTOR: ugh
108
+ result = []
109
+ result << "#{iter} {"
110
+ result << args
111
+ if body then
112
+ result << " #{body.strip} "
113
+ else
114
+ result << ' '
115
+ end
116
+ result << "}"
117
+ result = result.join
118
+ return result if result !~ /\n/ and result.size < LINE_LENGTH
119
+
120
+ result = []
121
+ result << "#{iter} #{b}"
122
+ result << args
123
+ result << "\n"
124
+ if body then
125
+ result << indent(body.strip)
126
+ result << "\n"
127
+ end
128
+ result << e
129
+ result.join
130
+ end
131
+
68
132
  def process_call(exp)
69
- recv, op, args = exp
70
- if OP_LIST.include?(op) and @context[1] == :block and @context.length == 4
71
- # NB: context.length is 4 when see a method call at the top-level of a
133
+ recv, op, *args = exp
134
+ if OP_LIST.include?(op) and @context[1] == :defn and @context.length == 2
135
+ # NB: context.length is 2 when see a method call at the top-level of a
72
136
  # :defn block -- this is where we expect Bloom statements to appear
73
137
  do_rule(exp)
74
138
  elsif op == :notin
75
139
  # Special case. In the rule "z <= x.notin(y)", z depends positively on x,
76
140
  # but negatively on y. See further explanation in the "else" section for
77
141
  # why this is a special case.
78
- notintab = call_to_id(args[1]) # args expected to be of the form (:arglist (:call nil :y ...))
142
+ notintab = call_to_id(args[0]) # args expected to be of the form (:call nil :y ...)
79
143
  @tables[notintab.to_s] = true # "true" denotes non-monotonic dependency
80
144
  super
81
145
  else
@@ -88,15 +152,16 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
88
152
  # a.b.c.notin(d.e.f), we register a non-monotonic dependency of lhs on
89
153
  # "d.e.f", not with "a.b.c"
90
154
  ty, qn, _ = exp_id_type(recv, op, args) # qn = qualified name
91
- if ty == :collection
155
+ if ty == :collection or ty == :lattice
92
156
  (@tables[qn] = @nm if @collect) unless @tables[qn]
157
+ @refs_in_body << qn unless @iter_stack.empty?
93
158
  #elsif ty == :import .. do nothing
94
159
  elsif ty == :not_coll_id
95
160
  # Check if receiver is a collection, and further if the current exp
96
161
  # represents a field lookup
97
162
  op_is_field_name = false
98
163
  if recv and recv.first == :call
99
- rty, _, robj = exp_id_type(recv[1], recv[2], recv[3])
164
+ rty, _, robj = exp_id_type(recv[1], recv[2], recv[3..-1])
100
165
  if rty == :collection
101
166
  cols = robj.cols
102
167
  op_is_field_name = true if cols and cols.include?(op)
@@ -104,18 +169,14 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
104
169
  end
105
170
  # For CALM analysis, mark deletion rules as non-monotonic
106
171
  @nm = true if op == :-@
172
+
173
+ # Don't worry about monotone ops, table names, table.attr calls, or
174
+ # accessors of iterator variables
107
175
  if recv
108
- # Don't worry about monotone ops, table names, table.attr calls, or
109
- # accessors of iterator variables
110
176
  unless RuleRewriter.is_monotone(op) or op_is_field_name or
111
177
  recv.first == :lvar or op.to_s.start_with?("__")
112
178
  @nm = true
113
179
  end
114
- else
115
- # Function called (implicit receiver = Bud instance) in a user-defined
116
- # code block. Check if it is non-monotonic (like budtime, that
117
- # produces a new value every time it is called)
118
- @nm_funcs_called = true unless RuleRewriter.is_monotone(op)
119
180
  end
120
181
  end
121
182
  if TEMP_OP_LIST.include? op
@@ -126,25 +187,35 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
126
187
  end
127
188
 
128
189
  def self.is_monotone(op)
129
- MONOTONE_WHITELIST.include?(op)
190
+ MONOTONE_WHITELIST.include?(op) ||
191
+ is_morphism(op) ||
192
+ Bud::Lattice.global_mfuncs.include?(op)
130
193
  end
131
194
 
132
- # Rewrite top-level rhs array literals to lambdas. During wiring, these are
133
- # turned into coll_expr collections.
195
+ def self.is_morphism(op)
196
+ Bud::Lattice.global_morphs.include?(op)
197
+ end
198
+
199
+ # Rewrite top-level rhs array and hash literals to lambdas. During wiring,
200
+ # these are turned into coll_expr collections.
134
201
  def lambda_rewrite(rhs)
135
202
  # the <= case
136
- if rhs[0] == :array
137
- return s(:iter, s(:call, nil, :lambda, s(:arglist)), nil, rhs)
203
+ if is_coll_literal(rhs[0])
204
+ return s(:iter, s(:call, nil, :lambda), s(:args), rhs)
138
205
  # the superator case
139
206
  elsif rhs[0] == :call \
140
- and rhs[1] and rhs[1][0] and rhs[1][0] == :array \
207
+ and rhs[1] and rhs[1][0] and is_coll_literal(rhs[1][0]) \
141
208
  and rhs[2] and (rhs[2] == :+@ or rhs[2] == :-@ or rhs[2] == :~@)
142
- return s(rhs[0], s(:iter, s(:call, nil, :lambda, s(:arglist)), nil, rhs[1]), rhs[2], rhs[3])
209
+ return s(rhs[0], s(:iter, s(:call, nil, :lambda), s(:args), rhs[1]), rhs[2], *rhs[3..-1])
143
210
  else
144
211
  return rhs
145
212
  end
146
213
  end
147
214
 
215
+ def is_coll_literal(e)
216
+ [:array, :hash].include? e
217
+ end
218
+
148
219
  def collect_rhs(exp)
149
220
  exp = lambda_rewrite(exp)
150
221
 
@@ -155,13 +226,13 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
155
226
  end
156
227
 
157
228
  def reset_instance_vars
229
+ @refs_in_body = Set.new
158
230
  @tables = {}
159
231
  @nm = false
160
- @nm_funcs_called = false
161
232
  @temp_op = nil
162
233
  end
163
234
 
164
- def record_rule(lhs, op, rhs_pos, rhs)
235
+ def record_rule(lhs, op, rhs_pos, rhs, unsafe_funcs_called)
165
236
  rule_txt_orig = "#{lhs} #{op} (#{rhs})"
166
237
  rule_txt = "#{lhs} #{op} (#{rhs_pos})"
167
238
  if op == :<
@@ -170,9 +241,11 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
170
241
  op = op.to_s
171
242
  end
172
243
 
173
- @rules << [@bud_instance, @rule_indx, lhs, op, rule_txt, rule_txt_orig, @nm_funcs_called]
244
+ @rules << [@bud_instance, @rule_indx, lhs, op, rule_txt,
245
+ rule_txt_orig, unsafe_funcs_called]
174
246
  @tables.each_pair do |t, nm|
175
- @depends << [@bud_instance, @rule_indx, lhs, op, t, nm]
247
+ in_rule_body = @refs_in_body.include? t
248
+ @depends << [@bud_instance, @rule_indx, lhs, op, t, nm, in_rule_body]
176
249
  end
177
250
 
178
251
  reset_instance_vars
@@ -180,17 +253,14 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
180
253
  end
181
254
 
182
255
  def do_rule(exp)
183
- lhs = process exp[0]
184
- op = exp[1]
185
- rhs_ast = map2pro(exp[2])
186
-
187
- # Remove the outer s(:arglist) from the rhs AST. An AST subtree rooted with
188
- # s(:arglist) is not really sensible and it causes Ruby2Ruby < 1.3.1 to
189
- # misbehave (for example, s(:arglist, s(:hash, ...)) is misparsed.
190
- raise Bud::CompileError unless rhs_ast.sexp_type == :arglist
191
- rhs_ast = rhs_ast[1]
256
+ lhs, op, rhs_ast = exp
257
+ lhs = process(lhs)
192
258
 
259
+ rhs_ast = MapRewriter.new.process(rhs_ast)
193
260
  rhs_ast = RenameRewriter.new(@bud_instance).process(rhs_ast)
261
+ rhs_ast = LatticeRefRewriter.new(@bud_instance).process(rhs_ast)
262
+ ufr = UnsafeFuncRewriter.new
263
+ rhs_ast = ufr.process(rhs_ast)
194
264
 
195
265
  if @bud_instance.options[:no_attr_rewrite]
196
266
  rhs = collect_rhs(rhs_ast)
@@ -203,35 +273,37 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
203
273
  reset_instance_vars
204
274
  rhs_pos = collect_rhs(AttrNameRewriter.new(@bud_instance).process(rhs_ast_dup))
205
275
  end
206
- record_rule(lhs, op, rhs_pos, rhs)
276
+ record_rule(lhs, op, rhs_pos, rhs, ufr.unsafe_func_called)
207
277
  drain(exp)
208
278
  end
209
279
 
210
- # We want to rewrite "map" calls on BudCollections to "pro" calls. It is hard
211
- # to do this precisely (issue #225), so we just replace map calls liberally
212
- # and define Enumerable#pro as an alias for "map".
213
- def map2pro(exp)
214
- # the non-superator case
215
- if exp[1] and exp[1][0] and exp[1][0] == :iter \
216
- and exp[1][1] and exp[1][1][1] and exp[1][1][1][0] == :call
217
- if exp[1][1][2] == :map
218
- exp[1][1][2] = :pro
219
- end
220
- # the superator case
221
- elsif exp[1] and exp[1][0] == :call and (exp[1][2] == :~@ or exp[1][2] == :+@ or exp[1][2] == :-@)
222
- if exp[1][1] and exp[1][1][1] and exp[1][1][1][2] == :map
223
- exp[1][1][1][2] = :pro
224
- end
225
- end
226
- exp
227
- end
228
-
229
280
  def drain(exp)
230
281
  exp.shift until exp.empty?
231
282
  return ""
232
283
  end
233
284
  end
234
285
 
286
+ # We want to rewrite "map" calls on BudCollections to "pro" calls. It is hard
287
+ # to do this precisely (issue #225), so we just replace map calls liberally
288
+ # and define Enumerable#pro as an alias for "map".
289
+ class MapRewriter < SexpProcessor
290
+ def initialize
291
+ super
292
+ self.require_empty = false
293
+ self.expected = Sexp
294
+ end
295
+
296
+ def process_call(exp)
297
+ tag, recv, op, *args = exp
298
+
299
+ if op == :map and args.empty?
300
+ op = :pro
301
+ end
302
+
303
+ s(tag, process(recv), op, *(args.map{|a| process(a)}))
304
+ end
305
+ end
306
+
235
307
  # Look for rename statements and define the necessary scratch collections
236
308
  class RenameRewriter < SexpProcessor
237
309
  def initialize(bud_instance)
@@ -252,14 +324,110 @@ class RenameRewriter < SexpProcessor
252
324
  end
253
325
 
254
326
  def process_call(exp)
255
- call, recv, op, args = exp
327
+ tag, recv, op, *args = exp
256
328
 
257
329
  if op == :rename
258
- arglist, namelit, schemahash = args
330
+ raise Bud::CompileError, "reduce takes two arguments" unless args.size == 2
331
+ namelit, schemahash = args
259
332
  register_scratch(namelit[1], schemahash)
260
333
  end
261
334
 
262
- return s(call, process(recv), op, process(args))
335
+ return s(tag, process(recv), op, *(args.map{|a| process(a)}))
336
+ end
337
+ end
338
+
339
+ # Check for whether the rule invokes any "unsafe" functions (funcs that might
340
+ # return a different value every time they are called, e.g., budtime). Note that
341
+ # although we call this a rewriter, it doesn't modify the input AST.
342
+ class UnsafeFuncRewriter < SexpProcessor
343
+ attr_reader :unsafe_func_called
344
+
345
+ def initialize
346
+ super()
347
+ self.require_empty = false
348
+ self.expected = Sexp
349
+ @unsafe_func_called = false
350
+ @elem_stack = []
351
+ end
352
+
353
+ def process_call(exp)
354
+ tag, recv, op, *args = exp
355
+
356
+ # We assume that unsafe funcs have a nil receiver (Bud instance is implicit
357
+ # receiver).
358
+ if recv.nil? and @elem_stack.size > 0
359
+ @unsafe_func_called = true unless RuleRewriter.is_monotone(op)
360
+ end
361
+
362
+ return s(tag, process(recv), op, *(args.map{|a| process(a)}))
363
+ end
364
+
365
+ def process_iter(exp)
366
+ tag, recv, iter_args, body = exp
367
+ new_body = push_and_process(body)
368
+ return s(tag, process(recv), process(iter_args), new_body)
369
+ end
370
+
371
+ def push_and_process(exp)
372
+ obj_id = exp.object_id
373
+ @elem_stack.push(obj_id)
374
+ rv = process(exp)
375
+ raise Bud::Error unless @elem_stack.pop == obj_id
376
+ return rv
377
+ end
378
+ end
379
+
380
+ # Rewrite references to lattice identifiers that appear in rule bodies. A
381
+ # reference to a lattice identifier returns the associated lattice wrapper. When
382
+ # the identifier appears at the top-level of the rule RHS, that is fine (since
383
+ # we want the wrapper to do wiring). But for references that appear inside rule
384
+ # bodies, we want to instead fetch the current value associated with the lattice
385
+ # wrapper.
386
+ class LatticeRefRewriter < SexpProcessor
387
+ def initialize(bud_instance)
388
+ super()
389
+ self.require_empty = false
390
+ self.expected = Sexp
391
+ @bud_instance = bud_instance
392
+ @elem_stack = []
393
+ end
394
+
395
+ def process_iter(exp)
396
+ tag, recv, iter_args, body = exp
397
+ new_body = push_and_process(body)
398
+ return s(tag, process(recv), process(iter_args), new_body)
399
+ end
400
+
401
+ def process_array(exp)
402
+ new_body = exp.sexp_body.map {|t| push_and_process(t)}
403
+ return s(:array, *new_body)
404
+ end
405
+
406
+ def process_hash(exp)
407
+ new_body = exp.sexp_body.map {|t| push_and_process(t)}
408
+ return s(:hash, *new_body)
409
+ end
410
+
411
+ def process_call(exp)
412
+ tag, recv, op, *args = exp
413
+
414
+ if recv.nil? and args.empty? and is_lattice?(op) and @elem_stack.size > 0
415
+ return s(:call, exp, :current_value)
416
+ else
417
+ return s(tag, process(recv), op, *(args.map{|a| process(a)}))
418
+ end
419
+ end
420
+
421
+ def push_and_process(exp)
422
+ obj_id = exp.object_id
423
+ @elem_stack.push(obj_id)
424
+ rv = process(exp)
425
+ raise Bud::Error unless @elem_stack.pop == obj_id
426
+ return rv
427
+ end
428
+
429
+ def is_lattice?(op)
430
+ @bud_instance.lattices.has_key? op.to_sym
263
431
  end
264
432
  end
265
433
 
@@ -278,32 +446,43 @@ class AttrNameRewriter < SexpProcessor # :nodoc: all
278
446
  # iter vars
279
447
  def process_iter(exp)
280
448
  if exp[1] and exp[1][0] == :call
449
+ return exp unless exp[2]
281
450
  gather_collection_names(exp[1])
451
+ meth_name = exp[1][2]
282
452
 
283
453
  # now find iter vars and match up
284
- if exp[2] and exp[2][0] == :lasgn and @collnames.size == 1 #single-table iter
285
- raise Bud::CompileError, "nested redefinition of block variable \"#{exp[2][1]}\" not allowed" if @iterhash[exp[2][1]]
286
- @iterhash[exp[2][1]] = @collnames[0]
287
- elsif exp[2] and exp[2][0] == :lasgn and @collnames.size > 1 and exp[1] # join iter with lefts/rights
288
- case exp[1][2]
454
+ if exp[2][0] == :args and @collnames.size == 1 # single-table iter
455
+ if @iterhash[exp[2][1]]
456
+ raise Bud::CompileError, "redefinition of block variable \"#{exp[2][1]}\" not allowed"
457
+ end
458
+
459
+ # XXX: The BudChannel#payloads method assigns the correct schema to
460
+ # tuples that pass through it (i.e., it omits the location specifier);
461
+ # hence we don't want to apply the location rewrite to the code block
462
+ # that is passed to payloads(). This is a dirty hack.
463
+ unless meth_name == :payloads
464
+ @iterhash[exp[2][1]] = @collnames[0]
465
+ end
466
+ elsif exp[2][0] == :args and not @collnames.empty? # join iter with lefts/rights
467
+ case meth_name
289
468
  when :lefts
290
469
  @iterhash[exp[2][1]] = @collnames[0]
291
470
  when :rights
292
471
  @iterhash[exp[2][1]] = @collnames[1]
293
- else
294
- raise Bud::CompileError, "nested redefinition of block variable \"#{exp[2][1]}\" not allowed" if @iterhash[exp[2][1]]
295
- end
296
- elsif exp[2] and exp[2][0] == :masgn and not @collnames.empty? # join or reduce iter
297
- return unless exp[2][1] and exp[2][1][0] == :array
298
- if exp[1][2] == :reduce
472
+ when :reduce
299
473
  unless @collnames.length == 1
300
- raise Bud::Error, "reduce should only have one associated collection, but has #{@collnames.inspect}"
474
+ raise Bud::CompileError, "reduce should only have one associated collection, but has #{@collnames.inspect}"
301
475
  end
302
- @iterhash[exp[2][1][2][1]] = @collnames.first
303
- else #join
304
- @collnames.each_with_index do |c, i|
305
- next unless exp[2][1][i+1] and exp[2][1][i+1][0] == :lasgn
306
- @iterhash[exp[2][1][i+1][1]] = c
476
+ @iterhash[exp[2][1]] = @collnames[0]
477
+ else
478
+ # join
479
+ if @iterhash[exp[2][1]]
480
+ raise Bud::CompileError, "redefinition of block variable \"#{exp[2][1]}\" not allowed"
481
+ end
482
+
483
+ @collnames.each_with_index do |c,i|
484
+ next unless exp[2][i+1]
485
+ @iterhash[exp[2][i+1]] = c
307
486
  end
308
487
  end
309
488
  end
@@ -313,36 +492,43 @@ class AttrNameRewriter < SexpProcessor # :nodoc: all
313
492
  end
314
493
 
315
494
  def gather_collection_names(exp)
316
- if exp[0] == :call and exp[1].nil?
495
+ # We expect a reference to a collection name to look like a function call
496
+ # (nil receiver) with no arguments.
497
+ if exp.sexp_type == :call and exp[1].nil? and exp.length == 3
317
498
  @collnames << exp[2]
318
- elsif exp[2] and exp[2] == :rename
319
- arglist, namelit, schemahash = exp[3]
499
+ elsif exp.sexp_type == :call and exp[2] == :rename
500
+ namelit = exp[3]
320
501
  @collnames << namelit[1]
502
+ elsif exp.sexp_type == :call and [:group, :argagg].include?(exp[2])
503
+ # For grouping and argagg expressions, only look at the receiver (the
504
+ # collection we're grouping on); otherwise, we might mistakenly think some
505
+ # of the arguments to the grouping operation are collection names.
506
+ gather_collection_names(exp[1])
321
507
  else
322
- exp.each { |e| gather_collection_names(e) if e and e.class <= Sexp }
508
+ exp.each { |e| gather_collection_names(e) if e.class <= Sexp }
323
509
  end
324
510
  end
325
511
 
326
512
  def process_call(exp)
327
- call, recv, op, args = exp
513
+ call, recv, op, *args = exp
328
514
 
329
- if recv and recv.class == Sexp and recv.first == :lvar and recv[1] and @iterhash[recv[1]]
515
+ if recv.class == Sexp and recv.sexp_type == :lvar and @iterhash[recv[1]]
330
516
  if @bud_instance.respond_to?(@iterhash[recv[1]])
331
517
  if @bud_instance.send(@iterhash[recv[1]]).class <= Bud::BudCollection
332
518
  cols = @bud_instance.send(@iterhash[recv[1]]).cols
333
519
  if op != :[] and @bud_instance.send(@iterhash[recv[1]]).respond_to?(op)
334
- # if the op is an attribute name in the schema, col is its index
335
- col = cols.index(op) unless cols.nil?
336
- unless col.nil?
520
+ # if the op is an attribute name in the schema, col_idx is its index
521
+ col_idx = cols.index(op) unless cols.nil?
522
+ unless col_idx.nil?
337
523
  op = :[]
338
- args = s(:arglist, s(:lit, col))
524
+ args = [s(:lit, col_idx)]
339
525
  end
340
526
  end
341
527
  end
342
- return s(call, recv, op, args)
528
+ return s(call, recv, op, *args)
343
529
  end
344
530
  end
345
- return s(call, process(recv), op, process(args))
531
+ return s(call, process(recv), op, *(args.map{|a| process(a)}))
346
532
  end
347
533
  end
348
534
 
@@ -365,52 +551,41 @@ class TempExpander < SexpProcessor # :nodoc: all
365
551
  end
366
552
 
367
553
  def process_defn(exp)
368
- tag, name, args, scope = exp
369
- if name.to_s =~ /^__bloom__.+/
370
- block = scope[1]
371
-
372
- block.each_with_index do |n,i|
373
- if i == 0
374
- raise Bud::CompileError if n != :block
375
- next
376
- end
377
-
378
- # temp declarations are misparsed if the RHS contains certain constructs
379
- # (e.g., group, "do |f| ... end" rather than "{|f| ... }"). Rewrite to
380
- # correct the misparsing.
381
- if n.sexp_type == :iter
382
- iter_body = n.sexp_body
383
- new_n = fix_temp_decl(iter_body)
384
- unless new_n.nil?
385
- block[i] = n = new_n
386
- @did_work = true
387
- end
388
- end
389
-
390
- _, recv, meth, meth_args = n
391
- if meth == KEYWORD and recv.nil?
392
- block[i] = rewrite_me(n)
554
+ tag, name, args, *body = exp
555
+ return exp unless name.to_s =~ /^__bloom__.+/
556
+
557
+ body.each_with_index do |n,i|
558
+ # temp declarations are misparsed if the RHS contains certain constructs
559
+ # (e.g., group, "do |f| ... end" rather than "{|f| ... }"). Rewrite to
560
+ # correct the misparsing.
561
+ if n.sexp_type == :iter
562
+ iter_body = n.sexp_body
563
+ new_n = fix_temp_decl(iter_body)
564
+ unless new_n.nil?
565
+ body[i] = n = new_n
393
566
  @did_work = true
394
567
  end
395
568
  end
569
+
570
+ _, recv, meth, meth_args = n
571
+ if meth == KEYWORD and recv.nil?
572
+ body[i] = rewrite_me(n)
573
+ @did_work = true
574
+ end
396
575
  end
397
- s(tag, name, args, scope)
576
+ s(tag, name, args, *body)
398
577
  end
399
578
 
400
579
  private
401
580
  def fix_temp_decl(iter_body)
402
581
  if iter_body.first.sexp_type == :call
403
582
  call_node = iter_body.first
583
+ _, recv, meth, *meth_args = call_node
404
584
 
405
- _, recv, meth, meth_args = call_node
406
585
  if meth == KEYWORD and recv.nil?
407
- _, lhs, op, rhs = meth_args.sexp_body.first
408
-
409
- old_rhs_body = rhs.sexp_body
410
- new_rhs_body = [:iter]
411
- new_rhs_body += old_rhs_body
412
- new_rhs_body += iter_body[1..-1]
413
- rhs[1] = Sexp.from_array(new_rhs_body)
586
+ _, lhs, op, rhs = meth_args.first
587
+ new_rhs = s(:iter, rhs, *(iter_body[1..-1]))
588
+ meth_args.first[3] = new_rhs
414
589
  return call_node
415
590
  end
416
591
  end
@@ -418,18 +593,18 @@ class TempExpander < SexpProcessor # :nodoc: all
418
593
  end
419
594
 
420
595
  def rewrite_me(exp)
421
- _, recv, meth, args = exp
596
+ _, recv, meth, *args = exp
422
597
 
423
- raise Bud::CompileError unless recv == nil
424
- nest_call = args.sexp_body.first
598
+ raise Bud::CompileError unless recv.nil?
599
+ nest_call = args.first
425
600
  raise Bud::CompileError unless nest_call.sexp_type == :call
426
601
 
427
- nest_recv, nest_op, nest_args = nest_call.sexp_body
602
+ nest_recv, nest_op, *nest_args = nest_call.sexp_body
428
603
  raise Bud::CompileError unless nest_recv.sexp_type == :lit
429
604
 
430
605
  tmp_name = nest_recv.sexp_body.first
431
606
  @tmp_tables << tmp_name
432
- new_recv = s(:call, nil, tmp_name, s(:arglist))
433
- return s(:call, new_recv, nest_op, nest_args)
607
+ new_recv = s(:call, nil, tmp_name)
608
+ return s(:call, new_recv, nest_op, *nest_args)
434
609
  end
435
610
  end