bud 0.9.4 → 0.9.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,367 @@
1
+ require 'bud/lattice-core'
2
+
3
+ class Bud::MaxLattice < Bud::Lattice
4
+ wrapper_name :lmax
5
+
6
+ def initialize(i=nil)
7
+ unless i.nil? || i.class <= Comparable
8
+ reject_input(i)
9
+ end
10
+ @v = i
11
+ end
12
+
13
+ def merge(i)
14
+ i_val = i.reveal
15
+ (@v.nil? || (i_val != nil && i_val > @v)) ? i : self
16
+ end
17
+
18
+ morph :gt do |k|
19
+ Bud::BoolLattice.new(!!(@v && @v > k))
20
+ end
21
+
22
+ morph :gt_eq do |k|
23
+ Bud::BoolLattice.new(!!(@v && @v >= k))
24
+ end
25
+
26
+ # XXX: support MaxLattice input?
27
+ morph :+ do |i|
28
+ # Since bottom of lmax is negative infinity, + is a no-op
29
+ return self if @v.nil?
30
+ reject_input(i, "+") unless i.class <= Numeric
31
+ self.class.new(@v + i)
32
+ end
33
+
34
+ morph :min_of do |i|
35
+ reject_input(i, "min_of") unless i.class <= Numeric
36
+ (@v.nil? || i < @v) ? self.class.new(i) : self
37
+ end
38
+
39
+ def lt_eq(k)
40
+ Bud::BoolLattice.new(!!(@v && @v <= k))
41
+ end
42
+ end
43
+
44
+ class Bud::MinLattice < Bud::Lattice
45
+ wrapper_name :lmin
46
+
47
+ def initialize(i=nil)
48
+ unless i.nil? || i.class <= Comparable
49
+ reject_input(i)
50
+ end
51
+ @v = i
52
+ end
53
+
54
+ def merge(i)
55
+ i_val = i.reveal
56
+ (@v.nil? || (i_val != nil && i_val < @v)) ? i : self
57
+ end
58
+
59
+ morph :lt do |k|
60
+ Bud::BoolLattice.new(!!(@v && @v < k))
61
+ end
62
+
63
+ # XXX: support MinLattice input
64
+ morph :+ do |i|
65
+ # Since bottom of lmin is infinity, + is a no-op
66
+ return self if @v.nil?
67
+ reject_input(i, "+") unless i.class <= Numeric
68
+ self.class.new(@v + i)
69
+ end
70
+ end
71
+
72
+ # XXX: consider creating two fixed ("interned") values for true and false.
73
+ class Bud::BoolLattice < Bud::Lattice
74
+ wrapper_name :lbool
75
+
76
+ def initialize(i=false)
77
+ reject_input(i) unless [true, false].include? i
78
+ @v = i
79
+ end
80
+
81
+ def merge(i)
82
+ self.class.new(@v || i.reveal)
83
+ end
84
+
85
+ # XXX: ugly syntax
86
+ morph :when_true do |&blk|
87
+ blk.call if @v
88
+ end
89
+ end
90
+
91
+ class Bud::MapLattice < Bud::Lattice
92
+ wrapper_name :lmap
93
+
94
+ def initialize(i={})
95
+ reject_input(i) unless i.class == Hash
96
+ i.each_pair do |k,val|
97
+ reject_input(i) if k.class <= Bud::Lattice
98
+ reject_input(i) unless val.class <= Bud::Lattice
99
+ end
100
+ @v = i
101
+ end
102
+
103
+ def merge(i)
104
+ rv = @v.merge(i.reveal) do |k, lhs_v, rhs_v|
105
+ lhs_v.merge(rhs_v)
106
+ end
107
+ wrap_unsafe(rv)
108
+ end
109
+
110
+ def inspect
111
+ "<#{self.class.wrapper}: #{@v.inspect}>"
112
+ end
113
+
114
+ # XXX: If the key is not in the map, we would like to return some generic
115
+ # "bottom" value that is shared by all lattice types. Unfortunately, such a
116
+ # value does not exist, so we need the caller to tell us which class to use as
117
+ # an optional second argument (if omitted, fetching a non-existent key yields
118
+ # a runtime exception). Another alternative would be to specify the type of
119
+ # the map's values when the lmap is declared, but that hinders code reuse.
120
+ morph :at do |k, *args|
121
+ if @v.has_key? k
122
+ @v[k]
123
+ else
124
+ raise Bud::Error if args.empty?
125
+ args.first.new
126
+ end
127
+ end
128
+
129
+ morph :apply_morph do |sym, *args|
130
+ raise Bud::Error unless Bud::Lattice.global_morphs.include? sym
131
+ do_apply(sym, args)
132
+ end
133
+
134
+ monotone :apply_monotone do |sym, *args|
135
+ raise Bud::Error unless Bud::Lattice.global_mfuncs.include? sym
136
+ do_apply(sym, args)
137
+ end
138
+
139
+ def do_apply(sym, args)
140
+ rv = {}
141
+ @v.each_pair do |k, val|
142
+ res = val.send(sym, *args)
143
+ raise Bud::Error unless res.kind_of? Bud::Lattice
144
+ rv[k] = res
145
+ end
146
+ wrap_unsafe(rv)
147
+ end
148
+
149
+ morph :key? do |k|
150
+ Bud::BoolLattice.new(@v.has_key? k)
151
+ end
152
+
153
+ morph :key_set do
154
+ Bud::SetLattice.new(@v.keys)
155
+ end
156
+
157
+ monotone :size do
158
+ Bud::MaxLattice.new(@v.size)
159
+ end
160
+
161
+ morph :intersect do |i|
162
+ i_tbl = i.reveal
163
+ # Scan the smaller map, probe the larger one
164
+ scan, probe = (@v.size < i_tbl.size ? [@v, i_tbl] : [i_tbl, @v])
165
+ rv = {}
166
+ scan.each do |k,val|
167
+ rv[k] = val.merge(probe[k]) if probe.has_key? k
168
+ end
169
+ wrap_unsafe(rv)
170
+ end
171
+
172
+ # Produce a Bloom collection (array of tuples) from this lmap, optionally
173
+ # applying a user-provided code block to each (k,v) pair in turn. Note that
174
+ # this is slightly different from how projection over an lmap would work: we
175
+ # return an array, whereas projection would return an lmap.
176
+ morph :to_collection do |&blk|
177
+ @v.map(&blk)
178
+ end
179
+
180
+ # Return true if this map is strictly smaller than or equal to the given
181
+ # map. "x" is strictly smaller than or equal to "y" if:
182
+ # (a) every key in "x" also appears in "y"
183
+ # (b) for every key k in "x", x[k] <= y[k]
184
+ #
185
+ # NB: For this to be a morphism, we require that (a) "self" is deflationary
186
+ # (or fixed) (b) the input lattice value is inflationary (or fixed). We
187
+ # currently don't have a way to express (a) in the type system.
188
+ def lt_eq(i)
189
+ reject_input(i, "lt_eq") unless i.class <= self.class
190
+
191
+ @v.each do |k, v|
192
+ unless i.key?(k).reveal == true
193
+ return Bud::BoolLattice.new(false)
194
+ end
195
+ unless v.lt_eq(i.at(k).reveal).reveal == true
196
+ return Bud::BoolLattice.new(false)
197
+ end
198
+ end
199
+
200
+ return Bud::BoolLattice.new(true)
201
+ end
202
+ end
203
+
204
+ # A set lattice contains zero or more primitive (non-lattice) values.
205
+ class Bud::SetLattice < Bud::Lattice
206
+ wrapper_name :lset
207
+
208
+ def initialize(i=[])
209
+ reject_input(i) if i.any? {|e| e.kind_of? Bud::Lattice}
210
+
211
+ i = Set.new(i) unless i.kind_of? Set
212
+ @v = i
213
+ end
214
+
215
+ def merge(i)
216
+ wrap_unsafe(@v | i.reveal)
217
+ end
218
+
219
+ morph :intersect do |i|
220
+ wrap_unsafe(@v & i.reveal)
221
+ end
222
+
223
+ morph :product do |i, &blk|
224
+ rv = Set.new
225
+ @v.each do |a|
226
+ if blk.nil?
227
+ t = i.pro {|b| [a,b]}
228
+ else
229
+ t = i.pro {|b| blk.call(a, b)}
230
+ end
231
+ rv.merge(t.reveal)
232
+ end
233
+ wrap_unsafe(rv)
234
+ end
235
+
236
+ morph :contains? do |i|
237
+ Bud::BoolLattice.new(@v.member? i)
238
+ end
239
+
240
+ morph :pro do |&blk|
241
+ # We don't use Set#map, since it returns an Array (ugh).
242
+ rv = Set.new
243
+ @v.each do |t|
244
+ val = blk.call(t)
245
+ rv << val unless val.nil?
246
+ end
247
+ wrap_unsafe(rv)
248
+ end
249
+
250
+ monotone :size do
251
+ Bud::MaxLattice.new(@v.size)
252
+ end
253
+
254
+ # Assuming that this set contains tuples (arrays) as elements, this performs
255
+ # an equijoin between the current lattice and i. The join predicate is
256
+ # "self_t[lhs_idx] == i_t[rhs_idx]", for all tuples self_t and i_t in self and
257
+ # i, respectively. The return value is the result of passing pairs of join
258
+ # tuples to the user-supplied block.
259
+ morph :eqjoin do |i, lhs_idx, rhs_idx, &blk|
260
+ rv = Set.new
261
+ @v.each do |a|
262
+ i.probe(rhs_idx, a[lhs_idx]).each do |b|
263
+ rv << blk.call(a, b)
264
+ end
265
+ end
266
+ wrap_unsafe(rv)
267
+ end
268
+
269
+ # Assuming that this set contains tuples (arrays), this returns a list of
270
+ # tuples (possibly empty) whose idx'th column has the value "v".
271
+ # XXX: we assume probe(idx, v) will only be called for a single value of idx!
272
+ def probe(idx, v)
273
+ @ht ||= build_ht(idx)
274
+ return @ht[v] || []
275
+ end
276
+
277
+ private
278
+ def build_ht(idx)
279
+ rv = {}
280
+ @v.each do |i|
281
+ field = i[idx]
282
+ rv[field] ||= []
283
+ rv[field] << i
284
+ end
285
+ rv
286
+ end
287
+ end
288
+
289
+ # A set that admits only non-negative numbers. This allows "sum" to be an
290
+ # order-preserving map. Note that this does duplicate elimination on its input,
291
+ # so it actually computes "SUM(DISTINCT ...)" in SQL.
292
+ #
293
+ # XXX: for methods that take a user-provided code block, we need to ensure that
294
+ # the set continues to contain only positive numbers.
295
+ class Bud::PositiveSetLattice < Bud::SetLattice
296
+ wrapper_name :lpset
297
+
298
+ def initialize(i=[])
299
+ super
300
+ @v.each do |n|
301
+ reject_input(i) unless n.class <= Numeric
302
+ reject_input(i) if n < 0
303
+ end
304
+ end
305
+
306
+ monotone :pos_sum do
307
+ @sum = @v.reduce(:+) if @sum.nil?
308
+ Bud::MaxLattice.new(@sum)
309
+ end
310
+ end
311
+
312
+ # XXX: Should this be just syntax sugar for a map lattice instead?
313
+ class Bud::BagLattice < Bud::Lattice
314
+ wrapper_name :lbag
315
+
316
+ def initialize(i={})
317
+ reject_input(i) unless i.class <= Hash
318
+ i.each do |k, mult|
319
+ reject_input(i) if k.class <= Bud::Lattice
320
+ reject_input(i) unless (mult.class <= Integer && mult > 0)
321
+ end
322
+ @v = i
323
+ end
324
+
325
+ # Note that for merge to be idempotent, we need to use the traditional
326
+ # definition of multiset union (per-element max of multiplicities, rather than
327
+ # sum of multiplicities).
328
+ def merge(i)
329
+ rv = @v.merge(i.reveal) do |k, lhs_v, rhs_v|
330
+ [lhs_v, rhs_v].max
331
+ end
332
+ wrap_unsafe(rv)
333
+ end
334
+
335
+ morph :intersect do |i|
336
+ i_tbl = i.reveal
337
+ # Scan the smaller one, probe the larger one
338
+ scan, probe = (@v.size < i_tbl.size ? [@v, i_tbl] : [i_tbl, @v])
339
+ rv = {}
340
+ scan.each do |k,val|
341
+ rv[k] = [val, probe[k]].min if probe.has_key? k
342
+ end
343
+ wrap_unsafe(rv)
344
+ end
345
+
346
+ morph :multiplicity do |k|
347
+ rv = @v[k]
348
+ rv ||= 0
349
+ Bud::MaxLattice.new(rv)
350
+ end
351
+
352
+ morph :+ do |i|
353
+ rv = @v.merge(i.reveal) do |k, lhs_v, rhs_v|
354
+ lhs_v + rhs_v
355
+ end
356
+ self.class.new(rv)
357
+ end
358
+
359
+ morph :contains? do |i|
360
+ Bud::BoolLattice.new(@v.has_key? i)
361
+ end
362
+
363
+ monotone :size do
364
+ @size = @v.values.reduce(:+) if @size.nil?
365
+ Bud::MaxLattice.new(@size)
366
+ end
367
+ end
@@ -11,6 +11,7 @@ class Class
11
11
  end
12
12
 
13
13
  # FIXME: Use a subclass of Struct.
14
+ # FIXME: Should likely override eql? as well
14
15
  class Struct
15
16
  def <=>(o)
16
17
  if o.class == self.class
@@ -139,7 +140,7 @@ class Module
139
140
  end
140
141
 
141
142
  # bloom statements to be registered with Bud runtime. optional +block_name+
142
- # allows for multiple bloom blocks per module, and overriding
143
+ # allows for multiple bloom blocks per module and method overriding
143
144
  def bloom(block_name=nil, &block)
144
145
  # If no block name was specified, generate a unique name
145
146
  if block_name.nil?
@@ -148,7 +149,7 @@ class Module
148
149
  @block_id += 1
149
150
  else
150
151
  unless block_name.class <= Symbol
151
- raise Bud::CompileError, "bloom block names must be a symbol: #{block_name}"
152
+ raise Bud::CompileError, "block name must be a symbol: #{block_name}"
152
153
  end
153
154
  end
154
155
 
@@ -161,15 +162,24 @@ class Module
161
162
  # module; this indicates a likely programmer error.
162
163
  if instance_methods(false).include?(meth_name) ||
163
164
  instance_methods(false).include?(meth_name.to_sym)
164
- raise Bud::CompileError, "duplicate named bloom block: '#{block_name}' in #{self}"
165
+ raise Bud::CompileError, "duplicate block name: '#{block_name}' in #{self}"
165
166
  end
166
167
  ast = Source.read_block(caller[0]) # pass in caller's location via backtrace
168
+
167
169
  # ast corresponds only to the statements of the block. Wrap it in a method
168
170
  # definition for backward compatibility for now.
169
- # First wrap ast in a block if it is only a single statement
170
- ast = s(:block) if ast.nil?
171
- ast = s(:block, ast) unless ast.sexp_type == :block
172
- ast = s(:defn, meth_name.to_sym, s(:args), s(:scope, ast))
171
+
172
+ # If the block contained multiple statements, the AST will have a top-level
173
+ # :block node. Since ruby_parser ASTs for method definitions don't contain
174
+ # such a node, remove it.
175
+ if ast.nil?
176
+ ast = []
177
+ elsif ast.sexp_type == :block
178
+ ast = ast.sexp_body
179
+ else
180
+ ast = [ast]
181
+ end
182
+ ast = s(:defn, meth_name.to_sym, s(:args), *ast)
173
183
  unless self.respond_to? :__bloom_asts__
174
184
  def self.__bloom_asts__
175
185
  @__bloom_asts__ ||= {}
@@ -180,11 +190,11 @@ class Module
180
190
  define_method(meth_name.to_sym, &block)
181
191
  end
182
192
 
183
- private
184
193
  # Return a string with a version of the class name appropriate for embedding
185
194
  # into a method name. Annoyingly, if you define class X nested inside
186
195
  # class/module Y, X's class name is the string "Y::X". We don't want to define
187
196
  # method names with semicolons in them, so just return "X" instead.
197
+ private
188
198
  def self.get_class_name(klass)
189
199
  (klass.name.nil? or klass.name == "") \
190
200
  ? "Anon#{klass.object_id}" \
data/lib/bud/rewrite.rb CHANGED
@@ -1,6 +1,4 @@
1
1
  require 'rubygems'
2
- require 'ruby2ruby'
3
- require 'set'
4
2
 
5
3
  class RuleRewriter < Ruby2Ruby # :nodoc: all
6
4
  attr_accessor :rule_indx, :rules, :depends
@@ -8,10 +6,11 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
8
6
  OP_LIST = Set.new([:<<, :<, :<=])
9
7
  TEMP_OP_LIST = Set.new([:-@, :~, :+@])
10
8
  MONOTONE_WHITELIST = Set.new([:==, :+, :<=, :-, :<, :>, :*, :~,
11
- :pairs, :matches, :combos, :flatten,
12
- :lefts, :rights, :map, :flat_map, :pro,
9
+ :pairs, :matches, :combos, :flatten, :new,
10
+ :lefts, :rights, :map, :flat_map, :pro, :merge,
13
11
  :cols, :key_cols, :val_cols, :payloads, :lambda,
14
- :tabname, :ip_port, :port, :ip, :int_ip_port])
12
+ :tabname, :ip_port, :port, :ip, :int_ip_port,
13
+ :current_value])
15
14
 
16
15
  def initialize(seed, bud_instance)
17
16
  @bud_instance = bud_instance
@@ -21,7 +20,8 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
21
20
  @collect = false
22
21
  @rules = []
23
22
  @depends = []
24
- @nm_funcs_called = false
23
+ @iter_stack = []
24
+ @refs_in_body = Set.new
25
25
  super()
26
26
  end
27
27
 
@@ -29,6 +29,7 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
29
29
  def resolve(obj, prefix, name)
30
30
  qn = prefix ? prefix + "." + name.to_s : name.to_s
31
31
  return [:collection, qn, obj.tables[name]] if obj.tables.has_key? name
32
+ return [:lattice, qn, obj.lattices[name]] if obj.lattices.has_key? name
32
33
 
33
34
  # does name refer to an import name?
34
35
  iobj = obj.import_instance name
@@ -38,12 +39,13 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
38
39
  end
39
40
 
40
41
  def exp_id_type(recv, name, args) # call only if sexp type is :call
41
- return $not_id unless args.size == 1
42
+ return $not_id unless args.empty?
42
43
  ty = $not_id
43
44
  if recv
44
45
  if recv.first == :call
45
- # possibly nested reference.
46
- rty, rqn, robj = exp_id_type(recv[1], recv[2], recv[3]) # rty, rqn, .. = receiver's type, qual name etc.
46
+ # possibly nested reference
47
+ # rty, rqn, .. = receiver's type, qual name etc.
48
+ rty, rqn, robj = exp_id_type(recv[1], recv[2], recv[3..-1])
47
49
  ty = resolve(robj, rqn, name) if rty == :import
48
50
  end
49
51
  else
@@ -56,26 +58,88 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
56
58
  def call_to_id(exp)
57
59
  # convert a series of nested calls, a sexp of the form
58
60
  # s(:call,
59
- # s(:call, s(:call, nil, :a, s(:arglist)), :b, s(:arglist)),
60
- # :bar ,
61
- # s(:arglist)))
61
+ # s(:call, s(:call, nil, :a), :b),
62
+ # :bar))
62
63
  # to the string "a.b.bar"
63
- raise "Malformed exp: #{exp}" unless (exp[0] == :call)
64
- _, recv, op, args = exp
64
+ raise Bud::CompileError, "malformed exp: #{exp}" unless exp.sexp_type == :call
65
+ _, recv, op = exp
65
66
  return recv.nil? ? op.to_s : call_to_id(recv) + "." + op.to_s
66
67
  end
67
68
 
69
+ # We want to distinguish between collection dependencies that occur in
70
+ # top-level expressions versus collections that are referenced inside rule
71
+ # bodies. We just want to set a flag when processing the :iter body, but
72
+ # annoyingly it seems that is hard to do without duplicating the
73
+ # implementation of process_iter().
74
+ #
75
+ # XXX: the whole RuleRewriter approach is wrong because it conflates
76
+ # converting ASTs to strings with doing analysis on ASTs. Those should be
77
+ # split into two separate passes.
78
+ def process_iter(exp)
79
+ iter = process exp.shift
80
+ args = exp.shift
81
+
82
+ @iter_stack.push(true)
83
+ body = exp.empty? ? nil : process(exp.shift)
84
+ @iter_stack.pop
85
+
86
+ do_process_iter(iter, args, body)
87
+ end
88
+
89
+ def do_process_iter(iter, args, body)
90
+ args = case args
91
+ when 0 then
92
+ " ||"
93
+ else
94
+ a = process(args)[1..-2]
95
+ a = " |#{a}|" unless a.empty?
96
+ a
97
+ end
98
+
99
+ b, e = if iter == "END" then
100
+ [ "{", "}" ]
101
+ else
102
+ [ "do", "end" ]
103
+ end
104
+
105
+ iter.sub!(/\(\)$/, '')
106
+
107
+ # REFACTOR: ugh
108
+ result = []
109
+ result << "#{iter} {"
110
+ result << args
111
+ if body then
112
+ result << " #{body.strip} "
113
+ else
114
+ result << ' '
115
+ end
116
+ result << "}"
117
+ result = result.join
118
+ return result if result !~ /\n/ and result.size < LINE_LENGTH
119
+
120
+ result = []
121
+ result << "#{iter} #{b}"
122
+ result << args
123
+ result << "\n"
124
+ if body then
125
+ result << indent(body.strip)
126
+ result << "\n"
127
+ end
128
+ result << e
129
+ result.join
130
+ end
131
+
68
132
  def process_call(exp)
69
- recv, op, args = exp
70
- if OP_LIST.include?(op) and @context[1] == :block and @context.length == 4
71
- # NB: context.length is 4 when see a method call at the top-level of a
133
+ recv, op, *args = exp
134
+ if OP_LIST.include?(op) and @context[1] == :defn and @context.length == 2
135
+ # NB: context.length is 2 when see a method call at the top-level of a
72
136
  # :defn block -- this is where we expect Bloom statements to appear
73
137
  do_rule(exp)
74
138
  elsif op == :notin
75
139
  # Special case. In the rule "z <= x.notin(y)", z depends positively on x,
76
140
  # but negatively on y. See further explanation in the "else" section for
77
141
  # why this is a special case.
78
- notintab = call_to_id(args[1]) # args expected to be of the form (:arglist (:call nil :y ...))
142
+ notintab = call_to_id(args[0]) # args expected to be of the form (:call nil :y ...)
79
143
  @tables[notintab.to_s] = true # "true" denotes non-monotonic dependency
80
144
  super
81
145
  else
@@ -88,15 +152,16 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
88
152
  # a.b.c.notin(d.e.f), we register a non-monotonic dependency of lhs on
89
153
  # "d.e.f", not with "a.b.c"
90
154
  ty, qn, _ = exp_id_type(recv, op, args) # qn = qualified name
91
- if ty == :collection
155
+ if ty == :collection or ty == :lattice
92
156
  (@tables[qn] = @nm if @collect) unless @tables[qn]
157
+ @refs_in_body << qn unless @iter_stack.empty?
93
158
  #elsif ty == :import .. do nothing
94
159
  elsif ty == :not_coll_id
95
160
  # Check if receiver is a collection, and further if the current exp
96
161
  # represents a field lookup
97
162
  op_is_field_name = false
98
163
  if recv and recv.first == :call
99
- rty, _, robj = exp_id_type(recv[1], recv[2], recv[3])
164
+ rty, _, robj = exp_id_type(recv[1], recv[2], recv[3..-1])
100
165
  if rty == :collection
101
166
  cols = robj.cols
102
167
  op_is_field_name = true if cols and cols.include?(op)
@@ -104,18 +169,14 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
104
169
  end
105
170
  # For CALM analysis, mark deletion rules as non-monotonic
106
171
  @nm = true if op == :-@
172
+
173
+ # Don't worry about monotone ops, table names, table.attr calls, or
174
+ # accessors of iterator variables
107
175
  if recv
108
- # Don't worry about monotone ops, table names, table.attr calls, or
109
- # accessors of iterator variables
110
176
  unless RuleRewriter.is_monotone(op) or op_is_field_name or
111
177
  recv.first == :lvar or op.to_s.start_with?("__")
112
178
  @nm = true
113
179
  end
114
- else
115
- # Function called (implicit receiver = Bud instance) in a user-defined
116
- # code block. Check if it is non-monotonic (like budtime, that
117
- # produces a new value every time it is called)
118
- @nm_funcs_called = true unless RuleRewriter.is_monotone(op)
119
180
  end
120
181
  end
121
182
  if TEMP_OP_LIST.include? op
@@ -126,25 +187,35 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
126
187
  end
127
188
 
128
189
  def self.is_monotone(op)
129
- MONOTONE_WHITELIST.include?(op)
190
+ MONOTONE_WHITELIST.include?(op) ||
191
+ is_morphism(op) ||
192
+ Bud::Lattice.global_mfuncs.include?(op)
130
193
  end
131
194
 
132
- # Rewrite top-level rhs array literals to lambdas. During wiring, these are
133
- # turned into coll_expr collections.
195
+ def self.is_morphism(op)
196
+ Bud::Lattice.global_morphs.include?(op)
197
+ end
198
+
199
+ # Rewrite top-level rhs array and hash literals to lambdas. During wiring,
200
+ # these are turned into coll_expr collections.
134
201
  def lambda_rewrite(rhs)
135
202
  # the <= case
136
- if rhs[0] == :array
137
- return s(:iter, s(:call, nil, :lambda, s(:arglist)), nil, rhs)
203
+ if is_coll_literal(rhs[0])
204
+ return s(:iter, s(:call, nil, :lambda), s(:args), rhs)
138
205
  # the superator case
139
206
  elsif rhs[0] == :call \
140
- and rhs[1] and rhs[1][0] and rhs[1][0] == :array \
207
+ and rhs[1] and rhs[1][0] and is_coll_literal(rhs[1][0]) \
141
208
  and rhs[2] and (rhs[2] == :+@ or rhs[2] == :-@ or rhs[2] == :~@)
142
- return s(rhs[0], s(:iter, s(:call, nil, :lambda, s(:arglist)), nil, rhs[1]), rhs[2], rhs[3])
209
+ return s(rhs[0], s(:iter, s(:call, nil, :lambda), s(:args), rhs[1]), rhs[2], *rhs[3..-1])
143
210
  else
144
211
  return rhs
145
212
  end
146
213
  end
147
214
 
215
+ def is_coll_literal(e)
216
+ [:array, :hash].include? e
217
+ end
218
+
148
219
  def collect_rhs(exp)
149
220
  exp = lambda_rewrite(exp)
150
221
 
@@ -155,13 +226,13 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
155
226
  end
156
227
 
157
228
  def reset_instance_vars
229
+ @refs_in_body = Set.new
158
230
  @tables = {}
159
231
  @nm = false
160
- @nm_funcs_called = false
161
232
  @temp_op = nil
162
233
  end
163
234
 
164
- def record_rule(lhs, op, rhs_pos, rhs)
235
+ def record_rule(lhs, op, rhs_pos, rhs, unsafe_funcs_called)
165
236
  rule_txt_orig = "#{lhs} #{op} (#{rhs})"
166
237
  rule_txt = "#{lhs} #{op} (#{rhs_pos})"
167
238
  if op == :<
@@ -170,9 +241,11 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
170
241
  op = op.to_s
171
242
  end
172
243
 
173
- @rules << [@bud_instance, @rule_indx, lhs, op, rule_txt, rule_txt_orig, @nm_funcs_called]
244
+ @rules << [@bud_instance, @rule_indx, lhs, op, rule_txt,
245
+ rule_txt_orig, unsafe_funcs_called]
174
246
  @tables.each_pair do |t, nm|
175
- @depends << [@bud_instance, @rule_indx, lhs, op, t, nm]
247
+ in_rule_body = @refs_in_body.include? t
248
+ @depends << [@bud_instance, @rule_indx, lhs, op, t, nm, in_rule_body]
176
249
  end
177
250
 
178
251
  reset_instance_vars
@@ -180,17 +253,14 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
180
253
  end
181
254
 
182
255
  def do_rule(exp)
183
- lhs = process exp[0]
184
- op = exp[1]
185
- rhs_ast = map2pro(exp[2])
186
-
187
- # Remove the outer s(:arglist) from the rhs AST. An AST subtree rooted with
188
- # s(:arglist) is not really sensible and it causes Ruby2Ruby < 1.3.1 to
189
- # misbehave (for example, s(:arglist, s(:hash, ...)) is misparsed.
190
- raise Bud::CompileError unless rhs_ast.sexp_type == :arglist
191
- rhs_ast = rhs_ast[1]
256
+ lhs, op, rhs_ast = exp
257
+ lhs = process(lhs)
192
258
 
259
+ rhs_ast = MapRewriter.new.process(rhs_ast)
193
260
  rhs_ast = RenameRewriter.new(@bud_instance).process(rhs_ast)
261
+ rhs_ast = LatticeRefRewriter.new(@bud_instance).process(rhs_ast)
262
+ ufr = UnsafeFuncRewriter.new
263
+ rhs_ast = ufr.process(rhs_ast)
194
264
 
195
265
  if @bud_instance.options[:no_attr_rewrite]
196
266
  rhs = collect_rhs(rhs_ast)
@@ -203,35 +273,37 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
203
273
  reset_instance_vars
204
274
  rhs_pos = collect_rhs(AttrNameRewriter.new(@bud_instance).process(rhs_ast_dup))
205
275
  end
206
- record_rule(lhs, op, rhs_pos, rhs)
276
+ record_rule(lhs, op, rhs_pos, rhs, ufr.unsafe_func_called)
207
277
  drain(exp)
208
278
  end
209
279
 
210
- # We want to rewrite "map" calls on BudCollections to "pro" calls. It is hard
211
- # to do this precisely (issue #225), so we just replace map calls liberally
212
- # and define Enumerable#pro as an alias for "map".
213
- def map2pro(exp)
214
- # the non-superator case
215
- if exp[1] and exp[1][0] and exp[1][0] == :iter \
216
- and exp[1][1] and exp[1][1][1] and exp[1][1][1][0] == :call
217
- if exp[1][1][2] == :map
218
- exp[1][1][2] = :pro
219
- end
220
- # the superator case
221
- elsif exp[1] and exp[1][0] == :call and (exp[1][2] == :~@ or exp[1][2] == :+@ or exp[1][2] == :-@)
222
- if exp[1][1] and exp[1][1][1] and exp[1][1][1][2] == :map
223
- exp[1][1][1][2] = :pro
224
- end
225
- end
226
- exp
227
- end
228
-
229
280
  def drain(exp)
230
281
  exp.shift until exp.empty?
231
282
  return ""
232
283
  end
233
284
  end
234
285
 
286
+ # We want to rewrite "map" calls on BudCollections to "pro" calls. It is hard
287
+ # to do this precisely (issue #225), so we just replace map calls liberally
288
+ # and define Enumerable#pro as an alias for "map".
289
+ class MapRewriter < SexpProcessor
290
+ def initialize
291
+ super
292
+ self.require_empty = false
293
+ self.expected = Sexp
294
+ end
295
+
296
+ def process_call(exp)
297
+ tag, recv, op, *args = exp
298
+
299
+ if op == :map and args.empty?
300
+ op = :pro
301
+ end
302
+
303
+ s(tag, process(recv), op, *(args.map{|a| process(a)}))
304
+ end
305
+ end
306
+
235
307
  # Look for rename statements and define the necessary scratch collections
236
308
  class RenameRewriter < SexpProcessor
237
309
  def initialize(bud_instance)
@@ -252,14 +324,110 @@ class RenameRewriter < SexpProcessor
252
324
  end
253
325
 
254
326
  def process_call(exp)
255
- call, recv, op, args = exp
327
+ tag, recv, op, *args = exp
256
328
 
257
329
  if op == :rename
258
- arglist, namelit, schemahash = args
330
+ raise Bud::CompileError, "reduce takes two arguments" unless args.size == 2
331
+ namelit, schemahash = args
259
332
  register_scratch(namelit[1], schemahash)
260
333
  end
261
334
 
262
- return s(call, process(recv), op, process(args))
335
+ return s(tag, process(recv), op, *(args.map{|a| process(a)}))
336
+ end
337
+ end
338
+
339
+ # Check for whether the rule invokes any "unsafe" functions (funcs that might
340
+ # return a different value every time they are called, e.g., budtime). Note that
341
+ # although we call this a rewriter, it doesn't modify the input AST.
342
+ class UnsafeFuncRewriter < SexpProcessor
343
+ attr_reader :unsafe_func_called
344
+
345
+ def initialize
346
+ super()
347
+ self.require_empty = false
348
+ self.expected = Sexp
349
+ @unsafe_func_called = false
350
+ @elem_stack = []
351
+ end
352
+
353
+ def process_call(exp)
354
+ tag, recv, op, *args = exp
355
+
356
+ # We assume that unsafe funcs have a nil receiver (Bud instance is implicit
357
+ # receiver).
358
+ if recv.nil? and @elem_stack.size > 0
359
+ @unsafe_func_called = true unless RuleRewriter.is_monotone(op)
360
+ end
361
+
362
+ return s(tag, process(recv), op, *(args.map{|a| process(a)}))
363
+ end
364
+
365
+ def process_iter(exp)
366
+ tag, recv, iter_args, body = exp
367
+ new_body = push_and_process(body)
368
+ return s(tag, process(recv), process(iter_args), new_body)
369
+ end
370
+
371
+ def push_and_process(exp)
372
+ obj_id = exp.object_id
373
+ @elem_stack.push(obj_id)
374
+ rv = process(exp)
375
+ raise Bud::Error unless @elem_stack.pop == obj_id
376
+ return rv
377
+ end
378
+ end
379
+
380
+ # Rewrite references to lattice identifiers that appear in rule bodies. A
381
+ # reference to a lattice identifier returns the associated lattice wrapper. When
382
+ # the identifier appears at the top-level of the rule RHS, that is fine (since
383
+ # we want the wrapper to do wiring). But for references that appear inside rule
384
+ # bodies, we want to instead fetch the current value associated with the lattice
385
+ # wrapper.
386
+ class LatticeRefRewriter < SexpProcessor
387
+ def initialize(bud_instance)
388
+ super()
389
+ self.require_empty = false
390
+ self.expected = Sexp
391
+ @bud_instance = bud_instance
392
+ @elem_stack = []
393
+ end
394
+
395
+ def process_iter(exp)
396
+ tag, recv, iter_args, body = exp
397
+ new_body = push_and_process(body)
398
+ return s(tag, process(recv), process(iter_args), new_body)
399
+ end
400
+
401
+ def process_array(exp)
402
+ new_body = exp.sexp_body.map {|t| push_and_process(t)}
403
+ return s(:array, *new_body)
404
+ end
405
+
406
+ def process_hash(exp)
407
+ new_body = exp.sexp_body.map {|t| push_and_process(t)}
408
+ return s(:hash, *new_body)
409
+ end
410
+
411
+ def process_call(exp)
412
+ tag, recv, op, *args = exp
413
+
414
+ if recv.nil? and args.empty? and is_lattice?(op) and @elem_stack.size > 0
415
+ return s(:call, exp, :current_value)
416
+ else
417
+ return s(tag, process(recv), op, *(args.map{|a| process(a)}))
418
+ end
419
+ end
420
+
421
+ def push_and_process(exp)
422
+ obj_id = exp.object_id
423
+ @elem_stack.push(obj_id)
424
+ rv = process(exp)
425
+ raise Bud::Error unless @elem_stack.pop == obj_id
426
+ return rv
427
+ end
428
+
429
+ def is_lattice?(op)
430
+ @bud_instance.lattices.has_key? op.to_sym
263
431
  end
264
432
  end
265
433
 
@@ -278,32 +446,43 @@ class AttrNameRewriter < SexpProcessor # :nodoc: all
278
446
  # iter vars
279
447
  def process_iter(exp)
280
448
  if exp[1] and exp[1][0] == :call
449
+ return exp unless exp[2]
281
450
  gather_collection_names(exp[1])
451
+ meth_name = exp[1][2]
282
452
 
283
453
  # now find iter vars and match up
284
- if exp[2] and exp[2][0] == :lasgn and @collnames.size == 1 #single-table iter
285
- raise Bud::CompileError, "nested redefinition of block variable \"#{exp[2][1]}\" not allowed" if @iterhash[exp[2][1]]
286
- @iterhash[exp[2][1]] = @collnames[0]
287
- elsif exp[2] and exp[2][0] == :lasgn and @collnames.size > 1 and exp[1] # join iter with lefts/rights
288
- case exp[1][2]
454
+ if exp[2][0] == :args and @collnames.size == 1 # single-table iter
455
+ if @iterhash[exp[2][1]]
456
+ raise Bud::CompileError, "redefinition of block variable \"#{exp[2][1]}\" not allowed"
457
+ end
458
+
459
+ # XXX: The BudChannel#payloads method assigns the correct schema to
460
+ # tuples that pass through it (i.e., it omits the location specifier);
461
+ # hence we don't want to apply the location rewrite to the code block
462
+ # that is passed to payloads(). This is a dirty hack.
463
+ unless meth_name == :payloads
464
+ @iterhash[exp[2][1]] = @collnames[0]
465
+ end
466
+ elsif exp[2][0] == :args and not @collnames.empty? # join iter with lefts/rights
467
+ case meth_name
289
468
  when :lefts
290
469
  @iterhash[exp[2][1]] = @collnames[0]
291
470
  when :rights
292
471
  @iterhash[exp[2][1]] = @collnames[1]
293
- else
294
- raise Bud::CompileError, "nested redefinition of block variable \"#{exp[2][1]}\" not allowed" if @iterhash[exp[2][1]]
295
- end
296
- elsif exp[2] and exp[2][0] == :masgn and not @collnames.empty? # join or reduce iter
297
- return unless exp[2][1] and exp[2][1][0] == :array
298
- if exp[1][2] == :reduce
472
+ when :reduce
299
473
  unless @collnames.length == 1
300
- raise Bud::Error, "reduce should only have one associated collection, but has #{@collnames.inspect}"
474
+ raise Bud::CompileError, "reduce should only have one associated collection, but has #{@collnames.inspect}"
301
475
  end
302
- @iterhash[exp[2][1][2][1]] = @collnames.first
303
- else #join
304
- @collnames.each_with_index do |c, i|
305
- next unless exp[2][1][i+1] and exp[2][1][i+1][0] == :lasgn
306
- @iterhash[exp[2][1][i+1][1]] = c
476
+ @iterhash[exp[2][1]] = @collnames[0]
477
+ else
478
+ # join
479
+ if @iterhash[exp[2][1]]
480
+ raise Bud::CompileError, "redefinition of block variable \"#{exp[2][1]}\" not allowed"
481
+ end
482
+
483
+ @collnames.each_with_index do |c,i|
484
+ next unless exp[2][i+1]
485
+ @iterhash[exp[2][i+1]] = c
307
486
  end
308
487
  end
309
488
  end
@@ -313,36 +492,43 @@ class AttrNameRewriter < SexpProcessor # :nodoc: all
313
492
  end
314
493
 
315
494
  def gather_collection_names(exp)
316
- if exp[0] == :call and exp[1].nil?
495
+ # We expect a reference to a collection name to look like a function call
496
+ # (nil receiver) with no arguments.
497
+ if exp.sexp_type == :call and exp[1].nil? and exp.length == 3
317
498
  @collnames << exp[2]
318
- elsif exp[2] and exp[2] == :rename
319
- arglist, namelit, schemahash = exp[3]
499
+ elsif exp.sexp_type == :call and exp[2] == :rename
500
+ namelit = exp[3]
320
501
  @collnames << namelit[1]
502
+ elsif exp.sexp_type == :call and [:group, :argagg].include?(exp[2])
503
+ # For grouping and argagg expressions, only look at the receiver (the
504
+ # collection we're grouping on); otherwise, we might mistakenly think some
505
+ # of the arguments to the grouping operation are collection names.
506
+ gather_collection_names(exp[1])
321
507
  else
322
- exp.each { |e| gather_collection_names(e) if e and e.class <= Sexp }
508
+ exp.each { |e| gather_collection_names(e) if e.class <= Sexp }
323
509
  end
324
510
  end
325
511
 
326
512
  def process_call(exp)
327
- call, recv, op, args = exp
513
+ call, recv, op, *args = exp
328
514
 
329
- if recv and recv.class == Sexp and recv.first == :lvar and recv[1] and @iterhash[recv[1]]
515
+ if recv.class == Sexp and recv.sexp_type == :lvar and @iterhash[recv[1]]
330
516
  if @bud_instance.respond_to?(@iterhash[recv[1]])
331
517
  if @bud_instance.send(@iterhash[recv[1]]).class <= Bud::BudCollection
332
518
  cols = @bud_instance.send(@iterhash[recv[1]]).cols
333
519
  if op != :[] and @bud_instance.send(@iterhash[recv[1]]).respond_to?(op)
334
- # if the op is an attribute name in the schema, col is its index
335
- col = cols.index(op) unless cols.nil?
336
- unless col.nil?
520
+ # if the op is an attribute name in the schema, col_idx is its index
521
+ col_idx = cols.index(op) unless cols.nil?
522
+ unless col_idx.nil?
337
523
  op = :[]
338
- args = s(:arglist, s(:lit, col))
524
+ args = [s(:lit, col_idx)]
339
525
  end
340
526
  end
341
527
  end
342
- return s(call, recv, op, args)
528
+ return s(call, recv, op, *args)
343
529
  end
344
530
  end
345
- return s(call, process(recv), op, process(args))
531
+ return s(call, process(recv), op, *(args.map{|a| process(a)}))
346
532
  end
347
533
  end
348
534
 
@@ -365,52 +551,41 @@ class TempExpander < SexpProcessor # :nodoc: all
365
551
  end
366
552
 
367
553
  def process_defn(exp)
368
- tag, name, args, scope = exp
369
- if name.to_s =~ /^__bloom__.+/
370
- block = scope[1]
371
-
372
- block.each_with_index do |n,i|
373
- if i == 0
374
- raise Bud::CompileError if n != :block
375
- next
376
- end
377
-
378
- # temp declarations are misparsed if the RHS contains certain constructs
379
- # (e.g., group, "do |f| ... end" rather than "{|f| ... }"). Rewrite to
380
- # correct the misparsing.
381
- if n.sexp_type == :iter
382
- iter_body = n.sexp_body
383
- new_n = fix_temp_decl(iter_body)
384
- unless new_n.nil?
385
- block[i] = n = new_n
386
- @did_work = true
387
- end
388
- end
389
-
390
- _, recv, meth, meth_args = n
391
- if meth == KEYWORD and recv.nil?
392
- block[i] = rewrite_me(n)
554
+ tag, name, args, *body = exp
555
+ return exp unless name.to_s =~ /^__bloom__.+/
556
+
557
+ body.each_with_index do |n,i|
558
+ # temp declarations are misparsed if the RHS contains certain constructs
559
+ # (e.g., group, "do |f| ... end" rather than "{|f| ... }"). Rewrite to
560
+ # correct the misparsing.
561
+ if n.sexp_type == :iter
562
+ iter_body = n.sexp_body
563
+ new_n = fix_temp_decl(iter_body)
564
+ unless new_n.nil?
565
+ body[i] = n = new_n
393
566
  @did_work = true
394
567
  end
395
568
  end
569
+
570
+ _, recv, meth, meth_args = n
571
+ if meth == KEYWORD and recv.nil?
572
+ body[i] = rewrite_me(n)
573
+ @did_work = true
574
+ end
396
575
  end
397
- s(tag, name, args, scope)
576
+ s(tag, name, args, *body)
398
577
  end
399
578
 
400
579
  private
401
580
  def fix_temp_decl(iter_body)
402
581
  if iter_body.first.sexp_type == :call
403
582
  call_node = iter_body.first
583
+ _, recv, meth, *meth_args = call_node
404
584
 
405
- _, recv, meth, meth_args = call_node
406
585
  if meth == KEYWORD and recv.nil?
407
- _, lhs, op, rhs = meth_args.sexp_body.first
408
-
409
- old_rhs_body = rhs.sexp_body
410
- new_rhs_body = [:iter]
411
- new_rhs_body += old_rhs_body
412
- new_rhs_body += iter_body[1..-1]
413
- rhs[1] = Sexp.from_array(new_rhs_body)
586
+ _, lhs, op, rhs = meth_args.first
587
+ new_rhs = s(:iter, rhs, *(iter_body[1..-1]))
588
+ meth_args.first[3] = new_rhs
414
589
  return call_node
415
590
  end
416
591
  end
@@ -418,18 +593,18 @@ class TempExpander < SexpProcessor # :nodoc: all
418
593
  end
419
594
 
420
595
  def rewrite_me(exp)
421
- _, recv, meth, args = exp
596
+ _, recv, meth, *args = exp
422
597
 
423
- raise Bud::CompileError unless recv == nil
424
- nest_call = args.sexp_body.first
598
+ raise Bud::CompileError unless recv.nil?
599
+ nest_call = args.first
425
600
  raise Bud::CompileError unless nest_call.sexp_type == :call
426
601
 
427
- nest_recv, nest_op, nest_args = nest_call.sexp_body
602
+ nest_recv, nest_op, *nest_args = nest_call.sexp_body
428
603
  raise Bud::CompileError unless nest_recv.sexp_type == :lit
429
604
 
430
605
  tmp_name = nest_recv.sexp_body.first
431
606
  @tmp_tables << tmp_name
432
- new_recv = s(:call, nil, tmp_name, s(:arglist))
433
- return s(:call, new_recv, nest_op, nest_args)
607
+ new_recv = s(:call, nil, tmp_name)
608
+ return s(:call, new_recv, nest_op, *nest_args)
434
609
  end
435
610
  end