bud 0.9.4 → 0.9.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,422 @@
1
+ require 'bud/lattice-core'
2
+
3
+ # Float::INFINITY only defined in MRI 1.9.2+
4
+ unless defined? Float::INFINITY
5
+ Float::INFINITY = 1.0/0.0
6
+ end
7
+
8
+ class Bud::MaxLattice < Bud::Lattice
9
+ wrapper_name :lmax
10
+
11
+ def initialize(i=-Float::INFINITY)
12
+ reject_input(i) unless i.class <= Comparable
13
+ @v = i
14
+ end
15
+
16
+ def merge(i)
17
+ i.reveal > @v ? i : self
18
+ end
19
+
20
+ morph :gt do |k|
21
+ Bud::BoolLattice.new(!!(@v > k))
22
+ end
23
+
24
+ morph :gt_eq do |k|
25
+ Bud::BoolLattice.new(!!(@v >= k))
26
+ end
27
+
28
+ # XXX: support MaxLattice input?
29
+ morph :+ do |i|
30
+ # NB: since bottom of lmax is negative infinity, + is a no-op
31
+ reject_input(i, "+") unless i.class <= Numeric
32
+ self.class.new(@v + i)
33
+ end
34
+
35
+ morph :min_of do |i|
36
+ reject_input(i, "min_of") unless i.class <= Numeric
37
+ i < @v ? self.class.new(i) : self
38
+ end
39
+
40
+ def lt_eq(k)
41
+ Bud::BoolLattice.new(!!(@v <= k))
42
+ end
43
+ end
44
+
45
+ class Bud::MinLattice < Bud::Lattice
46
+ wrapper_name :lmin
47
+
48
+ def initialize(i=Float::INFINITY)
49
+ reject_input(i) unless i.class <= Comparable
50
+ @v = i
51
+ end
52
+
53
+ def merge(i)
54
+ i.reveal < @v ? i : self
55
+ end
56
+
57
+ morph :lt do |k|
58
+ Bud::BoolLattice.new(!!(@v < k))
59
+ end
60
+
61
+ # XXX: support MinLattice input
62
+ morph :+ do |i|
63
+ # Since bottom of lmin is infinity, + is a no-op
64
+ reject_input(i, "+") unless i.class <= Numeric
65
+ self.class.new(@v + i)
66
+ end
67
+ end
68
+
69
+ # XXX: consider creating two fixed ("interned") values for true and false.
70
+ class Bud::BoolLattice < Bud::Lattice
71
+ wrapper_name :lbool
72
+
73
+ def initialize(i=false)
74
+ reject_input(i) unless [true, false].include? i
75
+ @v = i
76
+ end
77
+
78
+ def merge(i)
79
+ self.class.new(@v || i.reveal)
80
+ end
81
+
82
+ # XXX: ugly syntax
83
+ morph :when_true do |&blk|
84
+ blk.call if @v
85
+ end
86
+ end
87
+
88
+ class Bud::MapLattice < Bud::Lattice
89
+ wrapper_name :lmap
90
+
91
+ def initialize(i={})
92
+ reject_input(i) unless i.class == Hash
93
+ i.each_pair do |k,val|
94
+ reject_input(i) if k.class <= Bud::Lattice
95
+ reject_input(i) unless val.class <= Bud::Lattice
96
+ end
97
+ @v = i
98
+ end
99
+
100
+ def merge(i)
101
+ rv = @v.merge(i.reveal) do |k, lhs_v, rhs_v|
102
+ lhs_v.merge(rhs_v)
103
+ end
104
+ wrap_unsafe(rv)
105
+ end
106
+
107
+ def inspect
108
+ "<#{self.class.wrapper}: #{@v.inspect}>"
109
+ end
110
+
111
+ # XXX: If the key is not in the map, we would like to return some generic
112
+ # "bottom" value that is shared by all lattice types. Unfortunately, such a
113
+ # value does not exist, so we need the caller to tell us which class to use as
114
+ # an optional second argument (if omitted, fetching a non-existent key yields
115
+ # a runtime exception). Another alternative would be to specify the type of
116
+ # the map's values when the lmap is declared, but that hinders code reuse.
117
+ morph :at do |k, *args|
118
+ if @v.has_key? k
119
+ @v[k]
120
+ else
121
+ if args.empty?
122
+ raise Bud::Error, "missing key for lmap#at(#{k}) but no bottom type given"
123
+ end
124
+ args.first.new
125
+ end
126
+ end
127
+
128
+ morph :filter do
129
+ rv = {}
130
+ @v.each_pair do |k, val|
131
+ unless val.class <= Bud::BoolLattice
132
+ raise Bud::Error, "filter invoked on non-boolean map value: #{val}"
133
+ end
134
+ rv[k] = val if val.reveal == true
135
+ end
136
+ wrap_unsafe(rv)
137
+ end
138
+
139
+ morph :apply_morph do |sym, *args|
140
+ unless Bud::Lattice.global_morphs.include? sym
141
+ raise Bud::Error, "apply_morph called with non-morphism: #{sym}"
142
+ end
143
+ do_apply(sym, args)
144
+ end
145
+
146
+ # NB: "apply" can be used with both monotone functions and morphisms. We also
147
+ # provide apply_morph, which is slightly faster when theprogrammer knows they
148
+ # are applying a morphism.
149
+ monotone :apply do |sym, *args|
150
+ unless Bud::Lattice.global_mfuncs.include?(sym) ||
151
+ Bud::Lattice.global_morphs.include?(sym)
152
+ raise Bud::Error, "apply called with non-monotone function: #{sym}"
153
+ end
154
+ do_apply(sym, args)
155
+ end
156
+
157
+ def do_apply(sym, args)
158
+ rv = {}
159
+ @v.each_pair do |k, val|
160
+ res = val.send(sym, *args)
161
+ raise Bud::Error unless res.kind_of? Bud::Lattice
162
+ rv[k] = res
163
+ end
164
+ wrap_unsafe(rv)
165
+ end
166
+
167
+ morph :key? do |k|
168
+ Bud::BoolLattice.new(@v.has_key? k)
169
+ end
170
+
171
+ morph :key_set do
172
+ Bud::SetLattice.new(@v.keys)
173
+ end
174
+
175
+ monotone :size do
176
+ Bud::MaxLattice.new(@v.size)
177
+ end
178
+
179
+ morph :intersect do |i|
180
+ i_tbl = i.reveal
181
+ # Scan the smaller map, probe the larger one
182
+ scan, probe = (@v.size < i_tbl.size ? [@v, i_tbl] : [i_tbl, @v])
183
+ rv = {}
184
+ scan.each do |k,val|
185
+ rv[k] = val.merge(probe[k]) if probe.has_key? k
186
+ end
187
+ wrap_unsafe(rv)
188
+ end
189
+
190
+ # Produce a Bloom collection (array of tuples) from this lmap, optionally
191
+ # applying a user-provided code block to each (k,v) pair in turn. Note that
192
+ # this is slightly different from how projection over an lmap would work: we
193
+ # return an array, whereas projection would return an lmap.
194
+ morph :to_collection do |&blk|
195
+ @v.map(&blk)
196
+ end
197
+
198
+ # Return true if this map is strictly smaller than or equal to the given
199
+ # map. "x" is strictly smaller than or equal to "y" if:
200
+ # (a) every key in "x" also appears in "y"
201
+ # (b) for every key k in "x", x[k] <= y[k]
202
+ #
203
+ # NB: For this to be a morphism, we require that (a) "self" is deflationary
204
+ # (or fixed) (b) the input lattice value is inflationary (or fixed). We
205
+ # currently don't have a way to express (a) in the type system.
206
+ def lt_eq(i)
207
+ reject_input(i, "lt_eq") unless i.class <= self.class
208
+
209
+ @v.each do |k, v|
210
+ unless i.key?(k).reveal == true
211
+ return Bud::BoolLattice.new(false)
212
+ end
213
+ unless v.lt_eq(i.at(k).reveal).reveal == true
214
+ return Bud::BoolLattice.new(false)
215
+ end
216
+ end
217
+
218
+ return Bud::BoolLattice.new(true)
219
+ end
220
+ end
221
+
222
+ # A set lattice contains zero or more primitive (non-lattice) values.
223
+ class Bud::SetLattice < Bud::Lattice
224
+ wrapper_name :lset
225
+
226
+ def initialize(i=Set.new)
227
+ reject_input(i) unless i.kind_of? Enumerable
228
+ reject_input(i) if i.any? {|e| e.kind_of? Bud::Lattice}
229
+
230
+ i = Set.new(i) unless i.kind_of? Set
231
+ @v = i
232
+ end
233
+
234
+ def merge(i)
235
+ wrap_unsafe(@v | i.reveal)
236
+ end
237
+
238
+ # Override default "inspect" implementation to produce slightly nicer output
239
+ def inspect
240
+ "<#{self.class.wrapper}: #{reveal.to_a.sort.inspect}>"
241
+ end
242
+
243
+ morph :intersect do |i|
244
+ wrap_unsafe(@v & i.reveal)
245
+ end
246
+
247
+ morph :contains? do |i|
248
+ Bud::BoolLattice.new(@v.member? i)
249
+ end
250
+
251
+ monotone :group_count do |key_cols|
252
+ # Assume key_cols for now gives indices
253
+ rv = Hash.new(Bud::MaxLattice.new(0))
254
+ @v.each do |t|
255
+ unless t.class == Array
256
+ raise Bud::TypeError, "group_count only works if lset elements are type Array"
257
+ end
258
+
259
+ key = []
260
+ key_cols.each do |ind|
261
+ if ind >= t.length
262
+ raise Bud::Error, "lset element in group_count does not have column index #{ind}"
263
+ end
264
+ key << t[ind]
265
+ end
266
+ rv[key] += 1
267
+ end
268
+ Bud::MapLattice.new(rv)
269
+ end
270
+
271
+ morph :pro do |&blk|
272
+ # We don't use Set#map, since it returns an Array (ugh).
273
+ rv = Set.new
274
+ @v.each do |t|
275
+ val = blk.call(t)
276
+ rv << val unless val.nil?
277
+ end
278
+ wrap_unsafe(rv)
279
+ end
280
+
281
+ monotone :size do
282
+ Bud::MaxLattice.new(@v.size)
283
+ end
284
+
285
+ # Assuming that the elements of this set are Structs (tuples with named field
286
+ # accessors), this performs an equijoin between the current lattice and
287
+ # i. `preds` is a hash of join predicates; each k/v pair in the hash is an
288
+ # equality predicate that self_tup[k] == i_tup[v]. The return value is the
289
+ # result of passing pairs of join tuples to the user-supplied code block
290
+ # (values for which the code block returns nil are omitted from the
291
+ # result). Note that if no predicates are passed, this computes the Cartesian
292
+ # product (in which case the input elements do not need to be Structs).
293
+ morph :eqjoin do |*args, &blk|
294
+ # Need to emulate default block arguments for MRI 1.8
295
+ i, preds = args
296
+ preds ||= {}
297
+ rv = Set.new
298
+ @v.each do |a|
299
+ i.probe(a, preds).each do |b|
300
+ if blk.nil?
301
+ rv << [a,b]
302
+ else
303
+ val = blk.call(a, b)
304
+ rv << val unless val.nil?
305
+ end
306
+ end
307
+ end
308
+ wrap_unsafe(rv)
309
+ end
310
+
311
+ # Assuming that this set contains Structs, this method takes a value "val" and
312
+ # a hash of predicates "preds". It returns all the structs t where val[k] =
313
+ # t[v] for all k,v in preds; an empty array is returned if no matches found.
314
+ def probe(val, preds)
315
+ return @v if preds.empty?
316
+
317
+ probe_val = schema_fetch(val, preds.keys)
318
+ build_index(preds.values)
319
+ index = @join_indexes[preds.values]
320
+ return index[probe_val] || []
321
+ end
322
+
323
+ private
324
+ def schema_fetch(val, cols)
325
+ cols.map {|s| val[s]}
326
+ end
327
+
328
+ def build_index(cols)
329
+ @join_indexes ||= {}
330
+ return @join_indexes[cols] if @join_indexes.has_key? cols
331
+
332
+ idx = {}
333
+ @v.each do |val|
334
+ index_val = schema_fetch(val, cols)
335
+ idx[index_val] ||= []
336
+ idx[index_val] << val
337
+ end
338
+
339
+ @join_indexes[cols] = idx
340
+ return idx
341
+ end
342
+ end
343
+
344
+ # A set that admits only non-negative numbers. This allows "sum" to be a
345
+ # monotone function. Note that this does duplicate elimination on its input, so
346
+ # it actually computes "SUM(DISTINCT ...)" in SQL.
347
+ #
348
+ # XXX: for methods that take a user-provided code block, we need to ensure that
349
+ # the set continues to contain only positive numbers.
350
+ class Bud::PositiveSetLattice < Bud::SetLattice
351
+ wrapper_name :lpset
352
+
353
+ def initialize(i=[])
354
+ super
355
+ @v.each do |n|
356
+ reject_input(i) unless n.class <= Numeric
357
+ reject_input(i) if n < 0
358
+ end
359
+ end
360
+
361
+ monotone :pos_sum do
362
+ @sum = @v.reduce(Bud::MaxLattice.new(0), :+) if @sum.nil?
363
+ @sum
364
+ end
365
+ end
366
+
367
+ # XXX: Should this be just syntax sugar for a map lattice instead?
368
+ class Bud::BagLattice < Bud::Lattice
369
+ wrapper_name :lbag
370
+
371
+ def initialize(i={})
372
+ reject_input(i) unless i.class <= Hash
373
+ i.each do |k, mult|
374
+ reject_input(i) if k.class <= Bud::Lattice
375
+ reject_input(i) unless (mult.class <= Integer && mult > 0)
376
+ end
377
+ @v = i
378
+ end
379
+
380
+ # Note that for merge to be idempotent, we need to use the traditional
381
+ # definition of multiset union (per-element max of multiplicities, rather than
382
+ # sum of multiplicities).
383
+ def merge(i)
384
+ rv = @v.merge(i.reveal) do |k, lhs_v, rhs_v|
385
+ [lhs_v, rhs_v].max
386
+ end
387
+ wrap_unsafe(rv)
388
+ end
389
+
390
+ morph :intersect do |i|
391
+ i_tbl = i.reveal
392
+ # Scan the smaller one, probe the larger one
393
+ scan, probe = (@v.size < i_tbl.size ? [@v, i_tbl] : [i_tbl, @v])
394
+ rv = {}
395
+ scan.each do |k,val|
396
+ rv[k] = [val, probe[k]].min if probe.has_key? k
397
+ end
398
+ wrap_unsafe(rv)
399
+ end
400
+
401
+ morph :multiplicity do |k|
402
+ rv = @v[k]
403
+ rv ||= 0
404
+ Bud::MaxLattice.new(rv)
405
+ end
406
+
407
+ morph :+ do |i|
408
+ rv = @v.merge(i.reveal) do |k, lhs_v, rhs_v|
409
+ lhs_v + rhs_v
410
+ end
411
+ self.class.new(rv)
412
+ end
413
+
414
+ morph :contains? do |i|
415
+ Bud::BoolLattice.new(@v.has_key? i)
416
+ end
417
+
418
+ monotone :size do
419
+ @size = @v.values.reduce(Bud::MaxLattice.new(0), :+) if @size.nil?
420
+ @size
421
+ end
422
+ end
@@ -10,42 +10,63 @@ class Class
10
10
  end
11
11
  end
12
12
 
13
- # FIXME: Use a subclass of Struct.
14
- class Struct
13
+ $struct_classes = {}
14
+ $struct_lock = Mutex.new
15
+
16
+ # FIXME: Should likely override #hash and #eql? as well.
17
+ class Bud::TupleStruct < Struct
18
+ include Comparable
19
+
20
+ def self.new_struct(cols)
21
+ $struct_lock.synchronize {
22
+ ($struct_classes[cols] ||= Bud::TupleStruct.new(*cols))
23
+ }
24
+ end
25
+
26
+ # XXX: This only considers two TupleStruct instances to be equal if they have
27
+ # the same schema (column names) AND the same contents; unclear if structural
28
+ # equality (consider only values, not column names) would be better.
15
29
  def <=>(o)
16
30
  if o.class == self.class
17
31
  self.each_with_index do |e, i|
18
- cmp = e <=> o[i]
19
- return cmp if cmp != 0
32
+ other = o[i]
33
+ next if e == other
34
+ return e <=> other
20
35
  end
21
36
  return 0
22
37
  elsif o.nil?
23
- return -1
38
+ return nil
24
39
  else
25
40
  raise "Comparison (<=>) between #{o.class} and #{self.class} not implemented"
26
41
  end
27
42
  end
28
43
 
29
- alias oldeq :==
30
44
  def ==(o)
31
45
  if o.class == self.class
32
- return oldeq(o)
46
+ return super
33
47
  elsif o.class == Array
34
- begin
35
- self.each_with_index do |el, i|
36
- if el != o[i]
37
- return false
38
- end
39
- end
40
- return true
41
- rescue StandardError
42
- return false
48
+ return false if self.length != o.length
49
+ self.each_with_index do |el, i|
50
+ return false if el != o[i]
43
51
  end
52
+ return true
44
53
  end
45
54
  false
46
55
  end
47
56
 
48
- def to_msgpack(out='')
57
+ def hash
58
+ self.values.hash
59
+ end
60
+
61
+ def eql?(o)
62
+ self == o
63
+ end
64
+
65
+ def +(o)
66
+ self.to_ary + o.to_ary
67
+ end
68
+
69
+ def to_msgpack(out=nil)
49
70
  self.to_a.to_msgpack(out)
50
71
  end
51
72
 
@@ -54,17 +75,23 @@ class Struct
54
75
  end
55
76
 
56
77
  alias :to_s :inspect
78
+ alias :to_ary :to_a
57
79
  end
58
80
 
59
81
  # XXX: TEMPORARY/UGLY hack to ensure that arrays and structs compare. This can be
60
82
  # removed once tests are rewritten.
61
83
  class Array
62
- alias :oldeq :==
84
+ alias :old_eq :==
85
+ alias :old_eql? :eql?
86
+
63
87
  def ==(o)
64
- if o.kind_of? Struct
65
- o = o.to_a
66
- end
67
- self.oldeq(o)
88
+ o = o.to_a if o.kind_of? Bud::TupleStruct
89
+ self.old_eq(o)
90
+ end
91
+
92
+ def eql?(o)
93
+ o = o.to_a if o.kind_of? Bud::TupleStruct
94
+ self.old_eql?(o)
68
95
  end
69
96
  end
70
97
 
@@ -125,7 +152,6 @@ class Module
125
152
  @bud_import_tbl
126
153
  end
127
154
 
128
-
129
155
  # the block of Bloom collection declarations. one per module.
130
156
  def state(&block)
131
157
  meth_name = Module.make_state_meth_name(self)
@@ -138,8 +164,9 @@ class Module
138
164
  define_method(meth_name, &block)
139
165
  end
140
166
 
141
- # bloom statements to be registered with Bud runtime. optional +block_name+
142
- # allows for multiple bloom blocks per module, and overriding
167
+ # bloom statements to be registered with Bud runtime. optional +block_name+
168
+ # assigns a name for the block; this is useful documentation, and also allows
169
+ # the block to be overridden in a child class.
143
170
  def bloom(block_name=nil, &block)
144
171
  # If no block name was specified, generate a unique name
145
172
  if block_name.nil?
@@ -148,7 +175,7 @@ class Module
148
175
  @block_id += 1
149
176
  else
150
177
  unless block_name.class <= Symbol
151
- raise Bud::CompileError, "bloom block names must be a symbol: #{block_name}"
178
+ raise Bud::CompileError, "block name must be a symbol: #{block_name}"
152
179
  end
153
180
  end
154
181
 
@@ -161,15 +188,24 @@ class Module
161
188
  # module; this indicates a likely programmer error.
162
189
  if instance_methods(false).include?(meth_name) ||
163
190
  instance_methods(false).include?(meth_name.to_sym)
164
- raise Bud::CompileError, "duplicate named bloom block: '#{block_name}' in #{self}"
191
+ raise Bud::CompileError, "duplicate block name: '#{block_name}' in #{self}"
165
192
  end
166
193
  ast = Source.read_block(caller[0]) # pass in caller's location via backtrace
194
+
167
195
  # ast corresponds only to the statements of the block. Wrap it in a method
168
196
  # definition for backward compatibility for now.
169
- # First wrap ast in a block if it is only a single statement
170
- ast = s(:block) if ast.nil?
171
- ast = s(:block, ast) unless ast.sexp_type == :block
172
- ast = s(:defn, meth_name.to_sym, s(:args), s(:scope, ast))
197
+
198
+ # If the block contained multiple statements, the AST will have a top-level
199
+ # :block node. Since ruby_parser ASTs for method definitions don't contain
200
+ # such a node, remove it.
201
+ if ast.nil?
202
+ ast = []
203
+ elsif ast.sexp_type == :block
204
+ ast = ast.sexp_body
205
+ else
206
+ ast = [ast]
207
+ end
208
+ ast = s(:defn, meth_name.to_sym, s(:args), *ast)
173
209
  unless self.respond_to? :__bloom_asts__
174
210
  def self.__bloom_asts__
175
211
  @__bloom_asts__ ||= {}
@@ -180,11 +216,11 @@ class Module
180
216
  define_method(meth_name.to_sym, &block)
181
217
  end
182
218
 
183
- private
184
219
  # Return a string with a version of the class name appropriate for embedding
185
220
  # into a method name. Annoyingly, if you define class X nested inside
186
221
  # class/module Y, X's class name is the string "Y::X". We don't want to define
187
222
  # method names with semicolons in them, so just return "X" instead.
223
+ private
188
224
  def self.get_class_name(klass)
189
225
  (klass.name.nil? or klass.name == "") \
190
226
  ? "Anon#{klass.object_id}" \