bud 0.9.4 → 0.9.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,422 @@
1
+ require 'bud/lattice-core'
2
+
3
+ # Float::INFINITY only defined in MRI 1.9.2+
4
+ unless defined? Float::INFINITY
5
+ Float::INFINITY = 1.0/0.0
6
+ end
7
+
8
+ class Bud::MaxLattice < Bud::Lattice
9
+ wrapper_name :lmax
10
+
11
+ def initialize(i=-Float::INFINITY)
12
+ reject_input(i) unless i.class <= Comparable
13
+ @v = i
14
+ end
15
+
16
+ def merge(i)
17
+ i.reveal > @v ? i : self
18
+ end
19
+
20
+ morph :gt do |k|
21
+ Bud::BoolLattice.new(!!(@v > k))
22
+ end
23
+
24
+ morph :gt_eq do |k|
25
+ Bud::BoolLattice.new(!!(@v >= k))
26
+ end
27
+
28
+ # XXX: support MaxLattice input?
29
+ morph :+ do |i|
30
+ # NB: since bottom of lmax is negative infinity, + is a no-op
31
+ reject_input(i, "+") unless i.class <= Numeric
32
+ self.class.new(@v + i)
33
+ end
34
+
35
+ morph :min_of do |i|
36
+ reject_input(i, "min_of") unless i.class <= Numeric
37
+ i < @v ? self.class.new(i) : self
38
+ end
39
+
40
+ def lt_eq(k)
41
+ Bud::BoolLattice.new(!!(@v <= k))
42
+ end
43
+ end
44
+
45
+ class Bud::MinLattice < Bud::Lattice
46
+ wrapper_name :lmin
47
+
48
+ def initialize(i=Float::INFINITY)
49
+ reject_input(i) unless i.class <= Comparable
50
+ @v = i
51
+ end
52
+
53
+ def merge(i)
54
+ i.reveal < @v ? i : self
55
+ end
56
+
57
+ morph :lt do |k|
58
+ Bud::BoolLattice.new(!!(@v < k))
59
+ end
60
+
61
+ # XXX: support MinLattice input
62
+ morph :+ do |i|
63
+ # Since bottom of lmin is infinity, + is a no-op
64
+ reject_input(i, "+") unless i.class <= Numeric
65
+ self.class.new(@v + i)
66
+ end
67
+ end
68
+
69
+ # XXX: consider creating two fixed ("interned") values for true and false.
70
+ class Bud::BoolLattice < Bud::Lattice
71
+ wrapper_name :lbool
72
+
73
+ def initialize(i=false)
74
+ reject_input(i) unless [true, false].include? i
75
+ @v = i
76
+ end
77
+
78
+ def merge(i)
79
+ self.class.new(@v || i.reveal)
80
+ end
81
+
82
+ # XXX: ugly syntax
83
+ morph :when_true do |&blk|
84
+ blk.call if @v
85
+ end
86
+ end
87
+
88
+ class Bud::MapLattice < Bud::Lattice
89
+ wrapper_name :lmap
90
+
91
+ def initialize(i={})
92
+ reject_input(i) unless i.class == Hash
93
+ i.each_pair do |k,val|
94
+ reject_input(i) if k.class <= Bud::Lattice
95
+ reject_input(i) unless val.class <= Bud::Lattice
96
+ end
97
+ @v = i
98
+ end
99
+
100
+ def merge(i)
101
+ rv = @v.merge(i.reveal) do |k, lhs_v, rhs_v|
102
+ lhs_v.merge(rhs_v)
103
+ end
104
+ wrap_unsafe(rv)
105
+ end
106
+
107
+ def inspect
108
+ "<#{self.class.wrapper}: #{@v.inspect}>"
109
+ end
110
+
111
+ # XXX: If the key is not in the map, we would like to return some generic
112
+ # "bottom" value that is shared by all lattice types. Unfortunately, such a
113
+ # value does not exist, so we need the caller to tell us which class to use as
114
+ # an optional second argument (if omitted, fetching a non-existent key yields
115
+ # a runtime exception). Another alternative would be to specify the type of
116
+ # the map's values when the lmap is declared, but that hinders code reuse.
117
+ morph :at do |k, *args|
118
+ if @v.has_key? k
119
+ @v[k]
120
+ else
121
+ if args.empty?
122
+ raise Bud::Error, "missing key for lmap#at(#{k}) but no bottom type given"
123
+ end
124
+ args.first.new
125
+ end
126
+ end
127
+
128
+ morph :filter do
129
+ rv = {}
130
+ @v.each_pair do |k, val|
131
+ unless val.class <= Bud::BoolLattice
132
+ raise Bud::Error, "filter invoked on non-boolean map value: #{val}"
133
+ end
134
+ rv[k] = val if val.reveal == true
135
+ end
136
+ wrap_unsafe(rv)
137
+ end
138
+
139
+ morph :apply_morph do |sym, *args|
140
+ unless Bud::Lattice.global_morphs.include? sym
141
+ raise Bud::Error, "apply_morph called with non-morphism: #{sym}"
142
+ end
143
+ do_apply(sym, args)
144
+ end
145
+
146
+ # NB: "apply" can be used with both monotone functions and morphisms. We also
147
+ # provide apply_morph, which is slightly faster when theprogrammer knows they
148
+ # are applying a morphism.
149
+ monotone :apply do |sym, *args|
150
+ unless Bud::Lattice.global_mfuncs.include?(sym) ||
151
+ Bud::Lattice.global_morphs.include?(sym)
152
+ raise Bud::Error, "apply called with non-monotone function: #{sym}"
153
+ end
154
+ do_apply(sym, args)
155
+ end
156
+
157
+ def do_apply(sym, args)
158
+ rv = {}
159
+ @v.each_pair do |k, val|
160
+ res = val.send(sym, *args)
161
+ raise Bud::Error unless res.kind_of? Bud::Lattice
162
+ rv[k] = res
163
+ end
164
+ wrap_unsafe(rv)
165
+ end
166
+
167
+ morph :key? do |k|
168
+ Bud::BoolLattice.new(@v.has_key? k)
169
+ end
170
+
171
+ morph :key_set do
172
+ Bud::SetLattice.new(@v.keys)
173
+ end
174
+
175
+ monotone :size do
176
+ Bud::MaxLattice.new(@v.size)
177
+ end
178
+
179
+ morph :intersect do |i|
180
+ i_tbl = i.reveal
181
+ # Scan the smaller map, probe the larger one
182
+ scan, probe = (@v.size < i_tbl.size ? [@v, i_tbl] : [i_tbl, @v])
183
+ rv = {}
184
+ scan.each do |k,val|
185
+ rv[k] = val.merge(probe[k]) if probe.has_key? k
186
+ end
187
+ wrap_unsafe(rv)
188
+ end
189
+
190
+ # Produce a Bloom collection (array of tuples) from this lmap, optionally
191
+ # applying a user-provided code block to each (k,v) pair in turn. Note that
192
+ # this is slightly different from how projection over an lmap would work: we
193
+ # return an array, whereas projection would return an lmap.
194
+ morph :to_collection do |&blk|
195
+ @v.map(&blk)
196
+ end
197
+
198
+ # Return true if this map is strictly smaller than or equal to the given
199
+ # map. "x" is strictly smaller than or equal to "y" if:
200
+ # (a) every key in "x" also appears in "y"
201
+ # (b) for every key k in "x", x[k] <= y[k]
202
+ #
203
+ # NB: For this to be a morphism, we require that (a) "self" is deflationary
204
+ # (or fixed) (b) the input lattice value is inflationary (or fixed). We
205
+ # currently don't have a way to express (a) in the type system.
206
+ def lt_eq(i)
207
+ reject_input(i, "lt_eq") unless i.class <= self.class
208
+
209
+ @v.each do |k, v|
210
+ unless i.key?(k).reveal == true
211
+ return Bud::BoolLattice.new(false)
212
+ end
213
+ unless v.lt_eq(i.at(k).reveal).reveal == true
214
+ return Bud::BoolLattice.new(false)
215
+ end
216
+ end
217
+
218
+ return Bud::BoolLattice.new(true)
219
+ end
220
+ end
221
+
222
+ # A set lattice contains zero or more primitive (non-lattice) values.
223
+ class Bud::SetLattice < Bud::Lattice
224
+ wrapper_name :lset
225
+
226
+ def initialize(i=Set.new)
227
+ reject_input(i) unless i.kind_of? Enumerable
228
+ reject_input(i) if i.any? {|e| e.kind_of? Bud::Lattice}
229
+
230
+ i = Set.new(i) unless i.kind_of? Set
231
+ @v = i
232
+ end
233
+
234
+ def merge(i)
235
+ wrap_unsafe(@v | i.reveal)
236
+ end
237
+
238
+ # Override default "inspect" implementation to produce slightly nicer output
239
+ def inspect
240
+ "<#{self.class.wrapper}: #{reveal.to_a.sort.inspect}>"
241
+ end
242
+
243
+ morph :intersect do |i|
244
+ wrap_unsafe(@v & i.reveal)
245
+ end
246
+
247
+ morph :contains? do |i|
248
+ Bud::BoolLattice.new(@v.member? i)
249
+ end
250
+
251
+ monotone :group_count do |key_cols|
252
+ # Assume key_cols for now gives indices
253
+ rv = Hash.new(Bud::MaxLattice.new(0))
254
+ @v.each do |t|
255
+ unless t.class == Array
256
+ raise Bud::TypeError, "group_count only works if lset elements are type Array"
257
+ end
258
+
259
+ key = []
260
+ key_cols.each do |ind|
261
+ if ind >= t.length
262
+ raise Bud::Error, "lset element in group_count does not have column index #{ind}"
263
+ end
264
+ key << t[ind]
265
+ end
266
+ rv[key] += 1
267
+ end
268
+ Bud::MapLattice.new(rv)
269
+ end
270
+
271
+ morph :pro do |&blk|
272
+ # We don't use Set#map, since it returns an Array (ugh).
273
+ rv = Set.new
274
+ @v.each do |t|
275
+ val = blk.call(t)
276
+ rv << val unless val.nil?
277
+ end
278
+ wrap_unsafe(rv)
279
+ end
280
+
281
+ monotone :size do
282
+ Bud::MaxLattice.new(@v.size)
283
+ end
284
+
285
+ # Assuming that the elements of this set are Structs (tuples with named field
286
+ # accessors), this performs an equijoin between the current lattice and
287
+ # i. `preds` is a hash of join predicates; each k/v pair in the hash is an
288
+ # equality predicate that self_tup[k] == i_tup[v]. The return value is the
289
+ # result of passing pairs of join tuples to the user-supplied code block
290
+ # (values for which the code block returns nil are omitted from the
291
+ # result). Note that if no predicates are passed, this computes the Cartesian
292
+ # product (in which case the input elements do not need to be Structs).
293
+ morph :eqjoin do |*args, &blk|
294
+ # Need to emulate default block arguments for MRI 1.8
295
+ i, preds = args
296
+ preds ||= {}
297
+ rv = Set.new
298
+ @v.each do |a|
299
+ i.probe(a, preds).each do |b|
300
+ if blk.nil?
301
+ rv << [a,b]
302
+ else
303
+ val = blk.call(a, b)
304
+ rv << val unless val.nil?
305
+ end
306
+ end
307
+ end
308
+ wrap_unsafe(rv)
309
+ end
310
+
311
+ # Assuming that this set contains Structs, this method takes a value "val" and
312
+ # a hash of predicates "preds". It returns all the structs t where val[k] =
313
+ # t[v] for all k,v in preds; an empty array is returned if no matches found.
314
+ def probe(val, preds)
315
+ return @v if preds.empty?
316
+
317
+ probe_val = schema_fetch(val, preds.keys)
318
+ build_index(preds.values)
319
+ index = @join_indexes[preds.values]
320
+ return index[probe_val] || []
321
+ end
322
+
323
+ private
324
+ def schema_fetch(val, cols)
325
+ cols.map {|s| val[s]}
326
+ end
327
+
328
+ def build_index(cols)
329
+ @join_indexes ||= {}
330
+ return @join_indexes[cols] if @join_indexes.has_key? cols
331
+
332
+ idx = {}
333
+ @v.each do |val|
334
+ index_val = schema_fetch(val, cols)
335
+ idx[index_val] ||= []
336
+ idx[index_val] << val
337
+ end
338
+
339
+ @join_indexes[cols] = idx
340
+ return idx
341
+ end
342
+ end
343
+
344
+ # A set that admits only non-negative numbers. This allows "sum" to be a
345
+ # monotone function. Note that this does duplicate elimination on its input, so
346
+ # it actually computes "SUM(DISTINCT ...)" in SQL.
347
+ #
348
+ # XXX: for methods that take a user-provided code block, we need to ensure that
349
+ # the set continues to contain only positive numbers.
350
+ class Bud::PositiveSetLattice < Bud::SetLattice
351
+ wrapper_name :lpset
352
+
353
+ def initialize(i=[])
354
+ super
355
+ @v.each do |n|
356
+ reject_input(i) unless n.class <= Numeric
357
+ reject_input(i) if n < 0
358
+ end
359
+ end
360
+
361
+ monotone :pos_sum do
362
+ @sum = @v.reduce(Bud::MaxLattice.new(0), :+) if @sum.nil?
363
+ @sum
364
+ end
365
+ end
366
+
367
+ # XXX: Should this be just syntax sugar for a map lattice instead?
368
+ class Bud::BagLattice < Bud::Lattice
369
+ wrapper_name :lbag
370
+
371
+ def initialize(i={})
372
+ reject_input(i) unless i.class <= Hash
373
+ i.each do |k, mult|
374
+ reject_input(i) if k.class <= Bud::Lattice
375
+ reject_input(i) unless (mult.class <= Integer && mult > 0)
376
+ end
377
+ @v = i
378
+ end
379
+
380
+ # Note that for merge to be idempotent, we need to use the traditional
381
+ # definition of multiset union (per-element max of multiplicities, rather than
382
+ # sum of multiplicities).
383
+ def merge(i)
384
+ rv = @v.merge(i.reveal) do |k, lhs_v, rhs_v|
385
+ [lhs_v, rhs_v].max
386
+ end
387
+ wrap_unsafe(rv)
388
+ end
389
+
390
+ morph :intersect do |i|
391
+ i_tbl = i.reveal
392
+ # Scan the smaller one, probe the larger one
393
+ scan, probe = (@v.size < i_tbl.size ? [@v, i_tbl] : [i_tbl, @v])
394
+ rv = {}
395
+ scan.each do |k,val|
396
+ rv[k] = [val, probe[k]].min if probe.has_key? k
397
+ end
398
+ wrap_unsafe(rv)
399
+ end
400
+
401
+ morph :multiplicity do |k|
402
+ rv = @v[k]
403
+ rv ||= 0
404
+ Bud::MaxLattice.new(rv)
405
+ end
406
+
407
+ morph :+ do |i|
408
+ rv = @v.merge(i.reveal) do |k, lhs_v, rhs_v|
409
+ lhs_v + rhs_v
410
+ end
411
+ self.class.new(rv)
412
+ end
413
+
414
+ morph :contains? do |i|
415
+ Bud::BoolLattice.new(@v.has_key? i)
416
+ end
417
+
418
+ monotone :size do
419
+ @size = @v.values.reduce(Bud::MaxLattice.new(0), :+) if @size.nil?
420
+ @size
421
+ end
422
+ end
@@ -10,42 +10,63 @@ class Class
10
10
  end
11
11
  end
12
12
 
13
- # FIXME: Use a subclass of Struct.
14
- class Struct
13
+ $struct_classes = {}
14
+ $struct_lock = Mutex.new
15
+
16
+ # FIXME: Should likely override #hash and #eql? as well.
17
+ class Bud::TupleStruct < Struct
18
+ include Comparable
19
+
20
+ def self.new_struct(cols)
21
+ $struct_lock.synchronize {
22
+ ($struct_classes[cols] ||= Bud::TupleStruct.new(*cols))
23
+ }
24
+ end
25
+
26
+ # XXX: This only considers two TupleStruct instances to be equal if they have
27
+ # the same schema (column names) AND the same contents; unclear if structural
28
+ # equality (consider only values, not column names) would be better.
15
29
  def <=>(o)
16
30
  if o.class == self.class
17
31
  self.each_with_index do |e, i|
18
- cmp = e <=> o[i]
19
- return cmp if cmp != 0
32
+ other = o[i]
33
+ next if e == other
34
+ return e <=> other
20
35
  end
21
36
  return 0
22
37
  elsif o.nil?
23
- return -1
38
+ return nil
24
39
  else
25
40
  raise "Comparison (<=>) between #{o.class} and #{self.class} not implemented"
26
41
  end
27
42
  end
28
43
 
29
- alias oldeq :==
30
44
  def ==(o)
31
45
  if o.class == self.class
32
- return oldeq(o)
46
+ return super
33
47
  elsif o.class == Array
34
- begin
35
- self.each_with_index do |el, i|
36
- if el != o[i]
37
- return false
38
- end
39
- end
40
- return true
41
- rescue StandardError
42
- return false
48
+ return false if self.length != o.length
49
+ self.each_with_index do |el, i|
50
+ return false if el != o[i]
43
51
  end
52
+ return true
44
53
  end
45
54
  false
46
55
  end
47
56
 
48
- def to_msgpack(out='')
57
+ def hash
58
+ self.values.hash
59
+ end
60
+
61
+ def eql?(o)
62
+ self == o
63
+ end
64
+
65
+ def +(o)
66
+ self.to_ary + o.to_ary
67
+ end
68
+
69
+ def to_msgpack(out=nil)
49
70
  self.to_a.to_msgpack(out)
50
71
  end
51
72
 
@@ -54,17 +75,23 @@ class Struct
54
75
  end
55
76
 
56
77
  alias :to_s :inspect
78
+ alias :to_ary :to_a
57
79
  end
58
80
 
59
81
  # XXX: TEMPORARY/UGLY hack to ensure that arrays and structs compare. This can be
60
82
  # removed once tests are rewritten.
61
83
  class Array
62
- alias :oldeq :==
84
+ alias :old_eq :==
85
+ alias :old_eql? :eql?
86
+
63
87
  def ==(o)
64
- if o.kind_of? Struct
65
- o = o.to_a
66
- end
67
- self.oldeq(o)
88
+ o = o.to_a if o.kind_of? Bud::TupleStruct
89
+ self.old_eq(o)
90
+ end
91
+
92
+ def eql?(o)
93
+ o = o.to_a if o.kind_of? Bud::TupleStruct
94
+ self.old_eql?(o)
68
95
  end
69
96
  end
70
97
 
@@ -125,7 +152,6 @@ class Module
125
152
  @bud_import_tbl
126
153
  end
127
154
 
128
-
129
155
  # the block of Bloom collection declarations. one per module.
130
156
  def state(&block)
131
157
  meth_name = Module.make_state_meth_name(self)
@@ -138,8 +164,9 @@ class Module
138
164
  define_method(meth_name, &block)
139
165
  end
140
166
 
141
- # bloom statements to be registered with Bud runtime. optional +block_name+
142
- # allows for multiple bloom blocks per module, and overriding
167
+ # bloom statements to be registered with Bud runtime. optional +block_name+
168
+ # assigns a name for the block; this is useful documentation, and also allows
169
+ # the block to be overridden in a child class.
143
170
  def bloom(block_name=nil, &block)
144
171
  # If no block name was specified, generate a unique name
145
172
  if block_name.nil?
@@ -148,7 +175,7 @@ class Module
148
175
  @block_id += 1
149
176
  else
150
177
  unless block_name.class <= Symbol
151
- raise Bud::CompileError, "bloom block names must be a symbol: #{block_name}"
178
+ raise Bud::CompileError, "block name must be a symbol: #{block_name}"
152
179
  end
153
180
  end
154
181
 
@@ -161,15 +188,24 @@ class Module
161
188
  # module; this indicates a likely programmer error.
162
189
  if instance_methods(false).include?(meth_name) ||
163
190
  instance_methods(false).include?(meth_name.to_sym)
164
- raise Bud::CompileError, "duplicate named bloom block: '#{block_name}' in #{self}"
191
+ raise Bud::CompileError, "duplicate block name: '#{block_name}' in #{self}"
165
192
  end
166
193
  ast = Source.read_block(caller[0]) # pass in caller's location via backtrace
194
+
167
195
  # ast corresponds only to the statements of the block. Wrap it in a method
168
196
  # definition for backward compatibility for now.
169
- # First wrap ast in a block if it is only a single statement
170
- ast = s(:block) if ast.nil?
171
- ast = s(:block, ast) unless ast.sexp_type == :block
172
- ast = s(:defn, meth_name.to_sym, s(:args), s(:scope, ast))
197
+
198
+ # If the block contained multiple statements, the AST will have a top-level
199
+ # :block node. Since ruby_parser ASTs for method definitions don't contain
200
+ # such a node, remove it.
201
+ if ast.nil?
202
+ ast = []
203
+ elsif ast.sexp_type == :block
204
+ ast = ast.sexp_body
205
+ else
206
+ ast = [ast]
207
+ end
208
+ ast = s(:defn, meth_name.to_sym, s(:args), *ast)
173
209
  unless self.respond_to? :__bloom_asts__
174
210
  def self.__bloom_asts__
175
211
  @__bloom_asts__ ||= {}
@@ -180,11 +216,11 @@ class Module
180
216
  define_method(meth_name.to_sym, &block)
181
217
  end
182
218
 
183
- private
184
219
  # Return a string with a version of the class name appropriate for embedding
185
220
  # into a method name. Annoyingly, if you define class X nested inside
186
221
  # class/module Y, X's class name is the string "Y::X". We don't want to define
187
222
  # method names with semicolons in them, so just return "X" instead.
223
+ private
188
224
  def self.get_class_name(klass)
189
225
  (klass.name.nil? or klass.name == "") \
190
226
  ? "Anon#{klass.object_id}" \