RubyGems - bud - Versions diffs - 0.9.4 → 0.9.9 - Mend

bud 0.9.4 → 0.9.9

Files changed (43) hide show

checksums.yaml +7 -0
data/History.txt +106 -0
data/README.md +6 -4
data/Rakefile +91 -0
data/bin/budlabel +63 -0
data/bin/budplot +18 -8
data/bin/budtimelines +2 -2
data/bin/budvis +7 -1
data/docs/README.md +8 -17
data/docs/cheat.md +112 -13
data/docs/getstarted.md +97 -84
data/docs/operational.md +3 -3
data/examples/basics/paths.rb +2 -2
data/examples/chat/README.md +2 -0
data/examples/chat/chat.rb +3 -2
data/examples/chat/chat_protocol.rb +2 -2
data/examples/chat/chat_server.rb +3 -2
data/lib/bud.rb +229 -114
data/lib/bud/aggs.rb +20 -4
data/lib/bud/bud_meta.rb +83 -73
data/lib/bud/collections.rb +306 -120
data/lib/bud/depanalysis.rb +3 -4
data/lib/bud/executor/README.rescan +2 -1
data/lib/bud/executor/elements.rb +96 -95
data/lib/bud/executor/group.rb +35 -32
data/lib/bud/executor/join.rb +164 -183
data/lib/bud/graphs.rb +3 -3
data/lib/bud/labeling/bloomgraph.rb +47 -0
data/lib/bud/labeling/budplot_style.rb +53 -0
data/lib/bud/labeling/labeling.rb +288 -0
data/lib/bud/lattice-core.rb +595 -0
data/lib/bud/lattice-lib.rb +422 -0
data/lib/bud/monkeypatch.rb +68 -32
data/lib/bud/rebl.rb +28 -10
data/lib/bud/rewrite.rb +361 -152
data/lib/bud/server.rb +16 -8
data/lib/bud/source.rb +21 -18
data/lib/bud/state.rb +93 -4
data/lib/bud/storage/zookeeper.rb +45 -33
data/lib/bud/version.rb +3 -0
data/lib/bud/viz.rb +10 -12
data/lib/bud/viz_util.rb +8 -3
metadata +107 -108

@@ -0,0 +1,422 @@
+require 'bud/lattice-core'
+# Float::INFINITY only defined in MRI 1.9.2+
+unless defined? Float::INFINITY
+  Float::INFINITY = 1.0/0.0
+end
+class Bud::MaxLattice < Bud::Lattice
+  wrapper_name :lmax
+  def initialize(i=-Float::INFINITY)
+    reject_input(i) unless i.class <= Comparable
+    @v = i
+  end
+  def merge(i)
+    i.reveal > @v ? i : self
+  end
+  morph :gt do |k|
+    Bud::BoolLattice.new(!!(@v > k))
+  end
+  morph :gt_eq do |k|
+    Bud::BoolLattice.new(!!(@v >= k))
+  end
+  # XXX: support MaxLattice input?
+  morph :+ do |i|
+    # NB: since bottom of lmax is negative infinity, + is a no-op
+    reject_input(i, "+") unless i.class <= Numeric
+    self.class.new(@v + i)
+  end
+  morph :min_of do |i|
+    reject_input(i, "min_of") unless i.class <= Numeric
+    i < @v ? self.class.new(i) : self
+  end
+  def lt_eq(k)
+    Bud::BoolLattice.new(!!(@v <= k))
+  end
+end
+class Bud::MinLattice < Bud::Lattice
+  wrapper_name :lmin
+  def initialize(i=Float::INFINITY)
+    reject_input(i) unless i.class <= Comparable
+    @v = i
+  end
+  def merge(i)
+    i.reveal < @v ? i : self
+  end
+  morph :lt do |k|
+    Bud::BoolLattice.new(!!(@v < k))
+  end
+  # XXX: support MinLattice input
+  morph :+ do |i|
+    # Since bottom of lmin is infinity, + is a no-op
+    reject_input(i, "+") unless i.class <= Numeric
+    self.class.new(@v + i)
+  end
+end
+# XXX: consider creating two fixed ("interned") values for true and false.
+class Bud::BoolLattice < Bud::Lattice
+  wrapper_name :lbool
+  def initialize(i=false)
+    reject_input(i) unless [true, false].include? i
+    @v = i
+  end
+  def merge(i)
+    self.class.new(@v || i.reveal)
+  end
+  # XXX: ugly syntax
+  morph :when_true do |&blk|
+    blk.call if @v
+  end
+end
+class Bud::MapLattice < Bud::Lattice
+  wrapper_name :lmap
+  def initialize(i={})
+    reject_input(i) unless i.class == Hash
+    i.each_pair do |k,val|
+      reject_input(i) if k.class <= Bud::Lattice
+      reject_input(i) unless val.class <= Bud::Lattice
+    end
+    @v = i
+  end
+  def merge(i)
+    rv = @v.merge(i.reveal) do |k, lhs_v, rhs_v|
+      lhs_v.merge(rhs_v)
+    end
+    wrap_unsafe(rv)
+  end
+  def inspect
+    "<#{self.class.wrapper}: #{@v.inspect}>"
+  end
+  # XXX: If the key is not in the map, we would like to return some generic
+  # "bottom" value that is shared by all lattice types. Unfortunately, such a
+  # value does not exist, so we need the caller to tell us which class to use as
+  # an optional second argument (if omitted, fetching a non-existent key yields
+  # a runtime exception). Another alternative would be to specify the type of
+  # the map's values when the lmap is declared, but that hinders code reuse.
+  morph :at do |k, *args|
+    if @v.has_key? k
+      @v[k]
+    else
+      if args.empty?
+        raise Bud::Error, "missing key for lmap#at(#{k}) but no bottom type given"
+      end
+      args.first.new
+    end
+  end
+  morph :filter do
+    rv = {}
+    @v.each_pair do |k, val|
+      unless val.class <= Bud::BoolLattice
+        raise Bud::Error, "filter invoked on non-boolean map value: #{val}"
+      end
+      rv[k] = val if val.reveal == true
+    end
+    wrap_unsafe(rv)
+  end
+  morph :apply_morph do |sym, *args|
+    unless Bud::Lattice.global_morphs.include? sym
+      raise Bud::Error, "apply_morph called with non-morphism: #{sym}"
+    end
+    do_apply(sym, args)
+  end
+  # NB: "apply" can be used with both monotone functions and morphisms. We also
+  # provide apply_morph, which is slightly faster when theprogrammer knows they
+  # are applying a morphism.
+  monotone :apply do |sym, *args|
+    unless Bud::Lattice.global_mfuncs.include?(sym) ||
+           Bud::Lattice.global_morphs.include?(sym)
+      raise Bud::Error, "apply called with non-monotone function: #{sym}"
+    end
+    do_apply(sym, args)
+  end
+  def do_apply(sym, args)
+    rv = {}
+    @v.each_pair do |k, val|
+      res = val.send(sym, *args)
+      raise Bud::Error unless res.kind_of? Bud::Lattice
+      rv[k] = res
+    end
+    wrap_unsafe(rv)
+  end
+  morph :key? do |k|
+    Bud::BoolLattice.new(@v.has_key? k)
+  end
+  morph :key_set do
+    Bud::SetLattice.new(@v.keys)
+  end
+  monotone :size do
+    Bud::MaxLattice.new(@v.size)
+  end
+  morph :intersect do |i|
+    i_tbl = i.reveal
+    # Scan the smaller map, probe the larger one
+    scan, probe = (@v.size < i_tbl.size ? [@v, i_tbl] : [i_tbl, @v])
+    rv = {}
+    scan.each do |k,val|
+      rv[k] = val.merge(probe[k]) if probe.has_key? k
+    end
+    wrap_unsafe(rv)
+  end
+  # Produce a Bloom collection (array of tuples) from this lmap, optionally
+  # applying a user-provided code block to each (k,v) pair in turn. Note that
+  # this is slightly different from how projection over an lmap would work: we
+  # return an array, whereas projection would return an lmap.
+  morph :to_collection do |&blk|
+    @v.map(&blk)
+  end
+  # Return true if this map is strictly smaller than or equal to the given
+  # map. "x" is strictly smaller than or equal to "y" if:
+  #     (a) every key in "x"  also appears in "y"
+  #     (b) for every key k in "x", x[k] <= y[k]
+  #
+  # NB: For this to be a morphism, we require that (a) "self" is deflationary
+  # (or fixed) (b) the input lattice value is inflationary (or fixed). We
+  # currently don't have a way to express (a) in the type system.
+  def lt_eq(i)
+    reject_input(i, "lt_eq") unless i.class <= self.class
+    @v.each do |k, v|
+      unless i.key?(k).reveal == true
+        return Bud::BoolLattice.new(false)
+      end
+      unless v.lt_eq(i.at(k).reveal).reveal == true
+        return Bud::BoolLattice.new(false)
+      end
+    end
+    return Bud::BoolLattice.new(true)
+  end
+end
+# A set lattice contains zero or more primitive (non-lattice) values.
+class Bud::SetLattice < Bud::Lattice
+  wrapper_name :lset
+  def initialize(i=Set.new)
+    reject_input(i) unless i.kind_of? Enumerable
+    reject_input(i) if i.any? {|e| e.kind_of? Bud::Lattice}
+    i = Set.new(i) unless i.kind_of? Set
+    @v = i
+  end
+  def merge(i)
+    wrap_unsafe(@v | i.reveal)
+  end
+  # Override default "inspect" implementation to produce slightly nicer output
+  def inspect
+    "<#{self.class.wrapper}: #{reveal.to_a.sort.inspect}>"
+  end
+  morph :intersect do |i|
+    wrap_unsafe(@v & i.reveal)
+  end
+  morph :contains? do |i|
+    Bud::BoolLattice.new(@v.member? i)
+  end
+  monotone :group_count do |key_cols|
+    # Assume key_cols for now gives indices
+    rv = Hash.new(Bud::MaxLattice.new(0))
+    @v.each do |t|
+      unless t.class == Array
+        raise Bud::TypeError, "group_count only works if lset elements are type Array"
+      end
+      key = []
+      key_cols.each do |ind|
+        if ind >= t.length
+          raise Bud::Error, "lset element in group_count does not have column index #{ind}"
+        end
+        key << t[ind]
+      end
+      rv[key] += 1
+    end
+    Bud::MapLattice.new(rv)
+  end
+  morph :pro do |&blk|
+    # We don't use Set#map, since it returns an Array (ugh).
+    rv = Set.new
+    @v.each do |t|
+      val = blk.call(t)
+      rv << val unless val.nil?
+    end
+    wrap_unsafe(rv)
+  end
+  monotone :size do
+    Bud::MaxLattice.new(@v.size)
+  end
+  # Assuming that the elements of this set are Structs (tuples with named field
+  # accessors), this performs an equijoin between the current lattice and
+  # i. `preds` is a hash of join predicates; each k/v pair in the hash is an
+  # equality predicate that self_tup[k] == i_tup[v]. The return value is the
+  # result of passing pairs of join tuples to the user-supplied code block
+  # (values for which the code block returns nil are omitted from the
+  # result). Note that if no predicates are passed, this computes the Cartesian
+  # product (in which case the input elements do not need to be Structs).
+  morph :eqjoin do |*args, &blk|
+    # Need to emulate default block arguments for MRI 1.8
+    i, preds = args
+    preds ||= {}
+    rv = Set.new
+    @v.each do |a|
+      i.probe(a, preds).each do |b|
+        if blk.nil?
+          rv << [a,b]
+        else
+          val = blk.call(a, b)
+          rv << val unless val.nil?
+        end
+      end
+    end
+    wrap_unsafe(rv)
+  end
+  # Assuming that this set contains Structs, this method takes a value "val" and
+  # a hash of predicates "preds". It returns all the structs t where val[k] =
+  # t[v] for all k,v in preds; an empty array is returned if no matches found.
+  def probe(val, preds)
+    return @v if preds.empty?
+    probe_val = schema_fetch(val, preds.keys)
+    build_index(preds.values)
+    index = @join_indexes[preds.values]
+    return index[probe_val] || []
+  end
+  private
+  def schema_fetch(val, cols)
+    cols.map {|s| val[s]}
+  end
+  def build_index(cols)
+    @join_indexes ||= {}
+    return @join_indexes[cols] if @join_indexes.has_key? cols
+    idx = {}
+    @v.each do |val|
+      index_val = schema_fetch(val, cols)
+      idx[index_val] ||= []
+      idx[index_val] << val
+    end
+    @join_indexes[cols] = idx
+    return idx
+  end
+end
+# A set that admits only non-negative numbers. This allows "sum" to be a
+# monotone function.  Note that this does duplicate elimination on its input, so
+# it actually computes "SUM(DISTINCT ...)" in SQL.
+#
+# XXX: for methods that take a user-provided code block, we need to ensure that
+# the set continues to contain only positive numbers.
+class Bud::PositiveSetLattice < Bud::SetLattice
+  wrapper_name :lpset
+  def initialize(i=[])
+    super
+    @v.each do |n|
+      reject_input(i) unless n.class <= Numeric
+      reject_input(i) if n < 0
+    end
+  end
+  monotone :pos_sum do
+    @sum = @v.reduce(Bud::MaxLattice.new(0), :+) if @sum.nil?
+    @sum
+  end
+end
+# XXX: Should this be just syntax sugar for a map lattice instead?
+class Bud::BagLattice < Bud::Lattice
+  wrapper_name :lbag
+  def initialize(i={})
+    reject_input(i) unless i.class <= Hash
+    i.each do |k, mult|
+      reject_input(i) if k.class <= Bud::Lattice
+      reject_input(i) unless (mult.class <= Integer && mult > 0)
+    end
+    @v = i
+  end
+  # Note that for merge to be idempotent, we need to use the traditional
+  # definition of multiset union (per-element max of multiplicities, rather than
+  # sum of multiplicities).
+  def merge(i)
+    rv = @v.merge(i.reveal) do |k, lhs_v, rhs_v|
+      [lhs_v, rhs_v].max
+    end
+    wrap_unsafe(rv)
+  end
+  morph :intersect do |i|
+    i_tbl = i.reveal
+    # Scan the smaller one, probe the larger one
+    scan, probe = (@v.size < i_tbl.size ? [@v, i_tbl] : [i_tbl, @v])
+    rv = {}
+    scan.each do |k,val|
+      rv[k] = [val, probe[k]].min if probe.has_key? k
+    end
+    wrap_unsafe(rv)
+  end
+  morph :multiplicity do |k|
+    rv = @v[k]
+    rv ||= 0
+    Bud::MaxLattice.new(rv)
+  end
+  morph :+ do |i|
+    rv = @v.merge(i.reveal) do |k, lhs_v, rhs_v|
+      lhs_v + rhs_v
+    end
+    self.class.new(rv)
+  end
+  morph :contains? do |i|
+    Bud::BoolLattice.new(@v.has_key? i)
+  end
+  monotone :size do
+    @size = @v.values.reduce(Bud::MaxLattice.new(0), :+) if @size.nil?
+    @size
+  end
+end

data/lib/bud/monkeypatch.rb CHANGED

@@ -10,42 +10,63 @@ class Class
   end
 end
-# FIXME: Use a subclass of Struct.
-class Struct
+$struct_classes = {}
+$struct_lock = Mutex.new
+# FIXME: Should likely override #hash and #eql? as well.
+class Bud::TupleStruct < Struct
+  include Comparable
+  def self.new_struct(cols)
+    $struct_lock.synchronize {
+      ($struct_classes[cols] ||= Bud::TupleStruct.new(*cols))
+    }
+  end
+  # XXX: This only considers two TupleStruct instances to be equal if they have
+  # the same schema (column names) AND the same contents; unclear if structural
+  # equality (consider only values, not column names) would be better.
   def <=>(o)
     if o.class == self.class
       self.each_with_index do |e, i|
-        cmp = e <=> o[i]
-        return cmp if cmp != 0
+        other = o[i]
+        next if e == other
+        return e <=> other
       end
       return 0
     elsif o.nil?
-      return -1
+      return nil
     else
       raise "Comparison (<=>) between #{o.class} and #{self.class} not implemented"
     end
   end
-  alias oldeq :==
   def ==(o)
     if o.class == self.class
-      return oldeq(o)
+      return super
     elsif o.class == Array
-      begin
-        self.each_with_index do |el, i|
-          if el != o[i]
-            return false
-          end
-        end
-        return true
-      rescue StandardError
-        return false
+      return false if self.length != o.length
+      self.each_with_index do |el, i|
+        return false if el != o[i]
       end
+      return true
     end
     false
   end
-  def to_msgpack(out='')
+  def hash
+    self.values.hash
+  end
+  def eql?(o)
+    self == o
+  end
+  def +(o)
+    self.to_ary + o.to_ary
+  end
+  def to_msgpack(out=nil)
     self.to_a.to_msgpack(out)
   end
@@ -54,17 +75,23 @@ class Struct
   end
   alias :to_s :inspect
+  alias :to_ary :to_a
 end
 # XXX: TEMPORARY/UGLY hack to ensure that arrays and structs compare. This can be
 # removed once tests are rewritten.
 class Array
-  alias :oldeq :==
+  alias :old_eq :==
+  alias :old_eql? :eql?
   def ==(o)
-    if o.kind_of? Struct
-      o = o.to_a
-    end
-    self.oldeq(o)
+    o = o.to_a if o.kind_of? Bud::TupleStruct
+    self.old_eq(o)
+  end
+  def eql?(o)
+    o = o.to_a if o.kind_of? Bud::TupleStruct
+    self.old_eql?(o)
   end
 end
@@ -125,7 +152,6 @@ class Module
     @bud_import_tbl
   end
   # the block of Bloom collection declarations.  one per module.
   def state(&block)
     meth_name = Module.make_state_meth_name(self)
@@ -138,8 +164,9 @@ class Module
     define_method(meth_name, &block)
   end
-  # bloom statements to be registered with Bud runtime.  optional +block_name+
-  # allows for multiple bloom blocks per module, and overriding
+  # bloom statements to be registered with Bud runtime.  optional +block_name+
+  # assigns a name for the block; this is useful documentation, and also allows
+  # the block to be overridden in a child class.
   def bloom(block_name=nil, &block)
     # If no block name was specified, generate a unique name
     if block_name.nil?
@@ -148,7 +175,7 @@ class Module
       @block_id += 1
     else
       unless block_name.class <= Symbol
-        raise Bud::CompileError, "bloom block names must be a symbol: #{block_name}"
+        raise Bud::CompileError, "block name must be a symbol: #{block_name}"
       end
     end
@@ -161,15 +188,24 @@ class Module
     # module; this indicates a likely programmer error.
     if instance_methods(false).include?(meth_name) ||
        instance_methods(false).include?(meth_name.to_sym)
-      raise Bud::CompileError, "duplicate named bloom block: '#{block_name}' in #{self}"
+      raise Bud::CompileError, "duplicate block name: '#{block_name}' in #{self}"
     end
     ast = Source.read_block(caller[0]) # pass in caller's location via backtrace
     # ast corresponds only to the statements of the block. Wrap it in a method
     # definition for backward compatibility for now.
-    # First wrap ast in a block if it is only a single statement
-    ast = s(:block) if ast.nil?
-    ast = s(:block, ast) unless ast.sexp_type == :block
-    ast = s(:defn, meth_name.to_sym, s(:args), s(:scope, ast))
+    # If the block contained multiple statements, the AST will have a top-level
+    # :block node. Since ruby_parser ASTs for method definitions don't contain
+    # such a node, remove it.
+    if ast.nil?
+      ast = []
+    elsif ast.sexp_type == :block
+      ast = ast.sexp_body
+    else
+      ast = [ast]
+    end
+    ast = s(:defn, meth_name.to_sym, s(:args), *ast)
     unless self.respond_to? :__bloom_asts__
       def self.__bloom_asts__
         @__bloom_asts__ ||= {}
@@ -180,11 +216,11 @@ class Module
     define_method(meth_name.to_sym, &block)
   end
-  private
   # Return a string with a version of the class name appropriate for embedding
   # into a method name. Annoyingly, if you define class X nested inside
   # class/module Y, X's class name is the string "Y::X". We don't want to define
   # method names with semicolons in them, so just return "X" instead.
+  private
   def self.get_class_name(klass)
     (klass.name.nil? or klass.name == "") \
       ? "Anon#{klass.object_id}" \