RubyGems - bud - Versions diffs - 0.9.4 → 0.9.5 - Mend

bud 0.9.4 → 0.9.5

Files changed (31) hide show

data/History.txt +23 -0
data/bin/budlabel +63 -0
data/bin/budtimelines +1 -1
data/docs/cheat.md +1 -1
data/docs/getstarted.md +8 -8
data/examples/chat/README.md +2 -0
data/examples/chat/chat.rb +3 -2
data/examples/chat/chat_protocol.rb +1 -1
data/examples/chat/chat_server.rb +3 -2
data/lib/bud/aggs.rb +16 -2
data/lib/bud/bud_meta.rb +19 -28
data/lib/bud/collections.rb +157 -39
data/lib/bud/depanalysis.rb +3 -4
data/lib/bud/executor/elements.rb +62 -57
data/lib/bud/executor/group.rb +35 -32
data/lib/bud/executor/join.rb +0 -11
data/lib/bud/graphs.rb +1 -1
data/lib/bud/labeling/bloomgraph.rb +47 -0
data/lib/bud/labeling/budplot_style.rb +53 -0
data/lib/bud/labeling/labeling.rb +288 -0
data/lib/bud/lattice-core.rb +563 -0
data/lib/bud/lattice-lib.rb +367 -0
data/lib/bud/monkeypatch.rb +18 -8
data/lib/bud/rewrite.rb +314 -139
data/lib/bud/server.rb +13 -2
data/lib/bud/source.rb +34 -18
data/lib/bud/state.rb +90 -1
data/lib/bud/storage/zookeeper.rb +38 -33
data/lib/bud/viz.rb +0 -1
data/lib/bud.rb +55 -15
metadata +15 -8

data/lib/bud/depanalysis.rb CHANGED Viewed

@@ -6,7 +6,7 @@ class DepAnalysis #:nodoc: all
   state do
     # Data inserted by client, usually from t_depends and t_provides
-    scratch :depends, [:lhs, :op, :body, :neg]
+    scratch :depends, [:lhs, :op, :body, :neg, :in_body]
     scratch :providing, [:pred, :input]
     # Intermediate state
@@ -36,7 +36,7 @@ class DepAnalysis #:nodoc: all
     cycle <= depends_tc do |d|
       if d.lhs == d.body
-        unless  d.neg and !d.temporal
+        unless d.neg and !d.temporal
           [d.lhs, d.via, d.neg, d.temporal]
         end
       end
@@ -60,11 +60,10 @@ class DepAnalysis #:nodoc: all
           [p.pred, true]
         end
       else
-        unless depends_tc.map{|d| d.lhs if d.lhs != d.body}.include? p.pred
+        unless depends_tc.map{|dt| dt.lhs if dt.lhs != dt.body}.include? p.pred
           [p.pred, false]
         end
       end
     end
   end
 end

data/lib/bud/executor/elements.rb CHANGED Viewed

@@ -1,4 +1,3 @@
-require 'set'
 require 'bud/collections'
 module Bud
@@ -13,7 +12,7 @@ module Bud
   class PushElement < BudCollection
     attr_accessor :rescan, :invalidated
     attr_accessor :elem_name
-    attr_reader :found_delta, :refcount, :wired_by, :outputs
+    attr_reader :found_delta, :wired_by, :outputs
     def initialize(name_in, bud_instance, collection_name=nil, given_schema=nil, defer_schema=false, &blk)
       super(name_in, bud_instance, given_schema, defer_schema)
@@ -25,7 +24,6 @@ module Bud
       @wired_by = []
       @elem_name = name_in
       @found_delta = false
-      @refcount = 1
       @collection_name = collection_name
       @invalidated = true
       @rescan = true
@@ -58,6 +56,8 @@ module Bud
             print "#{next_accum} "
             if o.class <= Bud::BudCollection
               puts "#{(o.object_id*2).to_s(16)}: #{o.qualified_tabname} (#{o.class})"
+            elsif o.class <= Bud::LatticeWrapper
+              puts "#{o.inspect}"
             else
               puts "#{(o.object_id*2).to_s(16)}: (#{o.class.name})"
             end
@@ -83,17 +83,15 @@ module Bud
       case kind
       when :output
-        raise Bud::Error unless element.respond_to? :insert
         @outputs << element
       when :pending
-        raise Bud::Error unless element.respond_to? :pending_merge
         @pendings << element
       when :delete
-        raise Bud::Error unless element.respond_to? :pending_delete
         @deletes << element
       when :delete_by_key
-        raise Bud::Error unless element.respond_to? :pending_delete_keys
         @delete_keys << element
+      else
+        raise Bud::Error, "unrecognized wiring kind: #{kind}"
       end
       element.wired_by << self if element.respond_to? :wired_by
@@ -116,27 +114,36 @@ module Bud
     end
     def push_out(item, do_block=true)
-      if item
-        if do_block && @blk
-          item = item.to_a if @blk.arity > 1
-          item = @blk.call item
+      return if item.nil?
+      if do_block && @blk
+        item = item.to_a if @blk.arity > 1
+        item = @blk.call item
+        return if item.nil?
+      end
+      @outputs.each do |ou|
+        if ou.class <= Bud::PushElement
+          ou.insert(item, self)
+        elsif ou.class <= Bud::BudCollection
+          ou.do_insert(item, ou.new_delta)
+        elsif ou.class <= Bud::LatticeWrapper
+          ou.insert(item, self)
+        else
+          raise Bud::Error, "expected output target: #{ou.class}"
         end
+      end
-        unless item.nil?
-          @outputs.each do |ou|
-            if ou.class <= Bud::PushElement
-              ou.insert(item, self)
-            elsif ou.class <= Bud::BudCollection
-              ou.do_insert(item, ou.new_delta)
-            else
-              raise Bud::Error, "expected either a PushElement or a BudCollection"
-            end
-          end
+      # for the following, o is a BudCollection
+      @deletes.each{|o| o.pending_delete([item])}
+      @delete_keys.each{|o| o.pending_delete_keys([item])}
-          # for all the following, o is a BudCollection
-          @deletes.each{|o| o.pending_delete([item])}
-          @delete_keys.each{|o| o.pending_delete_keys([item])}
-          @pendings.each{|o| o.pending_merge([item])}
+      # o is a LatticeWrapper or a BudCollection
+      @pendings.each do |o|
+        if o.class <= Bud::LatticeWrapper
+          o <+ item
+        else
+          o.pending_merge([item])
         end
       end
     end
@@ -213,34 +220,30 @@ module Bud
                                         @collection_name)
       elem.set_block(&blk)
       self.wire_to(elem)
-      toplevel.push_elems[[self.object_id, :each, blk]] = elem
+      toplevel.push_elems[[self.object_id, :each_with_index, blk]] = elem
     end
     def join(elem2, &blk)
-      # cached = @bud_instance.push_elems[[self.object_id,:join,[self,elem2], @bud_instance, blk]]
-      # if cached.nil?
-        elem2 = elem2.to_push_elem unless elem2.class <= PushElement
-        toplevel = @bud_instance.toplevel
-        join = Bud::PushSHJoin.new([self, elem2], toplevel.this_rule_context, [])
-        self.wire_to(join)
-        elem2.wire_to(join)
-        toplevel.push_elems[[self.object_id, :join, [self, elem2], toplevel, blk]] = join
-        toplevel.push_joins[toplevel.this_stratum] << join
-      # else
-      #   cached.refcount += 1
-      # end
-      return toplevel.push_elems[[self.object_id, :join, [self, elem2], toplevel, blk]]
+      elem2 = elem2.to_push_elem unless elem2.kind_of? PushElement
+      toplevel = @bud_instance.toplevel
+      join = Bud::PushSHJoin.new([self, elem2], toplevel.this_rule_context, [])
+      self.wire_to(join)
+      elem2.wire_to(join)
+      toplevel.push_elems[[self.object_id, :join, [self, elem2], toplevel, blk]] = join
+      toplevel.push_joins[toplevel.this_stratum] << join
+      return join
     end
     def *(elem2, &blk)
       join(elem2, &blk)
     end
-    def notin(elem2, preds=nil, &blk)
+    def notin(elem2, *preds, &blk)
+      elem2 = elem2.to_push_elem unless elem2.kind_of? PushElement
       toplevel = @bud_instance.toplevel
       notin_elem = Bud::PushNotIn.new([self, elem2], toplevel.this_rule_context, preds, &blk)
       self.wire_to(notin_elem)
       elem2.wire_to(notin_elem)
-      toplevel.push_elems[[self.object_id, :notin, collection, toplevel, blk]] = notin_elem
+      toplevel.push_elems[[self.object_id, :notin, [self, elem2], toplevel, blk]] = notin_elem
       return notin_elem
     end
@@ -280,14 +283,12 @@ module Bud
         the_schema = { keynames => aggcols }
       end
-      aggpairs = aggpairs.map{|ap| ap[1].nil? ? [ap[0]] : [ap[0], canonicalize_col(ap[1])]}
+      aggpairs = prep_aggpairs(aggpairs)
       toplevel = @bud_instance.toplevel
-      # if @bud_instance.push_elems[[self.object_id, :group, keycols, aggpairs, blk]].nil?
-        g = Bud::PushGroup.new('grp'+Time.new.tv_usec.to_s, toplevel.this_rule_context, @collection_name, keycols, aggpairs, the_schema, &blk)
-        self.wire_to(g)
-        toplevel.push_elems[[self.object_id, :group, keycols, aggpairs, blk]] = g
-      # end
-      # toplevel.push_elems[[self.object_id, :group, keycols, aggpairs, blk]]
+      g = Bud::PushGroup.new('grp'+Time.new.tv_usec.to_s, toplevel.this_rule_context,
+                             @collection_name, keycols, aggpairs, the_schema, &blk)
+      self.wire_to(g)
+      toplevel.push_elems[[self.object_id, :group, keycols, aggpairs, blk]] = g
       return g
     end
@@ -305,19 +306,17 @@ module Bud
         end
       end
       aggpairs = [[agg, collection]]
-      # if toplevel.push_elems[[self.object_id,:argagg, gbkey_cols, aggpairs, blk]].nil?
-        aa = Bud::PushArgAgg.new('argagg'+Time.new.tv_usec.to_s, toplevel.this_rule_context, @collection_name, gbkey_cols, aggpairs, schema, &blk)
-        self.wire_to(aa)
-        toplevel.push_elems[[self.object_id,:argagg, gbkey_cols, aggpairs, blk]] = aa
-      # end
-      # return toplevel.push_elems[[self.object_id,:argagg, gbkey_cols, aggpairs, blk]]
+      aa = Bud::PushArgAgg.new('argagg'+Time.new.tv_usec.to_s, toplevel.this_rule_context,
+                               @collection_name, gbkey_cols, aggpairs, schema, &blk)
+      self.wire_to(aa)
+      toplevel.push_elems[[self.object_id, :argagg, gbkey_cols, aggpairs, blk]] = aa
       return aa
     end
     def argmax(gbcols, col, &blk)
-      argagg(gbcols, Bud::max(col), blk)
+      argagg(:max, gbcols, col, &blk)
     end
     def argmin(gbcols, col, &blk)
-      argagg(gbcols, Bud::min(col), blk)
+      argagg(:min, gbcols, col, &blk)
     end
     def sort(name=nil, bud_instance=nil, the_schema=nil, &blk)
       elem = Bud::PushSort.new(name, bud_instance, the_schema, &blk)
@@ -447,6 +446,7 @@ module Bud
   class ScannerElement < PushElement
     attr_reader :collection
     attr_reader :rescan_set, :invalidate_set
+    attr_accessor :force_rescan
     def initialize(elem_name, bud_instance, collection_in,
                    the_schema=collection_in.schema, &blk)
@@ -454,6 +454,7 @@ module Bud
       @collection = collection_in
       @rescan_set = []
       @invalidate_set = []
+      @force_rescan = false
     end
     def rescan
@@ -485,7 +486,11 @@ module Bud
     end
     def scan(first_iter)
-      if first_iter
+      if @force_rescan
+        # Scan entire storage
+        @collection.each_raw {|item| push_out(item)}
+        @force_rescan = false
+      elsif first_iter
         if rescan
           # Scan entire storage
           @collection.each_raw {|item| push_out(item)}

data/lib/bud/executor/group.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 require 'bud/executor/elements'
-require 'set'
 module Bud
   class PushGroup < PushStatefulElement
@@ -10,21 +9,25 @@ module Bud
       else
         @keys = keys_in.map{|k| k[1]}
       end
-      # An aggpair is an array: [agg class instance, index of input field].
-      # ap[1] is nil for Count.
-      @aggpairs = aggpairs_in.map{|ap| [ap[0], ap[1].nil? ? nil : ap[1][1]]}
+      # An aggpair is an array: [agg class instance, array of indexes of input
+      # agg input columns].  The second field is nil for Count.
+      @aggpairs = aggpairs_in.map do |ap|
+        agg, *rest = ap
+        if rest.empty?
+          [agg, nil]
+        else
+          [agg, rest.map {|r| r[1]}]
+        end
+      end
       @groups = {}
       # Check whether we need to eliminate duplicates from our input (we might
       # see duplicates because of the rescan/invalidation logic, as well as
       # because we don't do duplicate elimination on the output of a projection
       # operator). We don't need to dupelim if all the args are exemplary.
-      @elim_dups = @aggpairs.any? {|a| not a[0].kind_of? ArgExemplary}
-      if @elim_dups
-        @input_cache = Set.new
-      end
+      @elim_dups = @aggpairs.any? {|ap| not ap[0].kind_of? ArgExemplary}
+      @input_cache = Set.new if @elim_dups
-      @seen_new_data = false
       super(elem_name, bud_instance, collection_name, schema_in, &blk)
     end
@@ -34,19 +37,25 @@ module Bud
         @input_cache << item
       end
-      @seen_new_data = true
       key = item.values_at(*@keys)
       group_state = @groups[key]
       if group_state.nil?
         @groups[key] = @aggpairs.map do |ap|
-          input_val = ap[1].nil? ? item : item[ap[1]]
-          ap[0].init(input_val)
+          if ap[1].nil?
+            ap[0].init(item)
+          else
+            ap[0].init(*item.values_at(*ap[1]))
+          end
         end
       else
         @aggpairs.each_with_index do |ap, agg_ix|
-          input_val = ap[1].nil? ? item : item[ap[1]]
-          state_val = ap[0].trans(group_state[agg_ix], input_val)[0]
-          group_state[agg_ix] = state_val
+          state_val = group_state[agg_ix]
+          if ap[1].nil?
+            trans_rv = ap[0].trans(state_val, item)
+          else
+            trans_rv = ap[0].trans(state_val, *item.values_at(*ap[1]))
+          end
+          group_state[agg_ix] = trans_rv[0]
         end
       end
     end
@@ -62,14 +71,12 @@ module Bud
       puts "#{self.class}/#{self.tabname} invalidated" if $BUD_DEBUG
       @groups.clear
       @input_cache.clear if @elim_dups
-      @seen_new_data = false
     end
     def flush
-      # If we haven't seen any input since the last call to flush(), we're done:
-      # our output would be the same as before.
-      return unless @seen_new_data
-      @seen_new_data = false
+      # Don't emit fresh output unless a rescan is needed
+      return unless @rescan
+      @rescan = false
       @groups.each do |key, group_state|
         rv = key.clone
@@ -87,7 +94,6 @@ module Bud
         raise Bud::Error, "multiple aggpairs #{aggpairs_in.map{|a| a.class.name}} in ArgAgg; only one allowed"
       end
       super(elem_name, bud_instance, collection_name, keys_in, aggpairs_in, schema_in, &blk)
-      @agg, @aggcol = @aggpairs[0]
       @winners = {}
     end
@@ -101,18 +107,16 @@ module Bud
       key = @keys.map{|k| item[k]}
       group_state = @groups[key]
       if group_state.nil?
-        @seen_new_data = true
         @groups[key] = @aggpairs.map do |ap|
           @winners[key] = [item]
-          input_val = item[ap[1]]
-          ap[0].init(input_val)
+          input_vals = item.values_at(*ap[1])
+          ap[0].init(*input_vals)
         end
       else
         @aggpairs.each_with_index do |ap, agg_ix|
-          input_val = item[ap[1]]
-          state_val, flag, *rest = ap[0].trans(group_state[agg_ix], input_val)
+          input_vals = item.values_at(*ap[1])
+          state_val, flag, *rest = ap[0].trans(group_state[agg_ix], *input_vals)
           group_state[agg_ix] = state_val
-          @seen_new_data = true unless flag == :ignore
           case flag
           when :ignore
@@ -133,14 +137,13 @@ module Bud
     end
     def flush
-      # If we haven't seen any input since the last call to flush(), we're done:
-      # our output would be the same as before.
-      return unless @seen_new_data
-      @seen_new_data = false
+      # Don't emit fresh output unless a rescan is needed
+      return unless @rescan
+      @rescan = false
       @groups.each_key do |g|
         @winners[g].each do |t|
-          push_out(t, false)
+          push_out(t)
         end
       end
     end

data/lib/bud/executor/join.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 require 'bud/executor/elements'
-require 'set'
 $EMPTY = []
 module Bud
@@ -58,12 +57,6 @@ module Bud
       super(@tabname, @bud_instance, nil, @cols)
     end
-    public
-    def copy_on_write
-      @refcount -= 1
-      return Bud::PushSHJoin.new(@all_rels_below, @bud_instance, [])
-    end
     public
     def state_id # :nodoc: all
       object_id
@@ -366,10 +359,6 @@ module Bud
     #          :col1 => :col2  (same as  lefttable.col1 => righttable.col2)
     public
     def pairs(*preds, &blk)
-      ## XXX Need to do this for all the join modifiers
-      unless @refcount == 1
-        return self.copy_on_write.pairs(preds, blk)
-      end
       @origpreds = preds
       setup_preds(preds) unless preds.empty?
       # given new preds, the state for the join will be different.  set it up again.

data/lib/bud/graphs.rb CHANGED Viewed

@@ -75,7 +75,7 @@ class GraphGen #:nodoc: all
     # its name is "CYC" + concat(sort(predicate names))
     depends.each do |d|
       # b/c bud_obj was pruned before serialization...
-      (bud_obj, rule_id, lhs, op, body, nm) = d.to_a
+      bud_obj, rule_id, lhs, op, body, nm, in_body = d.to_a
       head = lhs
       body = body

data/lib/bud/labeling/bloomgraph.rb ADDED Viewed

@@ -0,0 +1,47 @@
+require 'rubygems'
+require 'bud'
+require 'graphviz'
+# A simple interface between graphviz and bud
+module BudGraph
+  state do
+    interface input, :bnode, [:name] => [:meta]
+    interface input, :bedge, [:from, :to, :meta]
+  end
+end
+module BloomGraph
+  include BudGraph
+  state do
+    table :nodes, bnode.schema
+    table :edges, bedge.schema
+  end
+  bloom do
+    nodes <= bnode
+    edges <= bedge
+  end
+  def finish(ignore, name, fmt=:pdf)
+    it = ignore.to_set
+    tick
+    nodes.to_a.each do |n|
+      unless it.include? n.name.to_sym
+        @graph.add_nodes(n.name, n.meta)
+      end
+    end
+    edges.to_a.each do |e|
+      unless it.include? e.from.to_sym or it.include? e.to.to_sym
+        @graph.add_edges(e.from, e.to, e.meta)
+      end
+    end
+    @graph.output(fmt => name)
+  end
+  def initialize(opts={:type => :digraph})
+    @graph = GraphViz.new(:G, opts)
+    super
+  end
+end