RubyGems - bud - Versions diffs - 0.9.4 → 0.9.9 - Mend

bud 0.9.4 → 0.9.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

checksums.yaml +7 -0
data/History.txt +106 -0
data/README.md +6 -4
data/Rakefile +91 -0
data/bin/budlabel +63 -0
data/bin/budplot +18 -8
data/bin/budtimelines +2 -2
data/bin/budvis +7 -1
data/docs/README.md +8 -17
data/docs/cheat.md +112 -13
data/docs/getstarted.md +97 -84
data/docs/operational.md +3 -3
data/examples/basics/paths.rb +2 -2
data/examples/chat/README.md +2 -0
data/examples/chat/chat.rb +3 -2
data/examples/chat/chat_protocol.rb +2 -2
data/examples/chat/chat_server.rb +3 -2
data/lib/bud.rb +229 -114
data/lib/bud/aggs.rb +20 -4
data/lib/bud/bud_meta.rb +83 -73
data/lib/bud/collections.rb +306 -120
data/lib/bud/depanalysis.rb +3 -4
data/lib/bud/executor/README.rescan +2 -1
data/lib/bud/executor/elements.rb +96 -95
data/lib/bud/executor/group.rb +35 -32
data/lib/bud/executor/join.rb +164 -183
data/lib/bud/graphs.rb +3 -3
data/lib/bud/labeling/bloomgraph.rb +47 -0
data/lib/bud/labeling/budplot_style.rb +53 -0
data/lib/bud/labeling/labeling.rb +288 -0
data/lib/bud/lattice-core.rb +595 -0
data/lib/bud/lattice-lib.rb +422 -0
data/lib/bud/monkeypatch.rb +68 -32
data/lib/bud/rebl.rb +28 -10
data/lib/bud/rewrite.rb +361 -152
data/lib/bud/server.rb +16 -8
data/lib/bud/source.rb +21 -18
data/lib/bud/state.rb +93 -4
data/lib/bud/storage/zookeeper.rb +45 -33
data/lib/bud/version.rb +3 -0
data/lib/bud/viz.rb +10 -12
data/lib/bud/viz_util.rb +8 -3
metadata +107 -108

data/lib/bud/depanalysis.rb CHANGED

@@ -6,7 +6,7 @@ class DepAnalysis #:nodoc: all
   state do
     # Data inserted by client, usually from t_depends and t_provides
-    scratch :depends, [:lhs, :op, :body, :neg]
+    scratch :depends, [:lhs, :op, :body, :neg, :in_body]
     scratch :providing, [:pred, :input]
     # Intermediate state
@@ -36,7 +36,7 @@ class DepAnalysis #:nodoc: all
     cycle <= depends_tc do |d|
       if d.lhs == d.body
-        unless  d.neg and !d.temporal
+        unless d.neg and !d.temporal
           [d.lhs, d.via, d.neg, d.temporal]
         end
       end
@@ -60,11 +60,10 @@ class DepAnalysis #:nodoc: all
           [p.pred, true]
         end
       else
-        unless depends_tc.map{|d| d.lhs if d.lhs != d.body}.include? p.pred
+        unless depends_tc.map{|dt| dt.lhs if dt.lhs != dt.body}.include? p.pred
           [p.pred, false]
         end
       end
     end
   end
 end

data/lib/bud/executor/README.rescan CHANGED

@@ -3,7 +3,8 @@ Notes on Invalidate and Rescan in Bud
 (I'll use 'downstream' to mean rhs to lhs (like in budplot). In every stratum,
 data originates at scanned sources at the "top", winds its way through various
-PushElements and ends up in a collection at the "bottom". I'll also the term
+PushElements and ends up in a collection at the "bottom". That is, data flows
+from "upstream" producers to "downstream" consumers. I'll also the term
 "elements" to mean both dataflow nodes (PushElements) and collections).
 Invalidation strategy works through two flags/signals, rescan and

data/lib/bud/executor/elements.rb CHANGED

@@ -1,4 +1,3 @@
-require 'set'
 require 'bud/collections'
 module Bud
@@ -13,19 +12,18 @@ module Bud
   class PushElement < BudCollection
     attr_accessor :rescan, :invalidated
     attr_accessor :elem_name
-    attr_reader :found_delta, :refcount, :wired_by, :outputs
+    attr_reader :found_delta, :wired_by, :outputs, :pendings
     def initialize(name_in, bud_instance, collection_name=nil, given_schema=nil, defer_schema=false, &blk)
       super(name_in, bud_instance, given_schema, defer_schema)
       @blk = blk
-      @outputs = []
-      @pendings = []
-      @deletes = []
-      @delete_keys = []
+      @outputs = Set.new
+      @pendings = Set.new
+      @deletes = Set.new
+      @delete_keys = Set.new
       @wired_by = []
       @elem_name = name_in
       @found_delta = false
-      @refcount = 1
       @collection_name = collection_name
       @invalidated = true
       @rescan = true
@@ -58,6 +56,8 @@ module Bud
             print "#{next_accum} "
             if o.class <= Bud::BudCollection
               puts "#{(o.object_id*2).to_s(16)}: #{o.qualified_tabname} (#{o.class})"
+            elsif o.class <= Bud::LatticeWrapper
+              puts "#{o.inspect}"
             else
               puts "#{(o.object_id*2).to_s(16)}: (#{o.class.name})"
             end
@@ -83,17 +83,15 @@ module Bud
       case kind
       when :output
-        raise Bud::Error unless element.respond_to? :insert
         @outputs << element
       when :pending
-        raise Bud::Error unless element.respond_to? :pending_merge
         @pendings << element
       when :delete
-        raise Bud::Error unless element.respond_to? :pending_delete
         @deletes << element
       when :delete_by_key
-        raise Bud::Error unless element.respond_to? :pending_delete_keys
         @delete_keys << element
+      else
+        raise Bud::Error, "unrecognized wiring kind: #{kind}"
       end
       element.wired_by << self if element.respond_to? :wired_by
@@ -116,27 +114,34 @@ module Bud
     end
     def push_out(item, do_block=true)
-      if item
-        if do_block && @blk
-          item = item.to_a if @blk.arity > 1
-          item = @blk.call item
+      if do_block && @blk
+        item = item.to_a if @blk.arity > 1
+        item = @blk.call item
+        return if item.nil?
+      end
+      @outputs.each do |ou|
+        if ou.class <= Bud::PushElement
+          ou.insert(item, self)
+        elsif ou.class <= Bud::BudCollection
+          ou.do_insert(item, ou.new_delta)
+        elsif ou.class <= Bud::LatticeWrapper
+          ou.insert(item, self)
+        else
+          raise Bud::Error, "unexpected output target: #{ou.class}"
         end
+      end
-        unless item.nil?
-          @outputs.each do |ou|
-            if ou.class <= Bud::PushElement
-              ou.insert(item, self)
-            elsif ou.class <= Bud::BudCollection
-              ou.do_insert(item, ou.new_delta)
-            else
-              raise Bud::Error, "expected either a PushElement or a BudCollection"
-            end
-          end
+      # for the following, o is a BudCollection
+      @deletes.each{|o| o.pending_delete([item])}
+      @delete_keys.each{|o| o.pending_delete_keys([item])}
-          # for all the following, o is a BudCollection
-          @deletes.each{|o| o.pending_delete([item])}
-          @delete_keys.each{|o| o.pending_delete_keys([item])}
-          @pendings.each{|o| o.pending_merge([item])}
+      # o is a LatticeWrapper or a BudCollection
+      @pendings.each do |o|
+        if o.class <= Bud::LatticeWrapper
+          o <+ item
+        else
+          o.pending_merge([item])
         end
       end
     end
@@ -144,18 +149,17 @@ module Bud
     # default for stateless elements
     public
     def add_rescan_invalidate(rescan, invalidate)
-      # if any of the source elements are in rescan mode, then put this node in
-      # rescan.
+      # If any sources are in rescan mode, then put this node in rescan
       srcs = non_temporal_predecessors
       if srcs.any?{|p| rescan.member? p}
         rescan << self
       end
-      # pass the current state to the non-element outputs, and see if they end
-      # up marking this node for rescan
+      # Pass the current state to each output collection and see if they end up
+      # marking this node for rescan
       invalidate_tables(rescan, invalidate)
-      # finally, if this node is in rescan, pass the request on to all source
+      # Finally, if this node is in rescan, pass the request on to all source
       # elements
       if rescan.member? self
         rescan.merge(srcs)
@@ -163,14 +167,16 @@ module Bud
     end
     def invalidate_tables(rescan, invalidate)
-      # exchange rescan and invalidate information with tables. If this node is
-      # in rescan, it may invalidate a target table (if it is a scratch). And if
-      # the target node is invalidated, this node marks itself for rescan to
-      # enable a refill of that table at run-time
-      @outputs.each do |o|
-        unless o.class <= PushElement
-          o.add_rescan_invalidate(rescan, invalidate)
-          rescan << self if invalidate.member? o
+      # Exchange rescan and invalidate information with tables. If this node is
+      # in rescan, it may invalidate an output table (if it is a scratch). And
+      # if the output table is going to be invalidated, this node marks itself
+      # for rescan to enable a refill of that table at run-time.
+      [@outputs, @pendings].each do |v|
+        v.each do |o|
+          unless o.class <= PushElement
+            o.add_rescan_invalidate(rescan, invalidate)
+            rescan << self if invalidate.member? o
+          end
         end
       end
     end
@@ -193,7 +199,7 @@ module Bud
     public
     def pro(the_name=elem_name, the_schema=schema, &blk)
       toplevel = @bud_instance.toplevel
-      elem = Bud::PushElement.new("project#{object_id}",
+      elem = Bud::PushElement.new("project#{object_id}".to_sym,
                                   toplevel.this_rule_context,
                                   @collection_name, the_schema)
       self.wire_to(elem)
@@ -204,43 +210,38 @@ module Bud
     alias each pro
-    # XXX: "the_name" & "the_schema" parameters are unused
     public
-    def each_with_index(the_name=elem_name, the_schema=schema, &blk)
+    def each_with_index(&blk)
       toplevel = @bud_instance.toplevel
-      elem = Bud::PushEachWithIndex.new("each_with_index#{object_id}",
+      elem = Bud::PushEachWithIndex.new("each_with_index#{object_id}".to_sym,
                                         toplevel.this_rule_context,
                                         @collection_name)
       elem.set_block(&blk)
       self.wire_to(elem)
-      toplevel.push_elems[[self.object_id, :each, blk]] = elem
+      toplevel.push_elems[[self.object_id, :each_with_index, blk]] = elem
     end
     def join(elem2, &blk)
-      # cached = @bud_instance.push_elems[[self.object_id,:join,[self,elem2], @bud_instance, blk]]
-      # if cached.nil?
-        elem2 = elem2.to_push_elem unless elem2.class <= PushElement
-        toplevel = @bud_instance.toplevel
-        join = Bud::PushSHJoin.new([self, elem2], toplevel.this_rule_context, [])
-        self.wire_to(join)
-        elem2.wire_to(join)
-        toplevel.push_elems[[self.object_id, :join, [self, elem2], toplevel, blk]] = join
-        toplevel.push_joins[toplevel.this_stratum] << join
-      # else
-      #   cached.refcount += 1
-      # end
-      return toplevel.push_elems[[self.object_id, :join, [self, elem2], toplevel, blk]]
+      elem2 = elem2.to_push_elem unless elem2.kind_of? PushElement
+      toplevel = @bud_instance.toplevel
+      join = Bud::PushSHJoin.new([self, elem2], toplevel.this_rule_context, [])
+      self.wire_to(join)
+      elem2.wire_to(join)
+      toplevel.push_elems[[self.object_id, :join, [self, elem2], toplevel, blk]] = join
+      toplevel.push_joins[toplevel.this_stratum] << join
+      return join
     end
     def *(elem2, &blk)
       join(elem2, &blk)
     end
-    def notin(elem2, preds=nil, &blk)
+    def notin(elem2, *preds, &blk)
+      elem2 = elem2.to_push_elem unless elem2.kind_of? PushElement
       toplevel = @bud_instance.toplevel
       notin_elem = Bud::PushNotIn.new([self, elem2], toplevel.this_rule_context, preds, &blk)
       self.wire_to(notin_elem)
       elem2.wire_to(notin_elem)
-      toplevel.push_elems[[self.object_id, :notin, collection, toplevel, blk]] = notin_elem
+      toplevel.push_elems[[self.object_id, :notin, [self, elem2], toplevel, blk]] = notin_elem
       return notin_elem
     end
@@ -252,6 +253,7 @@ module Bud
       end
     end
     alias <= merge
     superator "<~" do |o|
       raise Bud::Error, "illegal use of <~ with pusher '#{tabname}' on left"
     end
@@ -266,7 +268,7 @@ module Bud
     def group(keycols, *aggpairs, &blk)
       # establish schema
-      keycols = [] if keycols.nil?
+      keycols ||= []
       keycols = keycols.map{|c| canonicalize_col(c)}
       keynames = keycols.map{|k| k[2]}
       aggcolsdups = aggpairs.map{|ap| ap[0].class.name.split("::").last}
@@ -280,44 +282,37 @@ module Bud
         the_schema = { keynames => aggcols }
       end
-      aggpairs = aggpairs.map{|ap| ap[1].nil? ? [ap[0]] : [ap[0], canonicalize_col(ap[1])]}
+      aggpairs = prep_aggpairs(aggpairs)
       toplevel = @bud_instance.toplevel
-      # if @bud_instance.push_elems[[self.object_id, :group, keycols, aggpairs, blk]].nil?
-        g = Bud::PushGroup.new('grp'+Time.new.tv_usec.to_s, toplevel.this_rule_context, @collection_name, keycols, aggpairs, the_schema, &blk)
-        self.wire_to(g)
-        toplevel.push_elems[[self.object_id, :group, keycols, aggpairs, blk]] = g
-      # end
-      # toplevel.push_elems[[self.object_id, :group, keycols, aggpairs, blk]]
+      g = Bud::PushGroup.new("grp#{Time.new.tv_usec}".to_sym, toplevel.this_rule_context,
+                             @collection_name, keycols, aggpairs, the_schema, &blk)
+      self.wire_to(g)
+      toplevel.push_elems[[self.object_id, :group, keycols, aggpairs, blk]] = g
       return g
     end
     def argagg(aggname, gbkey_cols, collection, &blk)
+      gbkey_cols ||= []
       gbkey_cols = gbkey_cols.map{|c| canonicalize_col(c)}
       collection = canonicalize_col(collection)
       toplevel = @bud_instance.toplevel
       agg = toplevel.send(aggname, collection)[0]
-      raise Bud::Error, "#{aggname} not declared exemplary" unless agg.class <= Bud::ArgExemplary
-      keynames = gbkey_cols.map do |k|
-        if k.class == Symbol
-          k.to_s
-        else
-          k[2]
-        end
+      unless agg.class <= Bud::ArgExemplary
+        raise Bud::Error, "#{aggname} not declared exemplary"
       end
       aggpairs = [[agg, collection]]
-      # if toplevel.push_elems[[self.object_id,:argagg, gbkey_cols, aggpairs, blk]].nil?
-        aa = Bud::PushArgAgg.new('argagg'+Time.new.tv_usec.to_s, toplevel.this_rule_context, @collection_name, gbkey_cols, aggpairs, schema, &blk)
-        self.wire_to(aa)
-        toplevel.push_elems[[self.object_id,:argagg, gbkey_cols, aggpairs, blk]] = aa
-      # end
-      # return toplevel.push_elems[[self.object_id,:argagg, gbkey_cols, aggpairs, blk]]
+      aa = Bud::PushArgAgg.new("argagg#{Time.new.tv_usec}".to_sym, toplevel.this_rule_context,
+                               @collection_name, gbkey_cols, aggpairs, schema, &blk)
+      self.wire_to(aa)
+      toplevel.push_elems[[self.object_id, :argagg, gbkey_cols, aggpairs, blk]] = aa
       return aa
     end
     def argmax(gbcols, col, &blk)
-      argagg(gbcols, Bud::max(col), blk)
+      argagg(:max, gbcols, col, &blk)
     end
     def argmin(gbcols, col, &blk)
-      argagg(gbcols, Bud::min(col), blk)
+      argagg(:min, gbcols, col, &blk)
     end
     def sort(name=nil, bud_instance=nil, the_schema=nil, &blk)
       elem = Bud::PushSort.new(name, bud_instance, the_schema, &blk)
@@ -351,7 +346,7 @@ module Bud
     end
     def reduce(initial, &blk)
-      retval = Bud::PushReduce.new("reduce#{Time.new.tv_usec}",
+      retval = Bud::PushReduce.new("reduce#{Time.new.tv_usec}".to_sym,
                                    @bud_instance, @collection_name,
                                    schema, initial, &blk)
       self.wire_to(retval)
@@ -394,7 +389,8 @@ module Bud
   end
   class PushPredicate < PushStatefulElement
-    def initialize(pred_symbol, elem_name=nil, collection_name=nil, bud_instance=nil, schema_in=nil, &blk)
+    def initialize(pred_symbol, elem_name=nil, collection_name=nil,
+                   bud_instance=nil, schema_in=nil, &blk)
       @pred_symbol = pred_symbol
       @in_buf = []
       super(elem_name, bud_instance, collection_name, schema_in, &blk)
@@ -447,6 +443,7 @@ module Bud
   class ScannerElement < PushElement
     attr_reader :collection
     attr_reader :rescan_set, :invalidate_set
+    attr_accessor :force_rescan
     def initialize(elem_name, bud_instance, collection_in,
                    the_schema=collection_in.schema, &blk)
@@ -454,6 +451,7 @@ module Bud
       @collection = collection_in
       @rescan_set = []
       @invalidate_set = []
+      @force_rescan = false
     end
     def rescan
@@ -464,20 +462,21 @@ module Bud
       @collection.invalidate_at_tick # need to scan afresh if collection invalidated.
     end
-    # collection of others to rescan/invalidate if this scanner's collection
-    # were to be invalidated.
+    # What should be rescanned/invalidated if this scanner's collection were to
+    # be invalidated.
     def invalidate_at_tick(rescan, invalidate)
       @rescan_set = rescan
       @invalidate_set = invalidate
     end
     def add_rescan_invalidate(rescan, invalidate)
-      # if the collection is to be invalidated, the scanner needs to be in
+      # If the collection is to be invalidated, the scanner needs to be in
       # rescan mode
       rescan << self if invalidate.member? @collection
-      # in addition, default PushElement rescan/invalidate logic applies
-      super
+      # Pass the current state to each output collection and see if they end up
+      # marking this node for rescan
+      invalidate_tables(rescan, invalidate)
       # Note also that this node can be nominated for rescan by a target node;
       # in other words, a scanner element can be set to rescan even if the
@@ -485,9 +484,11 @@ module Bud
     end
     def scan(first_iter)
-      if first_iter
+      if @force_rescan
+        @collection.each_raw {|item| push_out(item)}
+        @force_rescan = false
+      elsif first_iter
         if rescan
-          # Scan entire storage
           @collection.each_raw {|item| push_out(item)}
         else
           # In the first iteration, tick_delta would be non-null IFF the
@@ -497,7 +498,7 @@ module Bud
       end
       # send deltas out in all cases
-      @collection.delta.each_value {|item| push_out(item)}
+      @collection.each_delta {|item| push_out(item)}
     end
   end

data/lib/bud/executor/group.rb CHANGED

@@ -1,5 +1,4 @@
 require 'bud/executor/elements'
-require 'set'
 module Bud
   class PushGroup < PushStatefulElement
@@ -10,21 +9,25 @@ module Bud
       else
         @keys = keys_in.map{|k| k[1]}
       end
-      # An aggpair is an array: [agg class instance, index of input field].
-      # ap[1] is nil for Count.
-      @aggpairs = aggpairs_in.map{|ap| [ap[0], ap[1].nil? ? nil : ap[1][1]]}
+      # An aggpair is an array: [agg class instance, array of indexes of input
+      # agg input columns].  The second field is nil for Count.
+      @aggpairs = aggpairs_in.map do |ap|
+        agg, *rest = ap
+        if rest.empty?
+          [agg, nil]
+        else
+          [agg, rest.map {|r| r[1]}]
+        end
+      end
       @groups = {}
       # Check whether we need to eliminate duplicates from our input (we might
       # see duplicates because of the rescan/invalidation logic, as well as
       # because we don't do duplicate elimination on the output of a projection
       # operator). We don't need to dupelim if all the args are exemplary.
-      @elim_dups = @aggpairs.any? {|a| not a[0].kind_of? ArgExemplary}
-      if @elim_dups
-        @input_cache = Set.new
-      end
+      @elim_dups = @aggpairs.any? {|ap| not ap[0].kind_of? ArgExemplary}
+      @input_cache = Set.new if @elim_dups
-      @seen_new_data = false
       super(elem_name, bud_instance, collection_name, schema_in, &blk)
     end
@@ -34,19 +37,25 @@ module Bud
         @input_cache << item
       end
-      @seen_new_data = true
       key = item.values_at(*@keys)
       group_state = @groups[key]
       if group_state.nil?
         @groups[key] = @aggpairs.map do |ap|
-          input_val = ap[1].nil? ? item : item[ap[1]]
-          ap[0].init(input_val)
+          if ap[1].nil?
+            ap[0].init(item)
+          else
+            ap[0].init(*item.values_at(*ap[1]))
+          end
         end
       else
         @aggpairs.each_with_index do |ap, agg_ix|
-          input_val = ap[1].nil? ? item : item[ap[1]]
-          state_val = ap[0].trans(group_state[agg_ix], input_val)[0]
-          group_state[agg_ix] = state_val
+          state_val = group_state[agg_ix]
+          if ap[1].nil?
+            trans_rv = ap[0].trans(state_val, item)
+          else
+            trans_rv = ap[0].trans(state_val, *item.values_at(*ap[1]))
+          end
+          group_state[agg_ix] = trans_rv[0]
         end
       end
     end
@@ -62,14 +71,12 @@ module Bud
       puts "#{self.class}/#{self.tabname} invalidated" if $BUD_DEBUG
       @groups.clear
       @input_cache.clear if @elim_dups
-      @seen_new_data = false
     end
     def flush
-      # If we haven't seen any input since the last call to flush(), we're done:
-      # our output would be the same as before.
-      return unless @seen_new_data
-      @seen_new_data = false
+      # Don't emit fresh output unless a rescan is needed
+      return unless @rescan
+      @rescan = false
       @groups.each do |key, group_state|
         rv = key.clone
@@ -87,7 +94,6 @@ module Bud
         raise Bud::Error, "multiple aggpairs #{aggpairs_in.map{|a| a.class.name}} in ArgAgg; only one allowed"
       end
       super(elem_name, bud_instance, collection_name, keys_in, aggpairs_in, schema_in, &blk)
-      @agg, @aggcol = @aggpairs[0]
       @winners = {}
     end
@@ -101,18 +107,16 @@ module Bud
       key = @keys.map{|k| item[k]}
       group_state = @groups[key]
       if group_state.nil?
-        @seen_new_data = true
         @groups[key] = @aggpairs.map do |ap|
           @winners[key] = [item]
-          input_val = item[ap[1]]
-          ap[0].init(input_val)
+          input_vals = item.values_at(*ap[1])
+          ap[0].init(*input_vals)
         end
       else
         @aggpairs.each_with_index do |ap, agg_ix|
-          input_val = item[ap[1]]
-          state_val, flag, *rest = ap[0].trans(group_state[agg_ix], input_val)
+          input_vals = item.values_at(*ap[1])
+          state_val, flag, *rest = ap[0].trans(group_state[agg_ix], *input_vals)
           group_state[agg_ix] = state_val
-          @seen_new_data = true unless flag == :ignore
           case flag
           when :ignore
@@ -133,14 +137,13 @@ module Bud
     end
     def flush
-      # If we haven't seen any input since the last call to flush(), we're done:
-      # our output would be the same as before.
-      return unless @seen_new_data
-      @seen_new_data = false
+      # Don't emit fresh output unless a rescan is needed
+      return unless @rescan
+      @rescan = false
       @groups.each_key do |g|
         @winners[g].each do |t|
-          push_out(t, false)
+          push_out(t)
         end
       end
     end