RubyGems - bud - Versions diffs - 0.0.8 → 0.1.0.pre1 - Mend

bud 0.0.8 → 0.1.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

data/README +4 -10
data/bin/budplot +1 -2
data/docs/cheat.md +2 -15
data/examples/basics/paths.rb +7 -7
data/lib/bud/aggs.rb +15 -19
data/lib/bud/bud_meta.rb +165 -77
data/lib/bud/bust/bust.rb +11 -4
data/lib/bud/collections.rb +643 -280
data/lib/bud/depanalysis.rb +50 -25
data/lib/bud/executor/elements.rb +592 -0
data/lib/bud/executor/group.rb +104 -0
data/lib/bud/executor/join.rb +638 -0
data/lib/bud/graphs.rb +12 -11
data/lib/bud/joins.rb +2 -1
data/lib/bud/meta_algebra.rb +5 -4
data/lib/bud/metrics.rb +9 -3
data/lib/bud/monkeypatch.rb +131 -23
data/lib/bud/rebl.rb +41 -28
data/lib/bud/rewrite.rb +112 -440
data/lib/bud/server.rb +3 -2
data/lib/bud/source.rb +109 -0
data/lib/bud/state.rb +16 -9
data/lib/bud/storage/dbm.rb +62 -16
data/lib/bud/storage/zookeeper.rb +2 -2
data/lib/bud/viz.rb +8 -4
data/lib/bud/viz_util.rb +10 -9
data/lib/bud.rb +413 -199
metadata +40 -55
data/examples/deploy/tokenring-ec2.rb +0 -26
data/examples/deploy/tokenring-fork.rb +0 -15
data/examples/deploy/tokenring-thread.rb +0 -15
data/examples/deploy/tokenring.rb +0 -47
data/lib/bud/deploy/deployer.rb +0 -67
data/lib/bud/deploy/ec2deploy.rb +0 -199
data/lib/bud/deploy/forkdeploy.rb +0 -90
data/lib/bud/deploy/threaddeploy.rb +0 -38
data/lib/bud/storage/tokyocabinet.rb +0 -190
data/lib/bud/stratify.rb +0 -85

data/lib/bud/collections.rb CHANGED Viewed

@@ -1,28 +1,37 @@
 require 'msgpack'
+$struct_classes = {}
+$EMPTY_HASH = {}
 module Bud
   ########
   #--
-  # the collection types
+  # the collection types                                                                                                ``
   # each collection is partitioned into 4:
   # - pending holds tuples deferred til the next tick
   # - storage holds the "normal" tuples
   # - delta holds the delta for rhs's of rules during semi-naive
   # - new_delta will hold the lhs tuples currently being produced during s-n
+  # - tick_delta holds \Union(delta_i) for each delta_i processed in fixpoint iteration i.
   #++
   class BudCollection
     include Enumerable
-    # This needs to be an accessor to allow REBL to update it after cloning a
-    # Bud instance.
-    attr_accessor :bud_instance # :nodoc: all
-    attr_reader :cols, :key_cols, :tabname # :nodoc: all
-    attr_reader :storage, :delta, :new_delta, :pending # :nodoc: all
+    attr_accessor :bud_instance, :locspec_idx, :tabname  # :nodoc: all
+    attr_reader :cols, :key_cols # :nodoc: all
+    attr_reader :struct
+    attr_reader :storage, :delta, :new_delta, :pending, :tick_delta # :nodoc: all
+    attr_accessor :qualified_tabname
+    attr_accessor :invalidated, :to_delete, :rescan
+    attr_accessor :is_source
+    attr_accessor :wired_by
     def initialize(name, bud_instance, given_schema=nil, defer_schema=false) # :nodoc: all
       @tabname = name
       @bud_instance = bud_instance
+      @invalidated = true
+      @is_source = true # unless it shows up on the lhs of some rule
+      @wired_by = []
       init_schema(given_schema) unless given_schema.nil? and defer_schema
       init_buffers
     end
@@ -34,31 +43,42 @@ module Bud
       init_deltas
     end
-    private
+    public
     def init_schema(given_schema)
       given_schema ||= {[:key]=>[:val]}
+      @given_schema = given_schema
+      @cols, @key_cols = BudCollection.parse_schema(given_schema)
       # Check that no location specifiers appear in the schema. In the case of
       # channels, the location specifier has already been stripped from the
       # user-specified schema.
-      given_schema.each do |s|
+      @cols.each do |s|
         if s.to_s.start_with? "@"
           raise Bud::Error, "illegal use of location specifier (@) in column #{s} of non-channel collection #{tabname}"
         end
       end
-      @given_schema = given_schema
-      @cols, @key_cols = parse_schema(given_schema)
+      if @cols.size == 0
+        @cols = nil
+      else
+        @struct = ($struct_classes[@cols] ||= Struct.new(*@cols))
+        @structlen = @struct.members.length
+      end
       @key_colnums = key_cols.map {|k| @cols.index(k)}
       setup_accessors
     end
+    def qualified_tabname
+      @qualified_tabname ||= @bud_instance.toplevel?  ? tabname : (@bud_instance.qualified_name + "." + tabname.to_s).to_sym
+    end
     # The user-specified schema might come in two forms: a hash of Array =>
     # Array (key_cols => remaining columns), or simply an Array of columns (if
     # no key_cols were specified). Return a pair: [list of (all) columns, list
     # of key columns]
     private
-    def parse_schema(given_schema)
+    def self.parse_schema(given_schema)
       if given_schema.respond_to? :keys
         raise Bud::Error, "invalid schema for #{tabname}" if given_schema.length != 1
         key_cols = given_schema.keys.first
@@ -71,16 +91,20 @@ module Bud
       cols = key_cols + val_cols
       cols.each do |c|
         if c.class != Symbol
-          raise Bud::Error, "invalid schema element \"#{c}\", type \"#{c.class}\""
+          raise Bud::Error, "Invalid column name \"#{c}\", type \"#{c.class}\""
         end
       end
       if cols.uniq.length < cols.length
-        raise Bud::Error, "schema for #{tabname} contains duplicate names"
+        raise Bud::Error, "schema #{given_schema.inspect} contains duplicate names"
       end
       return [cols, key_cols]
     end
+    def inspect
+      "#{self.class}:#{self.object_id.to_s(16)} [#{qualified_tabname}]"
+    end
     public
     def clone_empty #:nodoc: all
       self.class.new(tabname, bud_instance, @given_schema)
@@ -105,28 +129,27 @@ module Bud
     #    j = join link, path, {link.to => path.from}
     private
     def setup_accessors
-      s = @cols
-      s.each do |colname|
-        reserved = eval "defined?(#{colname})"
-        unless (reserved.nil? or
-          (reserved == "method" and method(colname).arity == -1 and (eval(colname))[0] == self.tabname))
+      sc = @cols
+      return if sc.nil?
+      sc.each do |colname|
+        if name_reserved? colname
           raise Bud::Error, "symbol :#{colname} reserved, cannot be used as column name for #{tabname}"
         end
       end
       # set up schema accessors, which are class methods
-      m = Module.new do
-        s.each_with_index do |c, i|
-          define_method c do
+      @cols_access = Module.new do
+        sc.each_with_index do |c, i|
+          m = define_method c do
             [@tabname, i, c]
           end
         end
       end
-      self.extend m
+      self.extend @cols_access
       # now set up a Module for tuple accessors, which are instance methods
       @tupaccess = Module.new do
-        s.each_with_index do |colname, offset|
+        sc.each_with_index do |colname, offset|
           define_method colname do
             self[offset]
           end
@@ -134,51 +157,120 @@ module Bud
       end
     end
-    # define methods to access tuple attributes by column name
     private
+    def name_reserved?(colname)
+      reserved = eval "defined?(#{colname})"
+      return false if reserved.nil?
+      if reserved == "method" and (method(colname).arity == 0 or method(colname).arity == -1)
+        begin
+          ret = eval("#{colname}")
+          if ret.kind_of? Array and ret.size == 3 and ret[0] == tabname
+            return false # schema redefinition (see tupaccess above), so name is not considered reserved
+          end
+        rescue # in case calling method throws an error
+        end
+      end
+      return true
+    end
+    # define methods to access tuple attributes by column name
+    public
     def tuple_accessors(tup)
-      tup.extend @tupaccess
+      tup #  XXX remove tuple_acessors everywhere.
     end
     # generate a tuple with the schema of this collection and nil values in each attribute
     public
     def null_tuple
-      tuple_accessors(Array.new(@cols.length))
+      @struct.new
     end
     # project the collection to its key attributes
     public
     def keys
-      self.map{|t| get_key_vals(t)}
+      self.pro{|t| @key_colnums.map {|i| t[i]}}
     end
     # project the collection to its non-key attributes
     public
     def values
-      self.map{|t| (self.key_cols.length..self.cols.length-1).map{|i| t[i]}}
+      self.pro{|t| (self.key_cols.length..self.cols.length-1).map{|i| t[i]}}
     end
     # map each item in the collection into a string, suitable for placement in stdio
     public
     def inspected
-      [["#{@tabname}: [#{self.map{|t| "\n  (#{t.map{|v| v.inspect}.join ", "})"}}]"]]
+      self.pro{|t| [t.inspect]}
+      # how about when this is called outside wiring?
+      # [["#{@tabname}: [#{self.map{|t| "\n  (#{t.map{|v| v.inspect}.join ", "})"}}]"]]
+    end
+    # projection
+    public
+    def pro(the_name = tabname, the_schema = schema, &blk)
+      pusher = to_push_elem(the_name, the_schema)
+      pusher_pro = pusher.pro(&blk)
+      pusher_pro.elem_name = the_name
+      pusher_pro.tabname = the_name
+      pusher_pro
     end
-    # akin to map, but modified for efficiency in Bloom statements
     public
-    def pro(&blk)
-      if @bud_instance.stratum_first_iter
-        return map(&blk)
+    def each_with_index(the_name = tabname, the_schema = schema, &blk)
+      toplevel = @bud_instance.toplevel
+      if not toplevel.done_wiring
+        proj = pro(the_name, the_schema)
+        elem = Bud::PushEachWithIndex.new('each_with_index' + object_id.to_s, toplevel.this_rule_context, tabname)
+        elem.set_block(&blk)
+        proj.wire_to(elem)
+        toplevel.push_elems[[self.object_id,:each,blk]] = elem
+        elem
       else
-        retval = []
-        each_from([@delta]) do |t|
-          newitem = blk.call(t)
-          retval << newitem unless newitem.nil?
+         storage.each_with_index
+      end
+    end
+    # ruby 1.9 defines flat_map to return "a new array with the concatenated results of running
+    # <em>block</em> once for every element". So we wire the input to a pro(&blk), and wire the output
+    # of that pro to a group that does accum.
+    public
+    def flat_map(&blk)
+      pusher = self.pro(&blk)
+      toplevel = @bud_instance.toplevel
+      elem = Bud::PushElement.new(tabname, toplevel.this_rule_context, tabname)
+      pusher.wire_to(elem)
+      f = Proc.new do |t|
+        t.each do |i|
+          elem.push_out(i,false)
         end
-        return retval
+        nil
       end
+      elem.set_block(&f)
+      toplevel.push_elems[[self.object_id,:flatten]] = elem
+      return elem
+    end
+    public
+    def sort(&blk)
+      pusher = self.pro
+      pusher.sort(@name, @bud_instance, @cols, &blk)
     end
+    def rename(the_name, the_schema=nil)
+      # a scratch with this name should have been defined during rewriting
+      raise(Bud::Error, "rename failed to define a scratch named #{the_name}") unless @bud_instance.respond_to? the_name
+      retval = pro(the_name, the_schema)
+      #retval.init_schema(the_schema)
+      retval
+    end
+    # def to_enum
+    #   pusher = self.pro
+    #   pusher.to_enum
+    # end
     # By default, all tuples in any rhs are in storage or delta. Tuples in
     # new_delta will get transitioned to delta in the next iteration of the
     # evaluator (but within the current time tick).
@@ -187,6 +279,21 @@ module Bud
       each_from([@storage, @delta], &block)
     end
+    public
+    def each_raw(&block)
+      @storage.each_value(&block)
+    end
+    public
+    def invalidate_at_tick
+      true # being conservative here as a default.
+    end
+    public
+    def non_temporal_predecessors
+      @wired_by.map {|elem| elem if elem.outputs.include? self}
+    end
     public
     def tick_metrics
       strat_num = bud_instance.this_stratum
@@ -195,8 +302,8 @@ module Bud
       addr = bud_instance.ip_port unless bud_instance.port.nil?
       rule_txt = nil
       bud_instance.metrics[:collections] ||= {}
-      bud_instance.metrics[:collections][{:addr=>addr, :tabname=>tabname, :strat_num=>strat_num, :rule_num=>rule_num}] ||= 0
-      bud_instance.metrics[:collections][{:addr=>addr, :tabname=>tabname, :strat_num=>strat_num, :rule_num=>rule_num}] += 1
+      bud_instance.metrics[:collections][{:addr=>addr, :tabname=>qualified_tabname, :strat_num=>strat_num, :rule_num=>rule_num}] ||= 0
+      bud_instance.metrics[:collections][{:addr=>addr, :tabname=>qualified_tabname, :strat_num=>strat_num, :rule_num=>rule_num}] += 1
     end
     private
@@ -204,7 +311,7 @@ module Bud
       bufs.each do |b|
         b.each_value do |v|
           tick_metrics if bud_instance and bud_instance.options[:metrics]
-          yield v
+          yield tuple_accessors(v)
         end
       end
     end
@@ -236,6 +343,7 @@ module Bud
     def init_deltas
       @delta = {}
       @new_delta = {}
+      @tick_delta = []
     end
     public
@@ -257,18 +365,26 @@ module Bud
       # is this enforced in do_insert?
       check_enumerable(k)
       t = @storage[k]
-      return t.nil? ? @delta[k] : t
+      return t.nil? ? @delta[k] : tuple_accessors(t)
     end
     # checks for +item+ in the collection
     public
     def include?(item)
       return true if key_cols.nil? or (key_cols.empty? and length > 0)
-      return false if item.nil? or item.empty?
+      return false if item.nil?
       key = get_key_vals(item)
       return (item == self[key])
     end
+    def length
+      @storage.length + @delta.length
+    end
+    def empty?
+      length == 0
+    end
     # checks for an item for which +block+ produces a match
     public
     def exists?(&block)
@@ -289,24 +405,20 @@ module Bud
     private
     def prep_tuple(o)
-      unless o.respond_to?(:length) and o.respond_to?(:[])
-        raise Bud::TypeError, "non-indexable type inserted into \"#{tabname}\": #{o.inspect}"
-      end
-      if o.class <= String
-        raise Bud::TypeError, "String value used as a fact inserted into \"#{tabname}\": #{o.inspect}"
-      end
-      if o.length < cols.length then
-        # if this tuple has too few fields, pad with nil's
-        old = o.clone
-        (o.length..cols.length-1).each{|i| o << nil}
-        # puts "in #{@tabname}, converted #{old.inspect} to #{o.inspect}"
-      elsif o.length > cols.length then
-        # if this tuple has more fields than usual, bundle up the
-        # extras into an array
-        o = (0..(cols.length - 1)).map{|c| o[c]} << (cols.length..(o.length - 1)).map{|c| o[c]}
+      return o if o.class == @struct
+      if o.class == Array
+        if @struct.nil?
+          sch =  (1 .. o.length).map{|i| ("c"+i.to_s).to_sym}
+          init_schema(sch)
+        end
+        o = o.take(@structlen) if o.length > @structlen
+      elsif o.kind_of? Struct
+        init_schema(o.members.map{|m| m.to_sym}) if @struct.nil?
+        o = o.take(@structlen)
+      else
+        raise TypeError, "Array or struct type expected in \"#{qualified_tabname}\": #{o.inspect}"
       end
-      return o
+      return @struct.new(*o)
     end
     private
@@ -316,8 +428,17 @@ module Bud
       end
     end
-    private
+    public
     def do_insert(o, store)
+      if $BUD_DEBUG
+        storetype = case store.object_id
+                      when @storage.object_id; "storage"
+                      when @pending.object_id; "pending"
+                      when @delta.object_id; "delta"
+                      when @new_delta.object_id; "new_delta"
+                    end
+        puts "#{qualified_tabname}.#{storetype} ==> #{o}"
+      end
       return if o.nil? # silently ignore nils resulting from map predicates failing
       o = prep_tuple(o)
       key = get_key_vals(o)
@@ -331,8 +452,8 @@ module Bud
     end
     public
-    def insert(o) # :nodoc: all
-      # puts "insert: #{o.inspect} into #{tabname}"
+    def insert(o, source=nil) # :nodoc: all
+      # puts "insert: #{o} into #{qualified_tabname}"
       do_insert(o, @storage)
     end
@@ -343,8 +464,8 @@ module Bud
     private
     def check_enumerable(o)
-      unless o.nil? or o.class < Enumerable
-        raise Bud::TypeError, "collection #{tabname} expected Enumerable value, not #{o.inspect} (class = #{o.class})"
+      unless o.nil? or o.class < Enumerable or o.class <= Proc
+        raise TypeError, "Collection #{qualified_tabname} expected Enumerable value, not #{o.inspect} (class = #{o.class})"
       end
     end
@@ -397,22 +518,47 @@ module Bud
     end
     public
-    def merge(o, buf=@new_delta) # :nodoc: all
-      unless o.nil?
-        check_enumerable(o)
-        establish_schema(o) if @cols.nil?
-        # it's a pity that we are massaging tuples that may be dups
-        o.each do |t|
-          next if t.nil? or t == []
-          t = prep_tuple(t)
-          key = get_key_vals(t)
-          buf[key] = tuple_accessors(t) unless include_any_buf?(t, key)
+    def merge(o, buf=@delta) # :nodoc: all
+      toplevel = @bud_instance.toplevel
+      if o.class <= Bud::PushElement
+        toplevel.merge_targets[toplevel.this_stratum][self] = true if toplevel.done_bootstrap
+        deduce_schema(o) if @cols.nil?
+        o.wire_to self
+      elsif o.class <= Bud::BudCollection
+        toplevel.merge_targets[toplevel.this_stratum][self] = true if toplevel.done_bootstrap
+        deduce_schema(o) if @cols.nil?
+        o.pro.wire_to self
+      elsif o.class <= Proc and toplevel.done_bootstrap and not toplevel.done_wiring and not o.nil?
+        toplevel.merge_targets[toplevel.this_stratum][self] = true if toplevel.done_bootstrap
+        tbl = register_coll_expr(o)
+        tbl.pro.wire_to self
+      else
+        unless o.nil?
+          o = o.uniq.compact if o.respond_to?(:uniq)
+          check_enumerable(o)
+          establish_schema(o) if @cols.nil?
+          o.each {|i| do_insert(i, buf)}
         end
       end
       return self
     end
+    # def prep_coll_expr(o)
+    #   o = o.uniq.compact if o.respond_to?(:uniq)
+    #   check_enumerable(o)
+    #   establish_schema(o) if @cols.nil?
+    #   o
+    # end
+    def register_coll_expr(expr)
+      # require 'ruby-debug'; debugger
+      coll_name = ("expr_"+expr.object_id.to_s)
+      cols = (1..@cols.length).map{|i| ("c"+i.to_s).to_sym} unless @cols.nil?
+      @bud_instance.coll_expr(coll_name.to_sym, expr, cols)
+      coll = @bud_instance.send(coll_name)
+      coll
+    end
     public
     # instantaneously merge items from collection +o+ into +buf+
     def <=(collection)
@@ -422,60 +568,121 @@ module Bud
     # buffer items to be merged atomically at end of this timestep
     public
     def pending_merge(o) # :nodoc: all
-      check_enumerable(o)
-      establish_schema(o) if @cols.nil?
-      o.each {|i| do_insert(i, @pending)}
+      toplevel = @bud_instance.toplevel
+      if o.class <= Bud::PushElement
+        toplevel.merge_targets[toplevel.this_stratum][self] = true if toplevel.done_bootstrap
+        o.wire_to_pending self
+      elsif o.class <= Bud::BudCollection
+        toplevel.merge_targets[toplevel.this_stratum][self] = true if toplevel.done_bootstrap
+        o.pro.wire_to_pending self
+      elsif o.class <= Proc and toplevel.done_bootstrap and not toplevel.done_wiring
+        toplevel.merge_targets[toplevel.this_stratum][self] = true if toplevel.done_bootstrap
+        tbl = register_coll_expr(o) unless o.nil?
+        tbl.pro.wire_to_pending self
+      else
+        unless o.nil?
+          o = o.uniq.compact if o.respond_to?(:uniq)
+          check_enumerable(o)
+          establish_schema(o) if @cols.nil?
+          o.each{|i| self.do_insert(i, @pending)}
+        end
+      end
       return self
     end
+    public
+    def flush ; end
     public
     superator "<+" do |o|
       pending_merge o
     end
+    def tick
+      raise "tick must be overriden in #{self.class}"
+    end
+    # move deltas to storage, and new_deltas to deltas.
+    # return true if new deltas were found
     public
-    superator "<+-" do |o|
-      self <+ o
-      self <- o.map do |t|
-        unless t.nil?
-          self[get_key_vals(t)]
+    def tick_deltas # :nodoc: all
+      unless @delta.empty?
+        puts "#{qualified_tabname}.tick_delta delta --> storage (#{@delta.size} elems)" if $BUD_DEBUG
+        @storage.merge!(@delta)
+        @tick_delta += @delta.values
+        @delta.clear
+      end
+      unless @new_delta.empty?
+        puts "#{qualified_tabname}.tick_delta new_delta --> delta (#{@new_delta.size} elems)" if $BUD_DEBUG
+        @new_delta.each_pair do |k, v|
+          sv = @storage[k]
+          if sv.nil?
+            @delta[k] = v
+          else
+            raise_pk_error(v, sv) unless v == sv
+          end
         end
+        @new_delta.clear
+        return !(@delta.empty?)
       end
+      return false # delta empty; another fixpoint iter not required.
     end
     public
-    superator "<-+" do |o|
-      self <+- o
+    def add_rescan_invalidate(rescan, invalidate)
+      # No change. Most collections don't need to rescan on every tick (only do so on negate). Also, there's no cache
+      # to invalidate by default. Scratches and PushElements override this method.
     end
-    # Called at the end of each timestep: prepare the collection for the next
-    # timestep.
-    public
-    def tick  # :nodoc: all
-      @storage = @pending
-      @pending = {}
-      raise Bud::Error, "orphaned tuples in @delta for #{@tabname}" unless @delta.empty?
-      raise Bud::Error, "orphaned tuples in @new_delta for #{@tabname}" unless @new_delta.empty?
+    def bootstrap
+      unless @pending.empty?
+        @delta = @pending
+        @pending = {}
+      end
     end
-    # move deltas to storage, and new_deltas to deltas.
     public
-    def tick_deltas # :nodoc: all
-      # assertion: intersect(@storage, @delta) == nil
-      @storage.merge!(@delta)
-      @delta = @new_delta
-      @new_delta = {}
+    def flush_deltas
+      if $BUD_DEBUG
+        puts "#{qualified_tabname}.flush delta --> storage" unless @delta.empty?
+        puts "#{qualified_tabname}.flush new_delta --> storage" unless @new_delta.empty?
+      end
+      unless (@delta.empty?)
+        @storage.merge!(@delta)
+        @tick_delta += @delta.values
+        @delta.clear
+      end
+      unless @new_delta.empty?
+        @storage.merge!(@new_delta)
+        @new_delta.clear
+      end
+      # @tick_delta kept around for higher strata.
     end
     public
-    def length
-      @storage.length
+    def to_push_elem(the_name=tabname, the_schema=schema)
+      # if no push source yet, set one up
+      toplevel = @bud_instance.toplevel
+      #rule_context = toplevel.this_rule_context
+      this_stratum = toplevel.this_stratum
+      oid = self.object_id
+      unless toplevel.scanners[this_stratum][[oid, the_name]]
+        toplevel.scanners[this_stratum][[oid, the_name]] = Bud::ScannerElement.new(the_name, self.bud_instance, self, the_schema)
+        toplevel.push_sources[this_stratum][[oid, the_name]] = toplevel.scanners[this_stratum][[oid, the_name]]
+      end
+      return toplevel.scanners[this_stratum][[oid, the_name]]
     end
-    public
-    def empty?
-      @storage.empty?
+    private
+    def method_missing(sym, *args, &block)
+      begin
+        @storage.send sym, *args, &block
+      rescue Exception => e
+        err = NoMethodError.new("no method :#{sym} in class #{self.class.name}")
+        err.set_backtrace(e.backtrace)
+        raise err
+      end
     end
     ######## aggs
@@ -485,73 +692,26 @@ module Bud
     # never deal with deltas.  This assumes that stratification is done right, and it will
     # be sensitive to bugs in the stratification!
     def agg_in
-      if not respond_to?(:bud_instance) or bud_instance.nil? or bud_instance.stratum_first_iter
+      if not respond_to?(:bud_instance) or bud_instance.nil?
         return self
       else
         return []
       end
     end
     # a generalization of argmin/argmax to arbitrary exemplary aggregates.
     # for each distinct value of the grouping key columns, return the items in that group
     # that have the value of the exemplary aggregate +aggname+
     public
     def argagg(aggname, gbkey_cols, collection)
-      agg = bud_instance.send(aggname, nil)[0]
-      raise Bud::Error, "#{aggname} not declared exemplary" unless agg.class <= Bud::ArgExemplary
-      keynames = gbkey_cols.map do |k|
-        if k.class == Symbol
-          k.to_s
-        else
-          k[2]
-        end
-      end
-      if collection.class == Symbol
-        colnum = self.send(collection.to_s)[1]
-      else
-        colnum = collection[1]
-      end
-      tups = agg_in.inject({}) do |memo,p|
-        pkey_cols = keynames.map{|n| p.send(n.to_sym)}
-        if memo[pkey_cols].nil?
-          memo[pkey_cols] = {:agg=>agg.send(:init, p[colnum]), :tups => [p]}
-        else
-          memo[pkey_cols][:agg], argflag = \
-             agg.send(:trans, memo[pkey_cols][:agg], p[colnum])
-          if argflag == :keep or agg.send(:tie, memo[pkey_cols][:agg], p[colnum])
-            memo[pkey_cols][:tups] << p
-          elsif argflag == :replace
-            memo[pkey_cols][:tups] = [p]
-          elsif argflag.class <= Array and argflag[0] == :delete
-            memo[pkey_cols][:tups] -= argflag[1..-1]
-          end
-        end
-        memo
-      end
-      # now we need to finalize the agg per group
-      finalaggs = {}
-      finals = []
-      tups.each do |k,v|
-        finalaggs[k] = agg.send(:final, v[:agg])
-      end
-      # and winnow the tups to match
-      finalaggs.each do |k,v|
-        tups[k][:tups].each do |t|
-          finals << t if (t[colnum] == v)
-        end
-      end
-      if block_given?
-        finals.map{|r| yield r}
-      else
-        # merge directly into retval.storage, so that the temp tuples get picked up
-        # by the lhs of the rule
-        retval = BudScratch.new('argagg_temp', bud_instance, @given_schema)
-        retval.uniquify_tabname
-        retval.merge(finals, retval.storage)
-      end
+      elem = to_push_elem
+      elem.schema
+      gbkey_cols = gbkey_cols.map{|k| canonicalize_col(k)} unless gbkey_cols.nil?
+      retval = elem.argagg(aggname,gbkey_cols,canonicalize_col(collection))
+      # PushElement inherits the schema accessors from this Collection
+      retval.extend @cols_access
+      retval
     end
     # for each distinct value of the grouping key columns, return the items in
@@ -579,92 +739,54 @@ module Bud
       end
     end
-    def join(collections, *preds, &blk)
-      # since joins are stateful, we want to allocate them once and store in this Bud instance
-      # we ID them on their tablenames, preds, and block
-      return wrap_map(BudJoin.new(collections, @bud_instance, preds), &blk)
-    end
+    # def join(collections, *preds, &blk)
+    #   # since joins are stateful, we want to allocate them once and store in this Bud instance
+    #   # we ID them on their tablenames, preds, and block
+    #   return wrap_map(BudJoin.new(collections, @bud_instance, preds), &blk)
+    # end
     # form a collection containing all pairs of items in +self+ and items in
     # +collection+
     public
     def *(collection)
-      join([self, collection])
+      elem1  = to_push_elem
+      j = elem1.join(collection)
+      return j
+      # join([self, collection])
     end
-    # AntiJoin
-    public
-    def notin(coll, *preds, &blk)
-      return BudJoin.new([self, coll], @bud_instance).anti(*preds, &blk)
+    def group(key_cols, *aggpairs, &blk)
+      elem = to_push_elem
+      key_cols = key_cols.map{|k| canonicalize_col(k)} unless key_cols.nil?
+      aggpairs = aggpairs.map{|ap| [ap[0], canonicalize_col(ap[1])].compact} unless aggpairs.nil?
+      g = elem.group(key_cols, *aggpairs, &blk)
+      return g
     end
-    # SQL-style grouping.  first argument is an array of attributes to group by.
-    # Followed by a variable-length list of aggregates over attributes (e.g. +min(:x)+)
-    # Attributes can be referenced as symbols, or as +collection_name.attribute_name+
-    public
-    def group(key_cols, *aggpairs)
-      key_cols ||= []
-      keynames = key_cols.map do |k|
-        if k.class == Symbol
-          k
-        elsif k[2] and k[2].class == Symbol
-          k[2]
-        else
-          raise Bud::CompileError, "invalid grouping key"
-        end
-      end
-      aggcolsdups = aggpairs.map{|ap| ap[0].class.name.split("::").last}
-      aggcols = []
-      aggcolsdups.each_with_index do |n, i|
-        aggcols << "#{n.downcase}_#{i}".to_sym
-      end
-      aggpairs = aggpairs.map do |ap|
-        if ap[1].class == Symbol
-          colnum = ap[1].nil? ? nil : self.send(ap[1].to_s)[1]
-        else
-          colnum = ap[1].nil? ? nil : ap[1][1]
-        end
-        [ap[0], colnum]
-      end
-      tups = agg_in.inject({}) do |memo, p|
-        pkey_cols = keynames.map{|n| p.send(n)}
-        memo[pkey_cols] = [] if memo[pkey_cols].nil?
-        aggpairs.each_with_index do |ap, i|
-          agg = ap[0]
-          colval = ap[1].nil? ? nil : p[ap[1]]
-          if memo[pkey_cols][i].nil?
-            memo[pkey_cols][i] = agg.send(:init, colval)
-          else
-            memo[pkey_cols][i], ignore = agg.send(:trans, memo[pkey_cols][i], colval)
-          end
-        end
-        memo
-      end
+    def notin(collection, *preds, &blk)
+      elem1 = to_push_elem
+      elem2 = collection.to_push_elem
+      return elem1.notin(elem2, preds, &blk)
+    end
-      result = tups.inject([]) do |memo, t|
-        finals = []
-        aggpairs.each_with_index do |ap, i|
-          finals << ap[0].send(:final, t[1][i])
-        end
-        memo << t[0] + finals
-      end
-      if block_given?
-        result.map{|r| yield r}
-      else
-        # merge directly into retval.storage, so that the temp tuples get picked up
-        # by the lhs of the rule
-        if aggcols.empty?
-          schema = keynames
-        else
-          schema = { keynames => aggcols }
-        end
-        retval = BudScratch.new('temp_group', bud_instance, schema)
-        retval.uniquify_tabname
-        retval.merge(result, retval.storage)
-      end
+    def canonicalize_col(col)
+      col.class <= Symbol ? self.send(col) : col
     end
-    alias reduce inject
+    # alias reduce inject
+    def reduce(initial, &blk)
+      elem1 = to_push_elem
+      red_elem = elem1.reduce(initial, &blk)
+      return red_elem
+    end
+    public
+    def pretty_print_instance_variables
+      # list of attributes (in order) to print when pretty_print is called.
+      important = ["@tabname", "@storage", "@delta", "@new_delta", "@pending"]
+      # everything except bud_instance
+      important + (self.instance_variables - important - ["@bud_instance"])
+    end
     public
     def uniquify_tabname # :nodoc: all
@@ -674,11 +796,56 @@ module Bud
   end
   class BudScratch < BudCollection # :nodoc: all
+    public
+    def tick  # :nodoc: all
+      @tick_delta.clear
+      @delta.clear
+      if not @pending.empty?
+        invalidate_cache
+        @delta = @pending
+        @pending = {}
+      elsif is_source
+        invalidate_cache
+      end
+      raise Bud::Error, "orphaned tuples in @new_delta for #{qualified_tabname}" unless @new_delta.empty?
+    end
+    public
+    def invalidate_at_tick
+      is_source      # rescan always only if this scratch is a source.
+    end
+    public
+    def add_rescan_invalidate(rescan, invalidate)
+      srcs = non_temporal_predecessors
+      if srcs.any? {|e| rescan.member? e}
+        invalidate << self
+        srcs.each{|e| rescan << e}
+      end
+    end
+    public
+    def invalidate_cache
+      puts "#{qualified_tabname} invalidated" if $BUD_DEBUG
+      #for scratches, storage is a cached value.
+      @invalidated = true
+      @storage.clear
+    end
+  end
+  class BudInputInterface < BudScratch
   end
-  class BudTemp < BudCollection # :nodoc: all
+  class BudOutputInterface < BudScratch
   end
+  class BudTemp < BudScratch # :nodoc: all
+  end
+  # Channels are a different type of collection in that they represent two distinct collections, one each for
+  # incoming and outgoing.  The incoming side makes use of @storage and @delta, whereas the outgoing side only deals
+  # with @pending. XXX Maybe we should be using aliases instead.
   class BudChannel < BudCollection
     attr_reader :locspec_idx # :nodoc: all
@@ -694,7 +861,7 @@ module Bud
       given_schema = Marshal.load(Marshal.dump(given_schema))
       unless @is_loopback
-        the_cols, the_key_cols = parse_schema(given_schema)
+        the_cols, the_key_cols = BudCollection.parse_schema(given_schema)
         spec_count = the_cols.count {|c| c.to_s.start_with? "@"}
         if spec_count == 0
           raise Bud::Error, "missing location specifier for channel '#{name}'"
@@ -720,6 +887,11 @@ module Bud
       super(name, bud_instance, given_schema)
     end
+    def bootstrap
+      # override BudCollection;  pending should not be moved into delta.
+    end
     private
     def remove_at_sign!(cols)
       i = cols.find_index {|c| c.to_s.start_with? "@"}
@@ -736,7 +908,7 @@ module Bud
         lsplit[1] = lsplit[1].to_i
         return lsplit
       rescue Exception => e
-        raise Bud::Error, "illegal location specifier in tuple #{t.inspect} for channel \"#{tabname}\": #{e.to_s}"
+        raise Bud::Error, "Illegal location specifier in tuple #{t.inspect} for channel \"#{qualified_tabname}\": #{e.to_s}"
       end
     end
@@ -747,24 +919,31 @@ module Bud
     public
     def tick # :nodoc: all
-      @storage = {}
+      @storage.clear
+      @invalidated = true
       # Note that we do not clear @pending here: if the user inserted into the
       # channel manually (e.g., via <~ from inside a sync_do block), we send the
       # message at the end of the current tick.
     end
+    public
+    def invalidate_cache
+    end
     public
     def flush # :nodoc: all
-      ip = @bud_instance.ip
-      port = @bud_instance.port
-      each_from([@pending]) do |t|
+      toplevel = @bud_instance.toplevel
+      ip = toplevel.ip
+      port = toplevel.port
+      @pending.each_value do |t|
         if @is_loopback
           the_locspec = [ip, port]
         else
           the_locspec = split_locspec(t, @locspec_idx)
           raise Bud::Error, "'#{t[@locspec_idx]}', channel '#{@tabname}'" if the_locspec[0].nil? or the_locspec[1].nil? or the_locspec[0] == '' or the_locspec[1] == ''
         end
-        @bud_instance.dsock.send_datagram([@tabname, t].to_msgpack, the_locspec[0], the_locspec[1])
+        puts "channel #{qualified_tabname}.send: #{t}" if $BUD_DEBUG
+        toplevel.dsock.send_datagram([qualified_tabname.to_s, t].to_msgpack, the_locspec[0], the_locspec[1])
       end
       @pending.clear
     end
@@ -777,9 +956,9 @@ module Bud
       if cols.size > 2
         # bundle up each tuple's non-locspec fields into an array
         retval = case @locspec_idx
-          when 0 then self.pro{|t| t[1..(t.size-1)]}
-          when (cols.size - 1) then self.pro{|t| t[0..(t.size-2)]}
-          else self.pro{|t| t[0..(@locspec_idx-1)] + t[@locspec_idx+1..(t.size-1)]}
+          when 0 then self.pro{|t| t.values_at(1..(t.size-1))}
+          when (schema.size - 1) then self.pro{|t| t.values_at(0..(t.size-2))}
+          else self.pro{|t| t.values_at(0..(@locspec_idx-1), @locspec_idx+1..(t.size-1))}
         end
       else
         # just return each tuple's non-locspec field value
@@ -789,7 +968,11 @@ module Bud
     end
     superator "<~" do |o|
-      pending_merge o
+      if o.class <= PushElement
+        o.wire_to_pending self
+      else
+        pending_merge(o)
+      end
     end
     superator "<+" do |o|
@@ -803,7 +986,7 @@ module Bud
     end
   end
-  class BudTerminal < BudCollection # :nodoc: all
+  class BudTerminal < BudScratch # :nodoc: all
     def initialize(name, given_schema, bud_instance, prompt=false) # :nodoc: all
       super(name, bud_instance, given_schema)
       @prompt = prompt
@@ -815,18 +998,19 @@ module Bud
       # we should add the terminal file descriptor to the EM event loop.
       @reader = Thread.new do
         begin
+          toplevel = @bud_instance.toplevel
           while true
             out_io = get_out_io
             out_io.print("#{tabname} > ") if @prompt
-            in_io = @bud_instance.options[:stdin]
+            in_io = toplevel.options[:stdin]
             s = in_io.gets
             break if s.nil? # Hit EOF
             s = s.chomp if s
             tup = [s]
-            ip = @bud_instance.ip
-            port = @bud_instance.port
+            ip = toplevel.ip
+            port = toplevel.port
             EventMachine::schedule do
               socket = EventMachine::open_datagram_socket("127.0.0.1", 0)
               socket.send_datagram([tabname, tup].to_msgpack, ip, port)
@@ -843,19 +1027,37 @@ module Bud
     public
     def flush #:nodoc: all
       out_io = get_out_io
-      @pending.each do |p|
+      @pending.each_value do |p|
         out_io.puts p[0]
         out_io.flush
       end
-      @pending = {}
+      @pending.clear
+    end
+    public
+    def invalidate_at_tick
+      true
     end
     public
     def tick #:nodoc: all
-      @storage = {}
+      unless @pending.empty?
+        @delta = @pending #  pending used for input tuples in this case.
+        @tick_delta = @pending.values
+        @pending.clear
+      else
+        @storage.clear
+        @delta.clear
+        @tick_delta.clear
+      end
+      @invalidated = true # channels and terminals are always invalidated.
       raise Bud::Error, "orphaned pending tuples in terminal" unless @pending.empty?
     end
+    public
+    def invalidate_cache
+    end
     undef merge
     public
@@ -864,19 +1066,23 @@ module Bud
     end
     superator "<~" do |o|
-      pending_merge(o)
+      if o.class <= PushElement
+        o.wire_to_pending self
+      else
+        pending_merge(o)
+      end
     end
     private
     def get_out_io
-      rv = @bud_instance.options[:stdout]
+      rv = @bud_instance.toplevel.options[:stdout]
       rv ||= $stdout
       raise Bud::Error, "attempting to write to terminal #{tabname} that was already closed" if rv.closed?
       rv
     end
   end
-  class BudPeriodic < BudCollection # :nodoc: all
+  class BudPeriodic < BudScratch # :nodoc: all
     def <=(o)
       raise Bud::Error, "illegal use of <= with periodic '#{tabname}' on left"
     end
@@ -892,43 +1098,142 @@ module Bud
     superator "<+" do |o|
       raise Bud::Error, "illegal use of <+ with periodic '#{tabname}' on left"
     end
+    def tick
+      @tick_delta.clear
+      @delta.clear
+      @invalidated = true
+      unless pending.empty?
+        @delta = @pending
+        @pending = {}
+      end
+    end
+  end
+  class BudPersistentCollection < BudCollection
+    public
+    def invalidate_at_tick
+      false # rescan required only when negated.
+    end
+    public
+    def invalidate_cache
+      raise "Abstract method not implemented by derived class #{self.class}"
+    end
   end
-  class BudTable < BudCollection # :nodoc: all
+  class BudTable < BudPersistentCollection # :nodoc: all
     def initialize(name, bud_instance, given_schema) # :nodoc: all
       super(name, bud_instance, given_schema)
       @to_delete = []
+      @to_delete_by_key = []
     end
     public
     def tick #:nodoc: all
+      if $BUD_DEBUG
+        puts "#{tabname}. storage -= pending deletes" unless @to_delete.empty? and @to_delete_by_key.empty?
+        puts "#{tabname}. delta += pending" unless @pending.empty?
+      end
+      @tick_delta.clear
+      deleted = nil
       @to_delete.each do |tuple|
-        key = get_key_vals(tuple)
-        if @storage[key] == tuple
-          @storage.delete key
+        keycols = @key_colnums.map{|k| tuple[k]}
+        if @storage[keycols] == tuple
+          v = @storage.delete keycols
+          deleted ||= v
         end
       end
-      @pending.each do |key, tuple|
-        old = @storage[key]
+      @to_delete_by_key.each do |tuple|
+        v = @storage.delete @key_colnums.map{|k| tuple[k]}
+        deleted ||= v
+      end
+      @invalidated =  (not deleted.nil?)
+      puts "table #{qualified_tabname} invalidated" if $BUD_DEBUG and @invalidated
+      @pending.each do |keycols, tuple|
+        old = @storage[keycols]
         if old.nil?
-          @storage[key] = tuple
+          @delta[keycols] = tuple #
         else
           raise_pk_error(tuple, old) unless tuple == old
         end
       end
       @to_delete = []
+      @to_delete_by_key = []
       @pending = {}
     end
+    def invalidated=(val)
+      raise "Internal error: nust not set invalidate on tables"
+    end
+    def pending_delete(o)
+      toplevel = @bud_instance.toplevel
+      if o.class <= Bud::PushElement
+        toplevel.merge_targets[toplevel.this_stratum][self] = true if toplevel.done_bootstrap
+        o.wire_to_delete self
+      elsif o.class <= Bud::BudCollection
+        toplevel.merge_targets[toplevel.this_stratum][self] = true if toplevel.done_bootstrap
+        o.pro.wire_to_delete self
+      elsif o.class <= Proc and @bud_instance.toplevel.done_bootstrap and not toplevel.done_wiring
+        toplevel.merge_targets[toplevel.this_stratum][self] = true if toplevel.done_bootstrap
+        tbl = register_coll_expr(o)
+        tbl.pro.wire_to_delete self
+      else
+        unless o.nil?
+          o = o.uniq.compact if o.respond_to?(:uniq)
+          check_enumerable(o)
+          establish_schema(o) if @cols.nil?
+          o.each{|i| @to_delete << prep_tuple(i)}
+        end
+      end
+    end
     superator "<-" do |o|
-      o.each do |t|
-        next if t.nil?
-        @to_delete << prep_tuple(t)
+      pending_delete(o)
+    end
+    public
+    def pending_delete_keys(o)
+      toplevel = @bud_instance.toplevel
+      if o.class <= Bud::PushElement
+        o.wire_to_delete_by_key self
+      elsif o.class <= Bud::BudCollection
+        o.pro.wire_to_delete_by_key self
+      elsif o.class <= Proc and @bud_instance.toplevel.done_bootstrap and not @bud_instance.toplevel.done_wiring
+        tbl = register_coll_expr(o)
+        tbl.pro.wire_to_delete_by_key self
+      else
+        unless o.nil?
+          o = o.uniq.compact if o.respond_to?(:uniq)
+          check_enumerable(o)
+          establish_schema(o) if @cols.nil?
+          o.each{|i| @to_delete_by_key << prep_tuple(i)}
+        end
       end
+      o
+    end
+    public
+    def invalidate_cache
+      # no cache to invalidate. Also, tables do not invalidate dependents, because their own state is not considered
+      # invalidated; that happens only if there were pending deletes at the beginning of a tick (see tick())
+      puts "******** invalidate_cache called on BudTable"
+    end
+    public
+    superator "<+-" do |o|
+      pending_delete_keys(o)
+      self <+ o
+    end
+    public
+    superator "<-+" do |o|
+      self <+- o
     end
   end
-  class BudReadOnly < BudScratch # :nodoc: all
+  class BudReadOnly < BudCollection # :nodoc: all
     superator "<+" do |o|
       raise CompileError, "illegal use of <+ with read-only collection '#{@tabname}' on left"
     end
@@ -936,6 +1241,50 @@ module Bud
     def merge(o)  #:nodoc: all
       raise CompileError, "illegal use of <= with read-only collection '#{@tabname}' on left"
     end
+    public
+    def invalidate_cache
+    end
+    public
+    def invalidate_at_tick
+      true
+    end
+  end
+  class BudSignal < BudReadOnly
+    def invalidate_at_tick
+      true
+    end
+    def tick
+      @invalidated = true
+      @storage.clear
+      unless @pending.empty?
+        @delta = @pending
+        @pending = {}
+      end
+    end
+  end
+  class BudCollExpr < BudReadOnly # :nodoc: all
+    def initialize(name, bud_instance, expr, given_schema=nil, defer_schema=false)
+      super(name, bud_instance, given_schema, defer_schema)
+      @expr = expr
+      @invalidated = true
+    end
+    def tick
+      @invalidated = true
+    end
+    public
+    def each(&block)
+      @expr.call.each {|i| yield i}
+    end
+    public
+    def each_raw(&block)
+      each(&block)
+    end
   end
   class BudFileReader < BudReadOnly # :nodoc: all
@@ -949,22 +1298,36 @@ module Bud
     end
     public
-    def pro(&blk)
-      if @bud_instance.stratum_first_iter
-        return map(&blk)
-      else
-        return []
-      end
-    end
-    public
-    def each(&block) # :nodoc: all
+    def each_raw(&block) # :nodoc: all
       while (l = @fd.gets)
-        t = tuple_accessors([@linenum, l.strip])
+        t = [@linenum, l.strip]
         @linenum += 1
         tick_metrics if bud_instance.options[:metrics]
         yield t
       end
     end
+    public
+    def each(&blk)
+      each_raw {|l| tuple_accessors(blk.call(l))}
+    end
+  end
+end
+module Enumerable
+  # public
+  # # monkeypatch to Enumerable to rename collections and their schemas
+  # def rename(new_tabname, new_schema=nil)
+  #   scr = Bud::BudScratch.new(new_tabname.to_s, nil, new_schema)
+  #   scr.merge(self, scr.storage)
+  #   scr
+  # end
+  public
+  # We rewrite "map" calls in Bloom blocks to invoke the "pro" method
+  # instead. This is fine when applied to a BudCollection; when applied to a
+  # normal Enumerable, just treat pro as an alias for map.
+  def pro(&blk)
+    map(&blk)
   end
 end