RubyGems - bud - Versions diffs - 0.0.4 → 0.0.5 - Mend

bud 0.0.4 → 0.0.5

Files changed (32) hide show

data/README +2 -3
data/bin/budvis +0 -66
data/docs/README.md +27 -15
data/docs/bust.md +1 -1
data/docs/cheat.md +79 -30
data/docs/operational.md +8 -4
data/examples/basics/paths.rb +5 -3
data/lib/bud/aggs.rb +1 -1
data/lib/bud/bud_meta.rb +11 -2
data/lib/bud/bust/bust.rb +1 -1
data/lib/bud/collections.rb +78 -20
data/lib/bud/deploy/threaddeploy.rb +1 -1
data/lib/bud/errors.rb +3 -0
data/lib/bud/graphs.rb +25 -26
data/lib/bud/joins.rb +78 -33
data/lib/bud/metrics.rb +43 -0
data/lib/bud/monkeypatch.rb +1 -1
data/lib/bud/rebl.rb +20 -13
data/lib/bud/rewrite.rb +217 -39
data/lib/bud/server.rb +16 -13
data/lib/bud/state.rb +39 -25
data/lib/bud/storage/dbm.rb +6 -1
data/lib/bud/storage/tokyocabinet.rb +6 -0
data/lib/bud/storage/zookeeper.rb +6 -6
data/lib/bud/viz.rb +5 -1
data/lib/bud/viz_util.rb +70 -0
data/lib/bud.rb +227 -99
metadata +33 -24
data/docs/c.html +0 -251
data/examples/deploy/deploy_ip_port +0 -1
data/examples/deploy/keys.rb +0 -5
data/lib/bud.rb.orig +0 -806

data/README CHANGED Viewed

@@ -41,7 +41,6 @@ To run the unit tests:
 The bud gem has a handful of mandatory dependencies. It also has two optional
 dependencies: if you wish to use Bud collections backed by Zookeeper or Tokyo
-Cabinet (the "zktable" and "tctable" collection types, respectively), the
-"zookeeper" and/or "tokyocabinet" gems must be installed. Note that before
-installing the "tokyocabinet" gem, the Tokyo Cabinet libraries should be
+Cabinet, the "zookeeper" and/or "tokyocabinet" gems must be installed. Note that
+before installing the "tokyocabinet" gem, the Tokyo Cabinet libraries should be
 installed first.

data/bin/budvis CHANGED Viewed

@@ -9,72 +9,6 @@ include VizUtil
 BUD_DBM_DIR = "#{ARGV[0]}/bud_"
-class VizHelper
-  include Bud
-  include TraceCardinality
-  def initialize(tabinf, cycle, depends, rules, dir)
-    @t_tabinf = tabinf
-    @t_cycle = cycle
-    @t_depends = depends
-    @t_rules = rules
-    @dir = dir
-    super()
-  end
-  def summarize(dir, schema)
-    table_io = {}
-    cardinalities.sort{|a, b| a[0] <=> b[0]}.each do |card|
-      table_io["#{card.table}_#{card.bud_time}"] = start_table(dir, card.table, card.bud_time, schema[card.table])
-    end
-    full_info.each do |info|
-      write_table_content(table_io["#{info.table}_#{info.bud_time}"], info.row)
-    end
-    table_io.each_value do |tab|
-      end_table(tab)
-    end
-    # fix: nested loops
-    times.sort.each do |time|
-      card_info = {}
-      cardinalities.each do |card|
-        if card.bud_time == time.bud_time
-          card_info[card.table] = card.cnt
-        end
-      end
-      d = "#{@dir}/tm_#{time.bud_time}"
-      write_graphs(@t_tabinf, @t_cycle, @t_depends, @t_rules, d, @dir, nil, false, nil, time.bud_time, card_info)
-    end
-  end
-  def start_table(dir, tab, time, schema)
-    str = "#{dir}/#{tab}_#{time}.html"
-    fout = File.new(str, "w")
-    fout.puts "<html><title>#{tab} @ #{time}</title>"
-    fout.puts "<table border=1>"
-    fout.puts "<tr>" + schema.map{|s| "<th> #{s} </th>"}.join(" ") + "<tr>" unless schema.nil?
-    fout.close
-    return str
-  end
-  def end_table(stream)
-    fp = File.open(stream, "a")
-    fp.puts "</table>"
-    fp.close
-  end
-  def write_table_content(fn, row)
-    stream = File.open(fn, "a")
-    stream.puts "<tr>"
-    stream.puts row.map{|c| "<td>#{c.to_s}</td>"}.join(" ")
-    stream.puts "</tr>"
-    stream.close
-  end
-end
 def usage
   puts "Usage:"

data/docs/README.md CHANGED Viewed

@@ -1,26 +1,38 @@
-#Bud: Bloom under development#
+Bud: Bloom under development
+============================
 Welcome to the documentation for *Bud*, a prototype of Bloom under development.
-The documents here are organized to be read in any order, but you might like to try the following:
+The documents here are organized to be read in any order, but you might like to
+try the following:
-* **[intro.md](intro.md)**: A brief introduction to Bud and Bloom.
-* **[getstarted.md](getstarted.md)**: A quickstart to teach you basic Bloom
+* [intro.md][intro]: A brief introduction to Bud and Bloom.
+* [getstarted.md][getstarted]: A quickstart to teach you basic Bloom
   concepts, the use of `rebl` interactive terminal, and the embedding of Bloom
   code in Ruby via the `Bud` module.
-* **[operational.md](operational.md)**: An operational view of Bloom, to provide
+* [operational.md][operational]: An operational view of Bloom, to provide
   a more detailed model of how Bloom code is evaluated by Bud.
-* **[cheat.md](cheat.md)**: A concise "cheat sheet" to remind you about Bloom syntax.
-* **[modules.md](modules.md)**: An overview of Bloom's modularity features.
-* **[ruby_hooks.md](ruby_hooks.md)**: Bud module methods that allow you to
+* [cheat.md][cheat]: A concise "cheat sheet" to remind you about Bloom syntax.
+* [modules.md][modules]: An overview of Bloom's modularity features.
+* [ruby\_hooks.md][ruby_hooks]: Bud module methods that allow you to
   interact with the Bud evaluator from other Ruby threads.
-* **[visualizations.md](visualizations.md)**: Overview of the `budvis` and
+* [visualizations.md][visualizations]: Overview of the `budvis` and
   `budplot` tools for visualizing Bloom program analyses.
-* **[bfs.md](bfs.md)**: A walkthrough of the Bloom distributed filesystem.
+* [bfs.md][bfs]: A walkthrough of the Bloom distributed filesystem.
+[intro]:          /bloom-lang/bud/blob/master/docs/intro.md
+[getstarted]:     /bloom-lang/bud/blob/master/docs/getstarted.md
+[operational]:    /bloom-lang/bud/blob/master/docs/operational.md
+[cheat]:          /bloom-lang/bud/blob/master/docs/cheat.md
+[modules]:        /bloom-lang/bud/blob/master/docs/modules.md
+[ruby_hooks]:     /bloom-lang/bud/blob/master/docs/ruby_hooks.md
+[visualizations]: /bloom-lang/bud/blob/master/docs/visualizations.md
+[bfs]:            /bloom-lang/bud/blob/master/docs/bfs.md
-In addition, the **[bud-sandbox](http://github.com/bloom-lang/bud-sandbox)**
-GitHub repository contains lots of useful libraries and example programs built
-using Bloom.
+In addition, the [bud-sandbox](http://github.com/bloom-lang/bud-sandbox) GitHub
+repository contains lots of useful libraries and example programs built using
+Bloom.
 Finally, the Bud gem ships with RubyDoc on the language constructs and runtime
-hooks provided by the Bud module.  (To see rdoc, run `gem server` from a command
-line and open [http://0.0.0.0:8808/](http://0.0.0.0:8808/))
+hooks provided by the Bud module. To see rdoc, run `gem server` from a command
+line and open [http://0.0.0.0:8808/](http://0.0.0.0:8808/)

data/docs/bust.md CHANGED Viewed

@@ -66,7 +66,7 @@ and the include line:
     include RestClient
-To make requests, insert into the rest_req interface, whose defintion is reproduced below:
+To make requests, insert into the rest_req interface, whose definition is reproduced below:
     interface input, :rest_req, [:rid, :verb, :form, :url, :params]

data/docs/cheat.md CHANGED Viewed

@@ -2,7 +2,7 @@
 ## General Bloom Syntax Rules ##
 Bloom programs are unordered sets of statements.<br>
-Statements are delimited by semicolons (;) or newlines. <br>
+Statements are delimited by semicolons (;) or newlines.<br>
 As in Ruby, backslash is used to escape a newline.<br>
 ## Simple embedding of Bud in a Ruby Class ##
@@ -25,7 +25,7 @@ A `state` block contains Bud collection definitions. A Bud collection is a *set*
 of *facts*; each fact is an array of Ruby values. Note that collections do not
 contain duplicates (inserting a duplicate fact into a collection is ignored).
-Like a table in a relational databas, a subset of the columns in a collection
+Like a table in a relational database, a subset of the columns in a collection
 makeup the collection's _key_. Attempting to insert two facts into a collection
 that agree on the key columns (but are not duplicates) results in a runtime
 exception.
@@ -90,35 +90,51 @@ Statements with stdio on lhs must use async merge (`<~`).<br>
 Using `stdio` on the lhs of an async merge results in writing to the `IO` object specified by the `:stdout` Bud option (`$stdout` by default).<br>
 To use `stdio` on rhs, instantiate Bud with `:stdin` option set to an `IO` object (e.g., `$stdin`).<br>
-### dbm_table ###
-Table collection mapped to a [DBM] (http://en.wikipedia.org/wiki/Dbm) store.<br>
-Default attributes: `[:key] => [:val]`
+Statements with stdio on lhs must use async merge (`<~`).<br>
+Using `stdio` on the lhs of an async merge results in writing to the `IO` object specified by the `:stdout` Bud option (`$stdout` by default).<br>
+To use `stdio` on rhs, instantiate Bud with `:stdin` option set to an `IO` object (e.g., `$stdin`).<br>
-    dbm_table :t1
-    dbm_table :t2, [:k1, :k2] => [:v1, :v2]
+### signals ###
+Built-in read-only scratch collection for receiving OS signals.<br>
+System-provided attributes: `[:key] => []`
-### tctable ###
-Table collection mapped to a [Tokyo Cabinet](http://fallabs.com/tokyocabinet/) store.<br>
-Default attributes: `[:key] => [:val]`
+Currently catches only SIGINT ("INT") and SIGTERM ("TERM").  If Bud option `:signal_handling=>:bloom` is set, the signal is trapped and Bloom rules
+are responsible to deal with the content of `signals`.
-    tctable :t1
-    tctable :t2, [:k1, :k2] => [:v1, :v2]
+### halt ###
+Built-in scratch collection to be used on the lhs of a rule; permanently halts the Bud instance upon first insertion.
-### zktable ###
-Table collection mapped to an [Apache Zookeeper](http://hadoop.apache.org/zookeeper/) store.<br>
-System-provided attributes: `[:key] => [:val]`<br>
-State declaration includes Zookeeper path and optional TCP string (default: "localhost:2181")<br>
+If the item `[:kill]` is inserted, the Bud OS process (including all Bud instances) is also halted.
+### sync ###
+Persistent collection mapped to an external storage engine, with synchronous write-flushing each timestep.  Supported storage engines: `:dbm` and `:tokyo`.<br>
+Default attributes: `[:key] => [:val]`.
+    sync :s1, :dbm
+    sync :s2, :tokyo, [:k1, :k2] => [:v1, :v2]
+Further info: [DBM](http://en.wikipedia.org/wiki/Dbm), [Tokyo Cabinet](http://fallabs.com/tokyocabinet/).
+### store ###
+Persistent collection mapped to an external storage engine, with asynchronous write-flushing.  Supported storage engines: `:zookeeper`.<br>
+Default attributes: `[:key] => [:val]`.
+Statements with a store on lhs must use async merge (`<~`).<br>
+Zookeeper is a special case: it does not take attributes as it trailing arguments.  Instead it requires a `:path` and can optionally take an `:addr` specification (default: `addr => 'localhost:2181'`).
-    zktable :foo, "/bat"
-    zktable :bar, "/dat", "localhost:2182"
+    store :s3, :zookeeper, :path=>"/foo/bar", :addr => 'localhost:2181'
+Further info: [Apache Zookeeper](http://hadoop.apache.org/zookeeper/).
 ## Bloom Statements ##
-*lhs BloomOp rhs*
+### Statement Syntax ###
+*lhs bloom_op rhs*
-Left-hand-side (lhs) is a named `BudCollection` object.<br>
-Right-hand-side (rhs) is a Ruby expression producing a `BudCollection` or `Array` of `Arrays`.<br>
-BloomOp is one of the 4 operators listed below.
+Left-hand-side (*lhs*) is a named `BudCollection` object.<br>
+Right-hand-side (*rhs*) is a Ruby expression producing a `BudCollection` or `Array` of `Arrays`.<br>
+The operator (*bloom_op*) is one of the 5 operators listed below.
 ### Bloom Operators ###
 merges:
@@ -131,13 +147,24 @@ delete:
 * `left <- right` &nbsp;&nbsp;&nbsp;&nbsp; (*deferred*)
+update/upsert:
+* `left <+- right` &nbsp;&nbsp;&nbsp; (*deferred*)<br>
+deferred insert of items on rhs and deferred deletion of items with matching
+keys on lhs.
+That is, for each fact produced by the rhs, the upsert operator removes any
+existing tuples that match on the lhs collection's key columns before inserting
+the corresponding rhs fact. Note that both the removal and insertion operators
+happen atomically in the next timestep.
 ### Collection Methods ###
 Standard Ruby methods used on a BudCollection `bc`:
 implicit map:
     t1 <= bc {|t| [t.col1 + 4, t.col2.chomp]} # formatting/projection
-    t2 <= bc {|t| t if t.col = 5}             # selection
+    t2 <= bc {|t| t if t.col == 5}            # selection
 `flat_map`:
@@ -183,15 +210,27 @@ implicit map:
     stdio <~ requests do |r|
       [r.inspect] if msgs.exists?{|m| r.ident == m.ident}
     end
+`bc.notin(bc2, `*optional hash pairs*`)` *optional ruby block*:<br>
+Output each item of `bc` such that (a) it has no match in `bc2` on the hash-pairs attributes, or (b) there is no matching item in `bc2` that leads to a non-nil return value from the block.
+Hash pairs can be fully qualified (`bc.attr1 => bc2.attr2`)
+or shorthand (`:attr1 => :attr2`).
+    # output items from foo if (a) there is no matching key in bar, or
+    # (b) all matching keys in bar have a smaller value
+    stdio <~ foo.notin(bar, :key=>:key) {|f, b| true if f.val <= b.val}
 ## SQL-style grouping/aggregation (and then some) ##
-* `bc.group([:col1, :col2], min(:col3))`.  *akin to min(col3) GROUP BY (col1,col2)*
+* `bc.group([:col1, :col2], min(:col3))`.  *akin to min(col3) GROUP BY col1,col2*
   * exemplary aggs: `min`, `max`, `choose`
   * summary aggs: `sum`, `avg`, `count`
   * structural aggs: `accum`
-* `bc.argmax([:col1], :col2)` &nbsp;&nbsp;&nbsp;&nbsp; *returns the bc tuple per col1 that has highest col2*
-* `bc.argmin([:col1], :col2)`
+* `bc.argmax([:attr1], :attr2)` &nbsp;&nbsp;&nbsp;&nbsp; *returns the bc items per attr1 that have highest attr2*
+* `bc.argmin([:attr1], :attr2)`
+* `bc.argagg(:exemplary_agg_name, [:attr1], :attr2))`.  *generalizes argmin/max: returns the bc items per attr1 that are chosen by the exemplary
+aggregate named*
 ### Built-in Aggregates: ###
@@ -244,14 +283,14 @@ Like `pairs`, but implicitly includes a block that projects down to the left ite
 Like `pairs`, but implicitly includes a block that projects down to the right item in each pair.
 `flatten`:<br>
-`flatten` is a bit like SQL's `SELECT *`: it produces a collection of concatenated objects, with a schema that is the concatenation of the schemas in tablelist (with duplicate names disambiguated.) Useful for chaining to operators that expect input collections with schemas, e.g. group:
+`flatten` is a bit like SQL's `SELECT *`: it produces a collection of concatenated objects, with a schema that is the concatenation of the schemas in tablelist (with duplicate names disambiguated). Useful for chaining to operators that expect input collections with schemas, e.g., `group`:
     out <= (r * s).matches.flatten.group([:a], max(:b))
 `outer(`*hash pairs*`)`:<br>
-Left Outer Join.  Like `pairs`, but objects in the first collection will be produced nil-padded if they have no match in the second collection.
+Left Outer Join.  Like `pairs`, but items in the first collection will be produced nil-padded if they have no match in the second collection.
-## Temp Collections ##
+## Temp Collections and With Blocks ##
 `temp`<br>
 Temp collections are scratches defined within a `bloom` block:
@@ -261,10 +300,20 @@ The schema of a temp collection in inherited from the rhs; if the rhs has no
 schema, a simple one is manufactured to suit the data found in the rhs at
 runtime: `[c0, c1, ...]`.
+`with`<br>
+With statements define a temp collection that can be referenced only within the scope of the associated block.  They are useful when you "fork" in a dataflow into two lhs destinations:
+    with :biggies <= request {|r| r if r.quantity > 100}, begin
+      to_process <= (biggies * known_good).lefts(:key=>:key)
+      denied <= (biggies * known_good).nopairs(:key=>key)
+    end
+The advantage of using `with` over `temp` is modularity: all the rules referencing `biggies` have to be bundled together, making it easier to see that the contents of `request` with quantity > 100 are handled properly.
 ## Bud Modules ##
 A Bud module combines state (collections) and logic (Bloom rules). Using modules allows your program to be decomposed into a collection of smaller units.
-Definining a Bud module is identical to defining a Ruby module, except that the module can use the `bloom`, `bootstrap`, and `state` blocks described above.
+Defining a Bud module is identical to defining a Ruby module, except that the module can use the `bloom`, `bootstrap`, and `state` blocks described above.
 There are two ways to use a module *B* in another Bloom module *A*:

data/docs/operational.md CHANGED Viewed

@@ -19,13 +19,13 @@ Each iteration of this loop is a *timestep* for that node; each timestep is asso
 A Bloom timestep has 3 main phases (from left to right):
 1. *setup*: All scratch collections are set to empty.  Network messages and periodic timer events are received from the runtime and placed into their designated `channel` and `periodic` scratches, respectively, to be read in the rhs of statements.  Note that a batch of multiple messages/events may be received at once.
-2. *logic*: All Bloom statements for the program are evaluated.  In programs with recursion through instantaneous merges (`<=`), the statements are repeatedly evaluated until a *fixpoint* is reached: i.e. no new lhs items are derived from any rhs.
-3. *transition*: Items derived on the lhs of deferred operators (`<+`, `<-`) are placed into/deleted from their corresponding collections, and items derived on the lhs of asynchronous merge (`<~`) are handed off to external code (i.e. the local operating system) for processing.
+2. *logic*: All Bloom statements for the program are evaluated.  In programs with recursion through instantaneous merges (`<=`), the statements are repeatedly evaluated until a *fixpoint* is reached: i.e., no new lhs items are derived from any rhs.
+3. *transition*: Items derived on the lhs of deferred operators (`<+`, `<-`, `<+-`) are placed into/deleted from their corresponding collections, and items derived on the lhs of asynchronous merge (`<~`) are handed off to external code (i.e., the local operating system) for processing.
 It is important to understand how the Bloom collection operators fit into these timesteps:
 * *Instantaneous* merge (`<=`) occurs within the fixpoint of phase 2.
-* *Deferred* operations include merge (`<+`) and delete (`<-`), and are handled in phase 3.  Their effects become visible atomically to Bloom statements in phase 2 of the next timestep.
+* *Deferred* operations include merge (`<+`), update (`<+-`), and delete (`<-`), and are handled in phase 3.  Their effects become visible atomically to Bloom statements in phase 2 of the next timestep.
 * *Asynchronous* merge (`<~`) is initiated during phase 3, so it cannot affect the current timestep.  When multiple items are on the rhs of an async merge, they may "appear" independently spread across multiple different future local timesteps.
@@ -43,7 +43,11 @@ State "update" is achieved in Bloom via a pair of deferred statements, one posit
     buffer <+ [[1, "newval"]]
     buffer <- buffer {|b| b if b.key == 1}
-This atomically replaces the entry for key 1 with the value "newval" at the start of the next timestep.
+This atomically replaces the entry for key 1 with the value "newval" at the start of the next timestep. As syntax sugar for this common pattern, the deferred update operator can be used:
+    buffer <+- [[1, "newval"]]
+This update statement removes (from the following timestep) any fact in `buffer` with the key `1`, and inserts (in the following timestep) a fact with the value `[1, "newval"]`. Note that "key" here refers to the key column(s) of the lhs relation: this example assumes `buffer` has a single key column.
 Any reasoning about atomicity in Bloom programs is built on this simple foundation.  It's really all you need.  In the bud-sandbox we show how to build more powerful atomicity constructs using it, including things like enforcing [ordering of items across timesteps](https://github.com/bloom-lang/bud-sandbox/tree/master/ordering), and protocols for [agreeing on ordering of distributed updates](https://github.com/bloom-lang/bud-sandbox/tree/master/paxos) across all nodes.

data/examples/basics/paths.rb CHANGED Viewed

@@ -18,9 +18,11 @@ class ShortestPaths
     # base case: every link is a path
     path <= link {|e| [e.from, e.to, e.to, e.cost]}
-    # inductive case: make path of length n+1 by connecting a link to a path of length n
-    temp :j <= (link*path).pairs(:to => :from)
-    path <= j { |l,p| [l.from, p.to, p.from, l.cost+p.cost] }
+    # inductive case: make path of length n+1 by connecting a link to a path of
+    # length n
+    path <= (link*path).pairs(:to => :from) do |l,p|
+      [l.from, p.to, p.from, l.cost+p.cost]
+    end
   end
   # find the shortest path between each connected pair of nodes

data/lib/bud/aggs.rb CHANGED Viewed

@@ -120,7 +120,7 @@ module Bud
   # exemplary aggregate method to be used in Bud::BudCollection.group.
   # randomly chooses among x entries being aggregated.
   def choose_rand(x=nil)
-    [ChooseRand.new]
+    [ChooseRand.new, x]
   end
   class Sum < Agg #:nodoc: all

data/lib/bud/bud_meta.rb CHANGED Viewed

@@ -36,12 +36,19 @@ class BudMeta #:nodoc: all
     @depanalysis = DepAnalysis.new
     @bud_instance.t_depends_tc.each {|d| @depanalysis.depends_tc << d}
     @bud_instance.t_provides.each {|p| @depanalysis.providing << p}
-    3.times { @depanalysis.tick }
+    3.times { @depanalysis.tick_internal }
     @depanalysis.underspecified.each do |u|
       puts "Warning: underspecified dataflow: #{u.inspect}"
       @bud_instance.t_underspecified << u
     end
+    @depanalysis.source.each do |s|
+      @bud_instance.sources[s.first] = true
+    end
+    @depanalysis.sink.each do |s|
+      @bud_instance.sinks[s.first] = true
+    end
     dump_rewrite(rewritten_strata) if @bud_instance.options[:dump_rewrite]
     return rewritten_strata, no_attr_rewrite_strata
@@ -128,6 +135,8 @@ class BudMeta #:nodoc: all
         next
       end
+      next if i == 1 and n.sexp_type == :nil # a block got rewritten to an empty block
       # Check for a common case
       if n.sexp_type == :lasgn
         return [n, "Illegal operator: '='"]
@@ -170,7 +179,7 @@ class BudMeta #:nodoc: all
   def stratify
     strat = Stratification.new
     @bud_instance.t_depends.each {|d| strat.depends << d}
-    strat.tick
+    strat.tick_internal
     # Copy computed data back into Bud runtime
     strat.stratum.each {|s| @bud_instance.t_stratum << s}

data/lib/bud/bust/bust.rb CHANGED Viewed

@@ -84,7 +84,7 @@ module Bust
               tuple_to_insert[index] = v[0]
             end
             # actually insert the puppy
-            @bud.async_do { (eval "@bud." + table_name) << tuple_to_insert }
+            @bud.async_do { (eval "@bud." + table_name) <+ [tuple_to_insert] }
             @session.print success
           end
         rescue Exception

data/lib/bud/collections.rb CHANGED Viewed

@@ -135,7 +135,7 @@ module Bud
     # project the collection to its key attributes
     public
     def keys
-      self.map{|t| (0..self.key_cols.length-1).map{|i| t[i]}}
+      self.map{|t| @key_colnums.map {|i| t[i]}}
     end
     # project the collection to its non-key attributes
@@ -173,10 +173,23 @@ module Bud
       each_from([@storage, @delta], &block)
     end
+    public
+    def tick_metrics
+      strat_num = bud_instance.this_stratum
+      rule_num = bud_instance.this_rule
+      addr = nil
+      addr = bud_instance.ip_port unless bud_instance.port.nil?
+      rule_txt = nil
+      bud_instance.metrics[:collections] ||= {}
+      bud_instance.metrics[:collections][{:addr=>addr, :tabname=>tabname, :strat_num=>strat_num, :rule_num=>rule_num}] ||= 0
+      bud_instance.metrics[:collections][{:addr=>addr, :tabname=>tabname, :strat_num=>strat_num, :rule_num=>rule_num}] += 1
+    end
     private
     def each_from(bufs, &block) # :nodoc: all
       bufs.each do |b|
         b.each_value do |v|
+          tick_metrics if bud_instance and bud_instance.options[:metrics]
           yield v
         end
       end
@@ -237,7 +250,7 @@ module Bud
     def include?(item)
       return true if key_cols.nil? or (key_cols.empty? and length > 0)
       return false if item.nil? or item.empty?
-      key = key_cols.map{|k| item[schema.index(k)]}
+      key = @key_colnums.map{|i| item[i]}
       return (item == self[key])
     end
@@ -255,7 +268,7 @@ module Bud
     private
     def raise_pk_error(new_guy, old)
-      keycols = key_cols.map{|k| old[schema.index(k)]}
+      keycols = @key_colnums.map{|i| old[i]}
       raise KeyConstraintError, "Key conflict inserting #{new_guy.inspect} into \"#{tabname}\": existing tuple #{old.inspect}, key_cols = #{keycols.inspect}"
     end
@@ -399,7 +412,22 @@ module Bud
     superator "<+" do |o|
       pending_merge o
     end
+    public
+    superator "<+-" do |o|
+      self <+ o
+      self <- o.map do |t|
+        unless t.nil?
+          self[@key_colnums.map{|k| t[k]}]
+        end
+      end
+    end
+    public
+    superator "<-+" do |o|
+      self <+- o
+    end
     # Called at the end of each timestep: prepare the collection for the next
     # timestep.
     public
@@ -440,8 +468,8 @@ module Bud
     # a generalization of argmin/argmax to arbitrary exemplary aggregates.
-    # for each distinct value in the grouping key columns, return the item in that group
-    # that has the value of the exemplary aggregate +aggname+
+    # for each distinct value of the grouping key columns, return the items in that group
+    # that have the value of the exemplary aggregate +aggname+
     public
     def argagg(aggname, gbkey_cols, collection)
       agg = bud_instance.send(aggname, nil)[0]
@@ -501,15 +529,17 @@ module Bud
       end
     end
-    # for each distinct value in the grouping key columns, return the item in that group
-    # that has the minimum value of the attribute +col+
+    # for each distinct value of the grouping key columns, return the items in
+    # that group that have the minimum value of the attribute +col+. Note that
+    # multiple tuples might be returned.
     public
     def argmin(gbkey_cols, col)
       argagg(:min, gbkey_cols, col)
     end
-    # for each distinct value in the grouping key columns, return the item in that group
-    # that has the maximum value of the attribute +col+
+    # for each distinct value of the grouping key columns, return the item in
+    # that group that has the maximum value of the attribute +col+. Note that
+    # multiple tuples might be returned.
     public
     def argmax(gbkey_cols, col)
       argagg(:max, gbkey_cols, col)
@@ -536,6 +566,14 @@ module Bud
     def *(collection)
       join([self, collection])
     end
+    # AntiJoin
+    public
+    def notin(coll,*preds, &blk)
+      @origpreds = preds
+      @schema = schema
+      return BudJoin.new([self,coll], @bud_instance).anti(*preds,&blk)
+    end
     # SQL-style grouping.  first argument is an array of attributes to group by.
     # Followed by a variable-length list of aggregates over attributes (e.g. +min(:x)+)
@@ -626,13 +664,27 @@ module Bud
       @is_loopback = loopback
       @locspec_idx = nil
+      # We're going to mutate the caller's given_schema (to remove the location
+      # specifier), so make a deep copy first. We also save a ref to the
+      # unmodified given_schema.
+      @raw_schema = given_schema
+      given_schema = Marshal.load(Marshal.dump(given_schema))
       unless @is_loopback
         the_schema, the_key_cols = parse_schema(given_schema)
+        spec_count = the_schema.count {|s| s.to_s.start_with? "@"}
+        if spec_count == 0
+          raise BudError, "Missing location specifier for channel '#{name}'"
+        end
+        if spec_count > 1
+          raise BudError, "Multiple location specifiers for channel '#{name}'"
+        end
         the_val_cols = the_schema - the_key_cols
         @locspec_idx = remove_at_sign!(the_key_cols)
-        @locspec_idx = remove_at_sign!(the_schema) if @locspec_idx.nil?
         if @locspec_idx.nil?
-          raise BudError, "Missing location specifier for channel '#{name}'"
+          val_idx = remove_at_sign!(the_val_cols)
+          @locspec_idx = val_idx + the_key_cols.length
         end
         # We mutate the hash key above, so we need to recreate the hash
@@ -647,7 +699,7 @@ module Bud
     private
     def remove_at_sign!(cols)
-      i = cols.find_index {|c| c.to_s[0].chr == '@'}
+      i = cols.find_index {|c| c.to_s.start_with? "@"}
       unless i.nil?
         cols[i] = cols[i].to_s.delete('@').to_sym
       end
@@ -667,7 +719,7 @@ module Bud
     public
     def clone_empty
-      self.class.new(tabname, bud_instance, @given_schema, @is_loopback)
+      self.class.new(tabname, bud_instance, @raw_schema, @is_loopback)
     end
     public
@@ -757,7 +809,7 @@ module Bud
               socket.send_datagram([tabname, tup].to_msgpack, ip, port)
             end
           end
-        rescue
+        rescue Exception
           puts "terminal reader thread failed: #{$!}"
           print $!.backtrace.join("\n")
           exit
@@ -778,7 +830,7 @@ module Bud
     public
     def tick #:nodoc: all
       @storage = {}
-      raise BudError unless @pending.empty?
+      raise BudError, "orphaned pending tuples in terminal" unless @pending.empty?
     end
     undef merge
@@ -796,6 +848,7 @@ module Bud
     def get_out_io
       rv = @bud_instance.options[:stdout]
       rv ||= $stdout
+      raise BudError, "attempting to write to terminal #{tabname} that was already closed" if rv.closed?
       rv
     end
   end
@@ -816,10 +869,6 @@ module Bud
     superator "<+" do |o|
       raise BudError, "Illegal use of <+ with periodic '#{tabname}' on left"
     end
-    def add_periodic_tuple(id)
-      pending_merge([[id, Time.now]])
-    end
   end
   class BudTable < BudCollection # :nodoc: all
@@ -890,6 +939,7 @@ module Bud
       while (l = @fd.gets)
         t = tuple_accessors([@linenum, l.strip])
         @linenum += 1
+        tick_metrics if bud_instance.options[:metrics]
         yield t
       end
     end
@@ -909,4 +959,12 @@ module Enumerable
     scr.merge(self, scr.storage)
     scr
   end
+  public
+  # We rewrite "map" calls in Bloom blocks to invoke the "pro" method
+  # instead. This is fine when applied to a BudCollection; when applied to a
+  # normal Enumerable, just treat pro as an alias for map.
+  def pro(&blk)
+    map(&blk)
+  end
 end