RubyGems - bud - Versions diffs - 0.0.5 → 0.0.6 - Mend

bud 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

data/bin/budplot +37 -12
data/bin/budtimelines +20 -22
data/bin/budvis +1 -1
data/docs/cheat.md +54 -17
data/docs/operational.md +1 -1
data/lib/bud.rb +31 -18
data/lib/bud/bud_meta.rb +7 -7
data/lib/bud/bust/bust.rb +2 -2
data/lib/bud/collections.rb +80 -68
data/lib/bud/deploy/ec2deploy.rb +1 -1
data/lib/bud/graphs.rb +9 -11
data/lib/bud/joins.rb +29 -20
data/lib/bud/monkeypatch.rb +8 -2
data/lib/bud/rebl.rb +29 -13
data/lib/bud/rewrite.rb +40 -39
data/lib/bud/server.rb +1 -1
data/lib/bud/state.rb +3 -3
data/lib/bud/storage/dbm.rb +4 -4
data/lib/bud/storage/tokyocabinet.rb +4 -4
data/lib/bud/storage/zookeeper.rb +3 -3
data/lib/bud/stratify.rb +6 -3
data/lib/bud/viz.rb +1 -1
data/lib/bud/viz_util.rb +11 -7
metadata +10 -8

data/bin/budplot CHANGED Viewed

@@ -8,7 +8,7 @@ require 'bud/viz_util'
 include VizUtil
-def is_module?(m)
+def is_constant?(m)
   begin
     return (eval("defined?(#{m})") == "constant")
   rescue SyntaxError
@@ -16,16 +16,42 @@ def is_module?(m)
   end
 end
-def process(mods)
+def make_instance(mods)
+  # If we're given a single identifier that names a class, just return an
+  # instance of that class. Otherwise, define a bogus class that includes all
+  # the module names specified by the user and return an instance.
   mods.each do |m|
-    unless is_module? m
-      puts "Error: unable to find definition for module \"#{m}\""
+    unless is_constant? m
+      puts "Error: unable to find definition for module or class \"#{m}\""
+      exit
+    end
+    mod_klass = eval m
+    if mod_klass.class == Class
+      if mods.length == 1
+        return mod_klass.new
+      else
+        puts "Error: cannot intermix classes and modules"
+        exit
+      end
+    elsif mod_klass.class != Module
+      puts "Error: \"#{m}\" is not a module or class name"
       exit
     end
   end
-  classdef = "class FooBar\ninclude Bud\n" + mods.map{|m| "include #{m}"}.join("\n") + "\nend\n FooBar.new"
-  d = eval(classdef)
+  def_lines = ["class FooBar",
+               "include Bud",
+               mods.map {|m| "include #{m}"},
+               "end"
+              ]
+  class_def = def_lines.flatten.join("\n")
+  eval(class_def)
+  FooBar.new
+end
+def process(mods)
+  d = make_instance(mods)
   interfaces = {}
   d.t_provides.each do |name, is_input|
@@ -39,10 +65,10 @@ def process(mods)
   d.tables.each do |t|
     tab = t[0].to_s
     tabinf[tab] = t[1].class.to_s
+    next if d.builtin_tables.has_key? t[0]
     if interfaces[tab].nil?
-      unless tab =~ /^t_/ or tab == "stdio" or tab == "localtick"
-        priv << t
-      end
+      priv << t
     else
       if interfaces[tab]
         inp << t
@@ -84,7 +110,6 @@ def do_table(f, info)
   info.sort{|a, b| a[0].to_s <=> b[0].to_s}.each do |tbl_name, tbl_impl|
     next if tbl_impl.schema.nil?
     key_s = tbl_impl.key_cols.join(", ")
-    key_s = "[]" if key_s == ""
     val_s = tbl_impl.val_cols.join(", ")
     f.puts "<tr><td><b>#{tbl_name}</b></td>"
     f.puts "<td>#{key_s}</td><td>#{val_s}</td></tr>"
@@ -93,7 +118,7 @@ def do_table(f, info)
 end
 if ARGV.length < 2
-  puts "Usage: budplot LIST_OF_FILES LIST_OF_MODULES"
+  puts "Usage: budplot LIST_OF_FILES LIST_OF_MODULES_OR_CLASSES"
   exit
 end
@@ -102,7 +127,7 @@ end
 modules = []
 ARGV.each do |arg|
   if File.exists? arg
-    eval "require '#{arg}'"
+    require arg
   else
     modules << arg
   end

data/bin/budtimelines CHANGED Viewed

@@ -36,7 +36,7 @@ end
 module TPSchema
   state do
     table :deltas, [:bud_time, :tab, :nm]
-    table :zerod_cards, [:bud_time, :table, :cnt]
+    table :zerod_cards, [:bud_time, :table, :cnt, :pred]
     table :nm_tab, [:table]
     table :collapsible_base, [:start, :fin]
     table :collapsible, [:start, :fin]
@@ -49,19 +49,18 @@ end
 module DeltaLogic
   include TPSchema
   bloom do
-    zerod_cards <= cardinalities
+    zerod_cards <= cardinalities{|c| c + [c.bud_time-1]}
     zerod_cards <= (times * depends).pairs do |t, d|
       unless cardinalities{|c| c[1] if c[0] == t.bud_time}.include? d[1]
-        [t.bud_time, d[1], 0]
+        [t.bud_time, d[1], 0, t.bud_time - 1]
       end
     end
     nm_tab <= depends do |d|
       [d[1]] if d[4]
     end
-    deltas <= (zerod_cards * zerod_cards).pairs(:table => :table) do |c1, c2|
+    deltas <= (zerod_cards * zerod_cards).pairs(:table => :table, :bud_time => :pred) do |c1, c2|
       if c1.bud_time == c2.bud_time - 1 and c1.table == c2.table and c1.cnt != c2.cnt
         if nm_tab.include? [c1.table]
           [c2.bud_time, c1.table, true]
@@ -77,26 +76,22 @@ module VanillaTraceProcessing
   include TPSchema
   include DeltaLogic
-  bloom do
-    collapsible_base <= times do |t|
-      unless deltas{|d| d.bud_time if d.nm}.include? t.bud_time
-        [t.bud_time-1, t.bud_time]
-      end
-    end
+  state do
+    scratch :tp, times.schema
+    scratch :bi1, best_interval.schema
+  end
+  bloom do
+    tp <= times.notin(deltas, :bud_time => :bud_time) {|t, d| true if d.nm}
+    collapsible_base <= tp {|t| [t.bud_time-1, t.bud_time]}
     collapsible <= collapsible_base
     collapsible <= (collapsible_base * collapsible).pairs(:fin => :start) do |b, c|
-      puts "another collapsible row; now #{b.inspect} - #{c.inspect}"
       [b.start, c.fin]
     end
-    best_interval <= collapsible do |c|
-      unless collapsible{|c1| c1.start == c.start and c1.fin > c.fin}.any? \
-       or collapsible{|c2| c2.fin == c.fin and c2.start < c.start}.any?
-        c
-      end
-    end
+    bi1 <= collapsible.notin(collapsible, :start => :start) {|c1, c2| true if c2.fin > c1.fin}
+    best_interval <= bi1.notin(collapsible, :fin => :fin) {|c1, c2| true if c2.start < c1.start}
   end
 end
@@ -139,12 +134,12 @@ da = GlobalDepAnalyzer.new
 ARGV.each do |arg_raw|
   elems = arg_raw.split("_")
-  arg = elems[1..3].join("_")
+  arg = elems[1..4].join("_")
   clean_arg << arg
   snd_info[arg] = []
   rcv_info[arg] = []
-  meta, data = get_meta2("#{arg_raw}/bud_")
+  meta, data = get_meta2("#{arg_raw}")
   tp = SimpleTraceProcessor.new
   meta[:depends].each do |m|
@@ -166,7 +161,6 @@ ARGV.each do |arg_raw|
   tp.tick
   puts "entries in collapsible: #{tp.collapsible.length}"
   puts "entries in base: #{tp.collapsible_base.length}"
   puts "entries in deltas: #{tp.deltas.length}"
@@ -182,7 +176,11 @@ da.tick
 nmreach = {}
 da.depends_tc.each do |d|
   nmreach[d[0]] = {} unless nmreach[d[0]]
-  nmreach[d[0]][d[1]] = d[3]
+  if nmreach[d[0]][d[1]]
+    nmreach[d[0]][d[1]] = d[3] or nmreach[d[0]][d[1]]
+  else
+    nmreach[d[0]][d[1]] = d[3]
+  end
 end
 # our local intervals relations are too optimistic.  to say that intervals[foo] = [2, 5]

data/bin/budvis CHANGED Viewed

@@ -7,7 +7,7 @@ require 'bud/viz_util'
 include VizUtil
-BUD_DBM_DIR = "#{ARGV[0]}/bud_"
+BUD_DBM_DIR = "#{ARGV[0]}"
 def usage

data/docs/cheat.md CHANGED Viewed

@@ -82,6 +82,13 @@ State declaration includes interval (in seconds).
     periodic :timer, 0.1
+Note that because periodics are just a simple wrapper over the system clock, Bud
+provides few semantic guarantees about the behavior of periodics. In particular,
+periodics execute in a best-effort manner (there is no guarantee of timely
+delivery of a periodic tuple), and the system clock value stored in the `val`
+field may not be monotonically increasing (e.g., if the system clock is changed
+in the midst of Bud execution).
 ### stdio ###
 Built-in scratch collection for performing terminal I/O.<br>
 System-provided attributes: `[:line] => []`
@@ -151,12 +158,10 @@ update/upsert:
 * `left <+- right` &nbsp;&nbsp;&nbsp; (*deferred*)<br>
 deferred insert of items on rhs and deferred deletion of items with matching
-keys on lhs.
-That is, for each fact produced by the rhs, the upsert operator removes any
-existing tuples that match on the lhs collection's key columns before inserting
-the corresponding rhs fact. Note that both the removal and insertion operators
-happen atomically in the next timestep.
+keys on lhs. That is, for each fact produced by the rhs, the upsert operator
+removes any existing tuples that match on the lhs collection's key columns
+before inserting the corresponding rhs fact. Note that both the removal and
+insertion operations happen atomically in the next timestep.
 ### Collection Methods ###
 Standard Ruby methods used on a BudCollection `bc`:
@@ -184,26 +189,36 @@ implicit map:
 `bc.include?`:
-    t5 <= bc do |t| # like SQL's NOT IN
+    # This is similar to SQL's NOT IN; note that Bud provides a "notin"
+    # collection method that should probably be preferred to this approach.
+    t5 <= bc do |t|
         t unless t2.include?([t.col1, t.col2])
     end
 ## BudCollection-Specific Methods ##
-`bc.keys`: projects `bc` to key columns<br>
+`bc.schema`: returns the schema of `bc` (Hash of key column names => non-key column names)<br>
-`bc.values`: projects `bc` to non-key columns<br>
+`bc.cols`: returns the column names in `bc` as an Array<br>
-`bc.inspected`: shorthand for `bc {|t| [t.inspect]}`
+`bc.key_cols`: returns the key column names in `bc` as an Array<br>
-    stdio <~ bc.inspected
+`bc.val_cols`: returns the non-key column names in `bc` as an Array<br>
+`bc.keys`: projects `bc` to key columns<br>
+`bc.values`: projects `bc` to non-key columns<br>
 `chan.payloads`: projects `chan` to non-address columns. Only defined for channels.
     # at sender
-    msgs <~ requests {|r| "127.0.0.1:12345", r}
+    msgs <~ requests {|r| ["127.0.0.1:12345", r]}
     # at receiver
     requests <= msgs.payloads
+`bc.inspected`: returns a human-readable version of the contents of `bc`
+    stdio <~ bc.inspected
 `bc.exists?`: test for non-empty collection.  Can optionally pass in a block.
     stdio <~ [["Wake Up!"] if timer.exists?]
@@ -211,10 +226,18 @@ implicit map:
       [r.inspect] if msgs.exists?{|m| r.ident == m.ident}
     end
-`bc.notin(bc2, `*optional hash pairs*`)` *optional ruby block*:<br>
-Output each item of `bc` such that (a) it has no match in `bc2` on the hash-pairs attributes, or (b) there is no matching item in `bc2` that leads to a non-nil return value from the block.
-Hash pairs can be fully qualified (`bc.attr1 => bc2.attr2`)
-or shorthand (`:attr1 => :attr2`).
+`bc.notin(bc2, `*optional hash pairs*`, `*optional ruby block*`)`:<br>
+Output the facts in `bc` that do not appear in `bc2`, as follows. First, we form a temporary collection `t` as follows:
+  1. Join `bc` and `bc2` according to the specified hash pairs. Hash pairs can
+     be fully qualified (`bc.attr1 => bc2.attr2`) or shorthand (`:attr1 =>
+     :attr2`).
+  2. If a code block is specified, invoke the block on every pair of matching
+     tuples in the join result. Any matches for which the block returns `nil`
+     are removed from `t`.
+Finally, we output every tuple of `bc` that does *not* appear in `t`.
     # output items from foo if (a) there is no matching key in bar, or
     # (b) all matching keys in bar have a smaller value
@@ -330,13 +353,23 @@ There are two ways to use a module *B* in another Bloom module *A*:
      (facts inserted into a collection defined in `b1` won't also be inserted
      into `b2`'s copy of the collection).
+In practice, a Bloom program is often composed of a collection of modules (which
+may themselves include or import sub-modules) and one "top-level class" that
+includes/imports those modules as well as the `Bud` module. An instance of this
+top-level class represents an instance of the Bud interpreter; it is on this
+top-level class that the `run_fg` method should be invoked, for example.
+Note that to enable the Bloom DSL for a collection of Ruby code, it is
+sufficient to include the `Bud` module *once* in the top-level class. That is,
+you should *not* include `Bud` in every Bloom module that you write.
 ## Skeleton of a Bud Module ##
     require 'rubygems'
     require 'bud'
     module YourModule
-      include Bud
+      import SubModule => :sub_m
       state do
         ...
@@ -355,3 +388,7 @@ There are two ways to use a module *B* in another Bloom module *A*:
       end
     end
+    class TopLevelClass
+      include Bud
+      include YourModule
+    end

data/docs/operational.md CHANGED Viewed

@@ -31,7 +31,7 @@ It is important to understand how the Bloom collection operators fit into these
 ## Atomicity: Timesteps and Deferred Operators ##
-The only instantaneous Bloom operator is a merge (`<=`), which can only introduce additional items into a collection--it can not delete or change existing items.  As a result, all state within a Bloom timestep is *immutable*: once an item is in a collection at timestep *T*, it stays in that collection throughout timestep *T*.  (And forever after, the fact that the item was in that collection at timestep *T* remains true.)
+The only instantaneous Bloom operator is a merge (`<=`), which can only introduce additional items into a collection--it cannot delete or change existing items.  As a result, all state within a Bloom timestep is *immutable*: once an item is in a collection at timestep *T*, it stays in that collection throughout timestep *T*.  (And forever after, the fact that the item was in that collection at timestep *T* remains true.)
 To get atomic state change in Bloom, you exploit the combination of two language features:

data/lib/bud.rb CHANGED Viewed

@@ -5,6 +5,10 @@ require 'socket'
 require 'superators'
 require 'thread'
+# Ruby2Ruby 1.3.1 is buggy (see issue #250)
+gem 'ruby2ruby', '< 1.3.1'
+require 'ruby2ruby'
 require 'bud/monkeypatch'
 require 'bud/aggs'
@@ -61,7 +65,8 @@ $bud_instances = {}        # Map from instance id => Bud instance
 module Bud
   attr_reader :strata, :budtime, :inbound, :options, :meta_parser, :viz, :rtracer
   attr_reader :dsock
-  attr_reader :tables, :channels, :tc_tables, :zk_tables, :dbm_tables, :sources, :sinks
+  attr_reader :builtin_tables, :tables
+  attr_reader :channels, :tc_tables, :zk_tables, :dbm_tables, :sources, :sinks
   attr_reader :stratum_first_iter, :joinstate
   attr_reader :this_stratum, :this_rule, :rule_orig_src
   attr_reader :running_async
@@ -96,8 +101,8 @@ module Bud
   #   * <tt>:deploy</tt>  enable deployment
   #   * <tt>:deploy_child_opts</tt> option hash to pass to deployed instances
   def initialize(options={})
+    @builtin_tables = {}
     @tables = {}
-    @table_meta = []
     @rewritten_strata = []
     @channels = {}
     @tc_tables = {}
@@ -180,11 +185,14 @@ module Bud
   # Rewrite methods defined in the given klass to expand module references and
   # temp collections. Imported modules are rewritten during the import process;
-  # we rewrite the main Bud class and any included modules here. Note that we
-  # only rewrite each distinct Class once.
+  # we rewrite the main class associated with this Bud instance and any included
+  # modules here. Note that we only rewrite each distinct Class once, and we
+  # skip methods defined by the Bud (Ruby) module directly (since we can be sure
+  # those won't reference Bloom modules).
   def self.rewrite_local_methods(klass)
     @done_rewrite ||= {}
     return if @done_rewrite.has_key? klass.name
+    return if klass.name == self.name   # Skip methods defined in the Bud module
     u = Unifier.new
     ref_expander = NestedRefRewriter.new(klass.bud_import_table)
@@ -368,7 +376,7 @@ module Bud
     # If we're called from the EventMachine thread (and EM is running), blocking
     # the current thread would imply deadlocking ourselves.
     if Thread.current == EventMachine::reactor_thread and EventMachine::reactor_running?
-      raise BudError, "Cannot invoke run_fg from inside EventMachine"
+      raise BudError, "cannot invoke run_fg from inside EventMachine"
     end
     q = Queue.new
@@ -486,7 +494,7 @@ module Bud
     cb_id = nil
     schedule_and_wait do
       unless @tables.has_key? tbl_name
-        raise Bud::BudError, "No such table: #{tbl_name}"
+        raise Bud::BudError, "no such table: #{tbl_name}"
       end
       raise Bud::BudError if @callbacks.has_key? @callback_id
@@ -500,7 +508,7 @@ module Bud
   # Unregister the callback that has the given ID.
   def unregister_callback(id)
     schedule_and_wait do
-      raise Bud::BudError, "Missing callback: #{id.inspect}" unless @callbacks.has_key? id
+      raise Bud::BudError, "missing callback: #{id.inspect}" unless @callbacks.has_key? id
       @callbacks.delete(id)
     end
   end
@@ -730,16 +738,19 @@ module Bud
   private
   # Builtin BUD state (predefined collections). We could define this using the
-  # standard "state" syntax, but we want to ensure that builtin state is
+  # standard state block syntax, but we want to ensure that builtin state is
   # initialized before user-defined state.
   def builtin_state
+    # We expect there to be no previously-defined tables
+    raise BudError unless @tables.empty?
     loopback  :localtick, [:col1]
     @stdio = terminal :stdio
     readonly :signals, [:key]
     scratch :halt, [:key]
     @periodics = table :periodics_tbl, [:pername] => [:period]
-    # for Bud reflection
+    # For Bud reflection
     table :t_rules, [:rule_id] => [:lhs, :op, :src, :orig_src]
     table :t_depends, [:rule_id, :lhs, :op, :body] => [:nm]
     table :t_depends_tc, [:head, :body, :via, :neg, :temporal]
@@ -749,6 +760,9 @@ module Bud
     table :t_cycle, [:predicate, :via, :neg, :temporal]
     table :t_table_info, [:tab_name, :tab_type]
     table :t_table_schema, [:tab_name, :col_name, :ord, :loc]
+    # Identify builtin tables as such
+    @builtin_tables = @tables.clone
   end
   # Handle any inbound tuples off the wire. Received messages are placed
@@ -826,7 +840,7 @@ module Bud
           unless new_e.class <= BudError
             new_e = BudError
           end
-          raise new_e, "Exception during Bud evaluation.\nException: #{e.inspect}.#{src_msg}"
+          raise new_e, "exception during Bud evaluation.\nException: #{e.inspect}.#{src_msg}"
         end
       end
       @stratum_first_iter = false
@@ -835,14 +849,13 @@ module Bud
       colls = @stratum_collection_map[strat_num] if @stratum_collection_map
       colls ||= @tables.keys
       colls.each do |name|
-        begin
-          coll = self.send(name)
-          unless coll.delta.empty? and coll.new_delta.empty?
-            coll.tick_deltas
-            fixpoint = false
-          end
-        rescue
-          # ignore missing tables; rebl for example deletes them mid-stream
+        coll = @tables[name]
+        # ignore missing tables; rebl for example deletes them mid-stream
+        next if coll.nil?
+        unless coll.delta.empty? and coll.new_delta.empty?
+          fixpoint = false unless coll.new_delta.empty?
+          coll.tick_deltas
         end
       end
     end while not fixpoint