RubyGems - bud - Versions diffs - 0.9.7 → 0.9.8 - Mend

bud 0.9.7 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +13 -5
data/History.txt +18 -0
data/Rakefile +91 -0
data/bin/budplot +7 -2
data/docs/README.md +8 -17
data/docs/cheat.md +1 -1
data/docs/getstarted.md +95 -82
data/docs/operational.md +3 -3
data/examples/basics/paths.rb +2 -2
data/examples/chat/chat_protocol.rb +1 -1
data/lib/bud.rb +67 -51
data/lib/bud/bud_meta.rb +64 -42
data/lib/bud/collections.rb +29 -26
data/lib/bud/executor/elements.rb +6 -6
data/lib/bud/executor/join.rb +63 -52
data/lib/bud/lattice-core.rb +5 -0
data/lib/bud/monkeypatch.rb +38 -11
data/lib/bud/rebl.rb +2 -2
data/lib/bud/rewrite.rb +22 -11
data/lib/bud/state.rb +2 -2
data/lib/bud/storage/zookeeper.rb +7 -0
data/lib/bud/version.rb +3 -0
data/lib/bud/viz.rb +3 -3
metadata +84 -82

data/lib/bud/collections.rb CHANGED

@@ -1,4 +1,3 @@
-$struct_classes = {}
 module Bud
   ########
   #--
@@ -17,7 +16,7 @@ module Bud
     attr_accessor :bud_instance  # :nodoc: all
     attr_reader :tabname, :cols, :key_cols # :nodoc: all
     attr_reader :struct
-    attr_reader :storage, :delta, :new_delta, :pending, :tick_delta # :nodoc: all
+    attr_reader :new_delta, :pending # :nodoc: all
     attr_reader :wired_by, :scanner_cnt
     attr_accessor :invalidated, :rescan
     attr_accessor :is_source
@@ -63,7 +62,7 @@ module Bud
       if @cols.empty?
         @cols = nil
       else
-        @struct = ($struct_classes[@cols] ||= Bud::TupleStruct.new(*@cols))
+        @struct = Bud::TupleStruct.new_struct(@cols)
         @structlen = @struct.members.length
       end
       setup_accessors
@@ -250,7 +249,7 @@ module Bud
     def sort(&blk)
       if @bud_instance.wiring?
         pusher = self.pro
-        pusher.sort("sort#{object_id}", @bud_instance, @cols, &blk)
+        pusher.sort("sort#{object_id}".to_sym, @bud_instance, @cols, &blk)
       else
         @storage.values.sort(&blk)
       end
@@ -275,7 +274,12 @@ module Bud
     public
     def each_raw(&block)
-      @storage.each_value(&block)
+      each_from([@storage], &block)
+    end
+    public
+    def each_delta(&block)
+      each_from([@delta], &block)
     end
     public
@@ -301,37 +305,26 @@ module Bud
     public
     def tick_metrics
       strat_num = bud_instance.this_stratum
-      rule_num = bud_instance.this_rule
-      addr = nil
       addr = bud_instance.ip_port unless bud_instance.port.nil?
+      key = { :addr=>addr, :tabname=>qualified_tabname,
+              :strat_num=>strat_num}
       bud_instance.metrics[:collections] ||= {}
-      bud_instance.metrics[:collections][{:addr=>addr, :tabname=>qualified_tabname, :strat_num=>strat_num, :rule_num=>rule_num}] ||= 0
-      bud_instance.metrics[:collections][{:addr=>addr, :tabname=>qualified_tabname, :strat_num=>strat_num, :rule_num=>rule_num}] += 1
+      bud_instance.metrics[:collections][key] ||= 0
+      bud_instance.metrics[:collections][key] += 1
     end
     private
     def each_from(bufs, &block) # :nodoc: all
+      do_metrics = bud_instance.options[:metrics]
       bufs.each do |b|
         b.each_value do |v|
-          tick_metrics if bud_instance and bud_instance.options[:metrics]
+          tick_metrics if do_metrics
           yield v
         end
       end
     end
-    public
-    def each_from_sym(buf_syms, &block) # :nodoc: all
-      bufs = buf_syms.map do |s|
-        case s
-        when :storage then @storage
-        when :delta then @delta
-        when :new_delta then @new_delta
-        else raise Bud::Error, "bad symbol passed into each_from_sym"
-        end
-      end
-      each_from(bufs, &block)
-    end
     private
     def init_storage
       @storage = {}
@@ -374,7 +367,7 @@ module Bud
     # checks for +item+ in the collection
     public
     def include?(item)
-      return true if key_cols.nil? or (key_cols.empty? and length > 0)
+      return true if key_cols.nil?
       return false if item.nil?
       key = get_key_vals(item)
       return (item == self[key])
@@ -650,6 +643,11 @@ module Bud
       end
     end
+    superator "<~" do |o|
+      # Overridden when <~ is defined (i.e., channels and terminals)
+      raise Bud::CompileError, "#{tabname} cannot appear on the lhs of a <~ operator"
+    end
     def tick
       raise Bud::Error, "tick must be overriden in #{self.class}"
     end
@@ -1111,6 +1109,11 @@ module Bud
       return true
     end
+    public
+    def bootstrap
+      # override BudCollection; pending should not be moved into delta.
+    end
     public
     def flush #:nodoc: all
       out_io = get_out_io
@@ -1257,7 +1260,7 @@ module Bud
     def invalidated=(val)
       # Might be reset to false at end-of-tick, but shouldn't be set to true
-      raise Bud::Error, "cannot not set invalidate on table '#{@tabname}'" if val
+      raise Bud::Error, "cannot set invalidate on table '#{@tabname}'" if val
       super
     end
@@ -1383,7 +1386,7 @@ module Bud
   end
   class BudFileReader < BudReadOnly # :nodoc: all
-    def initialize(name, filename, delimiter, bud_instance) # :nodoc: all
+    def initialize(name, filename, bud_instance) # :nodoc: all
       super(name, bud_instance, {[:lineno] => [:text]})
       @filename = filename
       @storage = {}

data/lib/bud/executor/elements.rb CHANGED

@@ -199,7 +199,7 @@ module Bud
     public
     def pro(the_name=elem_name, the_schema=schema, &blk)
       toplevel = @bud_instance.toplevel
-      elem = Bud::PushElement.new("project#{object_id}",
+      elem = Bud::PushElement.new("project#{object_id}".to_sym,
                                   toplevel.this_rule_context,
                                   @collection_name, the_schema)
       self.wire_to(elem)
@@ -213,7 +213,7 @@ module Bud
     public
     def each_with_index(&blk)
       toplevel = @bud_instance.toplevel
-      elem = Bud::PushEachWithIndex.new("each_with_index#{object_id}",
+      elem = Bud::PushEachWithIndex.new("each_with_index#{object_id}".to_sym,
                                         toplevel.this_rule_context,
                                         @collection_name)
       elem.set_block(&blk)
@@ -284,7 +284,7 @@ module Bud
       aggpairs = prep_aggpairs(aggpairs)
       toplevel = @bud_instance.toplevel
-      g = Bud::PushGroup.new('grp'+Time.new.tv_usec.to_s, toplevel.this_rule_context,
+      g = Bud::PushGroup.new("grp#{Time.new.tv_usec}".to_sym, toplevel.this_rule_context,
                              @collection_name, keycols, aggpairs, the_schema, &blk)
       self.wire_to(g)
       toplevel.push_elems[[self.object_id, :group, keycols, aggpairs, blk]] = g
@@ -302,7 +302,7 @@ module Bud
       end
       aggpairs = [[agg, collection]]
-      aa = Bud::PushArgAgg.new('argagg'+Time.new.tv_usec.to_s, toplevel.this_rule_context,
+      aa = Bud::PushArgAgg.new("argagg#{Time.new.tv_usec}".to_sym, toplevel.this_rule_context,
                                @collection_name, gbkey_cols, aggpairs, schema, &blk)
       self.wire_to(aa)
       toplevel.push_elems[[self.object_id, :argagg, gbkey_cols, aggpairs, blk]] = aa
@@ -346,7 +346,7 @@ module Bud
     end
     def reduce(initial, &blk)
-      retval = Bud::PushReduce.new("reduce#{Time.new.tv_usec}",
+      retval = Bud::PushReduce.new("reduce#{Time.new.tv_usec}".to_sym,
                                    @bud_instance, @collection_name,
                                    schema, initial, &blk)
       self.wire_to(retval)
@@ -498,7 +498,7 @@ module Bud
       end
       # send deltas out in all cases
-      @collection.delta.each_value {|item| push_out(item)}
+      @collection.each_delta {|item| push_out(item)}
     end
   end

data/lib/bud/executor/join.rb CHANGED

@@ -1,6 +1,5 @@
 require 'bud/executor/elements'
-$EMPTY = []
 module Bud
   class PushSHJoin < PushStatefulElement
     attr_reader :all_rels_below, :origpreds, :relnames, :keys, :localpreds
@@ -41,18 +40,6 @@ module Bud
         @selfjoins << name if cnt == 2
       end
-      # derive schema: one column for each table.
-      # duplicated inputs get distinguishing numeral
-      @cols = []
-      retval = @all_rels_below.reduce({}) do |memo, r|
-        r_name = r.qualified_tabname.to_s
-        memo[r_name] ||= 0
-        newstr = r_name + (memo[r_name] > 0 ? "_#{memo[r_name]}" : "")
-        @cols << newstr.to_sym
-        memo[r_name] += 1
-        memo
-      end
       setup_preds(preds) unless preds.empty?
       setup_state
@@ -143,14 +130,6 @@ module Bud
         if source_elem.rescan
           puts "#{qualified_tabname} rel:#{i}(#{source_elem.qualified_tabname}) invalidated" if $BUD_DEBUG
           @hash_tables[i] = {}
-          if i == 0
-            # Only if i == 0 because outer joins in Bloom are left outer joins.
-            # If i == 1, missing_keys will be corrected when items are populated
-            # in the rhs fork.
-            # XXX This is not modular. We are doing invalidation work for outer
-            # joins, which is part of a separate module PushSHOuterJoin.
-            @missing_keys.clear
-          end
         end
       end
     end
@@ -165,7 +144,7 @@ module Bud
       # referenced in entry.
       subtuple = 0
       all_rels_below[0..all_rels_below.length-1].each_with_index do |t,i|
-        if t.qualified_tabname == entry[0]
+        if t.qualified_tabname == name
           subtuple = i
           break
         end
@@ -183,7 +162,7 @@ module Bud
           elsif k.class <= Array
             [k,v]
           elsif k.class <= Symbol
-            if @all_rels_below and @all_rels_below.length == 2
+            if @all_rels_below.length == 2
               [find_attr_match(k, @all_rels_below[0]), find_attr_match(v, @all_rels_below[1])]
             else
               [find_attr_match(k), find_attr_match(v)]
@@ -235,9 +214,10 @@ module Bud
     protected
     def canonicalize_localpreds(rel_list, preds) # :nodoc:all
-      retval = preds.map do |p|
-        # reverse if lhs is rel_list[1], *unless* it's a self-join!
-        (p[0][0] == rel_list[1].qualified_tabname and p[0][0] != p[1][0]) ? p.reverse : p
+      second_rel = rel_list[1].qualified_tabname
+      preds.map do |p|
+        # reverse if lhs is second_rel *unless* it's a self-join!
+        (p[0][0] == second_rel and p[0][0] != p[1][0]) ? p.reverse : p
       end
     end
@@ -251,16 +231,15 @@ module Bud
       # again if we didn't rescan now.
       replay_join if @rescan
-      if @selfjoins.include? source.qualified_tabname
+      source_tbl = source.qualified_tabname
+      if @selfjoins.include? source_tbl
         offsets = []
-        @relnames.each_with_index{|r,i| offsets << i if r == source.qualified_tabname}
+        @relnames.each_with_index{|r,i| offsets << i if r == source_tbl}
       else
-        offsets = [@relnames.index(source.qualified_tabname)]
-      end
-      raise Bud::Error, "item #{item.inspect} inserted into join from unknown source #{source.elem_name}" if offsets == $EMPTY
-      offsets.each do |offset|
-        insert_item(item, offset)
+        offsets = [@relnames.index(source_tbl)]
       end
+      offsets.each {|offset| insert_item(item, offset)}
     end
     protected
@@ -332,14 +311,32 @@ module Bud
     ####
     # and now, the Bloom-facing methods
     # given a * expression over n collections, form all combinations of items
-    # subject to an array of predicates, pred
-    # currently supports two options for equijoin predicates:
+    # subject to an array of predicates, +preds+.
+    # currently supports two syntax options for equijoin predicates:
     #    general form: an array of arrays capturing a conjunction of equiv. classes
     #          [[table1.col1, table2.col2, table3.col3], [table1.col2, table2.col3]]
     #    common form: a hash capturing equality of a column on left with one on right.
     #          :col1 => :col2  (same as  lefttable.col1 => righttable.col2)
     public
     def pairs(*preds, &blk)
+      if @cols.nil?
+        # derive schema if needed: one column for each table.  duplicated inputs
+        # get distinguishing numeral.
+        #
+        # XXX: actually, this seems completely bogus. The schema for the output
+        # of the join should depend on the join's *targetlist*.
+        @cols = []
+        retval = @all_rels_below.reduce({}) do |memo, r|
+          r_name = r.qualified_tabname.to_s
+          memo[r_name] ||= 0
+          newstr = r_name + (memo[r_name] > 0 ? "_#{memo[r_name]}" : "")
+          @cols << newstr.to_sym
+          memo[r_name] += 1
+          memo
+        end
+        setup_accessors
+      end
       @origpreds = preds
       setup_preds(preds) unless preds.empty?
       # given new preds, the state for the join will be different.  set it up again.
@@ -361,20 +358,24 @@ module Bud
     end
     public
-    def rights(*preds, &blk)
-      @cols = blk.nil? ? @bud_instance.toplevel.tables[@rels[1].qualified_tabname].cols : nil
-      setup_accessors if blk.nil?
+    def lefts(*preds, &blk)
+      if blk.nil?
+        @cols = @bud_instance.toplevel.tables[@rels[0].qualified_tabname].cols
+        setup_accessors
+      end
       pairs(*preds) do |x,y|
-        blk.nil? ? y : blk.call(y)
+        blk.nil? ? x : blk.call(x)
       end
     end
     public
-    def lefts(*preds, &blk)
-      @cols = blk.nil? ? @bud_instance.toplevel.tables[@rels[0].qualified_tabname].cols : nil
-      setup_accessors if blk.nil?
+    def rights(*preds, &blk)
+      if blk.nil?
+        @cols = @bud_instance.toplevel.tables[@rels[1].qualified_tabname].cols
+        setup_accessors
+      end
       pairs(*preds) do |x,y|
-        blk.nil? ? x : blk.call(x)
+        blk.nil? ? y : blk.call(y)
       end
     end
@@ -405,13 +406,13 @@ module Bud
     public
     def flatten(*preds, &blk)
       if blk.nil?
-        @cols = dupfree_schema(@bud_instance.tables[@cols[0]].cols + @bud_instance.tables[@cols[1]].cols)
+        @cols = dupfree_schema(@rels[0].cols + @rels[1].cols)
       else
         @cols = []
       end
       setup_accessors
       pairs(*preds) do |x,y|
-        blk.nil? ? x.to_a + y.to_a : blk.call(x.to_a + y.to_a)
+        blk.nil? ? x + y : blk.call(x + y)
       end
     end
@@ -484,6 +485,14 @@ module Bud
         end
       end
     end
+    public
+    def invalidate_cache
+      super
+      # Only if need to check left join rel because outer joins in Bloom are
+      # left outer joins.
+      @missing_keys.clear if @rels.first.rescan
+    end
   end
@@ -497,11 +506,11 @@ module Bud
   # first flush, at which point we are sure to have seen all the t-side tuples
   # in this tick.
   class PushNotIn < PushStatefulElement
-    def initialize(rellist, bud_instance, preds=nil, &blk) # :nodoc: all
+    def initialize(rellist, bud_instance, preds, &blk) # :nodoc: all
       @lhs, @rhs = rellist
       @lhs_keycols = nil
       @rhs_keycols = nil
-      name_in = "#{@lhs.qualified_tabname}_notin_#{@rhs.qualified_tabname}"
+      name_in = "#{@lhs.qualified_tabname}_notin_#{@rhs.qualified_tabname}".to_sym
       super(name_in, bud_instance, nil, @lhs.schema)
       setup_preds(preds) unless preds.empty?
       @rhs_rcvd = false
@@ -532,12 +541,13 @@ module Bud
     end
     def find_col(colspec, rel)
-      if colspec.is_a? Symbol
+      case colspec
+      when Symbol
         unless rel.respond_to? colspec
           raise Bud::Error, "attribute :#{colspec} not found in #{rel.qualified_tabname}"
         end
         col_desc = rel.send(colspec)
-      elsif colspec.is_a? Array
+      when Array
         col_desc = colspec
       else
         raise Bud::Error, "symbol or column spec expected. Got #{colspec}"
@@ -546,8 +556,8 @@ module Bud
     end
     def get_key(item, offset)
-      keycols = offset == 0 ? @lhs_keycols : @rhs_keycols
-      keycols.nil? ? $EMPTY : item.values_at(*keycols)
+      keycols = (offset == 0 ? @lhs_keycols : @rhs_keycols)
+      keycols.nil? ? [] : item.values_at(*keycols)
     end
     public
@@ -580,8 +590,9 @@ module Bud
       # growing any more, until the next tick.
       unless @rhs_rcvd
         @rhs_rcvd = true
+        rhs_hash = @hash_tables[1]
         @hash_tables[0].each do |key,values|
-          rhs_values = @hash_tables[1][key]
+          rhs_values = rhs_hash[key]
           values.each {|item| process_match(item, rhs_values)}
         end
       end

data/lib/bud/lattice-core.rb CHANGED

@@ -500,6 +500,11 @@ class Bud::LatticeWrapper
     end
   end
+  superator "<~" do |o|
+    # Overridden when <~ is defined (i.e., channels and terminals)
+    raise Bud::CompileError, "#{tabname} cannot appear on the lhs of a <~ operator"
+  end
   # XXX: refactor with BudCollection to avoid duplication of code
   def add_merge_target
     toplevel = @bud_instance.toplevel

data/lib/bud/monkeypatch.rb CHANGED

@@ -10,17 +10,27 @@ class Class
   end
 end
+$struct_classes = {}
+$struct_lock = Mutex.new
 # FIXME: Should likely override #hash and #eql? as well.
 class Bud::TupleStruct < Struct
   include Comparable
+  def self.new_struct(cols)
+    $struct_lock.synchronize {
+      ($struct_classes[cols] ||= Bud::TupleStruct.new(*cols))
+    }
+  end
+  # XXX: This only considers two TupleStruct instances to be equal if they have
+  # the same schema (column names) AND the same contents; unclear if structural
+  # equality (consider only values, not column names) would be better.
   def <=>(o)
     if o.class == self.class
       self.each_with_index do |e, i|
         other = o[i]
         next if e == other
-        return nil if e.nil?
-        return nil if other.nil?
         return e <=> other
       end
       return 0
@@ -35,18 +45,27 @@ class Bud::TupleStruct < Struct
     if o.class == self.class
       return super
     elsif o.class == Array
-      begin
-        self.each_with_index do |el, i|
-          return false if el != o[i]
-        end
-        return true
-      rescue StandardError
-        return false
+      return false if self.length != o.length
+      self.each_with_index do |el, i|
+        return false if el != o[i]
       end
+      return true
     end
     false
   end
+  def hash
+    self.values.hash
+  end
+  def eql?(o)
+    self == o
+  end
+  def +(o)
+    self.to_ary + o.to_ary
+  end
   def to_msgpack(out=nil)
     self.to_a.to_msgpack(out)
   end
@@ -56,15 +75,23 @@ class Bud::TupleStruct < Struct
   end
   alias :to_s :inspect
+  alias :to_ary :to_a
 end
 # XXX: TEMPORARY/UGLY hack to ensure that arrays and structs compare. This can be
 # removed once tests are rewritten.
 class Array
-  alias :oldeq :==
+  alias :old_eq :==
+  alias :old_eql? :eql?
   def ==(o)
     o = o.to_a if o.kind_of? Bud::TupleStruct
-    self.oldeq(o)
+    self.old_eq(o)
+  end
+  def eql?(o)
+    o = o.to_a if o.kind_of? Bud::TupleStruct
+    self.old_eql?(o)
   end
 end