RubyGems - bud - Versions diffs - 0.9.6 → 0.9.7 - Mend

bud 0.9.6 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

checksums.yaml +6 -14
data/History.txt +41 -0
data/README.md +5 -4
data/docs/cheat.md +108 -9
data/lib/bud/aggs.rb +4 -2
data/lib/bud/bud_meta.rb +1 -4
data/lib/bud/collections.rb +55 -27
data/lib/bud/executor/README.rescan +2 -1
data/lib/bud/executor/elements.rb +33 -37
data/lib/bud/executor/join.rb +88 -110
data/lib/bud/lattice-core.rb +21 -13
data/lib/bud/lattice-lib.rb +73 -41
data/lib/bud/monkeypatch.rb +16 -17
data/lib/bud/rewrite.rb +13 -10
data/lib/bud/source.rb +3 -1
data/lib/bud.rb +85 -46
metadata +41 -27

data/lib/bud/lattice-lib.rb CHANGED Viewed

@@ -118,18 +118,39 @@ class Bud::MapLattice < Bud::Lattice
     if @v.has_key? k
       @v[k]
     else
-      raise Bud::Error if args.empty?
+      if args.empty?
+        raise Bud::Error, "missing key for lmap#at(#{k}) but no bottom type given"
+      end
       args.first.new
     end
   end
+  morph :filter do
+    rv = {}
+    @v.each_pair do |k, val|
+      unless val.class <= Bud::BoolLattice
+        raise Bud::Error, "filter invoked on non-boolean map value: #{val}"
+      end
+      rv[k] = val if val.reveal == true
+    end
+    wrap_unsafe(rv)
+  end
   morph :apply_morph do |sym, *args|
-    raise Bud::Error unless Bud::Lattice.global_morphs.include? sym
+    unless Bud::Lattice.global_morphs.include? sym
+      raise Bud::Error, "apply_morph called with non-morphism: #{sym}"
+    end
     do_apply(sym, args)
   end
-  monotone :apply_monotone do |sym, *args|
-    raise Bud::Error unless Bud::Lattice.global_mfuncs.include? sym
+  # NB: "apply" can be used with both monotone functions and morphisms. We also
+  # provide apply_morph, which is slightly faster when theprogrammer knows they
+  # are applying a morphism.
+  monotone :apply do |sym, *args|
+    unless Bud::Lattice.global_mfuncs.include?(sym) ||
+           Bud::Lattice.global_morphs.include?(sym)
+      raise Bud::Error, "apply called with non-monotone function: #{sym}"
+    end
     do_apply(sym, args)
   end
@@ -223,19 +244,6 @@ class Bud::SetLattice < Bud::Lattice
     wrap_unsafe(@v & i.reveal)
   end
-  morph :product do |i, &blk|
-    rv = Set.new
-    @v.each do |a|
-      if blk.nil?
-        t = i.pro {|b| [a,b]}
-      else
-        t = i.pro {|b| blk.call(a, b)}
-      end
-      rv.merge(t.reveal)
-    end
-    wrap_unsafe(rv)
-  end
   morph :contains? do |i|
     Bud::BoolLattice.new(@v.member? i)
   end
@@ -254,44 +262,68 @@ class Bud::SetLattice < Bud::Lattice
     Bud::MaxLattice.new(@v.size)
   end
-  # Assuming that this set contains tuples (arrays) as elements, this performs
-  # an equijoin between the current lattice and i. The join predicate is
-  # "self_t[lhs_idx] == i_t[rhs_idx]", for all tuples self_t and i_t in self and
-  # i, respectively. The return value is the result of passing pairs of join
-  # tuples to the user-supplied block.
-  morph :eqjoin do |i, lhs_idx, rhs_idx, &blk|
+  # Assuming that the elements of this set are Structs (tuples with named field
+  # accessors), this performs an equijoin between the current lattice and
+  # i. `preds` is a hash of join predicates; each k/v pair in the hash is an
+  # equality predicate that self_tup[k] == i_tup[v]. The return value is the
+  # result of passing pairs of join tuples to the user-supplied code block
+  # (values for which the code block returns nil are omitted from the
+  # result). Note that if no predicates are passed, this computes the Cartesian
+  # product (in which case the input elements do not need to be Structs).
+  morph :eqjoin do |*args, &blk|
+    # Need to emulate default block arguments for MRI 1.8
+    i, preds = args
+    preds ||= {}
     rv = Set.new
     @v.each do |a|
-      i.probe(rhs_idx, a[lhs_idx]).each do |b|
-        rv << blk.call(a, b)
+      i.probe(a, preds).each do |b|
+        if blk.nil?
+          rv << [a,b]
+        else
+          val = blk.call(a, b)
+          rv << val unless val.nil?
+        end
       end
     end
     wrap_unsafe(rv)
   end
-  # Assuming that this set contains tuples (arrays), this returns a list of
-  # tuples (possibly empty) whose idx'th column has the value "v".
-  # XXX: we assume probe(idx, v) will only be called for a single value of idx!
-  def probe(idx, v)
-    @ht ||= build_ht(idx)
-    return @ht[v] || []
+  # Assuming that this set contains Structs, this method takes a value "val" and
+  # a hash of predicates "preds". It returns all the structs t where val[k] =
+  # t[v] for all k,v in preds; an empty array is returned if no matches found.
+  def probe(val, preds)
+    return @v if preds.empty?
+    probe_val = schema_fetch(val, preds.keys)
+    build_index(preds.values)
+    index = @join_indexes[preds.values]
+    return index[probe_val] || []
   end
   private
-  def build_ht(idx)
-    rv = {}
-    @v.each do |i|
-      field = i[idx]
-      rv[field] ||= []
-      rv[field] << i
+  def schema_fetch(val, cols)
+    cols.map {|s| val[s]}
+  end
+  def build_index(cols)
+    @join_indexes ||= {}
+    return @join_indexes[cols] if @join_indexes.has_key? cols
+    idx = {}
+    @v.each do |val|
+      index_val = schema_fetch(val, cols)
+      idx[index_val] ||= []
+      idx[index_val] << val
     end
-    rv
+    @join_indexes[cols] = idx
+    return idx
   end
 end
-# A set that admits only non-negative numbers. This allows "sum" to be an
-# order-preserving map.  Note that this does duplicate elimination on its input,
-# so it actually computes "SUM(DISTINCT ...)" in SQL.
+# A set that admits only non-negative numbers. This allows "sum" to be a
+# monotone function.  Note that this does duplicate elimination on its input, so
+# it actually computes "SUM(DISTINCT ...)" in SQL.
 #
 # XXX: for methods that take a user-provided code block, we need to ensure that
 # the set continues to contain only positive numbers.

data/lib/bud/monkeypatch.rb CHANGED Viewed

@@ -10,33 +10,34 @@ class Class
   end
 end
-# FIXME: Use a subclass of Struct.
-# FIXME: Should likely override eql? as well
-class Struct
+# FIXME: Should likely override #hash and #eql? as well.
+class Bud::TupleStruct < Struct
+  include Comparable
   def <=>(o)
     if o.class == self.class
       self.each_with_index do |e, i|
-        cmp = e <=> o[i]
-        return cmp if cmp != 0
+        other = o[i]
+        next if e == other
+        return nil if e.nil?
+        return nil if other.nil?
+        return e <=> other
       end
       return 0
     elsif o.nil?
-      return -1
+      return nil
     else
       raise "Comparison (<=>) between #{o.class} and #{self.class} not implemented"
     end
   end
-  alias oldeq :==
   def ==(o)
     if o.class == self.class
-      return oldeq(o)
+      return super
     elsif o.class == Array
       begin
         self.each_with_index do |el, i|
-          if el != o[i]
-            return false
-          end
+          return false if el != o[i]
         end
         return true
       rescue StandardError
@@ -62,9 +63,7 @@ end
 class Array
   alias :oldeq :==
   def ==(o)
-    if o.kind_of? Struct
-      o = o.to_a
-    end
+    o = o.to_a if o.kind_of? Bud::TupleStruct
     self.oldeq(o)
   end
 end
@@ -126,7 +125,6 @@ class Module
     @bud_import_tbl
   end
   # the block of Bloom collection declarations.  one per module.
   def state(&block)
     meth_name = Module.make_state_meth_name(self)
@@ -139,8 +137,9 @@ class Module
     define_method(meth_name, &block)
   end
-  # bloom statements to be registered with Bud runtime.  optional +block_name+
-  # allows for multiple bloom blocks per module and method overriding
+  # bloom statements to be registered with Bud runtime.  optional +block_name+
+  # assigns a name for the block; this is useful documentation, and also allows
+  # the block to be overridden in a child class.
   def bloom(block_name=nil, &block)
     # If no block name was specified, generate a unique name
     if block_name.nil?

data/lib/bud/rewrite.rb CHANGED Viewed

@@ -8,7 +8,7 @@ class RuleRewriter < Ruby2Ruby # :nodoc: all
   MONOTONE_WHITELIST = [:==, :+, :<=, :-, :<, :>, :*, :~,
                         :pairs, :matches, :combos, :flatten, :new,
                         :lefts, :rights, :map, :flat_map, :pro, :merge,
-                        :cols, :key_cols, :val_cols, :payloads, :lambda,
+                        :schema, :cols, :key_cols, :val_cols, :payloads, :lambda,
                         :tabname, :current_value].to_set
   def initialize(seed, bud_instance)
@@ -366,7 +366,7 @@ class UnsafeFuncRewriter < SexpProcessor
     # We assume that unsafe funcs have a nil receiver (Bud instance is implicit
     # receiver).
     if recv.nil? and @elem_stack.size > 0
-      unless is_safe_func(op) || is_lattice?(op)
+      unless is_safe_func(op) || is_collection_name?(op)
         @unsafe_func_called = true
       end
     end
@@ -388,8 +388,8 @@ class UnsafeFuncRewriter < SexpProcessor
     return rv
   end
-  def is_lattice?(op)
-    @bud_instance.lattices.has_key? op.to_sym
+  def is_collection_name?(op)
+    @bud_instance.tables.has_key?(op.to_sym) || @bud_instance.lattices.has_key?(op.to_sym)
   end
   def is_safe_func(op)
@@ -560,7 +560,7 @@ class TempExpander < SexpProcessor # :nodoc: all
   attr_reader :tmp_tables
   attr_accessor :did_work
-  KEYWORD = :temp
+  TEMP_KEYWORD = :temp
   def initialize
     super()
@@ -588,8 +588,8 @@ class TempExpander < SexpProcessor # :nodoc: all
       end
       _, recv, meth, meth_args = n
-      if meth == KEYWORD and recv.nil?
-        body[i] = rewrite_me(n)
+      if meth == TEMP_KEYWORD and recv.nil?
+        body[i] = rewrite_temp(n)
         @did_work = true
       end
     end
@@ -602,7 +602,7 @@ class TempExpander < SexpProcessor # :nodoc: all
       call_node = iter_body.first
       _, recv, meth, *meth_args = call_node
-      if meth == KEYWORD and recv.nil?
+      if meth == TEMP_KEYWORD and recv.nil?
         _, lhs, op, rhs = meth_args.first
         new_rhs = s(:iter, rhs, *(iter_body[1..-1]))
         meth_args.first[3] = new_rhs
@@ -612,7 +612,7 @@ class TempExpander < SexpProcessor # :nodoc: all
     return nil
   end
-  def rewrite_me(exp)
+  def rewrite_temp(exp)
     _, recv, meth, *args = exp
     raise Bud::CompileError unless recv.nil?
@@ -620,7 +620,10 @@ class TempExpander < SexpProcessor # :nodoc: all
     raise Bud::CompileError unless nest_call.sexp_type == :call
     nest_recv, nest_op, *nest_args = nest_call.sexp_body
-    raise Bud::CompileError unless nest_recv.sexp_type == :lit
+    unless nest_recv.sexp_type == :lit
+      recv_src = Ruby2Ruby.new.process(Marshal.load(Marshal.dump(nest_recv)))
+      raise Bud::CompileError, "argument to temp must be a symbol: #{recv_src}"
+    end
     tmp_name = nest_recv.sexp_body.first
     @tmp_tables << tmp_name

data/lib/bud/source.rb CHANGED Viewed

@@ -17,7 +17,9 @@ module Source
     lines = cache(filename, num)
     # Note: num is 1-based.
-    parser = RubyParser.for_current_ruby
+    # for_current_ruby might object if the current Ruby version is not supported
+    # by RubyParser; bravely try to continue on regardless
+    parser = RubyParser.for_current_ruby rescue RubyParser.new
     stmt = ""       # collection of lines that form one complete Ruby statement
     ast = nil
     lines[num .. -1].each do |l|

data/lib/bud.rb CHANGED Viewed

@@ -71,6 +71,7 @@ module Bud
   attr_reader :tables, :builtin_tables, :channels, :zk_tables, :dbm_tables, :app_tables, :lattices
   attr_reader :push_sources, :push_elems, :push_joins, :scanners, :merge_targets
   attr_reader :this_stratum, :this_rule, :rule_orig_src, :done_bootstrap
+  attr_reader :inside_tick
   attr_accessor :stratified_rules
   attr_accessor :metrics, :periodics
   attr_accessor :this_rule_context, :qualified_name
@@ -165,11 +166,11 @@ module Bud
     do_rewrite
     if toplevel == self
       # initialize per-stratum state
-      num_strata = @stratified_rules.length
-      @scanners = num_strata.times.map{{}}
-      @push_sources = num_strata.times.map{{}}
-      @push_joins = num_strata.times.map{[]}
-      @merge_targets = num_strata.times.map{Set.new}
+      @num_strata = @stratified_rules.length
+      @scanners = @num_strata.times.map{{}}
+      @push_sources = @num_strata.times.map{{}}
+      @push_joins = @num_strata.times.map{[]}
+      @merge_targets = @num_strata.times.map{Set.new}
     end
   end
@@ -318,8 +319,6 @@ module Bud
   end
   def do_wiring
-    @num_strata = @stratified_rules.length
     @stratified_rules.each_with_index { |rules, stratum| eval_rules(rules, stratum) }
     # Prepare list of tables that will be actively used at run time. First, all
@@ -367,6 +366,24 @@ module Bud
       end
     end
+    # We create "orphan" scanners for collections that don't appear on the RHS
+    # of any rules, but do appear on the LHS of at least one rule. These
+    # scanners aren't needed to compute the fixpoint, but they are used as part
+    # of rescan/invalidation (e.g., if an orphaned collection receives a manual
+    # deletion operation, we need to arrange for the collection to be
+    # re-filled).
+    @orphan_scanners = []       # Pairs of [scanner, stratum]
+    @app_tables.each do |t|
+      next unless t.class <= Bud::BudCollection         # skip lattice wrappers
+      next if t.scanner_cnt > 0
+      stratum = collection_stratum(t.qualified_tabname.to_s)
+      # if the collection also doesn't appear on any LHSs, skip it
+      next if stratum.nil?
+      @orphan_scanners << [Bud::ScannerElement.new(t.tabname, self, t, t.schema),
+                           stratum]
+    end
     # Sanity check
     @push_sorted_elems.each do |stratum_elems|
       stratum_elems.each {|se| se.check_wiring}
@@ -432,19 +449,17 @@ module Bud
   #
   # scanner[stratum].rescan_set = Similar to above.
   def prepare_invalidation_scheme
-    num_strata = @push_sorted_elems.size
     if $BUD_SAFE
       @app_tables = @tables.values + @lattices.values # No collections excluded
       rescan = Set.new
       invalidate = @app_tables.select {|t| t.class <= BudScratch}.to_set
-      num_strata.times do |stratum|
+      @num_strata.times do |stratum|
         @push_sorted_elems[stratum].each do |elem|
           invalidate << elem
           rescan << elem
         end
       end
-      #prune_rescan_invalidate(rescan, invalidate)
       @default_rescan = rescan.to_a
       @default_invalidate = invalidate.to_a
       @reset_list = [] # Nothing to reset at end of tick. It'll be overwritten anyway
@@ -474,7 +489,7 @@ module Bud
     invalidate = @app_tables.select {|t| t.invalidate_at_tick}.to_set
     rescan = Set.new
-    num_strata.times do |stratum|
+    @num_strata.times do |stratum|
       @push_sorted_elems[stratum].each do |elem|
         rescan << elem if elem.rescan_at_tick
@@ -496,29 +511,35 @@ module Bud
       puts "Unsafe targets: #{unsafe_targets.inspect}"
     end
-    # Now compute for each table that is to be scanned, the set of dependent
-    # tables and elements that will be invalidated if that table were to be
-    # invalidated at run time.
+    # For each collection that is to be scanned, compute the set of dependent
+    # tables and elements that will need invalidation and/or rescan if that
+    # table were to be invalidated at runtime.
     dflt_rescan = rescan
     dflt_invalidate = invalidate
     to_reset = rescan + invalidate
-    num_strata.times do |stratum|
-      @scanners[stratum].each_value do |scanner|
-        # If it is going to be always invalidated, it doesn't need further
-        # examination
-        next if dflt_rescan.member? scanner
-        rescan = dflt_rescan + [scanner]  # add scanner to scan set
-        invalidate = dflt_invalidate.clone
-        rescan_invalidate_tc(stratum, rescan, invalidate)
-        prune_rescan_invalidate(rescan, invalidate)
-        to_reset.merge(rescan)
-        to_reset.merge(invalidate)
-        # Give the diffs (from default) to scanner; these are elements that are
-        # dependent on this scanner
-        diffscan = (rescan - dflt_rescan).find_all {|elem| elem.class <= PushElement}
-        scanner.invalidate_at_tick(diffscan, (invalidate - dflt_invalidate).to_a)
-      end
+    each_scanner do |scanner, stratum|
+      # If it is going to be always invalidated, it doesn't need further
+      # examination. Lattice scanners also don't get invalidated.
+      next if dflt_rescan.member? scanner
+      next if scanner.class <= LatticeScanner
+      rescan = dflt_rescan.clone
+      invalidate = dflt_invalidate + [scanner.collection]
+      rescan_invalidate_tc(stratum, rescan, invalidate)
+      prune_rescan_invalidate(rescan, invalidate)
+      # Make sure we reset the rescan/invalidate flag for this scanner at
+      # end-of-tick, but we can remove the scanner from its own
+      # rescan_set/inval_set.
+      to_reset.merge(rescan)
+      to_reset.merge(invalidate)
+      rescan.delete(scanner)
+      invalidate.delete(scanner.collection)
+      # Give the diffs (from default) to scanner; these are elements that are
+      # dependent on this scanner
+      diffscan = (rescan - dflt_rescan).find_all {|elem| elem.class <= PushElement}
+      scanner.invalidate_at_tick(diffscan, (invalidate - dflt_invalidate).to_a)
     end
     @reset_list = to_reset.to_a
@@ -560,6 +581,12 @@ module Bud
   # Given rescan, invalidate sets, compute transitive closure
   def rescan_invalidate_tc(stratum, rescan, invalidate)
+    # XXX: hack. If there's nothing in the given stratum, don't do
+    # anything. This can arise if we have an orphan scanner whose input is a
+    # non-monotonic operator; the stratum(LHS) = stratum(RHS) + 1, but there's
+    # nothing else in stratum(LHS).
+    return if @push_sorted_elems[stratum].nil?
     rescan_len = rescan.size
     invalidate_len = invalidate.size
     while true
@@ -576,6 +603,18 @@ module Bud
     rescan.delete_if {|e| e.rescan_at_tick}
   end
+  def each_scanner
+    @num_strata.times do |stratum|
+      @scanners[stratum].each_value do |scanner|
+        yield scanner, stratum
+      end
+    end
+    @orphan_scanners.each do |scanner,stratum|
+      yield scanner, stratum
+    end
+  end
   def do_rewrite
     @meta_parser = BudMeta.new(self, @declarations)
     @stratified_rules = @meta_parser.meta_rewrite
@@ -1052,23 +1091,23 @@ module Bud
           elem.invalidate_cache unless elem.class <= PushElement
         }
-        num_strata = @push_sorted_elems.size
         # The following loop invalidates additional (non-default) elements and
         # tables that depend on the run-time invalidation state of a table.
         # Loop once to set the flags.
-        num_strata.times do |stratum|
-          @scanners[stratum].each_value do |scanner|
-            if scanner.rescan
-              scanner.rescan_set.each {|e| e.rescan = true}
-              scanner.invalidate_set.each {|e|
-                e.invalidated = true
-                e.invalidate_cache unless e.class <= PushElement
-              }
-            end
+        each_scanner do |scanner, stratum|
+          if scanner.rescan
+            scanner.rescan_set.each {|e| e.rescan = true}
+            scanner.invalidate_set.each {|e|
+              e.invalidated = true
+              e.invalidate_cache unless e.class <= PushElement
+            }
           end
         end
-        # Loop a second time to actually call invalidate_cache
-        num_strata.times do |stratum|
+        # Loop a second time to actually call invalidate_cache.  We can't merge
+        # this with the loops above because some versions of invalidate_cache
+        # (e.g., join) depend on the rescan state of other elements.
+        @num_strata.times do |stratum|
           @push_sorted_elems[stratum].each {|e| e.invalidate_cache if e.invalidated}
         end
       end
@@ -1134,14 +1173,14 @@ module Bud
   end
   # Return the stratum number of the given collection.
-  # NB: if a collection is not referenced by any rules, it is not currently
-  # assigned to a strata.
+  # NB: if a collection does not appear on the lhs or rhs of any rules, it is
+  # not currently assigned to a strata.
   def collection_stratum(collection)
     t_stratum.each do |t|
       return t.stratum if t.predicate == collection
     end
-    raise Bud::Error, "no such collection: #{collection}"
+    return nil
   end
   private