RubyGems - davidrichards-sirb - Versions diffs - 0.6.14 - Mend

davidrichards-sirb 0.6.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

data/README.rdoc +227 -0
data/VERSION.yml +4 -0
data/bin/sirb +33 -0
data/lib/overrides/array.rb +6 -0
data/lib/overrides/file.rb +17 -0
data/lib/overrides/module.rb +70 -0
data/lib/overrides/symbol.rb +39 -0
data/lib/sirb/enumerable_statistics.rb +350 -0
data/lib/sirb/functional.rb +114 -0
data/lib/sirb/general_statistics.rb +72 -0
data/lib/sirb/inter_enumerable_statistics.rb +139 -0
data/lib/sirb/lib_loader.rb +45 -0
data/lib/sirb/runner.rb +274 -0
data/lib/sirb/sproc/proc.rb +5 -0
data/lib/sirb/sproc/proc_source.rb +130 -0
data/lib/sirb/sproc/sproc.rb +79 -0
data/lib/sirb/sproc/usage_notes.txt +25 -0
data/lib/sirb/sproc.rb +29 -0
data/lib/sirb/thread_support.rb +20 -0
data/lib/sirb/unbound_method.rb +5 -0
data/lib/sirb.rb +52 -0
data/lib/stored_procedures.rb +10 -0
data/spec/lib/overrides/array_spec.rb +7 -0
data/spec/lib/overrides/file_spec.rb +13 -0
data/spec/lib/overrides/module_spec.rb +86 -0
data/spec/lib/overrides/symbol_spec.rb +39 -0
data/spec/lib/sirb/enumerable_statistics_spec.rb +85 -0
data/spec/lib/sirb/functional_spec.rb +75 -0
data/spec/lib/sirb/general_statistics_spec.rb +40 -0
data/spec/lib/sirb/inter_enumerable_statistics_spec.rb +55 -0
data/spec/lib/sirb/lib_loader_spec.rb +39 -0
data/spec/lib/sirb/runner_spec.rb +9 -0
data/spec/lib/sirb/sproc/proc_spec.rb +9 -0
data/spec/lib/sirb/sproc/sproc_spec.rb +25 -0
data/spec/lib/sirb/unbound_method_spec.rb +12 -0
data/spec/lib/sirb_spec.rb +9 -0
data/spec/spec_helper.rb +15 -0
metadata +97 -0

data/lib/sirb/enumerable_statistics.rb ADDED Viewed

@@ -0,0 +1,350 @@
+module Sirb #:nodoc:
+  # These are the standard R vector functions that I want to add to any
+  # Enumerable class for Ruby.  I started by borrowing heavily from
+  # Gotoken' math/statistics project
+  # (http://raa.ruby-lang.org/project/math-statistics/).  There were a few
+  # changes that don't make sense in the idiomatic Ruby that I now use (a
+  # few things have changed since 2001).
+  #
+  # The following is a table of values from R to my methods
+  #
+  # max         | max
+  # min         | min
+  # sum         | sum
+  # mean        | mean
+  # median      | median
+  # range       | range
+  # var         | var variance
+  # cor         | cor correlation
+  # sort        | sort
+  # rank        | rank
+  # order       | order
+  # quantile    | quantile
+  # cumsum      | cum_sum cumulative_sum
+  # cumprod     | cum_prod cumulative_product
+  # cummax      | cum_max cumulative_max
+  # cummin      | cum_min cumulative_min
+  # pmax        | p_max
+  # pmin        | p_min
+  module EnumerableStatistics
+    # There are issues with this...
+    include GeneralStatistics
+    def self.append_features(mod)
+      alias :original_max :max
+      alias :original_min :min
+      unless mod < Enumerable
+        raise TypeError,
+          "`#{self}' can't be included non Enumerable (#{mod})"
+      end
+      def mod.default_block= (block)
+        self.const_set("STAT_BLOCK", block)
+      end
+      def mod.default_block
+        defined?(self::STAT_BLOCK) && self::STAT_BLOCK
+      end
+      super
+    end
+    def default_block
+      @stat_block || self.class.default_block
+    end
+    def default_block=(block)
+      @stat_block = block
+    end
+    def sum
+      sum = 0.0
+      if block_given?
+        each{|i| sum += yield(i)}
+      elsif default_block
+        each{|i| sum += default_block[*i]}
+      else
+        each{|i| sum += i}
+      end
+      sum
+    end
+    def average(&block)
+      sum(&block)/size
+    end
+    alias :mean :average
+    alias :avg :average
+    def variance(&block)
+      m = mean(&block)
+      sum_of_differences = if block_given?
+        sum{ |i| j=yield(i); (m - j) ** 2 }
+      elsif default_block
+        sum{ |i| j=default_block[*i]; (m - j) ** 2 }
+      else
+        sum{ |i| (m - i) ** 2 }
+      end
+      sum_of_differences / (size - 1)
+    end
+    alias :var :variance
+    def standard_deviation(&block)
+      Math::sqrt(variance(&block))
+    end
+    alias :std :standard_deviation
+    def min(&block)
+      list = if block_given?
+        map{|x| yield(x) }
+      elsif default_block
+        map{|x| default_block[*x] }
+      else
+        self
+      end
+      Object.min(*list)
+    end
+    def min_index
+      index(min)
+    end
+    def max
+      list = if block_given?
+        map{|x| yield(x) }
+      elsif default_block
+        map{|x| default_block[*x] }
+      else
+        self
+      end
+      Object.max(*list)
+    end
+    def max_index
+      index(max)
+    end
+    # The slow way is to iterate up to the middle point.  A faster way is to
+    # use the index, when available.  If a block is supplied, always iterate
+    # to the middle point.
+    def median(ratio=0.5, &block)
+      return iterate_midway(ratio, &block) if block_given?
+      begin
+        mid1, mid2 = middle_two
+        sorted = new_sort
+        med1, med2 = sorted[mid1], sorted[mid2]
+        return med1 if med1 == med2
+        return med1 + ((med2 - med1) * ratio)
+      rescue
+        iterate_midway(ratio, &block)
+      end
+    end
+    def middle_two
+      mid2 = size.div(2)
+      mid1 = (size % 2 == 0) ? mid2 - 1 : mid2
+      return mid1, mid2
+    end
+    protected :middle_two
+    def median_position
+      middle_two.last
+    end
+    protected :median_position
+    def first_half(&block)
+      fh = self[0..median_position].dup
+    end
+    protected :first_half
+    def second_half(&block)
+      # Total crap, but it's the way R does things, and this will most likely
+      # only be used to feed R some numbers to plot, if at all.
+      sh = size <= 5 ? self[median_position..-1].dup : self[median_position - 1..-1].dup
+    end
+    protected :second_half
+    # An iterative version of median
+    def iterate_midway(ratio, &block)
+      mid1, mid2, last_value, j, sorted, sort1, sort2 = middle_two, nil, 0, new_sort, nil, nil
+      if block_given?
+        sorted.each do |i|
+          last_value = yield(i)
+          j += 1
+          sort1 = last_value if j == mid1
+          sort2 = last_value if j == mid2
+          break if j >= mid2
+        end
+      elsif default_block
+        sorted.each do |i|
+          last_value = default_block[*i]
+          j += 1
+          sort1 = last_value if j == mid1
+          sort2 = last_value if j == mid2
+          break if j >= mid2
+        end
+      else
+        sorted.each do |i|
+          last_value = i
+          sort1 = last_value if j == mid1
+          sort2 = last_value if j == mid2
+          j += 1
+          break if j >= mid2
+        end
+      end
+      return med1 if med1 == med2
+      return med1 + ((med2 - med1) * ratio)
+    end
+    protected :iterate_midway
+    # Just an array of [min, max] to comply with R uses of the work.  Use
+    # range_as_range if you want a real Range.
+    def range(&block)
+      [min(&block), max(&block)]
+    end
+    # Useful for setting a real range class (FixedRange).
+    def range_class=(klass)
+      @range_class = klass
+    end
+    def range_class
+      @range_class ||= Range
+    end
+    def range_as_range(&block)
+      range_class.new(min(&block), max(&block))
+    end
+    # I don't pass the block to the sort, because a sort block needs to look
+    # something like: {|x,y| x <=> y}.  To get around this, set the default
+    # block on the object.
+    def new_sort(&block)
+      if block_given?
+        map { |i| yield(i) }.sort.dup
+      elsif default_block
+        map { |i| default_block[*i] }.sort.dup
+      else
+        sort().dup
+      end
+    end
+    # Doesn't overwrite things like Matrix#rank
+    def rank(&block)
+      sorted = new_sort
+      if block_given?
+        map { |i| sorted.index(yield(i)) + 1 }
+      elsif default_block
+        map { |i| sorted.index(default_block[*i]) + 1 }
+      else
+        map { |i| sorted.index(i) + 1 }
+      end
+    end unless defined?(rank)
+    # Given values like [10,5,5,1]
+    # Rank should produce something like [4,2,2,1]
+    # And order should produce something like [4,2,3,1]
+    # The trick is that rank skips as many as were duplicated, so there
+    # could not be a 3 in the rank from the example above.
+    def order(&block)
+      hold= []
+      rank(&block).each_with_index do |x, i|
+        j = i
+        while hold.include?(j) do
+          j += 1
+        end
+        hold << j
+      end
+    end
+    # First quartile: nth_split_by_m(1, 4)
+    # Third quartile: nth_split_by_m(3, 4)
+    # Median: nth_split_by_m(1, 2)
+    # Doesn't match R, and it's silly to try to.
+    # def nth_split_by_m(n, m)
+    #   sorted  = new_sort
+    #   dividers = m - 1
+    #   if size % m == dividers # Divides evenly
+    #     # Because we have a 0-based list, we get the floor
+    #     i = ((size / m.to_f) * n).floor
+    #     j = i
+    #   else
+    #     # This reflects R's approach, which I don't think I agree with.
+    #     i = (((size / m.to_f) * n) - 1)
+    #     i = i > (size / m.to_f) ? i.floor : i.ceil
+    #     j = i + 1
+    #   end
+    #   sorted[i] + ((n / m.to_f) * (sorted[j] - sorted[i]))
+    # end
+    def quantile(&block)
+      [
+        min(&block),
+        first_half(&block).median(0.25, &block),
+        median(&block),
+        second_half(&block).median(0.75, &block),
+        max(&block)
+      ]
+    end
+    def cum_sum(sorted=false, &block)
+      sum = 0.0
+      obj = sorted ? self.new_sort : self
+      if block_given?
+        obj.map { |i| sum += yield(i) }
+      elsif default_block
+        obj.map { |i| sum += default_block[*i] }
+      else
+        obj.map { |i| sum += i }
+      end
+    end
+    alias :cumulative_sum :cum_sum
+    def cum_prod(sorted=false, &block)
+      prod = 1.0
+      obj = sorted ? self.new_sort : self
+      if block_given?
+        obj.map { |i| prod *= yield(i) }
+      elsif default_block
+        obj.map { |i| prod *= default_block[*i] }
+      else
+        obj.map { |i| prod *= i }
+      end
+    end
+    alias :cumulative_product :cum_prod
+    def cum_max(&block)
+      current_max = nil
+      if block_given?
+        map {|i| current_max = Object.max(current_max, yield(i)) }
+      elsif default_block
+        map {|i| current_max = Object.max(current_max, default_block[*i]) }
+      else
+        map {|i| current_max = Object.max(current_max, i) }
+      end
+    end
+    alias :cumulative_max :cum_max
+    def cum_min(&block)
+      current_min = nil
+      if block_given?
+        map {|i| current_min = Object.min(current_min, yield(i)) }
+      elsif default_block
+        map {|i| current_min = Object.min(current_min, default_block[*i]) }
+      else
+        map {|i| current_min = Object.min(current_min, i) }
+      end
+    end
+    alias :cumulative_min :cum_min
+  end
+end

data/lib/sirb/functional.rb ADDED Viewed

@@ -0,0 +1,114 @@
+# This is probably border-line for what O'Reilly meant for using their
+# code.  I grabbed six methods from The Ruby Programming Language,
+# section 6.8.  I want to experiment with how this could change some of
+# my methods.
+# This module defines methods and operators for functional programming.
+module Functional
+  # Apply this function to each element of the specified Enumerable,
+  # returning an array of results. This is the reverse of Enumerable.map.
+  # Use | as an operator alias. Read "|" as "over" or "applied over".
+  #
+  # Example:
+  #   a = [[1,2],[3,4]]
+  #   sum = lambda {|x,y| x+y}
+  #   sums = sum|a   # => [3,7]
+  def apply(enum)
+    enum.respond_to?(:map) ? enum.map(&self) : self.call(enum)
+  end
+  alias | apply
+  # Use this function to "reduce" an enumerable to a single quantity.
+  # This is the inverse of Enumerable.inject.
+  # Use <= as an operator alias.
+  # Mnemonic: <= looks like a needle for injections
+  # Example:
+  #   data = [1,2,3,4]
+  #   sum = lambda {|x,y| x+y}
+  #   total = sum<=data   # => 10
+  def reduce(enum)
+    enum.inject &self
+  end
+  alias <= reduce
+  # Return a new lambda that computes self[f[args]].
+  # Use * as an operator alias for compose.
+  # Examples, using the * alias for this method.
+  #
+  # f = lambda {|x| x*x }
+  # g = lambda {|x| x+1 }
+  # (f*g)[2]   # => 9
+  # (g*f)[2]   # => 5
+  #
+  # def polar(x,y)
+  #   [Math.hypot(y,x), Math.atan2(y,x)]
+  # end
+  # def cartesian(magnitude, angle)
+  #   [magnitude*Math.cos(angle), magnitude*Math.sin(angle)]
+  # end
+  # p,c = method :polar, method :cartesian
+  # (c*p)[3,4]  # => [3,4]
+  #
+  def compose(f)
+    if self.respond_to?(:arity) && self.arity == 1
+      lambda {|*args| self[f[*args]] }
+    else
+      lambda {|*args| self[*f[*args]] }
+    end
+  end
+  # * is the natural operator for function composition.
+  alias * compose
+  # Return a lambda equivalent to this one with one or more initial
+  # arguments applied. When only a single argument
+  # is being specified, the >> alias may be simpler to use.
+  # Example:
+  #   product = lambda {|x,y| x*y}
+  #   doubler = product >> 2
+  #
+  def apply_head(*first)
+    lambda {|*rest| self[*first.concat(rest)]}
+  end
+  #
+  # Return a lambda equivalent to this one with one or more final arguments
+  # applied. When only a single argument is being specified,
+  # the << alias may be simpler.
+  # Example:
+  #  difference = lambda {|x,y| x-y }
+  #  decrement = difference << 1
+  #
+  def apply_tail(*last)
+    lambda {|*rest| self[*rest.concat(last)]}
+  end
+  # Here are operator alternatives for these methods. The angle brackets
+  # point to the side on which the argument is shifted in.
+  # alias >> apply_head    # g = f >> 2 -- set first arg to 2
+  # alias << apply_tail    # g = f << 2 -- set last arg to 2
+  # Return a new lambda that caches the results of this function and
+  # only calls the function when new arguments are supplied.
+  #
+  def memoize
+    cache = {}  # An empty cache. The lambda captures this in its closure.
+    lambda {|*args|
+      # notice that the hash key is the entire array of arguments!
+      unless cache.has_key?(args)  # If no cached result for these args
+        cache[args] = self[*args]  # Compute and cache the result
+      end
+      cache[args]                  # Return result from cache
+    }
+  end
+  # A (probably unnecessary) unary + operator for memoization
+  # Mnemonic: the + operator means "improved"
+  alias +@ memoize        # cached_f = +f
+end
+# I add these here, instead of in overrides because it makes things a
+# lot simpler for sirb.rb to figure out the load order.
+# Add these functional programming methods to Proc and Method classes.
+class Proc; include Functional; end
+class Method; include Functional; end

data/lib/sirb/general_statistics.rb ADDED Viewed

@@ -0,0 +1,72 @@
+module Sirb #:nodoc:
+  # These are general statistics, things that should stand on their own as
+  # concepts unattached to vectors or scalars or whatever.
+  module GeneralStatistics #:nodoc:
+    def self.included(base)
+      base.class_eval do
+        archive_method(:max)
+        archive_method(:min)
+        include InstanceMethods
+      end
+    end
+    module InstanceMethods
+      # Returns the max, the non-nil value, or nil (if both are nil).  A block
+      # can be passed if a special comparison is wanted (not typically).
+      def max(*x, &block)
+        return x.first if x.size == 1
+        return max2(x[0], x[1], &block) if x.size == 2
+        a = x.first
+        (1...x.size).each { |b|
+          a = max2(a,x[b], &block) }
+        a
+      end
+      # Returns the first index of the max value
+      def max_index(*x, &block)
+        x.index(max(*x, &block))
+      end
+      # Returns the max, the non-nil value, or nil (if both are nil).  A block
+      # can be passed if a special comparison is wanted (not typically).
+      def max2(x,y, &block)
+        return y if x.nil?
+        return x if y.nil?
+        if block_given?
+          yield(x,y)
+        else
+          (x <=> y) > 0 ? x : y
+        end
+      end
+      # Min of any number of items
+      def min(*x, &block)
+        return x.first if x.size == 1
+        return min2(x[0], x[1], &block) if x.size == 2
+        a = x.first
+        (1...x.size).each { |b|
+          a = min2(a,x[b], &block) }
+        a
+      end
+      # Returns the first index of the min value
+      def min_index(*x, &block)
+        x.index(min(*x, &block))
+      end
+      # Returns the min, the non-nil value, or nil (if both are nil).  A block
+      # can be passed if a special comparison is wanted (not typically).
+      def min2(x,y, &block)
+        return y if x.nil?
+        return x if y.nil?
+        if block_given?
+          yield(x,y)
+        else
+          (x <=> y) < 0 ? x : y
+        end
+      end
+    end # InstanceMethods
+  end # GeneralStatistics
+end # Sirb

data/lib/sirb/inter_enumerable_statistics.rb ADDED Viewed

@@ -0,0 +1,139 @@
+module Sirb #:nodoc:
+  # These are general methods for comparing enumerables.  This list seems
+  # to grow quite a bit as I build up other libraries, so expect this to
+  # grow.
+  module InterEnumerableStatistics #:nodoc:
+    # Multiplies the values:
+    # >> product(1,2,3)
+    # => 6.0
+    def product(*x)
+      x.inject(1.0) {|sum, a| sum *= a}
+    end
+    # There are going to be a lot more of these kinds of things, so pay
+    # attention.
+    def to_pairs(x, y, &block)
+      n = min(x.size, y.size)
+      (0...n).map {|i| block.call(x[i], y[i]) }
+    end
+    # Finds the tanimoto coefficient: the intersection set size / union set
+    # size.  This is used to find the distance between two vectors.
+    # >> cor([1,2,3], [2,3,5])
+    # => 0.981980506061966
+    # >> tanimoto_pairs([1,2,3], [2,3,5])
+    # => 0.5
+    def tanimoto_pairs(x,y)
+      intersect(x,y).size / union(x,y).size.to_f
+    end
+    # Sometimes it just helps to have things spelled out.  These are all
+    # part of the Array class.
+    # All of the left and right hand sides, excluding duplicates.
+    # "The union of x and y"
+    def union(x,y)
+      x | y
+    end
+    # What's shared on the left and right hand sides
+    # "The intersection of x and y"
+    def intersect(x,y)
+      x & y
+    end
+    # Everything on the left hand side except what's shared on the right
+    # hand side.
+    # "The relative compliment of y in x"
+    def compliment(x,y)
+      x - y
+    end
+    # Everything but what's shared
+    def exclusive_not(x,y)
+      (x | y) - (x & y)
+    end
+    # Finds the cartesian product, excluding duplicates items and self-
+    # referential pairs.  Yields the block value if given.
+    def cartesian_product(x,y, &block)
+      x,y = x.uniq.dup, y.uniq.dup
+      pairs = x.inject([]) do |cp, i|
+        cp | y.map{|b| i == b ? nil : [i,b]}.compact
+      end
+      return pairs unless block_given?
+      pairs.map{|p| yield p.first, p.last}
+    end
+    alias :cp :cartesian_product
+    alias :permutations :cartesian_product
+    # Sigma of pairs.  Returns a single float, or whatever object is sent in.
+    # Example: sigma_pairs([1,2,3], [4,5,6], 0) {|x, y| x + y}
+    # returns 21 instead of 21.0.
+    def sigma_pairs(x, y, z=0.0, &block)
+      to_pairs(x,y,&block).inject(z) {|sum, i| sum += i}
+    end
+    # Takes any number of enumerables and returns the range for all of them.
+    # This is an O(n*3) operation.
+    def range_for(*args)
+      range_pairs(p_max(*args), p_min(*args))
+    end
+    # Returns the range of each position between the two pairs.
+    def range_pairs(x,y)
+      to_pairs(x,y) {|a,b| max(a,b) - min(a,b)}
+    end
+    # Returns the Euclidian distance between all points of a set of enumerables
+    def euclidian_distance(x,y)
+      Math.sqrt(sigma_pairs(x,y) {|a, b| (a - b) ** 2})
+    end
+    # Returns a random integer in the range for any number of lists.  This
+    # is a way to get a random vector that is tenable based on the sample
+    # data.  For example, given two sets of numbers:
+    #
+    # a = [1,2,3]; b = [8,8,8]
+    #
+    # rand_in_pair_range will return a value >= 1 and <= 8 in the first
+    # place, >= 2 and <= 8 in the second place, and >= 3 and <= 8 in the
+    # last place.
+    # Works for integers.  Rethink this for floats.  May consider setting up
+    # FixedRange for floats.  O(n*5)
+    def rand_in_range(*args)
+      range = range_for(*args)
+      min = p_min(*args)
+      (0...range.size).inject([]) do |ary, i|
+        ary << (rand(range[i] + 1) + min[i])
+      end
+    end
+    # Finds the correlation between two enumerables.
+    # Example: cor([1,2,3], [2,3,5)
+    # return 0.981980506061966
+    def correlation(x, y)
+      n = min(x.size, y.size)
+      ( sigma_pairs(x,y) { |a,b| a * b } - (( x.sum * y.sum ) / n.to_f)) / ((n - 1 ) * x.std * y.std)
+    end
+    alias :cor :correlation
+    # Returns the max of two or more enumerables.
+    # >> p_max([1,2,3], [4,5,6], [0,2,9])
+    # => [4, 5, 9]
+    def p_max(*enums)
+      n = min(*enums.map{ |x| x.size} )
+      (0...n).map { |i| max(*enums.map{ |x| x[i] }) }
+    end
+    # Returns the min of two or more enumerables.
+    # >> p_min([1,2,3], [4,5,6], [0,2,9])
+    # => [0, 2, 3]
+    def p_min(*enums)
+      n = min(*enums.map{ |x| x.size} )
+      (0...n).map { |i| min(*enums.map{ |x| x[i] }) }
+    end
+  end
+end