RubyGems - rust - Versions diffs - 0.4 → 0.10 - Mend

rust 0.4 → 0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml +4 -4
data/bin/ruby-rust +3 -0
data/lib/{rust-csv.rb → rust/core/csv.rb} +35 -4
data/lib/rust/core/rust.rb +221 -0
data/lib/rust/core/types/all.rb +4 -0
data/lib/{rust-core.rb → rust/core/types/dataframe.rb} +324 -244
data/lib/rust/core/types/datatype.rb +195 -0
data/lib/rust/core/types/factor.rb +158 -0
data/lib/rust/core/types/language.rb +199 -0
data/lib/rust/core/types/list.rb +97 -0
data/lib/rust/core/types/matrix.rb +155 -0
data/lib/rust/core/types/s4class.rb +78 -0
data/lib/rust/core/types/utils.rb +122 -0
data/lib/rust/core.rb +7 -0
data/lib/rust/models/all.rb +4 -0
data/lib/rust/models/anova.rb +77 -0
data/lib/rust/models/regression.rb +258 -0
data/lib/rust/plots/all.rb +4 -0
data/lib/rust/plots/basic-plots.rb +143 -0
data/lib/{rust-plots.rb → rust/plots/core.rb} +98 -107
data/lib/rust/plots/distribution-plots.rb +75 -0
data/lib/rust/stats/all.rb +4 -0
data/lib/{rust-basics.rb → rust/stats/correlation.rb} +46 -3
data/lib/rust/stats/descriptive.rb +157 -0
data/lib/{rust-effsize.rb → rust/stats/effsize.rb} +44 -21
data/lib/rust/stats/probabilities.rb +356 -0
data/lib/rust/stats/tests.rb +384 -0
data/lib/rust.rb +4 -8
metadata +31 -12
data/lib/rust-calls.rb +0 -69
data/lib/rust-descriptive.rb +0 -67
data/lib/rust-tests.rb +0 -165

data/lib/rust/stats/descriptive.rb ADDED Viewed

@@ -0,0 +1,157 @@
+require_relative '../core'
+##
+# Module containing utilities for descriptive statistics.
+module Rust::Descriptive
+    class << self
+        ##
+        # Computes the arithmetic mean of the given +data+.
+        def mean(data)
+            raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
+            return data.sum.to_f / data.size
+        end
+        ##
+        # Computes the standard deviation of the given +data+.
+        def standard_deviation(data)
+            raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
+            return Math.sqrt(variance(data))
+        end
+        alias :sd     :standard_deviation
+        alias :stddev :standard_deviation
+        ##
+        # Computes the variance of the given +data+.
+        def variance(data)
+            raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
+            return Float::NAN if data.size < 2
+            mean = mean(data)
+            return data.map { |v| (v - mean) ** 2 }.sum.to_f / (data.size - 1)
+        end
+        alias :var     :variance
+        ##
+        # Computes the median of the given +data+.
+        def median(data)
+            raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
+            sorted = data.sort
+            if data.size == 0
+                return Float::NAN
+            elsif data.size.odd?
+                return sorted[data.size / 2]
+            else
+                i = (data.size / 2)
+                return (sorted[i - 1] + sorted[i]) / 2.0
+            end
+        end
+        ##
+        # Sums the given +data+.
+        def sum(data)
+            raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
+            return data.sum
+        end
+        ##
+        # Returns the quantiles of the given +data+, given the +percentiles+ (optional).
+        def quantile(data, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0])
+            raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
+            raise TypeError, "Expecting Array of numerics" if !percentiles.is_a?(Array) || !percentiles.all? { |e| e.is_a?(Numeric) }
+            raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
+            n = data.size
+            quantiles = percentiles.size
+            percentiles = percentiles.map { |x| x > 1.0 ? 1.0 : (x < 0.0 ? 0.0 : x) }
+            rough_indices = percentiles.map { |x| 1 + [n - 1, 0].max * x - 1 }
+            floor_indices = rough_indices.map { |i| i.floor }
+            ceil_indices = rough_indices.map { |i| i.ceil }
+            data = data.sort
+            result = floor_indices.map { |i| data[i] }
+            result_ceil = ceil_indices.map { |i| data[i] }
+            indices_to_fix = (0...quantiles).select { |i| rough_indices[i] > floor_indices[i] && result_ceil[i] != result[i] }
+            index_approximation_errors = indices_to_fix.map { |i| rough_indices[i] - floor_indices[i] }
+            reduced_index_approximation_errors = index_approximation_errors.map { |i| (1 - i) }
+            hi_indices = indices_to_fix.map { |i| ceil_indices[i] }
+            data_hi_indices = hi_indices.map { |i| data[i] }
+            j = 0
+            indices_to_fix.each do |i|
+                result[i] = reduced_index_approximation_errors[j] * result[i] + index_approximation_errors[j] * data_hi_indices[j]
+                j += 1
+            end
+            return percentiles.zip(result).to_h
+        end
+        ##
+        # Returns the outliers in +data+ using Tukey's fences, with a given +k+.
+        def outliers(data, k=1.5, **opts)
+            outliers_according_to(data, data, k, **opts)
+        end
+        ##
+        # Returns the outliers in +data+ using Tukey's fences, with a given +k+, with respect to different data
+        # distribution (+data_distribution+).
+        def outliers_according_to(data, data_distribution, k=1.5, **opts)
+            quantiles = Rust::Descriptive.quantile(data_distribution, [0.25, 0.75])
+            q1 = quantiles[0.25]
+            q3 = quantiles[0.75]
+            iqr = q3 - q1
+            positive_outliers = data.select { |d| d > q3 + iqr * k }
+            negative_outliers = data.select { |d| d < q1 - iqr * k }
+            outliers = negative_outliers + positive_outliers
+            if opts[:side]
+                case opts[:side].to_sym
+                when :positive, :neg, :n, :+
+                    outliers = positive_outliers
+                when :negative, :pos, :p, :-
+                    outliers = negative_outliers
+                end
+            end
+            return outliers
+        end
+    end
+end
+module Rust::RBindings
+    def mean(series)
+        Rust::Descriptive.mean(series)
+    end
+    def median(series)
+        Rust::Descriptive.median(series)
+    end
+    def var(series)
+        Rust::Descriptive.variance(series)
+    end
+    def sd(series)
+        Rust::Descriptive.standard_deviation(series)
+    end
+    def quantile(series, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0])
+        Rust::Descriptive.quantile(series, percentiles)
+    end
+end

data/lib/{rust-effsize.rb → rust/stats/effsize.rb} RENAMED Viewed

@@ -1,10 +1,15 @@
-require 'code-assertions'
+require_relative '../core'
-Rust.exclusive do
-    Rust._eval("library(effsize)")
-end
+Rust.prerequisite('effsize')
+##
+# Module containing utilities for computing effect size statistics.
 module Rust::EffectSize
+    ##
+    # Effect size results.
     class Result
         attr_accessor   :name
         attr_accessor   :estimate
@@ -16,14 +21,23 @@ module Rust::EffectSize
             return "#{name} = #{estimate} (#{magnitude}) [#{confidence_interval.min}, #{confidence_interval.max}]"
         end
     end
-end
-module Rust::EffectSize::CliffDelta
-    class << self
-        def compute(d1, d2)
+    ##
+    # Cliff delta effect size statistics.
+    class CliffDelta
+        ##
+        # Computes and returns the effect size for +d1+ and +d2+.
+        def self.compute(d1, d2)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
+            if d1.size <= 1 || d2.size <= 1
+                return Rust::EffectSize::Result.new
+            end
             Rust.exclusive do
                 Rust['effsize.a'] = d1
                 Rust['effsize.b'] = d2
@@ -32,23 +46,32 @@ module Rust::EffectSize::CliffDelta
                 result = Rust::EffectSize::Result.new
                 result.name                 = "Cliff's delta"
-                result.estimate             = Rust._pull("effsize.result$estimate")
-                result.confidence_interval  = Range.new(*Rust._pull("effsize.result$conf.int"))
-                result.confidence_level     = Rust._pull("effsize.result$conf.level")
-                result.magnitude            = Rust._pull("as.character(effsize.result$magnitude)").to_sym
+                result.estimate             = Rust._pull("effsize.result$estimate")                         rescue Float::NAN
+                result.confidence_interval  = Range.new(*Rust._pull("effsize.result$conf.int"))             rescue nil
+                result.confidence_level     = Rust._pull("effsize.result$conf.level")                       rescue Float::NAN
+                result.magnitude            = Rust._pull("as.character(effsize.result$magnitude)").to_sym   rescue nil
                 return result
             end
         end
     end
-end
-module Rust::EffectSize::CohenD
-    class << self
-        def compute(d1, d2)
+    ##
+    # Cohen D effect size statistics.
+    class CohenD
+        ##
+        # Computes and returns the effect size for +d1+ and +d2+.
+        def self.compute(d1, d2)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
+            if d1.size <= 1 || d2.size <= 1
+                return Rust::EffectSize::Result.new
+            end
             Rust.exclusive do
                 Rust['effsize.a'] = d1
                 Rust['effsize.b'] = d2
@@ -57,10 +80,10 @@ module Rust::EffectSize::CohenD
                 result = Rust::EffectSize::Result.new
                 result.name                 = "Cohen's d"
-                result.estimate             = Rust._pull("effsize.result$estimate")
-                result.confidence_interval  = Range.new(*Rust._pull("effsize.result$conf.int"))
-                result.confidence_level     = Rust._pull("effsize.result$conf.level")
-                result.magnitude            = Rust._pull("as.character(effsize.result$magnitude)").to_sym
+                result.estimate             = Rust._pull("effsize.result$estimate")                       rescue Float::NAN
+                result.confidence_interval  = Range.new(*Rust._pull("effsize.result$conf.int"))           rescue nil
+                result.confidence_level     = Rust._pull("effsize.result$conf.level")                     rescue Float::NAN
+                result.magnitude            = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
                 return result
             end

data/lib/rust/stats/probabilities.rb ADDED Viewed

@@ -0,0 +1,356 @@
+require_relative '../core'
+class Numeric
+    ##
+    # Computes the distance between this and another number.
+    def _rust_prob_distance(other)
+        raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
+        return (self - other).abs
+    end
+end
+class Array
+    ##
+    # Computes the distance between this and another array.
+    def _rust_prob_distance(other)
+        raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
+        longest, shortest = self.size > other.size ? [self, other] : [other, self]
+        distance = 0
+        for i in 0...longest.size
+            distance += longest[i].to_i._rust_prob_distance(shortest[i].to_i)
+        end
+        return distance
+    end
+end
+class String
+    ##
+    # Computes the distance between this and another string.
+    def _rust_prob_distance(other)
+        raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
+        return self.bytes._rust_prob_distance other.bytes
+    end
+end
+module Rust
+    ##
+    # Represents a slice of a random variable, for which no check is made in terms of cumulative probability.
+    class RandomVariableSlice
+        ##
+        # Creates a new slice of random variable. +values+ is a hash of values associated with their probabilities.
+        def initialize(values)
+            raise TypeError, "Expected Hash" unless values.is_a?(Hash)
+            @values = values
+        end
+        ##
+        # Gets the probability of a value +v+. If +v+ is not specified, returns the cumulative probability of the whole
+        # slice.
+        def probability(v=nil)
+            unless v
+                return @values.values.sum
+            else
+                return @values[v]
+            end
+        end
+        ##
+        # Returns the value with the maximum probability.
+        def ml
+            @values.max_by { |k, v| v }[0]
+        end
+        ##
+        # Returns the expected value for this slice.
+        def expected
+            @values.map { |k, v| k*v }.sum
+        end
+        ##
+        # Returns a slice with the values that are greater than +n+.
+        def >(n)
+            self.so_that { |k| k > n }
+        end
+        ##
+        # Returns a slice with the values that are greater than or equal to +n+.
+        def >=(n)
+            self.so_that { |k| k >= n }
+        end
+        ##
+        # Returns a slice with the values that are lower than +n+.
+        def <(n)
+            self.so_that { |k| k < n }
+        end
+        ##
+        # Returns a slice with the values that are lower than or equal to +n+.
+        def <=(n)
+            self.so_that { |k| k <= n }
+        end
+        ##
+        # Returns a slice with the value +n+.
+        def ==(n)
+            self.so_that { |k| k == n }
+        end
+        ##
+        # Returns a slice with the values between +a+ and +b+.
+        def between(a, b)
+            self.so_that { |k| k.between(a, b) }
+        end
+        ##
+        # Returns a slice with the values for which the given block returns true.
+        def so_that
+            RandomVariableSlice.new(@values.select { |k, v| yield(k) })
+        end
+    end
+    ##
+    # Represents a random variable. The cumulative probability of the values must equal 1.
+    class RandomVariable < RandomVariableSlice
+        EPSILON = 1e-7
+        attr_reader    :values
+        ##
+        # Creates a new random variable. +values+ is a hash of values associated with their probabilities.
+        # +exact+ indicates whether this variable, when combined with others, should force to keep all the values, even
+        # the most unlikely ones. If this is +false+ (default), the most improbable values (lower than EPSILON) are
+        # removed for efficiency reasons.
+        def initialize(values = {0 => 1.0}, exact = false)
+            @values = values
+            @exact = exact
+            raise "All the probabilities should be in the range [0, 1]" unless @values.values.all? { |v| v.between? 0, 1 }
+            raise "The cumulative probability must be exactly 1 (#{@values.values.sum} instead)"        unless @values.values.sum.between? 1-EPSILON, 1+EPSILON
+            approx!
+        end
+        ##
+        # Returns the probability of value +v+.
+        def probability(v)
+            return @values[v].to_f
+        end
+        ##
+        # Returns a new random variable which represents the sum of this and the +other+ random variable.
+        def +(other)
+            new_hash = {}
+            @values.each do |my_key, my_value|
+                other.values.each do |other_key, other_value|
+                    sum_key = my_key + other_key
+                    new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
+                end
+            end
+            return RandomVariable.new(new_hash, @exact)
+        end
+        ##
+        # Based on the type of +arg+, either mul (product with another random variable) or rep (repeated sum) is called.
+        def *(arg)
+            if arg.is_a? Integer
+                return rep(arg)
+            elsif arg.is_a? RandomVariable
+                return mul(arg)
+            else
+                raise "The argument must be an Integer or a RandomVariable"
+            end
+        end
+        ##
+        # Returns a new random variable which represents the product of this and the +other+ random variable.
+        def mul(other)
+            new_hash = {}
+            @values.each do |my_key, my_value|
+                other.values.each do |other_key, other_value|
+                    mul_key = my_key * other_key
+                    new_hash[mul_key] = new_hash[mul_key].to_f + (my_value * other_value)
+                end
+            end
+            return RandomVariable.new(new_hash, @exact)
+        end
+        ##
+        # Returns a new random variable which represents the sum of this random variable with itself +n+ times.
+        def rep(times)
+            rv = self
+            (times-1).times do
+                rv += self
+            end
+            return rv
+        end
+        ##
+        # Makes sure that the operations yield all the values, even the most unlikely ones.
+        def exact!
+            @exact = true
+        end
+        ##
+        # If this variable is not exact, the values with probability lower than EPSLION are removed.
+        def approx!
+            return if @exact
+            to_delete = []
+            @values.each do |v, probability|
+                to_delete.push v if probability <= EPSILON
+            end
+            to_delete.each do |v|
+                probability = @values.delete v
+                nearest = @values.keys.min_by { |k| k._rust_prob_distance v }
+                @values[nearest] += probability
+            end
+        end
+        ##
+        # Returns a random value, according to the data distribution.
+        def extract
+            v = rand
+            cumulative = 0
+            @values.sort_by { |k, v| k }.each do |key, prob|
+                cumulative += prob
+                return key if cumulative >= v
+            end
+        end
+        ##
+        # Creates a random variable by partially specifying the values through +hash+. The remaining probability is
+        # attributed to +key+ (0, by default).
+        def self.complete(hash, key=0)
+            hash[key] = 1 - hash.values.sum
+            return RandomVariable.new(hash)
+        end
+    end
+    ##
+    # Represents a uniform random variable.
+    class UniformRandomVariable < RandomVariable
+        ##
+        # Creates random variables for which all the +values+ have the same probability (1 / values.size).
+        def initialize(values, exact = false)
+            super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
+        end
+    end
+    ##
+    # Module that contains utilities for handling random variables.
+    module Probabilities
+        ##
+        # Computes the probability of the random variable +v+.
+        def P(v)
+            if v.is_a? RandomVariableSlice
+                raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
+                return v.probability
+            else
+                raise "Cannot compute the expected value of a #{v.class}"
+            end
+        end
+        ##
+        # Computes the expected value of the random variable +v+.
+        def E(v)
+            if v.is_a? RandomVariableSlice
+                return v.expected
+            else
+                raise "Cannot compute the expected value of a #{v.class}"
+            end
+        end
+    end
+    ##
+    # Module containing examples of commonly-used random variables.
+    module RandomVariableExamples
+        ENGLISH_ALPHABET = RandomVariable.new({
+            "a" => 0.08167,
+            "b" => 0.01492,
+            "c" => 0.02782,
+            "d" => 0.04253,
+            "e" => 0.12703,
+            "f" => 0.02228,
+            "g" => 0.02015,
+            "h" => 0.06094,
+            "i" => 0.06966,
+            "j" => 0.00153,
+            "k" => 0.00772,
+            "l" => 0.04025,
+            "m" => 0.02406,
+            "n" => 0.06749,
+            "o" => 0.07507,
+            "p" => 0.01929,
+            "q" => 0.00095,
+            "r" => 0.05987,
+            "s" => 0.06327,
+            "t" => 0.09056,
+            "u" => 0.02758,
+            "v" => 0.00978,
+            "w" => 0.02360,
+            "x" => 0.00150,
+            "y" => 0.01974,
+            "z" => 0.00074
+        })
+        DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
+        COIN = UniformRandomVariable.new(["h", "t"])
+    end
+end