RubyGems - rust - Versions diffs - 0.3 → 0.9 - Mend

rust 0.3 → 0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml +4 -4
data/bin/ruby-rust +3 -0
data/lib/{rust-csv.rb → rust/core/csv.rb} +14 -4
data/lib/rust/core/rust.rb +157 -0
data/lib/rust/core/types/all.rb +4 -0
data/lib/{rust-core.rb → rust/core/types/dataframe.rb} +183 -245
data/lib/rust/core/types/datatype.rb +161 -0
data/lib/rust/core/types/factor.rb +131 -0
data/lib/rust/core/types/language.rb +166 -0
data/lib/rust/core/types/list.rb +81 -0
data/lib/rust/core/types/matrix.rb +132 -0
data/lib/rust/core/types/s4class.rb +59 -0
data/lib/rust/core/types/utils.rb +109 -0
data/lib/rust/core.rb +7 -0
data/lib/rust/models/all.rb +4 -0
data/lib/rust/models/anova.rb +60 -0
data/lib/rust/models/regression.rb +205 -0
data/lib/rust/plots/all.rb +4 -0
data/lib/rust/plots/basic-plots.rb +111 -0
data/lib/{rust-plots.rb → rust/plots/core.rb} +64 -129
data/lib/rust/plots/distribution-plots.rb +62 -0
data/lib/rust/stats/all.rb +4 -0
data/lib/{rust-basics.rb → rust/stats/correlation.rb} +11 -5
data/lib/rust/stats/descriptive.rb +128 -0
data/lib/{rust-effsize.rb → rust/stats/effsize.rb} +23 -21
data/lib/rust/stats/probabilities.rb +248 -0
data/lib/rust/stats/tests.rb +292 -0
data/lib/rust.rb +4 -8
metadata +31 -12
data/lib/rust-calls.rb +0 -69
data/lib/rust-descriptive.rb +0 -59
data/lib/rust-tests.rb +0 -165

data/lib/rust/plots/distribution-plots.rb ADDED Viewed

@@ -0,0 +1,62 @@
+require_relative 'core'
+module Rust::Plots
+    class DistributionPlot < BasePlot
+        def initialize
+            super()
+            @series = []
+        end
+        def series(data, **options)
+            @series << [data, options]
+            return self
+        end
+    end
+    class DensityPlot < DistributionPlot
+        protected
+        def _show()
+            first = true
+            @series.each do |data, options|
+                Rust["plotter.series"] = data
+                if first
+                    first = false
+                    command = "plot"
+                else
+                    command = "lines"
+                end
+                function = Rust::Function.new(command)
+                function.options = self._augmented_options({"col" => options[:color]})
+                function.arguments << Rust::Variable.new("density(plotter.series)")
+                function.call
+            end
+            return self
+        end
+    end
+    class BoxPlot < DistributionPlot
+        protected
+        def _show()
+            function = Rust::Function.new("boxplot")
+            names = []
+            @series.each_with_index do |data, i|
+                series, options = *data
+                varname = "plotter.series#{i}"
+                Rust[varname] = series
+                function.arguments << Rust::Variable.new(varname)
+                names << (options[:name] || (i+1).to_s)
+            end
+            function.options = self._augmented_options({'names' => names})
+            function.call
+            return self
+        end
+    end
+end

data/lib/rust/stats/all.rb ADDED Viewed

@@ -0,0 +1,4 @@
+self_path = File.expand_path(__FILE__)
+Dir.glob(File.dirname(self_path) + "/*.rb").each do |lib|
+    require_relative lib unless lib == self_path
+end

data/lib/{rust-basics.rb → rust/stats/correlation.rb} RENAMED Viewed

@@ -1,6 +1,6 @@
-require_relative 'rust-core'
+require_relative '../core'
-module Rust:: Correlation
+module Rust::Correlation
     class Pearson
         def self.test(d1, d2)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
@@ -10,13 +10,14 @@ module Rust:: Correlation
                 Rust['correlation.a'] = d1
                 Rust['correlation.b'] = d2
-                Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='p')")
+                _, warnings = Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='p')", true)
                 result = Result.new
                 result.name             = "Pearson's product-moment correlation"
                 result.statistics['t']  = Rust._pull('correlation.result$statistic')
                 result.pvalue           = Rust._pull('correlation.result$p.value')
                 result.correlation      = Rust._pull('correlation.result$estimate')
+                result.exact            = !warnings.include?("Cannot compute exact p-value with ties")
                 return result
             end
@@ -36,13 +37,14 @@ module Rust:: Correlation
                 Rust['correlation.a'] = d1
                 Rust['correlation.b'] = d2
-                Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='s')")
+                _, warnings = Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='s')", true)
                 result = Result.new
                 result.name             = "Spearman's rank correlation rho"
                 result.statistics['S']  = Rust._pull('correlation.result$statistic')
                 result.pvalue           = Rust._pull('correlation.result$p.value')
                 result.correlation      = Rust._pull('correlation.result$estimate')
+                result.exact            = !warnings.include?("Cannot compute exact p-value with ties")
                 return result
             end
@@ -62,13 +64,14 @@ module Rust:: Correlation
                 Rust['correlation.a'] = d1
                 Rust['correlation.b'] = d2
-                Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='p')")
+                _, warnings = Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='k')", true)
                 result = Result.new
                 result.name             = "Kendall's rank correlation tau"
                 result.statistics['T']  = Rust._pull('correlation.result$statistic')
                 result.pvalue           = Rust._pull('correlation.result$p.value')
                 result.correlation      = Rust._pull('correlation.result$estimate')
+                result.exact            = !warnings.include?("Cannot compute exact p-value with ties")
                 return result
             end
@@ -84,11 +87,13 @@ module Rust:: Correlation
         attr_accessor   :statistics
         attr_accessor   :pvalue
         attr_accessor   :correlation
+        attr_accessor   :exact
         alias :estimate :correlation
         def initialize
             @statistics = {}
+            @exact = true
         end
         def [](name)
@@ -101,6 +106,7 @@ module Rust:: Correlation
         def to_s
             return "#{name}. Correlation = #{correlation}, P-value = #{pvalue} " +
+                    (!@exact ? "P-value is not exact. " : "") +
                     "#{ statistics.map { |k, v| k.to_s + " -> " + v.to_s  }.join(", ") }."
         end
     end

data/lib/rust/stats/descriptive.rb ADDED Viewed

@@ -0,0 +1,128 @@
+require_relative '../core'
+module Rust::Descriptive
+    class << self
+        def mean(data)
+            raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
+            return data.sum.to_f / data.size
+        end
+        def standard_deviation(data)
+            raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
+            return Math.sqrt(variance(data))
+        end
+        alias :sd     :standard_deviation
+        alias :stddev :standard_deviation
+        def variance(data)
+            raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
+            return Float::NAN if data.size < 2
+            mean = mean(data)
+            return data.map { |v| (v - mean) ** 2 }.sum.to_f / (data.size - 1)
+        end
+        alias :var     :variance
+        def median(data)
+            raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
+            sorted = data.sort
+            if data.size == 0
+                return Float::NAN
+            elsif data.size.odd?
+                return sorted[data.size / 2]
+            else
+                i = (data.size / 2)
+                return (sorted[i - 1] + sorted[i]) / 2.0
+            end
+        end
+        def sum(data)
+            raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
+            return data.sum
+        end
+        def quantile(data, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0])
+            raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
+            raise TypeError, "Expecting Array of numerics" if !percentiles.is_a?(Array) || !percentiles.all? { |e| e.is_a?(Numeric) }
+            raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
+            n = data.size
+            quantiles = percentiles.size
+            percentiles = percentiles.map { |x| x > 1.0 ? 1.0 : (x < 0.0 ? 0.0 : x) }
+            rough_indices = percentiles.map { |x| 1 + [n - 1, 0].max * x - 1 }
+            floor_indices = rough_indices.map { |i| i.floor }
+            ceil_indices = rough_indices.map { |i| i.ceil }
+            data = data.sort
+            result = floor_indices.map { |i| data[i] }
+            result_ceil = ceil_indices.map { |i| data[i] }
+            indices_to_fix = (0...quantiles).select { |i| rough_indices[i] > floor_indices[i] && result_ceil[i] != result[i] }
+            index_approximation_errors = indices_to_fix.map { |i| rough_indices[i] - floor_indices[i] }
+            reduced_index_approximation_errors = index_approximation_errors.map { |i| (1 - i) }
+            hi_indices = indices_to_fix.map { |i| ceil_indices[i] }
+            data_hi_indices = hi_indices.map { |i| data[i] }
+            j = 0
+            indices_to_fix.each do |i|
+                result[i] = reduced_index_approximation_errors[j] * result[i] + index_approximation_errors[j] * data_hi_indices[j]
+                j += 1
+            end
+            return percentiles.zip(result).to_h
+        end
+        def outliers(data, k=1.5, **opts)
+            outliers_according_to(data, data, k, **opts)
+        end
+        def outliers_according_to(data, data_distribution, k=1.5, **opts)
+            quantiles = Rust::Descriptive.quantile(data_distribution, [0.25, 0.75])
+            q1 = quantiles[0.25]
+            q3 = quantiles[0.75]
+            iqr = q3 - q1
+            positive_outliers = data.select { |d| d > q3 + iqr * k }
+            negative_outliers = data.select { |d| d < q1 - iqr * k }
+            outliers = negative_outliers + positive_outliers
+            if opts[:side]
+                case opts[:side].to_sym
+                when :positive, :neg, :n, :+
+                    outliers = positive_outliers
+                when :negative, :pos, :p, :-
+                    outliers = negative_outliers
+                end
+            end
+            return outliers
+        end
+    end
+end
+module Rust::RBindings
+    def mean(series)
+        Rust::Descriptive.mean(series)
+    end
+    def median(series)
+        Rust::Descriptive.median(series)
+    end
+    def var(series)
+        Rust::Descriptive.variance(series)
+    end
+    def sd(series)
+        Rust::Descriptive.standard_deviation(series)
+    end
+    def quantile(series, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0])
+        Rust::Descriptive.quantile(series, percentiles)
+    end
+end

data/lib/{rust-effsize.rb → rust/stats/effsize.rb} RENAMED Viewed

@@ -1,8 +1,6 @@
-require 'code-assertions'
+require_relative '../core'
-Rust.exclusive do
-    Rust._eval("library(effsize)")
-end
+Rust.prerequisite('effsize')
 module Rust::EffectSize
     class Result
@@ -16,14 +14,16 @@ module Rust::EffectSize
             return "#{name} = #{estimate} (#{magnitude}) [#{confidence_interval.min}, #{confidence_interval.max}]"
         end
     end
-end
-module Rust::EffectSize::CliffDelta
-    class << self
-        def compute(d1, d2)
+    class CliffDelta
+        def self.compute(d1, d2)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
+            if d1.size <= 1 || d2.size <= 1
+                return Rust::EffectSize::Result.new
+            end
             Rust.exclusive do
                 Rust['effsize.a'] = d1
                 Rust['effsize.b'] = d2
@@ -32,23 +32,25 @@ module Rust::EffectSize::CliffDelta
                 result = Rust::EffectSize::Result.new
                 result.name                 = "Cliff's delta"
-                result.estimate             = Rust._pull("effsize.result$estimate")
-                result.confidence_interval  = Range.new(*Rust._pull("effsize.result$conf.int"))
-                result.confidence_level     = Rust._pull("effsize.result$conf.level")
-                result.magnitude            = Rust._pull("as.character(effsize.result$magnitude)").to_sym
+                result.estimate             = Rust._pull("effsize.result$estimate")                         rescue Float::NAN
+                result.confidence_interval  = Range.new(*Rust._pull("effsize.result$conf.int"))             rescue nil
+                result.confidence_level     = Rust._pull("effsize.result$conf.level")                       rescue Float::NAN
+                result.magnitude            = Rust._pull("as.character(effsize.result$magnitude)").to_sym   rescue nil
                 return result
             end
         end
     end
-end
-module Rust::EffectSize::CohenD
-    class << self
-        def compute(d1, d2)
+    class CohenD
+        def self.compute(d1, d2)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
+            if d1.size <= 1 || d2.size <= 1
+                return Rust::EffectSize::Result.new
+            end
             Rust.exclusive do
                 Rust['effsize.a'] = d1
                 Rust['effsize.b'] = d2
@@ -57,10 +59,10 @@ module Rust::EffectSize::CohenD
                 result = Rust::EffectSize::Result.new
                 result.name                 = "Cohen's d"
-                result.estimate             = Rust._pull("effsize.result$estimate")
-                result.confidence_interval  = Range.new(*Rust._pull("effsize.result$conf.int"))
-                result.confidence_level     = Rust._pull("effsize.result$conf.level")
-                result.magnitude            = Rust._pull("as.character(effsize.result$magnitude)").to_sym
+                result.estimate             = Rust._pull("effsize.result$estimate")                       rescue Float::NAN
+                result.confidence_interval  = Range.new(*Rust._pull("effsize.result$conf.int"))           rescue nil
+                result.confidence_level     = Rust._pull("effsize.result$conf.level")                     rescue Float::NAN
+                result.magnitude            = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
                 return result
             end

data/lib/rust/stats/probabilities.rb ADDED Viewed

@@ -0,0 +1,248 @@
+require_relative '../core'
+class Numeric
+    def distance(other)
+        raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
+        return (self - other).abs
+    end
+end
+class Array
+    def distance(other)
+        raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
+        longest, shortest = self.size > other.size ? [self, other] : [other, self]
+        distance = 0
+        for i in 0...longest.size
+            distance += longest[i].to_i.distance(shortest[i].to_i)
+        end
+        return distance
+    end
+end
+class String
+    def distance(other)
+        raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
+        return self.bytes.distance other.bytes
+    end
+end
+module Rust
+    class RandomVariableSlice
+        def initialize(values)
+            raise TypeError, "Expected Hash" unless values.is_a?(Hash)
+            @values = values
+        end
+        def probability(v=nil)
+            unless v
+                return @values.values.sum
+            else
+                return @values[v]
+            end
+        end
+        def ml
+            @values.max_by { |k, v| v }[0]
+        end
+        def expected
+            @values.map { |k, v| k*v }.sum
+        end
+        def >(n)
+            self.so_that { |k| k > n}
+        end
+        def >=(n)
+            self.so_that { |k| k >= n}
+        end
+        def <(n)
+            self.so_that { |k| k < n}
+        end
+        def <=(n)
+            self.so_that { |k| k <= n}
+        end
+        def ==(n)
+            self.so_that { |k| k == n}
+        end
+        def so_that
+            RandomVariableSlice.new(@values.select { |k, v| yield(k) })
+        end
+        def between(a, b)
+            RandomVariableSlice.new(@values.select { |k, v| k.between? a, b })
+        end
+    end
+    class RandomVariable < RandomVariableSlice
+        EPSILON = 1e-7
+        attr_reader    :values
+        def initialize(values = {0 => 1.0}, exact = false)
+            @values = values
+            @exact = exact
+            raise "All the probabilities should be in the range [0, 1]" unless @values.values.all? { |v| v.between? 0, 1 }
+            raise "The cumulative probability must be exactly 1 (#{@values.values.sum} instead)"        unless @values.values.sum.between? 1-EPSILON, 1+EPSILON
+            approx!
+        end
+        def probability(v)
+            return @values[v].to_f
+        end
+        def +(other)
+        new_hash = {}
+        @values.each do |my_key, my_value|
+            other.values.each do |other_key, other_value|
+                sum_key = my_key + other_key
+                new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
+            end
+        end
+        return RandomVariable.new(new_hash, @exact)
+        end
+        def *(times)
+            if times.is_a? Integer
+                return rep(times)
+            elsif times.is_a? RandomVariable
+                return mul(times)
+            else
+                raise "The argument must be an Integer or a RandomVariable"
+            end
+        end
+        def mul(other)
+            new_hash = {}
+            @values.each do |my_key, my_value|
+                other.values.each do |other_key, other_value|
+                    mul_key = my_key * other_key
+                    new_hash[mul_key] = new_hash[mul_key].to_f + (my_value * other_value)
+                end
+            end
+            return RandomVariable.new(new_hash, @exact)
+        end
+        def rep(times)
+            rv = self
+            (times-1).times do
+                rv += self
+            end
+            return rv
+        end
+        def exact!
+            @exact = true
+        end
+        def approx!
+            return if @exact
+            to_delete = []
+            @values.each do |v, probability|
+                to_delete.push v if probability <= EPSILON
+            end
+            to_delete.each do |v|
+                probability = @values.delete v
+                nearest = @values.keys.min_by { |k| k.distance v }
+                @values[nearest] += probability
+            end
+        end
+        def extract
+            v = rand
+            cumulative = 0
+            @values.each do |key, prob|
+                cumulative += prob
+                return key if cumulative >= v
+            end
+        end
+        def self.complete(hash, key=0)
+            hash[key] = 1 - hash.values.sum
+            return RandomVariable.new(hash)
+        end
+    end
+    class UniformRandomVariable < RandomVariable
+        def initialize(values, exact = false)
+            super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
+        end
+    end
+    module Probabilities
+        def P(v)
+            if v.is_a? RandomVariableSlice
+                raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
+                return v.probability
+            else
+                raise "Cannot compute the expected value of a #{v.class}"
+            end
+        end
+        def E(v)
+            if v.is_a? RandomVariableSlice
+                return v.expected
+            else
+                raise "Cannot compute the expected value of a #{v.class}"
+            end
+        end
+    end
+    class RandomVariable
+        ENGLISH_ALPHABET = RandomVariable.new({
+            "a" => 0.08167,
+            "b" => 0.01492,
+            "c" => 0.02782,
+            "d" => 0.04253,
+            "e" => 0.12703,
+            "f" => 0.02228,
+            "g" => 0.02015,
+            "h" => 0.06094,
+            "i" => 0.06966,
+            "j" => 0.00153,
+            "k" => 0.00772,
+            "l" => 0.04025,
+            "m" => 0.02406,
+            "n" => 0.06749,
+            "o" => 0.07507,
+            "p" => 0.01929,
+            "q" => 0.00095,
+            "r" => 0.05987,
+            "s" => 0.06327,
+            "t" => 0.09056,
+            "u" => 0.02758,
+            "v" => 0.00978,
+            "w" => 0.02360,
+            "x" => 0.00150,
+            "y" => 0.01974,
+            "z" => 0.00074
+        })
+        DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
+        COIN = UniformRandomVariable.new(["h", "t"])
+    end
+end