RubyGems - rust - Versions diffs - 0.9 → 0.10 - Mend

rust 0.9 → 0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

checksums.yaml +4 -4
data/bin/ruby-rust +1 -1
data/lib/rust/core/csv.rb +21 -0
data/lib/rust/core/rust.rb +65 -1
data/lib/rust/core/types/dataframe.rb +146 -0
data/lib/rust/core/types/datatype.rb +34 -0
data/lib/rust/core/types/factor.rb +27 -0
data/lib/rust/core/types/language.rb +44 -11
data/lib/rust/core/types/list.rb +16 -0
data/lib/rust/core/types/matrix.rb +29 -6
data/lib/rust/core/types/s4class.rb +19 -0
data/lib/rust/core/types/utils.rb +14 -1
data/lib/rust/models/anova.rb +17 -0
data/lib/rust/models/regression.rb +54 -1
data/lib/rust/plots/basic-plots.rb +32 -0
data/lib/rust/plots/core.rb +90 -0
data/lib/rust/plots/distribution-plots.rb +13 -0
data/lib/rust/stats/correlation.rb +43 -0
data/lib/rust/stats/descriptive.rb +29 -0
data/lib/rust/stats/effsize.rb +21 -0
data/lib/rust/stats/probabilities.rb +141 -33
data/lib/rust/stats/tests.rb +97 -5
metadata +2 -2

data/lib/rust/stats/probabilities.rb CHANGED Viewed

@@ -1,7 +1,11 @@
 require_relative '../core'
 class Numeric
-    def distance(other)
+    ##
+    # Computes the distance between this and another number.
+    def _rust_prob_distance(other)
         raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
         return (self - other).abs
@@ -9,14 +13,18 @@ class Numeric
 end
 class Array
-    def distance(other)
+    ##
+    # Computes the distance between this and another array.
+    def _rust_prob_distance(other)
         raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
         longest, shortest = self.size > other.size ? [self, other] : [other, self]
         distance = 0
         for i in 0...longest.size
-            distance += longest[i].to_i.distance(shortest[i].to_i)
+            distance += longest[i].to_i._rust_prob_distance(shortest[i].to_i)
         end
         return distance
@@ -24,21 +32,37 @@ class Array
 end
 class String
-    def distance(other)
+    ##
+    # Computes the distance between this and another string.
+    def _rust_prob_distance(other)
         raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
-        return self.bytes.distance other.bytes
+        return self.bytes._rust_prob_distance other.bytes
     end
 end
 module Rust
+    ##
+    # Represents a slice of a random variable, for which no check is made in terms of cumulative probability.
     class RandomVariableSlice
+        ##
+        # Creates a new slice of random variable. +values+ is a hash of values associated with their probabilities.
         def initialize(values)
             raise TypeError, "Expected Hash" unless values.is_a?(Hash)
             @values = values
         end
+        ##
+        # Gets the probability of a value +v+. If +v+ is not specified, returns the cumulative probability of the whole
+        # slice.
         def probability(v=nil)
             unless v
                 return @values.values.sum
@@ -47,48 +71,84 @@ module Rust
             end
         end
+        ##
+        # Returns the value with the maximum probability.
         def ml
             @values.max_by { |k, v| v }[0]
         end
+        ##
+        # Returns the expected value for this slice.
         def expected
             @values.map { |k, v| k*v }.sum
         end
+        ##
+        # Returns a slice with the values that are greater than +n+.
         def >(n)
-            self.so_that { |k| k > n}
+            self.so_that { |k| k > n }
         end
+        ##
+        # Returns a slice with the values that are greater than or equal to +n+.
         def >=(n)
-            self.so_that { |k| k >= n}
+            self.so_that { |k| k >= n }
         end
+        ##
+        # Returns a slice with the values that are lower than +n+.
         def <(n)
-            self.so_that { |k| k < n}
+            self.so_that { |k| k < n }
         end
+        ##
+        # Returns a slice with the values that are lower than or equal to +n+.
         def <=(n)
-            self.so_that { |k| k <= n}
+            self.so_that { |k| k <= n }
         end
+        ##
+        # Returns a slice with the value +n+.
         def ==(n)
-            self.so_that { |k| k == n}
+            self.so_that { |k| k == n }
         end
-        def so_that
-            RandomVariableSlice.new(@values.select { |k, v| yield(k) })
-        end
+        ##
+        # Returns a slice with the values between +a+ and +b+.
         def between(a, b)
-            RandomVariableSlice.new(@values.select { |k, v| k.between? a, b })
+            self.so_that { |k| k.between(a, b) }
+        end
+        ##
+        # Returns a slice with the values for which the given block returns true.
+        def so_that
+            RandomVariableSlice.new(@values.select { |k, v| yield(k) })
         end
     end
+    ##
+    # Represents a random variable. The cumulative probability of the values must equal 1.
     class RandomVariable < RandomVariableSlice
         EPSILON = 1e-7
         attr_reader    :values
+        ##
+        # Creates a new random variable. +values+ is a hash of values associated with their probabilities.
+        # +exact+ indicates whether this variable, when combined with others, should force to keep all the values, even
+        # the most unlikely ones. If this is +false+ (default), the most improbable values (lower than EPSILON) are
+        # removed for efficiency reasons.
         def initialize(values = {0 => 1.0}, exact = false)
             @values = values
             @exact = exact
@@ -99,34 +159,46 @@ module Rust
             approx!
         end
+        ##
+        # Returns the probability of value +v+.
         def probability(v)
             return @values[v].to_f
         end
-        def +(other)
-        new_hash = {}
+        ##
+        # Returns a new random variable which represents the sum of this and the +other+ random variable.
-        @values.each do |my_key, my_value|
-            other.values.each do |other_key, other_value|
-                sum_key = my_key + other_key
-                new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
+        def +(other)
+            new_hash = {}
+            @values.each do |my_key, my_value|
+                other.values.each do |other_key, other_value|
+                    sum_key = my_key + other_key
+                    new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
+                end
             end
+            return RandomVariable.new(new_hash, @exact)
         end
-        return RandomVariable.new(new_hash, @exact)
-        end
+        ##
+        # Based on the type of +arg+, either mul (product with another random variable) or rep (repeated sum) is called.
-        def *(times)
-            if times.is_a? Integer
-                return rep(times)
-            elsif times.is_a? RandomVariable
-                return mul(times)
+        def *(arg)
+            if arg.is_a? Integer
+                return rep(arg)
+            elsif arg.is_a? RandomVariable
+                return mul(arg)
             else
                 raise "The argument must be an Integer or a RandomVariable"
             end
         end
+        ##
+        # Returns a new random variable which represents the product of this and the +other+ random variable.
         def mul(other)
             new_hash = {}
@@ -141,6 +213,9 @@ module Rust
             return RandomVariable.new(new_hash, @exact)
         end
+        ##
+        # Returns a new random variable which represents the sum of this random variable with itself +n+ times.
         def rep(times)
             rv = self
             (times-1).times do
@@ -150,10 +225,16 @@ module Rust
             return rv
         end
+        ##
+        # Makes sure that the operations yield all the values, even the most unlikely ones.
         def exact!
             @exact = true
         end
+        ##
+        # If this variable is not exact, the values with probability lower than EPSLION are removed.
         def approx!
             return if @exact
@@ -164,35 +245,56 @@ module Rust
             to_delete.each do |v|
                 probability = @values.delete v
-                nearest = @values.keys.min_by { |k| k.distance v }
+                nearest = @values.keys.min_by { |k| k._rust_prob_distance v }
                 @values[nearest] += probability
             end
         end
+        ##
+        # Returns a random value, according to the data distribution.
         def extract
             v = rand
             cumulative = 0
-            @values.each do |key, prob|
+            @values.sort_by { |k, v| k }.each do |key, prob|
                 cumulative += prob
                 return key if cumulative >= v
             end
         end
+        ##
+        # Creates a random variable by partially specifying the values through +hash+. The remaining probability is
+        # attributed to +key+ (0, by default).
         def self.complete(hash, key=0)
             hash[key] = 1 - hash.values.sum
             return RandomVariable.new(hash)
         end
     end
+    ##
+    # Represents a uniform random variable.
     class UniformRandomVariable < RandomVariable
+        ##
+        # Creates random variables for which all the +values+ have the same probability (1 / values.size).
         def initialize(values, exact = false)
             super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
         end
     end
+    ##
+    # Module that contains utilities for handling random variables.
     module Probabilities
+        ##
+        # Computes the probability of the random variable +v+.
         def P(v)
             if v.is_a? RandomVariableSlice
                 raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
@@ -202,6 +304,9 @@ module Rust
             end
         end
+        ##
+        # Computes the expected value of the random variable +v+.
         def E(v)
             if v.is_a? RandomVariableSlice
                 return v.expected
@@ -211,7 +316,10 @@ module Rust
         end
     end
-    class RandomVariable
+    ##
+    # Module containing examples of commonly-used random variables.
+    module RandomVariableExamples
         ENGLISH_ALPHABET = RandomVariable.new({
             "a" => 0.08167,
             "b" => 0.01492,
@@ -240,9 +348,9 @@ module Rust
             "y" => 0.01974,
             "z" => 0.00074
         })
         DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
         COIN = UniformRandomVariable.new(["h", "t"])
     end
 end

data/lib/rust/stats/tests.rb CHANGED Viewed

@@ -1,6 +1,13 @@
 require_relative '../core'
+##
+# Module with utilities for running statistical hypothesis tests.
 module Rust::StatisticalTests
+    ##
+    # Represents the result of a statistical hypothesis test.
     class Result
         attr_accessor   :name
         attr_accessor   :statistics
@@ -21,16 +28,28 @@ module Rust::StatisticalTests
             @statistics[name.to_sym] = value
         end
+        ##
+        # If a hypothesis is available, returns the adjusted p-value with respect to all the other results obtained for
+        # the same hypothesis. Otherwise, simply returns the p-value for this result.
+        # The +method+ for adjustment can be optionally specified (Bonferroni, by default).
         def adjusted_pvalue(method='bonferroni')
-            return 1 unless @hypothesis
+            return @pvalue unless @hypothesis
             @hypothesis.adjusted_pvalue_for(self, method)
         end
+        ##
+        # Sets the underlying hypothesis for the test. The p-values of the results belonging to the same hypothesis can
+        # be adjusted through the adjusted_pvalue method.
         def hypothesis=(value)
             @hypothesis = value
             @hypothesis.add(self)
         end
+        ##
+        # Returns true if the results are significant according to the specified alpha.
         def significant
             pvalue < alpha
         end
@@ -43,7 +62,13 @@ module Rust::StatisticalTests
         end
     end
-    class Hypothesis
+    ##
+    # Represents a hypothesis behind one or more results.
+    class Hypothesis
+        ##
+        # Returns the hypothesis with the given +title_or_instance+ as title (if String).
         def self.find(title_or_instance)
             return Hypothesis.new(nil) if title_or_instance == nil
@@ -63,18 +88,28 @@ module Rust::StatisticalTests
         attr_reader :results
         attr_reader :title
+        ##
+        # Creates a new hypothesis with a given +title+.
         def initialize(title)
             @title = title
             @results = []
         end
+        ##
+        # Registers a +result+ for this hypothesis.
         def add(result)
             @results << result
         end
-        def adjusted_pvalue_for(instance, method)
+        ##
+        # Returns the adjusted p-value for a specific +result+ with respect to all the other results obtained under this
+        # same hypothesis, using the specified +method+.
+        def adjusted_pvalue_for(result, method)
             p_values = @results.map { |r| r.pvalue }
-            index = @results.index(instance)
+            index = @results.index(result)
             adjusted_pvalues = Rust::StatisticalTests::PValueAdjustment.method(method).adjust(*p_values)
@@ -85,9 +120,17 @@ module Rust::StatisticalTests
             end
         end
     end
+    ##
+    # Class with utilities for running Wilcoxon Signed-Rank test and Ranked-Sum test (a.k.a. Mann-Whitney U test).
     class Wilcoxon
-         def self.paired(d1, d2, alpha = 0.05, **options)
+        ##
+        # Runs a Wilxoson Signed-Rank test for +d1+ and +d2+, with a given +alpha+ (0.05, by default).
+        # +options+ can be specified and directly passed to the R function.
+        def self.paired(d1, d2, alpha = 0.05, **options)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
             raise "The two distributions have different size" if d1.size != d2.size
@@ -109,6 +152,10 @@ module Rust::StatisticalTests
             end
         end
+        ##
+        # Runs a Wilxoson Ranked-Sum (a.k.a. Mann-Whitney U) test for +d1+ and +d2+, with a given +alpha+ (0.05, by default).
+        # +options+ can be specified and directly passed to the R function.
         def self.unpaired(d1, d2, alpha = 0.05, **options)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
@@ -131,7 +178,15 @@ module Rust::StatisticalTests
         end
     end
+    ##
+    # Class with utilities for running the T test.
     class T
+        ##
+        # Runs a paired T test for +d1+ and +d2+, with a given +alpha+ (0.05, by default).
+        # +options+ can be specified and directly passed to the R function.
         def self.paired(d1, d2, alpha = 0.05, **options)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
@@ -154,6 +209,10 @@ module Rust::StatisticalTests
             end
         end
+        ##
+        # Runs an unpaired T test for +d1+ and +d2+, with a given +alpha+ (0.05, by default).
+        # +options+ can be specified and directly passed to the R function.
         def self.unpaired(d1, d2, alpha = 0.05, **options)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
@@ -176,7 +235,15 @@ module Rust::StatisticalTests
         end
     end
+    ##
+    # Utilities for the Shapiro normality test.
     class Shapiro
+        ##
+        # Runs the Shapiro normality test for +vector+ and a given +alpha+ (0.05, by default).
+        # +options+ can be specified and directly passed to the R function.
         def self.compute(vector, alpha = 0.05, **options)
             raise TypeError, "Expecting Array of numerics" if !vector.is_a?(Array) || !vector.all? { |e| e.is_a?(Numeric) }
             Rust.exclusive do
@@ -196,7 +263,14 @@ module Rust::StatisticalTests
         end
     end
+    ##
+    # Module with utilities for adjusting the p-values.
     module PValueAdjustment
+        ##
+        # Returns the Ruby class given the R name of the p-value adjustment method.
         def self.method(name)
             name = name.to_s
             case name.downcase
@@ -215,6 +289,9 @@ module Rust::StatisticalTests
             end
         end
+        ##
+        # Bonferroni p-value adjustment method.
         class Bonferroni
             def self.adjust(*p_values)
                 Rust.exclusive do
@@ -224,6 +301,9 @@ module Rust::StatisticalTests
             end
         end
+        ##
+        # Holm p-value adjustment method.
         class Holm
             def self.adjust(*p_values)
                 Rust.exclusive do
@@ -233,6 +313,9 @@ module Rust::StatisticalTests
             end
         end
+        ##
+        # Hochberg p-value adjustment method.
         class Hochberg
             def self.adjust(*p_values)
                 Rust.exclusive do
@@ -242,6 +325,9 @@ module Rust::StatisticalTests
             end
         end
+        ##
+        # Hommel p-value adjustment method.
         class Hommel
             def self.adjust(*p_values)
                 Rust.exclusive do
@@ -251,6 +337,9 @@ module Rust::StatisticalTests
             end
         end
+        ##
+        # Benjamini-Hochberg p-value adjustment method.
         class BenjaminiHochberg
             def self.adjust(*p_values)
                 Rust.exclusive do
@@ -260,6 +349,9 @@ module Rust::StatisticalTests
             end
         end
+        ##
+        # Benjamini-Yekutieli p-value adjustment method.
         class BenjaminiYekutieli
             def self.adjust(*p_values)
                 Rust.exclusive do

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rust
 version: !ruby/object:Gem::Version
-  version: '0.9'
+  version: '0.10'
 platform: ruby
 authors:
 - Simone Scalabrino
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-08-09 00:00:00.000000000 Z
+date: 2022-08-10 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rinruby