RubyGems - rust - Versions diffs - 0.4 → 0.7 - Mend

rust 0.4 → 0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: ca9e5aaa6bcfff9d1b261c5a3ced5cf74b7731085b4b094dba55df72884aca9a
-  data.tar.gz: 8c90363b44a0c95abc9610fb78c3c632c6ae4265ad2260ff7e9740777245f63e
+  metadata.gz: 35c41ec98f5b286ef597096152249f19825a27bc33030e85ea196df8b778d3b1
+  data.tar.gz: 48b2e61f707ebcd05fa5f593016c8c0fb4b9d1e58751dd5deae83715583bd978
 SHA512:
-  metadata.gz: 0ca17e2c0dda2188138f11e1ae4becaa8a5c4b0d2cc12273775b9ade1fefbc860f3ccee2251fbe353b9dbde55eded960e8cf26642af042070d979ed192b332e3
-  data.tar.gz: 28dacd36f814acf51d222c8e746f65b94ef53ab5f24902ba6f654b05267c926e8db03cca82a876ca1183f55293da96d60ec862c5bfa26f6abd430d1f5e998709
+  metadata.gz: b0cc1289721d52cd376e0a242c2be419ea511f03a0fdd42c4c7a1973cde24e8dfc467e9d355693f85632cccb98af6986777aa020b715db5f06aa88a63d154ea0
+  data.tar.gz: 4c91c808773dcd04913a594cfcaa7956318908e6d590ebec1d9a04beef18986adc70a3c5e480c39bdc9f753b5edf920a1939208b8b530ccfc67124ac153a4804

data/lib/rust-basics.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 require_relative 'rust-core'
-module Rust:: Correlation
+module Rust::Correlation
     class Pearson
         def self.test(d1, d2)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }

data/lib/rust-calls.rb CHANGED Viewed

@@ -34,6 +34,17 @@ module Rust
         end
     end
+    class SimpleFormula
+        def initialize(dependent, independent)
+            @dependent = dependent
+            @independent = independent
+        end
+        def to_R
+            return "#@dependent ~ #@independent"
+        end
+    end
     class Variable
         def initialize(name)
             @name = name

data/lib/rust-core.rb CHANGED Viewed

@@ -130,7 +130,10 @@ module Rust
                 @labels.each { |label| @data[label] = [] }
             elsif labels_or_data.is_a? Hash
                 @labels = labels_or_data.keys.map { |l| l.to_s }
-                @data = labels_or_data.clone
+                labels_or_data.each do |key, value|
+                    @data[key.to_s] = value.clone
+                end
             end
         end
@@ -142,6 +145,14 @@ module Rust
             end
         end
+        def fast_row(i)
+            if i < 0 || i >= self.rows
+                return nil
+            else
+                return @labels.map { |label| @data[label][i] }
+            end
+        end
         def shuffle(*args)
             result = DataFrame.new(@labels)
@@ -174,6 +185,7 @@ module Rust
         def column(name)
             return @data[name]
         end
+        alias :| :column
         def rename_column!(old_name, new_name)
             raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
@@ -195,6 +207,13 @@ module Rust
             return result
         end
+        def has_row?
+            self.each_with_index do |row, i|
+                return true if yield row, i
+            end
+            return false
+        end
         def select_columns(cols=nil)
             raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
@@ -215,6 +234,40 @@ module Rust
             @data.delete(column)
         end
+        def delete_row(i)
+            @data.each do |label, column|
+                column.delete_at(i)
+            end
+        end
+        def uniq_by(by)
+            result = self.clone
+            result.uniq_by!(by)
+            return result
+        end
+        def uniq_by!(by)
+            my_keys = {}
+            to_delete = []
+            self.each_with_index do |row, i|
+                key = []
+                by.each do |colname|
+                    key << row[colname]
+                end
+                unless my_keys[key]
+                    my_keys[key] = i
+                else
+                    to_delete << (i-to_delete.size)
+                end
+            end
+            to_delete.each do |i|
+                self.delete_row(i)
+            end
+            return self
+        end
         def column_names
             return @labels.map { |k| k.to_s }
         end
@@ -243,7 +296,7 @@ module Rust
                 row.each do |key, value|
                     @data[key.to_s] << value
                 end
-#
                 return true
             else
                 raise TypeError, "Expected an Array or a Hash"
@@ -275,6 +328,14 @@ module Rust
             return self
         end
+        def fast_each
+            self.fast_each_with_index do |element, i|
+                yield element
+            end
+            return self
+        end
         def each_with_index
             for i in 0...self.rows
                 element = {}
@@ -288,6 +349,19 @@ module Rust
             return self
         end
+        def fast_each_with_index
+            for i in 0...self.rows
+                element = []
+                @labels.each do |label|
+                    element << @data[label][i]
+                end
+                yield element, i
+            end
+            return self
+        end
         def load_in_r_as(variable_name)
             command = []
@@ -397,6 +471,77 @@ module Rust
             return result
         end
+        def aggregate(by, **aggregators)
+            raise TypeError, "Expected a string" unless by.is_a?(String)
+            raise TypeError, "All the aggregators should be procs" unless aggregators.values.all? { |v| v.is_a?(Proc) }
+            raise "Expected a block for default aggregator" unless block_given?
+            aggregators = aggregators.map { |label, callable| [label.to_s, callable] }.to_h
+            sorted = self.sort_by(by)
+            current_value = nil
+            partials = []
+            partial = nil
+            sorted.column(by).each_with_index do |value, index|
+                if current_value != value
+                    current_value = value
+                    partials << partial if partial
+                    partial = Rust::DataFrame.new(self.column_names)
+                end
+                partial << sorted.fast_row(index)
+            end
+            partials << partial
+            result = Rust::DataFrame.new(self.column_names)
+            partials.each do |partial|
+                aggregated_row = {}
+                aggregated_row[by] = partial.column(by)[0]
+                (self.column_names - [by]).each do |column|
+                    if aggregators[column]
+                        aggregated_row[column] = aggregators[column].call(partial.column(column))
+                    else
+                        aggregated_row[column] = yield partial.column(column)
+                    end
+                end
+                result << aggregated_row
+            end
+            return result
+        end
+        def sort_by(column)
+            result = self.clone
+            result.sort_by!(column)
+            return result
+        end
+        def sort_by!(by)
+            copy = @data[by].clone
+            copy.sort!
+            indices = []
+            @data[by].each_with_index do |value, i|
+                index = copy.index(value)
+                indices << index
+                copy[index] = NilClass
+            end
+            (self.column_names - [by]).each do |column_name|
+                sorted = []
+                column = self.column(column_name)
+                column_i = 0
+                indices.each do |i|
+                    sorted[i] = column[column_i]
+                    column_i += 1
+                end
+                @data[column_name] = sorted
+            end
+            @data[by].sort!
+        end
         def bind_rows!(dataframe)
             raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
             raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
@@ -480,7 +625,7 @@ module Rust
         end
     end
-    class Sequence
+    class Sequence < RustDatatype
         attr_reader :min
         attr_reader :max
@@ -511,6 +656,103 @@ module Rust
         def to_R
             "seq(from=#@min, to=#@max, by=#@step)"
         end
+        def load_in_r_as(variable_name)
+            Rust._eval("#{variable_name} <- #{self.to_R}")
+        end
+    end
+    class DataFrameArray < Array
+        def bind_all
+            return nil if self.size == 0
+            result = self.first.clone
+            for i in 1...self.size
+                result .bind_rows!(self[i])
+            end
+            return result
+        end
+    end
+    class DataFrameHash < Hash
+        def bind_all
+            return nil if self.values.size == 0
+            result = self.values.first.clone
+            for i in 1...self.values.size
+                result .bind_rows!(self.values[i])
+            end
+            return result
+        end
+    end
+    class MathArray < Array
+        def -(other)
+            raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
+            raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
+            result = self.clone
+            other = [other] * self.size if other.is_a?(Numeric)
+            for i in 0...self.size
+                result[i] -= other[i]
+            end
+            return result
+        end
+        def *(other)
+            raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
+            raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
+            result = self.clone
+            other = [other] * self.size if other.is_a?(Numeric)
+            for i in 0...self.size
+                result[i] *= other[i]
+            end
+            return result
+        end
+        def +(other)
+            raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
+            raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
+            result = self.clone
+            other = [other] * self.size if other.is_a?(Numeric)
+            for i in 0...self.size
+                result[i] += other[i]
+            end
+            return result
+        end
+        def /(other) #To recover the syntax highlighting but in Kate: /
+            raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
+            raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
+            result = self.clone
+            other = [other] * self.size if other.is_a?(Numeric)
+            for i in 0...self.size
+                result[i] /= other[i]
+            end
+            return result
+        end
+        def **(other)
+            raise ArgumentError, "Expected numeric" if !other.is_a?(Numeric)
+            result = self.clone
+            for i in 0...self.size
+                result[i] = result[i] ** other
+            end
+            return result
+        end
     end
 end
@@ -554,6 +796,14 @@ class Array
     def to_R
         return "c(#{self.map { |e| e.to_R }.join(",")})"
     end
+    def distribution
+        result = {}
+        self.each do |value|
+            result[value] = result[value].to_i + 1
+        end
+        return result
+    end
 end
 class String
@@ -569,19 +819,21 @@ class Range
 end
 module Rust::RBindings
-    def read_csv(filename, **options)
-        Rust::CSV.read(filename, **options)
-    end
-    def write_csv(filename, dataframe, **options)
-        Rust::CSV.write(filename, dataframe, **options)
-    end
     def data_frame(*args)
         Rust::DataFrame.new(*args)
     end
 end
+module Rust::TestCases
+    def self.sample_dataframe(columns, size=100)
+        result = Rust::DataFrame.new(columns)
+        size.times do |i|
+            result << columns.map { |c| yield i, c }
+        end
+        return result
+    end
+end
 def bind_r!
     include Rust::RBindings
 end

data/lib/rust-csv.rb CHANGED Viewed

@@ -3,7 +3,7 @@ require_relative 'rust-core'
 module Rust
     class CSV
         def self.read_all(pattern, **options)
-            result = {}
+            result = DataFrameHash.new
             Dir.glob(pattern).each do |filename|
                 result[filename] = CSV.read(filename, **options)
             end
@@ -49,10 +49,9 @@ module Rust
             raise TypeError, "Expected Rust::DataFrame" unless dataframe.is_a?(Rust::DataFrame)
             write_headers = options[:headers] != false
-            options[:headers] = dataframe.column_names if options[:headers] == nil
+            options[:headers] = dataframe.column_names unless options[:headers]
             hash = {}
-            labels = nil
             ::CSV.open(filename, 'w', write_headers: write_headers, **options) do |csv|
                 dataframe.each do |row|
                     csv << row
@@ -93,3 +92,13 @@ module Rust
         end
     end
 end
+module Rust::RBindings
+    def read_csv(filename, **options)
+        Rust::CSV.read(filename, **options)
+    end
+    def write_csv(filename, dataframe, **options)
+        Rust::CSV.write(filename, dataframe, **options)
+    end
+end

data/lib/rust-descriptive.rb CHANGED Viewed

@@ -50,18 +50,59 @@ module Rust::Descriptive
         def quantile(data, percentiles=[0.0, 0.25, 0.5, 0.75, 1.0])
             raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !percentiles.is_a?(Array) || !percentiles.all? { |e| e.is_a?(Numeric) }
-            raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
+            raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
-            Rust.exclusive do
-                Rust['descriptive.data'] = data
-                Rust['descriptive.percs'] = percentiles
-                call_result = Rust._pull("quantile(descriptive.data, descriptive.percs)")
-                assert { call_result.is_a?(Array) }
-                assert { call_result.size == percentiles.size }
-                return percentiles.zip(call_result).to_h
+            n = data.size
+            quantiles = percentiles.size
+            percentiles = percentiles.map { |x| x > 1.0 ? 1.0 : (x < 0.0 ? 0.0 : x) }
+            rough_indices = percentiles.map { |x| 1 + [n - 1, 0].max * x - 1 }
+            floor_indices = rough_indices.map { |i| i.floor }
+            ceil_indices = rough_indices.map { |i| i.ceil }
+            data = data.sort
+            result = floor_indices.map { |i| data[i] }
+            result_ceil = ceil_indices.map { |i| data[i] }
+            indices_to_fix = (0...quantiles).select { |i| rough_indices[i] > floor_indices[i] && result_ceil[i] != result[i] }
+            index_approximation_errors = indices_to_fix.map { |i| rough_indices[i] - floor_indices[i] }
+            reduced_index_approximation_errors = index_approximation_errors.map { |i| (1 - i) }
+            hi_indices = indices_to_fix.map { |i| ceil_indices[i] }
+            data_hi_indices = hi_indices.map { |i| data[i] }
+            j = 0
+            indices_to_fix.each do |i|
+                result[i] = reduced_index_approximation_errors[j] * result[i] + index_approximation_errors[j] * data_hi_indices[j]
+                j += 1
             end
+            return percentiles.zip(result).to_h
+        end
+        def outliers(data, k=1.5, **opts)
+            outliers_according_to(data, data, k, **opts)
+        end
+        def outliers_according_to(data, data_distribution, k=1.5, **opts)
+            quantiles = Rust::Descriptive.quantile(data_distribution, [0.25, 0.75])
+            q1 = quantiles[0.25]
+            q3 = quantiles[0.75]
+            iqr = q3 - q1
+            positive_outliers = data.select { |d| d > q3 + iqr * k }
+            negative_outliers = data.select { |d| d < q1 - iqr * k }
+            outliers = negative_outliers + positive_outliers
+            if opts[:side]
+                case opts[:side].to_sym
+                when :positive, :neg, :n, :+
+                    outliers = positive_outliers
+                when :negative, :pos, :p, :-
+                    outliers = negative_outliers
+                end
+            end
+            return outliers
         end
     end
 end

data/lib/rust-effsize.rb CHANGED Viewed

@@ -24,6 +24,10 @@ module Rust::EffectSize::CliffDelta
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
+            if d1.size <= 1 || d2.size <= 1
+                return Rust::EffectSize::Result.new
+            end
             Rust.exclusive do
                 Rust['effsize.a'] = d1
                 Rust['effsize.b'] = d2
@@ -32,10 +36,10 @@ module Rust::EffectSize::CliffDelta
                 result = Rust::EffectSize::Result.new
                 result.name                 = "Cliff's delta"
-                result.estimate             = Rust._pull("effsize.result$estimate")
-                result.confidence_interval  = Range.new(*Rust._pull("effsize.result$conf.int"))
-                result.confidence_level     = Rust._pull("effsize.result$conf.level")
-                result.magnitude            = Rust._pull("as.character(effsize.result$magnitude)").to_sym
+                result.estimate             = Rust._pull("effsize.result$estimate")                         rescue Float::NAN
+                result.confidence_interval  = Range.new(*Rust._pull("effsize.result$conf.int"))             rescue nil
+                result.confidence_level     = Rust._pull("effsize.result$conf.level")                       rescue Float::NAN
+                result.magnitude            = Rust._pull("as.character(effsize.result$magnitude)").to_sym   rescue nil
                 return result
             end
@@ -49,6 +53,10 @@ module Rust::EffectSize::CohenD
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
+            if d1.size <= 1 || d2.size <= 1
+                return Rust::EffectSize::Result.new
+            end
             Rust.exclusive do
                 Rust['effsize.a'] = d1
                 Rust['effsize.b'] = d2
@@ -57,10 +65,10 @@ module Rust::EffectSize::CohenD
                 result = Rust::EffectSize::Result.new
                 result.name                 = "Cohen's d"
-                result.estimate             = Rust._pull("effsize.result$estimate")
-                result.confidence_interval  = Range.new(*Rust._pull("effsize.result$conf.int"))
-                result.confidence_level     = Rust._pull("effsize.result$conf.level")
-                result.magnitude            = Rust._pull("as.character(effsize.result$magnitude)").to_sym
+                result.estimate             = Rust._pull("effsize.result$estimate")                       rescue Float::NAN
+                result.confidence_interval  = Range.new(*Rust._pull("effsize.result$conf.int"))           rescue nil
+                result.confidence_level     = Rust._pull("effsize.result$conf.level")                     rescue Float::NAN
+                result.magnitude            = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
                 return result
             end

data/lib/rust-plots.rb CHANGED Viewed

@@ -21,6 +21,14 @@ module Rust::Plots
             return self
         end
+        def palette(size)
+            if size <= 1
+                return ['black']
+            else
+                return Rust._pull("hcl.colors(n=#{size})")
+            end
+        end
         def x_range(range)
             @options['xlim'] = range
@@ -127,10 +135,18 @@ module Rust::Plots
     end
     class ScatterPlot < BasePlot
-        def initialize(x, y)
+        def initialize(x = nil, y = nil, **options)
             super()
-            @x = x
-            @y = y
+            @series = []
+            if x && y
+                self.series(x, y, options)
+            end
+        end
+        def series(x, y, **options)
+            @series << [x, y, options]
+            return self
         end
         def thickness(t)
@@ -159,13 +175,66 @@ module Rust::Plots
         protected
         def _show()
-            Rust["plotter.x"] = @x
-            Rust["plotter.y"] = @y
+            first = true
+            palette = self.palette(@series.size)
+            i = 0
+            base_options = {}
+            unless @options['xlim']
+                x_values = @series.map { |v| v[0] }.flatten
+                y_values = @series.map { |v| v[1] }.flatten
+                base_options[:xlim] = [x_values.min, x_values.max]
+                base_options[:ylim] = [y_values.min, y_values.max]
+            end
+            @series.each do |x, y, options|
+                options = options.merge(base_options)
+                Rust["plotter.x"] = x
+                Rust["plotter.y"] = y
+                function = nil
+                if first
+                    function = Rust::Function.new("plot")
+                    first = false
+                else
+                    function = Rust::Function.new("lines")
+                end
+                augmented_options = {}
+                augmented_options['col'] = options[:color] || palette[i]
+                augmented_options['xlim'] = options[:xlim] if options[:xlim]
+                augmented_options['ylim'] = options[:ylim] if options[:ylim]
+                function.options = self._augmented_options(augmented_options)
+                function.arguments << Rust::Variable.new("plotter.x")
+                function.arguments << Rust::Variable.new("plotter.y")
+                function.call
+                i += 1
+            end
+            return self
+        end
+    end
+    class BarPlot < BasePlot
+        def initialize(bars)
+            super()
+            @bars = bars
+        end
+        protected
+        def _show()
+            Rust["plotter.bars"] = @bars.values
+            Rust["plotter.labels"] = @bars.keys
+            Rust._eval("names(plotter.bars) <- plotter.labels")
-            function = Rust::Function.new("plot")
+            function = Rust::Function.new("barplot")
             function.options = self._augmented_options
-            function.arguments << Rust::Variable.new("plotter.x")
-            function.arguments << Rust::Variable.new("plotter.y")
+            function.arguments << Rust::Variable.new("plotter.bars")
             function.call

data/lib/rust-probabilities.rb ADDED Viewed

@@ -0,0 +1,248 @@
+require_relative 'rust-core'
+class Numeric
+    def distance(other)
+        raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
+        return (self - other).abs
+    end
+end
+class Array
+    def distance(other)
+        raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
+        longest, shortest = self.size > other.size ? [self, other] : [other, self]
+        distance = 0
+        for i in 0...longest.size
+            distance += longest[i].to_i.distance(shortest[i].to_i)
+        end
+        return distance
+    end
+end
+class String
+    def distance(other)
+        raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
+        return self.bytes.distance other.bytes
+    end
+end
+module Rust
+    class RandomVariableSlice
+        def initialize(values)
+            raise TypeError, "Expected Hash" unless values.is_a?(Hash)
+            @values = values
+        end
+        def probability(v=nil)
+            unless v
+                return @values.values.sum
+            else
+                return @values[v]
+            end
+        end
+        def ml
+            @values.max_by { |k, v| v }[0]
+        end
+        def expected
+            @values.map { |k, v| k*v }.sum
+        end
+        def >(n)
+            self.so_that { |k| k > n}
+        end
+        def >=(n)
+            self.so_that { |k| k >= n}
+        end
+        def <(n)
+            self.so_that { |k| k < n}
+        end
+        def <=(n)
+            self.so_that { |k| k <= n}
+        end
+        def ==(n)
+            self.so_that { |k| k == n}
+        end
+        def so_that
+            RandomVariableSlice.new(@values.select { |k, v| yield(k) })
+        end
+        def between(a, b)
+            RandomVariableSlice.new(@values.select { |k, v| k.between? a, b })
+        end
+    end
+    class RandomVariable < RandomVariableSlice
+        EPSILON = 1e-7
+        attr_reader    :values
+        def initialize(values = {0 => 1.0}, exact = false)
+            @values = values
+            @exact = exact
+            raise "All the probabilities should be in the range [0, 1]" unless @values.values.all? { |v| v.between? 0, 1 }
+            raise "The cumulative probability must be exactly 1 (#{@values.values.sum} instead)"        unless @values.values.sum.between? 1-EPSILON, 1+EPSILON
+            approx!
+        end
+        def probability(v)
+            return @values[v].to_f
+        end
+        def +(other)
+        new_hash = {}
+        @values.each do |my_key, my_value|
+            other.values.each do |other_key, other_value|
+                sum_key = my_key + other_key
+                new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
+            end
+        end
+        return RandomVariable.new(new_hash, @exact)
+        end
+        def *(times)
+            if times.is_a? Integer
+                return rep(times)
+            elsif times.is_a? RandomVariable
+                return mul(times)
+            else
+                raise "The argument must be an Integer or a RandomVariable"
+            end
+        end
+        def mul(other)
+            new_hash = {}
+            @values.each do |my_key, my_value|
+                other.values.each do |other_key, other_value|
+                    mul_key = my_key * other_key
+                    new_hash[mul_key] = new_hash[mul_key].to_f + (my_value * other_value)
+                end
+            end
+            return RandomVariable.new(new_hash, @exact)
+        end
+        def rep(times)
+            rv = self
+            (times-1).times do
+                rv += self
+            end
+            return rv
+        end
+        def exact!
+            @exact = true
+        end
+        def approx!
+            return if @exact
+            to_delete = []
+            @values.each do |v, probability|
+                to_delete.push v if probability <= EPSILON
+            end
+            to_delete.each do |v|
+                probability = @values.delete v
+                nearest = @values.keys.min_by { |k| k.distance v }
+                @values[nearest] += probability
+            end
+        end
+        def extract
+            v = rand
+            cumulative = 0
+            @values.each do |key, prob|
+                cumulative += prob
+                return key if cumulative >= v
+            end
+        end
+        def self.complete(hash, key=0)
+            hash[key] = 1 - hash.values.sum
+            return RandomVariable.new(hash)
+        end
+    end
+    class UniformRandomVariable < RandomVariable
+        def initialize(values, exact = false)
+            super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
+        end
+    end
+    module Probabilities
+        def P(v)
+            if v.is_a? RandomVariableSlice
+                raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
+                return v.probability
+            else
+                raise "Cannot compute the expected value of a #{v.class}"
+            end
+        end
+        def E(v)
+            if v.is_a? RandomVariableSlice
+                return v.expected
+            else
+                raise "Cannot compute the expected value of a #{v.class}"
+            end
+        end
+    end
+    class RandomVariable
+        ENGLISH_ALPHABET = RandomVariable.new({
+            "a" => 0.08167,
+            "b" => 0.01492,
+            "c" => 0.02782,
+            "d" => 0.04253,
+            "e" => 0.12703,
+            "f" => 0.02228,
+            "g" => 0.02015,
+            "h" => 0.06094,
+            "i" => 0.06966,
+            "j" => 0.00153,
+            "k" => 0.00772,
+            "l" => 0.04025,
+            "m" => 0.02406,
+            "n" => 0.06749,
+            "o" => 0.07507,
+            "p" => 0.01929,
+            "q" => 0.00095,
+            "r" => 0.05987,
+            "s" => 0.06327,
+            "t" => 0.09056,
+            "u" => 0.02758,
+            "v" => 0.00978,
+            "w" => 0.02360,
+            "x" => 0.00150,
+            "y" => 0.01974,
+            "z" => 0.00074
+        })
+        DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
+        COIN = UniformRandomVariable.new(["h", "t"])
+    end
+end

data/lib/rust-tests.rb CHANGED Viewed

@@ -7,6 +7,7 @@ module Rust::StatisticalTests
         attr_accessor   :pvalue
         attr_accessor   :exact
         attr_accessor   :alpha
+        attr_accessor   :hypothesis
         def initialize
             @statistics = {}
@@ -20,6 +21,16 @@ module Rust::StatisticalTests
             @statistics[name.to_sym] = value
         end
+        def adjusted_pvalue(method='bonferroni')
+            return 1 unless @hypothesis
+            @hypothesis.adjusted_pvalue_for(self, method)
+        end
+        def hypothesis=(value)
+            @hypothesis = value
+            @hypothesis.add(self)
+        end
         def significant
             pvalue < alpha
         end
@@ -31,32 +42,150 @@ module Rust::StatisticalTests
                     (!exact ? " P-value is not exact." : "")
         end
     end
+    class Hypothesis
+        def self.find(title_or_instance)
+            return Hypothesis.new(nil) if title_or_instance == nil
+            if title_or_instance.is_a?(String)
+                ObjectSpace.each_object(Hypothesis) do |instance|
+                    return instance if instance.title == title_or_instance
+                end
+                return Hypothesis.new(title_or_instance)
+            elsif title_or_instance.is_a?(Hypothesis)
+                return title_or_instance
+            end
+            raise TypeError, "Expected nil, String or Hypothesis"
+        end
+        attr_reader :results
+        attr_reader :title
+        def initialize(title)
+            @title = title
+            @results = []
+        end
+        def add(result)
+            @results << result
+        end
+        def adjusted_pvalue_for(instance, method)
+            p_values = @results.map { |r| r.pvalue }
+            index = @results.index(instance)
+            adjusted_pvalues = Rust::StatisticalTests::PValueAdjustment.method(method).adjust(*p_values)
+            if adjusted_pvalues.is_a?(Numeric)
+                return adjusted_pvalues
+            else
+                return adjusted_pvalues[index]
+            end
+        end
+    end
+end
+module Rust::StatisticalTests::PValueAdjustment
+    def self.method(name)
+        name = name.to_s
+        case name.downcase
+        when "bonferroni", "b"
+            return Bonferroni
+        when "holm", "h"
+            return Holm
+        when "hochberg"
+            return Hochberg
+        when "hommel"
+            return Hommel
+        when "benjaminihochberg", "bh"
+            return BenjaminiHochberg
+        when "benjaminiyekutieli", "by"
+            return BenjaminiYekutieli
+        end
+    end
+    class Bonferroni
+        def self.adjust(*p_values)
+            Rust.exclusive do
+                Rust['adjustment.p'] = p_values
+                return Rust._pull("p.adjust(adjustment.p, method=\"bonferroni\")")
+            end
+        end
+    end
+    class Holm
+        def self.adjust(*p_values)
+            Rust.exclusive do
+                Rust['adjustment.p'] = p_values
+                return Rust._pull("p.adjust(adjustment.p, method=\"holm\")")
+            end
+        end
+    end
+    class Hochberg
+        def self.adjust(*p_values)
+            Rust.exclusive do
+                Rust['adjustment.p'] = p_values
+                return Rust._pull("p.adjust(adjustment.p, method=\"hochberg\")")
+            end
+        end
+    end
+    class Hommel
+        def self.adjust(*p_values)
+            Rust.exclusive do
+                Rust['adjustment.p'] = p_values
+                return Rust._pull("p.adjust(adjustment.p, method=\"hommel\")")
+            end
+        end
+    end
+    class BenjaminiHochberg
+        def self.adjust(*p_values)
+            Rust.exclusive do
+                Rust['adjustment.p'] = p_values
+                return Rust._pull("p.adjust(adjustment.p, method=\"BH\")")
+            end
+        end
+    end
+    class BenjaminiYekutieli
+        def self.adjust(*p_values)
+            Rust.exclusive do
+                Rust['adjustment.p'] = p_values
+                return Rust._pull("p.adjust(adjustment.p, method=\"BY\")")
+            end
+        end
+    end
 end
 module Rust::StatisticalTests::Wilcoxon
     class << self
-        def paired(d1, d2, alpha = 0.05)
+         def paired(d1, d2, alpha = 0.05, **options)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
             raise "The two distributions have different size" if d1.size != d2.size
             Rust.exclusive do
                 Rust["wilcox.a"] = d1
                 Rust["wilcox.b"] = d2
                 _, warnings = Rust._eval("wilcox.result = wilcox.test(wilcox.a, wilcox.b, alternative='two.sided', paired=T)", true)
                 result = Rust::StatisticalTests::Result.new
-                result.name      = "Wilcoxon Signed-Rank test"
-                result.pvalue    = Rust._pull("wilcox.result$p.value")
-                result[:w]       = Rust._pull("wilcox.result$statistic")
-                result.exact     = !warnings.include?("cannot compute exact p-value with zeroes")
-                result.alpha     = alpha
+                result.name       = "Wilcoxon Signed-Rank test"
+                result.pvalue     = Rust._pull("wilcox.result$p.value")
+                result[:w]        = Rust._pull("wilcox.result$statistic")
+                result.exact      = !warnings.include?("cannot compute exact p-value with zeroes")
+                result.alpha      = alpha
+                result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
                 return result
             end
         end
-        def unpaired(d1, d2, alpha = 0.05)
+        def unpaired(d1, d2, alpha = 0.05, **options)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
@@ -66,11 +195,12 @@ module Rust::StatisticalTests::Wilcoxon
                 _, warnings = Rust._eval("wilcox.result = wilcox.test(wilcox.a, wilcox.b, alternative='two.sided', paired=F)", true)
                 result = Rust::StatisticalTests::Result.new
-                result.name      = "Wilcoxon Ranked-Sum test (a.k.a. Mann–Whitney U test)"
-                result.pvalue    = Rust._pull("wilcox.result$p.value")
-                result[:w]       = Rust._pull("wilcox.result$statistic")
-                result.exact     = !warnings.include?("cannot compute exact p-value with ties")
-                result.alpha     = alpha
+                result.name       = "Wilcoxon Ranked-Sum test (a.k.a. Mann–Whitney U test)"
+                result.pvalue     = Rust._pull("wilcox.result$p.value")
+                result[:w]        = Rust._pull("wilcox.result$statistic")
+                result.exact      = !warnings.include?("cannot compute exact p-value with ties")
+                result.alpha      = alpha
+                result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
                 return result
             end
@@ -80,7 +210,7 @@ end
 module Rust::StatisticalTests::T
     class << self
-        def paired(d1, d2, alpha = 0.05)
+        def paired(d1, d2, alpha = 0.05, **options)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
             raise "The two distributions have different size" if d1.size != d2.size
@@ -91,17 +221,18 @@ module Rust::StatisticalTests::T
                 warnings = Rust._eval("t.result = t.test(t.a, t.b, alternative='two.sided', paired=T)")
                 result = Rust::StatisticalTests::Result.new
-                result.name      = "Paired t-test"
-                result.pvalue    = Rust._pull("t.result$p.value")
-                result[:t]       = Rust._pull("t.result$statistic")
-                result.exact     = true
-                result.alpha     = alpha
+                result.name       = "Paired t-test"
+                result.pvalue     = Rust._pull("t.result$p.value")
+                result[:t]        = Rust._pull("t.result$statistic")
+                result.exact      = true
+                result.alpha      = alpha
+                result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
                 return result
             end
         end
-        def unpaired(d1, d2, alpha = 0.05)
+        def unpaired(d1, d2, alpha = 0.05, **options)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
@@ -111,11 +242,12 @@ module Rust::StatisticalTests::T
                 Rust._eval("t.result = t.test(t.a, t.b, alternative='two.sided', paired=F)")
                 result = Rust::StatisticalTests::Result.new
-                result.name      = "Welch Two Sample t-test"
-                result.pvalue    = Rust._pull("t.result$p.value")
-                result[:t]       = Rust._pull("t.result$statistic")
-                result.exact     = true
-                result.alpha     = alpha
+                result.name       = "Welch Two Sample t-test"
+                result.pvalue     = Rust._pull("t.result$p.value")
+                result[:t]        = Rust._pull("t.result$statistic")
+                result.exact      = true
+                result.alpha      = alpha
+                result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
                 return result
             end
@@ -125,18 +257,19 @@ end
 module Rust::StatisticalTests::Shapiro
     class << self
-        def compute(vector, alpha = 0.05)
+        def compute(vector, alpha = 0.05, **options)
             raise TypeError, "Expecting Array of numerics" if !vector.is_a?(Array) || !vector.all? { |e| e.is_a?(Numeric) }
             Rust.exclusive do
                 Rust['shapiro.v'] = vector
                 Rust._eval("shapiro.result = shapiro.test(shapiro.v)")
                 result = Rust::StatisticalTests::Result.new
-                result.name     = "Shapiro-Wilk normality test"
-                result.pvalue   = Rust._pull("shapiro.result$p.value")
-                result[:W]      = Rust._pull("shapiro.result$statistic")
-                result.exact    = true
-                result.alpha    = alpha
+                result.name       = "Shapiro-Wilk normality test"
+                result.pvalue     = Rust._pull("shapiro.result$p.value")
+                result[:W]        = Rust._pull("shapiro.result$statistic")
+                result.exact      = true
+                result.alpha      = alpha
+                result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
                 return result
             end

data/lib/rust.rb CHANGED Viewed

@@ -6,3 +6,4 @@ require_relative 'rust-effsize'
 require_relative 'rust-descriptive'
 require_relative 'rust-plots'
 require_relative 'rust-calls'
+require_relative 'rust-probabilities'

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rust
 version: !ruby/object:Gem::Version
-  version: '0.4'
+  version: '0.7'
 platform: ruby
 authors:
 - Simone Scalabrino
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2020-09-15 00:00:00.000000000 Z
+date: 2021-02-16 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rinruby
@@ -63,6 +63,7 @@ files:
 - lib/rust-descriptive.rb
 - lib/rust-effsize.rb
 - lib/rust-plots.rb
+- lib/rust-probabilities.rb
 - lib/rust-tests.rb
 - lib/rust.rb
 homepage: https://github.com/intersimone999/ruby-rust
@@ -84,7 +85,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.1.4
+rubygems_version: 3.2.7
 signing_key:
 specification_version: 4
 summary: Ruby advanced statistical library