RubyGems - rust - Versions diffs - 0.4 → 0.7 - Mend

rust 0.4 → 0.7

Files changed (12) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: ca9e5aaa6bcfff9d1b261c5a3ced5cf74b7731085b4b094dba55df72884aca9a
-  data.tar.gz: 8c90363b44a0c95abc9610fb78c3c632c6ae4265ad2260ff7e9740777245f63e
+  metadata.gz: 35c41ec98f5b286ef597096152249f19825a27bc33030e85ea196df8b778d3b1
+  data.tar.gz: 48b2e61f707ebcd05fa5f593016c8c0fb4b9d1e58751dd5deae83715583bd978
 SHA512:
-  metadata.gz: 0ca17e2c0dda2188138f11e1ae4becaa8a5c4b0d2cc12273775b9ade1fefbc860f3ccee2251fbe353b9dbde55eded960e8cf26642af042070d979ed192b332e3
-  data.tar.gz: 28dacd36f814acf51d222c8e746f65b94ef53ab5f24902ba6f654b05267c926e8db03cca82a876ca1183f55293da96d60ec862c5bfa26f6abd430d1f5e998709
+  metadata.gz: b0cc1289721d52cd376e0a242c2be419ea511f03a0fdd42c4c7a1973cde24e8dfc467e9d355693f85632cccb98af6986777aa020b715db5f06aa88a63d154ea0
+  data.tar.gz: 4c91c808773dcd04913a594cfcaa7956318908e6d590ebec1d9a04beef18986adc70a3c5e480c39bdc9f753b5edf920a1939208b8b530ccfc67124ac153a4804

data/lib/rust-basics.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 require_relative 'rust-core'
-module Rust:: Correlation
+module Rust::Correlation
     class Pearson
         def self.test(d1, d2)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }

data/lib/rust-calls.rb CHANGED Viewed

@@ -34,6 +34,17 @@ module Rust
         end
     end
+    class SimpleFormula
+        def initialize(dependent, independent)
+            @dependent = dependent
+            @independent = independent
+        end
+        def to_R
+            return "#@dependent ~ #@independent"
+        end
+    end
     class Variable
         def initialize(name)
             @name = name

data/lib/rust-core.rb CHANGED Viewed

@@ -130,7 +130,10 @@ module Rust
                 @labels.each { |label| @data[label] = [] }
             elsif labels_or_data.is_a? Hash
                 @labels = labels_or_data.keys.map { |l| l.to_s }
-                @data = labels_or_data.clone
+                labels_or_data.each do |key, value|
+                    @data[key.to_s] = value.clone
+                end
             end
         end
@@ -142,6 +145,14 @@ module Rust
             end
         end
+        def fast_row(i)
+            if i < 0 || i >= self.rows
+                return nil
+            else
+                return @labels.map { |label| @data[label][i] }
+            end
+        end
         def shuffle(*args)
             result = DataFrame.new(@labels)
@@ -174,6 +185,7 @@ module Rust
         def column(name)
             return @data[name]
         end
+        alias :| :column
         def rename_column!(old_name, new_name)
             raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
@@ -195,6 +207,13 @@ module Rust
             return result
         end
+        def has_row?
+            self.each_with_index do |row, i|
+                return true if yield row, i
+            end
+            return false
+        end
         def select_columns(cols=nil)
             raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
@@ -215,6 +234,40 @@ module Rust
             @data.delete(column)
         end
+        def delete_row(i)
+            @data.each do |label, column|
+                column.delete_at(i)
+            end
+        end
+        def uniq_by(by)
+            result = self.clone
+            result.uniq_by!(by)
+            return result
+        end
+        def uniq_by!(by)
+            my_keys = {}
+            to_delete = []
+            self.each_with_index do |row, i|
+                key = []
+                by.each do |colname|
+                    key << row[colname]
+                end
+                unless my_keys[key]
+                    my_keys[key] = i
+                else
+                    to_delete << (i-to_delete.size)
+                end
+            end
+            to_delete.each do |i|
+                self.delete_row(i)
+            end
+            return self
+        end
         def column_names
             return @labels.map { |k| k.to_s }
         end
@@ -243,7 +296,7 @@ module Rust
                 row.each do |key, value|
                     @data[key.to_s] << value
                 end
-#
                 return true
             else
                 raise TypeError, "Expected an Array or a Hash"
@@ -275,6 +328,14 @@ module Rust
             return self
         end
+        def fast_each
+            self.fast_each_with_index do |element, i|
+                yield element
+            end
+            return self
+        end
         def each_with_index
             for i in 0...self.rows
                 element = {}
@@ -288,6 +349,19 @@ module Rust
             return self
         end
+        def fast_each_with_index
+            for i in 0...self.rows
+                element = []
+                @labels.each do |label|
+                    element << @data[label][i]
+                end
+                yield element, i
+            end
+            return self
+        end
         def load_in_r_as(variable_name)
             command = []
@@ -397,6 +471,77 @@ module Rust
             return result
         end
+        def aggregate(by, **aggregators)
+            raise TypeError, "Expected a string" unless by.is_a?(String)
+            raise TypeError, "All the aggregators should be procs" unless aggregators.values.all? { |v| v.is_a?(Proc) }
+            raise "Expected a block for default aggregator" unless block_given?
+            aggregators = aggregators.map { |label, callable| [label.to_s, callable] }.to_h
+            sorted = self.sort_by(by)
+            current_value = nil
+            partials = []
+            partial = nil
+            sorted.column(by).each_with_index do |value, index|
+                if current_value != value
+                    current_value = value
+                    partials << partial if partial
+                    partial = Rust::DataFrame.new(self.column_names)
+                end
+                partial << sorted.fast_row(index)
+            end
+            partials << partial
+            result = Rust::DataFrame.new(self.column_names)
+            partials.each do |partial|
+                aggregated_row = {}
+                aggregated_row[by] = partial.column(by)[0]
+                (self.column_names - [by]).each do |column|
+                    if aggregators[column]
+                        aggregated_row[column] = aggregators[column].call(partial.column(column))
+                    else
+                        aggregated_row[column] = yield partial.column(column)
+                    end
+                end
+                result << aggregated_row
+            end
+            return result
+        end
+        def sort_by(column)
+            result = self.clone
+            result.sort_by!(column)
+            return result
+        end
+        def sort_by!(by)
+            copy = @data[by].clone
+            copy.sort!
+            indices = []
+            @data[by].each_with_index do |value, i|
+                index = copy.index(value)
+                indices << index
+                copy[index] = NilClass
+            end
+            (self.column_names - [by]).each do |column_name|
+                sorted = []
+                column = self.column(column_name)
+                column_i = 0
+                indices.each do |i|
+                    sorted[i] = column[column_i]
+                    column_i += 1
+                end
+                @data[column_name] = sorted
+            end
+            @data[by].sort!
+        end
         def bind_rows!(dataframe)
             raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
             raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
@@ -480,7 +625,7 @@ module Rust
         end
     end
-    class Sequence
+    class Sequence < RustDatatype
         attr_reader :min
         attr_reader :max
@@ -511,6 +656,103 @@ module Rust
         def to_R
             "seq(from=#@min, to=#@max, by=#@step)"
         end
+        def load_in_r_as(variable_name)
+            Rust._eval("#{variable_name} <- #{self.to_R}")
+        end
+    end
+    class DataFrameArray < Array
+        def bind_all
+            return nil if self.size == 0
+            result = self.first.clone
+            for i in 1...self.size
+                result .bind_rows!(self[i])
+            end
+            return result
+        end
+    end
+    class DataFrameHash < Hash
+        def bind_all
+            return nil if self.values.size == 0
+            result = self.values.first.clone
+            for i in 1...self.values.size
+                result .bind_rows!(self.values[i])
+            end
+            return result
+        end
+    end
+    class MathArray < Array
+        def -(other)
+            raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
+            raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
+            result = self.clone
+            other = [other] * self.size if other.is_a?(Numeric)
+            for i in 0...self.size
+                result[i] -= other[i]
+            end
+            return result
+        end
+        def *(other)
+            raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
+            raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
+            result = self.clone
+            other = [other] * self.size if other.is_a?(Numeric)
+            for i in 0...self.size
+                result[i] *= other[i]
+            end
+            return result
+        end
+        def +(other)
+            raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
+            raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
+            result = self.clone
+            other = [other] * self.size if other.is_a?(Numeric)
+            for i in 0...self.size
+                result[i] += other[i]
+            end
+            return result
+        end
+        def /(other) #To recover the syntax highlighting but in Kate: /
+            raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
+            raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
+            result = self.clone
+            other = [other] * self.size if other.is_a?(Numeric)
+            for i in 0...self.size
+                result[i] /= other[i]
+            end
+            return result
+        end
+        def **(other)
+            raise ArgumentError, "Expected numeric" if !other.is_a?(Numeric)
+            result = self.clone
+            for i in 0...self.size
+                result[i] = result[i] ** other
+            end
+            return result
+        end
     end
 end
@@ -554,6 +796,14 @@ class Array
     def to_R
         return "c(#{self.map { |e| e.to_R }.join(",")})"
     end
+    def distribution
+        result = {}
+        self.each do |value|
+            result[value] = result[value].to_i + 1
+        end
+        return result
+    end
 end
 class String
@@ -569,19 +819,21 @@ class Range
 end
 module Rust::RBindings
-    def read_csv(filename, **options)
-        Rust::CSV.read(filename, **options)
-    end
-    def write_csv(filename, dataframe, **options)
-        Rust::CSV.write(filename, dataframe, **options)
-    end
     def data_frame(*args)
         Rust::DataFrame.new(*args)
     end
 end
+module Rust::TestCases
+    def self.sample_dataframe(columns, size=100)
+        result = Rust::DataFrame.new(columns)
+        size.times do |i|
+            result << columns.map { |c| yield i, c }
+        end
+        return result
+    end
+end
 def bind_r!
     include Rust::RBindings
 end

data/lib/rust-csv.rb CHANGED Viewed

@@ -3,7 +3,7 @@ require_relative 'rust-core'
 module Rust
     class CSV
         def self.read_all(pattern, **options)
-            result = {}
+            result = DataFrameHash.new
             Dir.glob(pattern).each do |filename|
                 result[filename] = CSV.read(filename, **options)
             end
@@ -49,10 +49,9 @@ module Rust
             raise TypeError, "Expected Rust::DataFrame" unless dataframe.is_a?(Rust::DataFrame)
             write_headers = options[:headers] != false
-            options[:headers] = dataframe.column_names if options[:headers] == nil
+            options[:headers] = dataframe.column_names unless options[:headers]
             hash = {}
-            labels = nil
             ::CSV.open(filename, 'w', write_headers: write_headers, **options) do |csv|
                 dataframe.each do |row|
                     csv << row
@@ -93,3 +92,13 @@ module Rust
         end
     end
 end
+module Rust::RBindings
+    def read_csv(filename, **options)
+        Rust::CSV.read(filename, **options)
+    end
+    def write_csv(filename, dataframe, **options)
+        Rust::CSV.write(filename, dataframe, **options)
+    end
+end

data/lib/rust-descriptive.rb CHANGED Viewed

@@ -50,18 +50,59 @@ module Rust::Descriptive
         def quantile(data, percentiles=[0.0, 0.25, 0.5, 0.75, 1.0])
             raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !percentiles.is_a?(Array) || !percentiles.all? { |e| e.is_a?(Numeric) }
-            raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
+            raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
-            Rust.exclusive do
-                Rust['descriptive.data'] = data
-                Rust['descriptive.percs'] = percentiles
-                call_result = Rust._pull("quantile(descriptive.data, descriptive.percs)")
-                assert { call_result.is_a?(Array) }
-                assert { call_result.size == percentiles.size }
-                return percentiles.zip(call_result).to_h
+            n = data.size
+            quantiles = percentiles.size
+            percentiles = percentiles.map { |x| x > 1.0 ? 1.0 : (x < 0.0 ? 0.0 : x) }
+            rough_indices = percentiles.map { |x| 1 + [n - 1, 0].max * x - 1 }
+            floor_indices = rough_indices.map { |i| i.floor }
+            ceil_indices = rough_indices.map { |i| i.ceil }
+            data = data.sort
+            result = floor_indices.map { |i| data[i] }
+            result_ceil = ceil_indices.map { |i| data[i] }
+            indices_to_fix = (0...quantiles).select { |i| rough_indices[i] > floor_indices[i] && result_ceil[i] != result[i] }
+            index_approximation_errors = indices_to_fix.map { |i| rough_indices[i] - floor_indices[i] }
+            reduced_index_approximation_errors = index_approximation_errors.map { |i| (1 - i) }
+            hi_indices = indices_to_fix.map { |i| ceil_indices[i] }
+            data_hi_indices = hi_indices.map { |i| data[i] }
+            j = 0
+            indices_to_fix.each do |i|
+                result[i] = reduced_index_approximation_errors[j] * result[i] + index_approximation_errors[j] * data_hi_indices[j]
+                j += 1
             end
+            return percentiles.zip(result).to_h
+        end
+        def outliers(data, k=1.5, **opts)
+            outliers_according_to(data, data, k, **opts)
+        end
+        def outliers_according_to(data, data_distribution, k=1.5, **opts)
+            quantiles = Rust::Descriptive.quantile(data_distribution, [0.25, 0.75])
+            q1 = quantiles[0.25]
+            q3 = quantiles[0.75]
+            iqr = q3 - q1
+            positive_outliers = data.select { |d| d > q3 + iqr * k }
+            negative_outliers = data.select { |d| d < q1 - iqr * k }
+            outliers = negative_outliers + positive_outliers
+            if opts[:side]
+                case opts[:side].to_sym
+                when :positive, :neg, :n, :+
+                    outliers = positive_outliers
+                when :negative, :pos, :p, :-
+                    outliers = negative_outliers
+                end
+            end
+            return outliers
         end
     end
 end

data/lib/rust-effsize.rb CHANGED Viewed

@@ -24,6 +24,10 @@ module Rust::EffectSize::CliffDelta
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
+            if d1.size <= 1 || d2.size <= 1
+                return Rust::EffectSize::Result.new
+            end
             Rust.exclusive do
                 Rust['effsize.a'] = d1
                 Rust['effsize.b'] = d2
@@ -32,10 +36,10 @@ module Rust::EffectSize::CliffDelta
                 result = Rust::EffectSize::Result.new
                 result.name                 = "Cliff's delta"
-                result.estimate             = Rust._pull("effsize.result$estimate")
-                result.confidence_interval  = Range.new(*Rust._pull("effsize.result$conf.int"))
-                result.confidence_level     = Rust._pull("effsize.result$conf.level")
-                result.magnitude            = Rust._pull("as.character(effsize.result$magnitude)").to_sym
+                result.estimate             = Rust._pull("effsize.result$estimate")                         rescue Float::NAN
+                result.confidence_interval  = Range.new(*Rust._pull("effsize.result$conf.int"))             rescue nil
+                result.confidence_level     = Rust._pull("effsize.result$conf.level")                       rescue Float::NAN
+                result.magnitude            = Rust._pull("as.character(effsize.result$magnitude)").to_sym   rescue nil
                 return result
             end
@@ -49,6 +53,10 @@ module Rust::EffectSize::CohenD
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
+            if d1.size <= 1 || d2.size <= 1
+                return Rust::EffectSize::Result.new
+            end
             Rust.exclusive do
                 Rust['effsize.a'] = d1
                 Rust['effsize.b'] = d2
@@ -57,10 +65,10 @@ module Rust::EffectSize::CohenD
                 result = Rust::EffectSize::Result.new
                 result.name                 = "Cohen's d"
-                result.estimate             = Rust._pull("effsize.result$estimate")
-                result.confidence_interval  = Range.new(*Rust._pull("effsize.result$conf.int"))
-                result.confidence_level     = Rust._pull("effsize.result$conf.level")
-                result.magnitude            = Rust._pull("as.character(effsize.result$magnitude)").to_sym
+                result.estimate             = Rust._pull("effsize.result$estimate")                       rescue Float::NAN
+                result.confidence_interval  = Range.new(*Rust._pull("effsize.result$conf.int"))           rescue nil
+                result.confidence_level     = Rust._pull("effsize.result$conf.level")                     rescue Float::NAN
+                result.magnitude            = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
                 return result
             end

data/lib/rust-plots.rb CHANGED Viewed

@@ -21,6 +21,14 @@ module Rust::Plots
             return self
         end
+        def palette(size)
+            if size <= 1
+                return ['black']
+            else
+                return Rust._pull("hcl.colors(n=#{size})")
+            end
+        end
         def x_range(range)
             @options['xlim'] = range
@@ -127,10 +135,18 @@ module Rust::Plots
     end
     class ScatterPlot < BasePlot
-        def initialize(x, y)
+        def initialize(x = nil, y = nil, **options)
             super()
-            @x = x
-            @y = y
+            @series = []
+            if x && y
+                self.series(x, y, options)
+            end
+        end
+        def series(x, y, **options)
+            @series << [x, y, options]
+            return self
         end
         def thickness(t)
@@ -159,13 +175,66 @@ module Rust::Plots
         protected
         def _show()
-            Rust["plotter.x"] = @x
-            Rust["plotter.y"] = @y
+            first = true
+            palette = self.palette(@series.size)
+            i = 0
+            base_options = {}
+            unless @options['xlim']
+                x_values = @series.map { |v| v[0] }.flatten
+                y_values = @series.map { |v| v[1] }.flatten
+                base_options[:xlim] = [x_values.min, x_values.max]
+                base_options[:ylim] = [y_values.min, y_values.max]
+            end
+            @series.each do |x, y, options|
+                options = options.merge(base_options)
+                Rust["plotter.x"] = x
+                Rust["plotter.y"] = y
+                function = nil
+                if first
+                    function = Rust::Function.new("plot")
+                    first = false
+                else
+                    function = Rust::Function.new("lines")
+                end
+                augmented_options = {}
+                augmented_options['col'] = options[:color] || palette[i]
+                augmented_options['xlim'] = options[:xlim] if options[:xlim]
+                augmented_options['ylim'] = options[:ylim] if options[:ylim]
+                function.options = self._augmented_options(augmented_options)
+                function.arguments << Rust::Variable.new("plotter.x")
+                function.arguments << Rust::Variable.new("plotter.y")
+                function.call
+                i += 1
+            end
+            return self
+        end
+    end
+    class BarPlot < BasePlot
+        def initialize(bars)
+            super()
+            @bars = bars
+        end
+        protected
+        def _show()
+            Rust["plotter.bars"] = @bars.values
+            Rust["plotter.labels"] = @bars.keys
+            Rust._eval("names(plotter.bars) <- plotter.labels")
-            function = Rust::Function.new("plot")
+            function = Rust::Function.new("barplot")
             function.options = self._augmented_options
-            function.arguments << Rust::Variable.new("plotter.x")
-            function.arguments << Rust::Variable.new("plotter.y")
+            function.arguments << Rust::Variable.new("plotter.bars")
             function.call

data/lib/rust-probabilities.rb ADDED Viewed

@@ -0,0 +1,248 @@
+require_relative 'rust-core'
+class Numeric
+    def distance(other)
+        raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
+        return (self - other).abs
+    end
+end
+class Array
+    def distance(other)
+        raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
+        longest, shortest = self.size > other.size ? [self, other] : [other, self]
+        distance = 0
+        for i in 0...longest.size
+            distance += longest[i].to_i.distance(shortest[i].to_i)
+        end
+        return distance
+    end
+end
+class String
+    def distance(other)
+        raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
+        return self.bytes.distance other.bytes
+    end
+end
+module Rust
+    class RandomVariableSlice
+        def initialize(values)
+            raise TypeError, "Expected Hash" unless values.is_a?(Hash)
+            @values = values
+        end
+        def probability(v=nil)
+            unless v
+                return @values.values.sum
+            else
+                return @values[v]
+            end
+        end
+        def ml
+            @values.max_by { |k, v| v }[0]
+        end
+        def expected
+            @values.map { |k, v| k*v }.sum
+        end
+        def >(n)
+            self.so_that { |k| k > n}
+        end
+        def >=(n)
+            self.so_that { |k| k >= n}
+        end
+        def <(n)
+            self.so_that { |k| k < n}
+        end
+        def <=(n)
+            self.so_that { |k| k <= n}
+        end
+        def ==(n)
+            self.so_that { |k| k == n}
+        end
+        def so_that
+            RandomVariableSlice.new(@values.select { |k, v| yield(k) })
+        end
+        def between(a, b)
+            RandomVariableSlice.new(@values.select { |k, v| k.between? a, b })
+        end
+    end
+    class RandomVariable < RandomVariableSlice
+        EPSILON = 1e-7
+        attr_reader    :values
+        def initialize(values = {0 => 1.0}, exact = false)
+            @values = values
+            @exact = exact
+            raise "All the probabilities should be in the range [0, 1]" unless @values.values.all? { |v| v.between? 0, 1 }
+            raise "The cumulative probability must be exactly 1 (#{@values.values.sum} instead)"        unless @values.values.sum.between? 1-EPSILON, 1+EPSILON
+            approx!
+        end
+        def probability(v)
+            return @values[v].to_f
+        end
+        def +(other)
+        new_hash = {}
+        @values.each do |my_key, my_value|
+            other.values.each do |other_key, other_value|
+                sum_key = my_key + other_key
+                new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
+            end
+        end
+        return RandomVariable.new(new_hash, @exact)
+        end
+        def *(times)
+            if times.is_a? Integer
+                return rep(times)
+            elsif times.is_a? RandomVariable
+                return mul(times)
+            else
+                raise "The argument must be an Integer or a RandomVariable"
+            end
+        end
+        def mul(other)
+            new_hash = {}
+            @values.each do |my_key, my_value|
+                other.values.each do |other_key, other_value|
+                    mul_key = my_key * other_key
+                    new_hash[mul_key] = new_hash[mul_key].to_f + (my_value * other_value)
+                end
+            end
+            return RandomVariable.new(new_hash, @exact)
+        end
+        def rep(times)
+            rv = self
+            (times-1).times do
+                rv += self
+            end
+            return rv
+        end
+        def exact!
+            @exact = true
+        end
+        def approx!
+            return if @exact
+            to_delete = []
+            @values.each do |v, probability|
+                to_delete.push v if probability <= EPSILON
+            end
+            to_delete.each do |v|
+                probability = @values.delete v
+                nearest = @values.keys.min_by { |k| k.distance v }
+                @values[nearest] += probability
+            end
+        end
+        def extract
+            v = rand
+            cumulative = 0
+            @values.each do |key, prob|
+                cumulative += prob
+                return key if cumulative >= v
+            end
+        end
+        def self.complete(hash, key=0)
+            hash[key] = 1 - hash.values.sum
+            return RandomVariable.new(hash)
+        end
+    end
+    class UniformRandomVariable < RandomVariable
+        def initialize(values, exact = false)
+            super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
+        end
+    end
+    module Probabilities
+        def P(v)
+            if v.is_a? RandomVariableSlice
+                raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
+                return v.probability
+            else
+                raise "Cannot compute the expected value of a #{v.class}"
+            end
+        end
+        def E(v)
+            if v.is_a? RandomVariableSlice
+                return v.expected
+            else
+                raise "Cannot compute the expected value of a #{v.class}"
+            end
+        end
+    end
+    class RandomVariable
+        ENGLISH_ALPHABET = RandomVariable.new({
+            "a" => 0.08167,
+            "b" => 0.01492,
+            "c" => 0.02782,
+            "d" => 0.04253,
+            "e" => 0.12703,
+            "f" => 0.02228,
+            "g" => 0.02015,
+            "h" => 0.06094,
+            "i" => 0.06966,
+            "j" => 0.00153,
+            "k" => 0.00772,
+            "l" => 0.04025,
+            "m" => 0.02406,
+            "n" => 0.06749,
+            "o" => 0.07507,
+            "p" => 0.01929,
+            "q" => 0.00095,
+            "r" => 0.05987,
+            "s" => 0.06327,
+            "t" => 0.09056,
+            "u" => 0.02758,
+            "v" => 0.00978,
+            "w" => 0.02360,
+            "x" => 0.00150,
+            "y" => 0.01974,
+            "z" => 0.00074
+        })
+        DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
+        COIN = UniformRandomVariable.new(["h", "t"])
+    end
+end

data/lib/rust-tests.rb CHANGED Viewed

@@ -7,6 +7,7 @@ module Rust::StatisticalTests
         attr_accessor   :pvalue
         attr_accessor   :exact
         attr_accessor   :alpha
+        attr_accessor   :hypothesis
         def initialize
             @statistics = {}
@@ -20,6 +21,16 @@ module Rust::StatisticalTests
             @statistics[name.to_sym] = value
         end
+        def adjusted_pvalue(method='bonferroni')
+            return 1 unless @hypothesis
+            @hypothesis.adjusted_pvalue_for(self, method)
+        end
+        def hypothesis=(value)
+            @hypothesis = value
+            @hypothesis.add(self)
+        end
         def significant
             pvalue < alpha
         end
@@ -31,32 +42,150 @@ module Rust::StatisticalTests
                     (!exact ? " P-value is not exact." : "")
         end
     end
+    class Hypothesis
+        def self.find(title_or_instance)
+            return Hypothesis.new(nil) if title_or_instance == nil
+            if title_or_instance.is_a?(String)
+                ObjectSpace.each_object(Hypothesis) do |instance|
+                    return instance if instance.title == title_or_instance
+                end
+                return Hypothesis.new(title_or_instance)
+            elsif title_or_instance.is_a?(Hypothesis)
+                return title_or_instance
+            end
+            raise TypeError, "Expected nil, String or Hypothesis"
+        end
+        attr_reader :results
+        attr_reader :title
+        def initialize(title)
+            @title = title
+            @results = []
+        end
+        def add(result)
+            @results << result
+        end
+        def adjusted_pvalue_for(instance, method)
+            p_values = @results.map { |r| r.pvalue }
+            index = @results.index(instance)
+            adjusted_pvalues = Rust::StatisticalTests::PValueAdjustment.method(method).adjust(*p_values)
+            if adjusted_pvalues.is_a?(Numeric)
+                return adjusted_pvalues
+            else
+                return adjusted_pvalues[index]
+            end
+        end
+    end
+end
+module Rust::StatisticalTests::PValueAdjustment
+    def self.method(name)
+        name = name.to_s
+        case name.downcase
+        when "bonferroni", "b"
+            return Bonferroni
+        when "holm", "h"
+            return Holm
+        when "hochberg"
+            return Hochberg
+        when "hommel"
+            return Hommel
+        when "benjaminihochberg", "bh"
+            return BenjaminiHochberg
+        when "benjaminiyekutieli", "by"
+            return BenjaminiYekutieli
+        end
+    end
+    class Bonferroni
+        def self.adjust(*p_values)
+            Rust.exclusive do
+                Rust['adjustment.p'] = p_values
+                return Rust._pull("p.adjust(adjustment.p, method=\"bonferroni\")")
+            end
+        end
+    end
+    class Holm
+        def self.adjust(*p_values)
+            Rust.exclusive do
+                Rust['adjustment.p'] = p_values
+                return Rust._pull("p.adjust(adjustment.p, method=\"holm\")")
+            end
+        end
+    end
+    class Hochberg
+        def self.adjust(*p_values)
+            Rust.exclusive do
+                Rust['adjustment.p'] = p_values
+                return Rust._pull("p.adjust(adjustment.p, method=\"hochberg\")")
+            end
+        end
+    end
+    class Hommel
+        def self.adjust(*p_values)
+            Rust.exclusive do
+                Rust['adjustment.p'] = p_values
+                return Rust._pull("p.adjust(adjustment.p, method=\"hommel\")")
+            end
+        end
+    end
+    class BenjaminiHochberg
+        def self.adjust(*p_values)
+            Rust.exclusive do
+                Rust['adjustment.p'] = p_values
+                return Rust._pull("p.adjust(adjustment.p, method=\"BH\")")
+            end
+        end
+    end
+    class BenjaminiYekutieli
+        def self.adjust(*p_values)
+            Rust.exclusive do
+                Rust['adjustment.p'] = p_values
+                return Rust._pull("p.adjust(adjustment.p, method=\"BY\")")
+            end
+        end
+    end
 end
 module Rust::StatisticalTests::Wilcoxon
     class << self
-        def paired(d1, d2, alpha = 0.05)
+         def paired(d1, d2, alpha = 0.05, **options)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
             raise "The two distributions have different size" if d1.size != d2.size
             Rust.exclusive do
                 Rust["wilcox.a"] = d1
                 Rust["wilcox.b"] = d2
                 _, warnings = Rust._eval("wilcox.result = wilcox.test(wilcox.a, wilcox.b, alternative='two.sided', paired=T)", true)
                 result = Rust::StatisticalTests::Result.new
-                result.name      = "Wilcoxon Signed-Rank test"
-                result.pvalue    = Rust._pull("wilcox.result$p.value")
-                result[:w]       = Rust._pull("wilcox.result$statistic")
-                result.exact     = !warnings.include?("cannot compute exact p-value with zeroes")
-                result.alpha     = alpha
+                result.name       = "Wilcoxon Signed-Rank test"
+                result.pvalue     = Rust._pull("wilcox.result$p.value")
+                result[:w]        = Rust._pull("wilcox.result$statistic")
+                result.exact      = !warnings.include?("cannot compute exact p-value with zeroes")
+                result.alpha      = alpha
+                result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
                 return result
             end
         end
-        def unpaired(d1, d2, alpha = 0.05)
+        def unpaired(d1, d2, alpha = 0.05, **options)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
@@ -66,11 +195,12 @@ module Rust::StatisticalTests::Wilcoxon
                 _, warnings = Rust._eval("wilcox.result = wilcox.test(wilcox.a, wilcox.b, alternative='two.sided', paired=F)", true)
                 result = Rust::StatisticalTests::Result.new
-                result.name      = "Wilcoxon Ranked-Sum test (a.k.a. Mann–Whitney U test)"
-                result.pvalue    = Rust._pull("wilcox.result$p.value")
-                result[:w]       = Rust._pull("wilcox.result$statistic")
-                result.exact     = !warnings.include?("cannot compute exact p-value with ties")
-                result.alpha     = alpha
+                result.name       = "Wilcoxon Ranked-Sum test (a.k.a. Mann–Whitney U test)"
+                result.pvalue     = Rust._pull("wilcox.result$p.value")
+                result[:w]        = Rust._pull("wilcox.result$statistic")
+                result.exact      = !warnings.include?("cannot compute exact p-value with ties")
+                result.alpha      = alpha
+                result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
                 return result
             end
@@ -80,7 +210,7 @@ end
 module Rust::StatisticalTests::T
     class << self
-        def paired(d1, d2, alpha = 0.05)
+        def paired(d1, d2, alpha = 0.05, **options)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
             raise "The two distributions have different size" if d1.size != d2.size
@@ -91,17 +221,18 @@ module Rust::StatisticalTests::T
                 warnings = Rust._eval("t.result = t.test(t.a, t.b, alternative='two.sided', paired=T)")
                 result = Rust::StatisticalTests::Result.new
-                result.name      = "Paired t-test"
-                result.pvalue    = Rust._pull("t.result$p.value")
-                result[:t]       = Rust._pull("t.result$statistic")
-                result.exact     = true
-                result.alpha     = alpha
+                result.name       = "Paired t-test"
+                result.pvalue     = Rust._pull("t.result$p.value")
+                result[:t]        = Rust._pull("t.result$statistic")
+                result.exact      = true
+                result.alpha      = alpha
+                result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
                 return result
             end
         end
-        def unpaired(d1, d2, alpha = 0.05)
+        def unpaired(d1, d2, alpha = 0.05, **options)
             raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
             raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
@@ -111,11 +242,12 @@ module Rust::StatisticalTests::T
                 Rust._eval("t.result = t.test(t.a, t.b, alternative='two.sided', paired=F)")
                 result = Rust::StatisticalTests::Result.new
-                result.name      = "Welch Two Sample t-test"
-                result.pvalue    = Rust._pull("t.result$p.value")
-                result[:t]       = Rust._pull("t.result$statistic")
-                result.exact     = true
-                result.alpha     = alpha
+                result.name       = "Welch Two Sample t-test"
+                result.pvalue     = Rust._pull("t.result$p.value")
+                result[:t]        = Rust._pull("t.result$statistic")
+                result.exact      = true
+                result.alpha      = alpha
+                result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
                 return result
             end
@@ -125,18 +257,19 @@ end
 module Rust::StatisticalTests::Shapiro
     class << self
-        def compute(vector, alpha = 0.05)
+        def compute(vector, alpha = 0.05, **options)
             raise TypeError, "Expecting Array of numerics" if !vector.is_a?(Array) || !vector.all? { |e| e.is_a?(Numeric) }
             Rust.exclusive do
                 Rust['shapiro.v'] = vector
                 Rust._eval("shapiro.result = shapiro.test(shapiro.v)")
                 result = Rust::StatisticalTests::Result.new
-                result.name     = "Shapiro-Wilk normality test"
-                result.pvalue   = Rust._pull("shapiro.result$p.value")
-                result[:W]      = Rust._pull("shapiro.result$statistic")
-                result.exact    = true
-                result.alpha    = alpha
+                result.name       = "Shapiro-Wilk normality test"
+                result.pvalue     = Rust._pull("shapiro.result$p.value")
+                result[:W]        = Rust._pull("shapiro.result$statistic")
+                result.exact      = true
+                result.alpha      = alpha
+                result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
                 return result
             end

data/lib/rust.rb CHANGED Viewed

@@ -6,3 +6,4 @@ require_relative 'rust-effsize'
 require_relative 'rust-descriptive'
 require_relative 'rust-plots'
 require_relative 'rust-calls'
+require_relative 'rust-probabilities'

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rust
 version: !ruby/object:Gem::Version
-  version: '0.4'
+  version: '0.7'
 platform: ruby
 authors:
 - Simone Scalabrino
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2020-09-15 00:00:00.000000000 Z
+date: 2021-02-16 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rinruby
@@ -63,6 +63,7 @@ files:
 - lib/rust-descriptive.rb
 - lib/rust-effsize.rb
 - lib/rust-plots.rb
+- lib/rust-probabilities.rb
 - lib/rust-tests.rb
 - lib/rust.rb
 homepage: https://github.com/intersimone999/ruby-rust
@@ -84,7 +85,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.1.4
+rubygems_version: 3.2.7
 signing_key:
 specification_version: 4
 summary: Ruby advanced statistical library