RubyGems - rust - Versions diffs - 0.7 → 0.11 - Mend

rust 0.7 → 0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

checksums.yaml +4 -4
data/bin/ruby-rust +3 -0
data/lib/{rust-csv.rb → rust/core/csv.rb} +23 -1
data/lib/rust/core/rust.rb +221 -0
data/lib/rust/core/types/all.rb +4 -0
data/lib/{rust-core.rb → rust/core/types/dataframe.rb} +159 -331
data/lib/rust/core/types/datatype.rb +195 -0
data/lib/rust/core/types/factor.rb +158 -0
data/lib/rust/core/types/language.rb +199 -0
data/lib/rust/core/types/list.rb +97 -0
data/lib/rust/core/types/matrix.rb +155 -0
data/lib/rust/core/types/s4class.rb +78 -0
data/lib/rust/core/types/utils.rb +122 -0
data/lib/rust/core.rb +7 -0
data/lib/rust/external/robustbase.rb +44 -0
data/lib/rust/models/all.rb +4 -0
data/lib/rust/models/anova.rb +77 -0
data/lib/rust/models/regression.rb +258 -0
data/lib/rust/plots/all.rb +4 -0
data/lib/rust/plots/basic-plots.rb +143 -0
data/lib/{rust-plots.rb → rust/plots/core.rb} +89 -167
data/lib/rust/plots/distribution-plots.rb +75 -0
data/lib/rust/stats/all.rb +4 -0
data/lib/{rust-basics.rb → rust/stats/correlation.rb} +45 -2
data/lib/{rust-descriptive.rb → rust/stats/descriptive.rb} +52 -3
data/lib/{rust-effsize.rb → rust/stats/effsize.rb} +28 -13
data/lib/{rust-probabilities.rb → rust/stats/probabilities.rb} +142 -34
data/lib/{rust-tests.rb → rust/stats/tests.rb} +178 -92
data/lib/rust.rb +4 -9
metadata +32 -13
data/lib/rust-calls.rb +0 -80

data/lib/{rust-core.rb → rust/core/types/dataframe.rb} RENAMED Viewed

@@ -1,127 +1,34 @@
-require 'code-assertions'
-require 'stringio'
-require 'rinruby'
-require 'csv'
+require_relative 'datatype'
 module Rust
-    CLIENT_MUTEX = Mutex.new
-    R_MUTEX      = Mutex.new
-    R_ENGINE     = RinRuby.new(echo: false)
+    ##
+    # Mirror of the data-frame type in R.
-    private_constant    :R_ENGINE
-    private_constant    :R_MUTEX
-    private_constant    :CLIENT_MUTEX
-    @@debugging = false
-    @@in_client_mutex = false
-    def self.debug
-        @@debugging = true
-    end
-    def self.exclusive
-        result = nil
-        CLIENT_MUTEX.synchronize do
-            @@in_client_mutex = true
-            result = yield
-            @@in_client_mutex = false
-        end
-        return result
-    end
-    def self.[]=(variable, value)
-        if value.is_a?(RustDatatype)
-            value.load_in_r_as(variable.to_s)
-        elsif value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(Array)
-            R_ENGINE.assign(variable, value)
-        else
-            raise "Given #{value.class}, expected RustDatatype, String, Numeric, or Array"
+    class DataFrame < RustDatatype
+        def self.can_pull?(type, klass)
+            return [klass].flatten.include?("data.frame")
         end
-    end
-    def self.[](variable, type=RustDatatype)
-        return type.pull_variable(variable)
-    end
-    def self._eval_big(r_command, return_warnings = false)
-        r_command = r_command.join("\n") if r_command.is_a?(Array)
-        self._rexec(r_command, return_warnings) do |cmd|
-            result = true
-            instructions = cmd.lines
-            while instructions.size > 0
-                current_command = ""
-                while (instructions.size > 0) && (current_command.length + instructions.first.length < 10000)
-                    current_command << instructions.shift
-                end
-                result &= R_ENGINE.eval(current_command)
-            end
-            result
-        end
-    end
-    def self._pull(r_command, return_warnings = false)
-        self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.pull(cmd) }
-    end
-    def self._eval(r_command, return_warnings = false)
-        self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.eval(cmd) }
-    end
-    def self._rexec(r_command, return_warnings = false)
-        puts "Calling _rexec with command: #{r_command}" if @@debugging
-        R_MUTEX.synchronize do
-            assert("This command must be executed in an exclusive block") { @@in_client_mutex }
-            result = nil
-            begin
-                $stdout = StringIO.new
-                if return_warnings
-                    R_ENGINE.echo(true, true)
-                else
-                    R_ENGINE.echo(false, false)
-                end
-                result = yield(r_command)
-            ensure
-                R_ENGINE.echo(false, false)
-                warnings = $stdout.string
-                $stdout = STDOUT
-            end
-            if return_warnings
-                return result, warnings.lines.map { |w| w.strip.chomp }
-            else
-                return result
-            end
-        end
-    end
-    class RustDatatype
-        def self.pull_variable(variable)
-            return Rust._pull(variable)
+        def self.pull_priority
+            1
         end
-        def load_in_r_as(r_instance, variable_name)
-            raise "Not implemented"
-        end
-    end
-    class DataFrame < RustDatatype
-        def self.pull_variable(variable)
+        def self.pull_variable(variable, type, klass)
             hash = {}
-            colnames = Rust._pull("colnames(#{variable})")
+            colnames = Rust["colnames(#{variable})"]
             colnames.each do |col|
-                hash[col] = Rust._pull("#{variable}$#{col}")
+                hash[col] = Rust["#{variable}$\"#{col}\""]
             end
             return DataFrame.new(hash)
         end
+        ##
+        # Creates a new data-frame.
+        # +labels_or_data+ can be either:
+        # - an Array of column names (creates an empty data-frame)
+        # - a Hash with column names as keys and values as values
         def initialize(labels_or_data)
             @data = {}
@@ -137,6 +44,9 @@ module Rust
             end
         end
+        ##
+        # Returns the +i+-th row of the data-frame
         def row(i)
             if i < 0 || i >= self.rows
                 return nil
@@ -145,6 +55,9 @@ module Rust
             end
         end
+        ##
+        # Returns the +i+-th row of the data-frame. Faster (but harder to interpret) alternative to #row.
         def fast_row(i)
             if i < 0 || i >= self.rows
                 return nil
@@ -153,6 +66,9 @@ module Rust
             end
         end
+        ##
+        # Shuffles the rows in the data-frame. The arguments are passed to the Array#shuffle method.
         def shuffle(*args)
             result = DataFrame.new(@labels)
@@ -167,6 +83,10 @@ module Rust
             return result
         end
+        ##
+        # Returns a copy of the data-frame containing only the specified +rows+ and/or +cols+. If +rows+ and/or +cols+
+        # are nil, all the rows/columns are returned.
         def [](rows, cols=nil)
             raise "You must specify either rows or columns to select" if !rows && !cols
             result = self
@@ -182,11 +102,17 @@ module Rust
             return result
         end
+        ##
+        # Return the column named +name+.
         def column(name)
             return @data[name]
         end
         alias :| :column
+        ##
+        # Renames the column named +old_name+ in +new_name+.
         def rename_column!(old_name, new_name)
             raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
             raise "This DataFrame already contains a column named #{new_name}" if @labels.include?(new_name)
@@ -195,10 +121,24 @@ module Rust
             @labels[@labels.index(old_name)] = new_name
         end
+        ##
+        # Functionally transforms the column named +column+ by applying the function given as a block.
+        # Example:
+        # df = Rust::DataFrame.new({a: [1,2,3], b: [3,4,5]})
+        # df.transform_column!("a") { |v| v + 1 }
+        # df|"a" # => [2, 3, 4]
         def transform_column!(column)
             @data[column].map! { |e| yield e }
         end
+        ##
+        # Returns a copy data-frame with only the rows for which the function given in the block returns true.
+        # Example:
+        # df = Rust::DataFrame.new({a: [1,2,3], b: ['a','b','c']})
+        # df2 = df.select_rows { |r| r['a'].even? }
+        # df2|"b" # => ['b']
         def select_rows
             result = DataFrame.new(self.column_names)
             self.each_with_index do |row, i|
@@ -207,6 +147,9 @@ module Rust
             return result
         end
+        ##
+        # Returns true if the function given in the block returns true for any of the rows in this data-frame.
         def has_row?
             self.each_with_index do |row, i|
                 return true if yield row, i
@@ -214,6 +157,10 @@ module Rust
             return false
         end
+        ##
+        # Returns a copy of the data-frame with only the columns in +cols+. As an alternative, a block can be used
+        # (only the columns for which the function returns true are kept).
         def select_columns(cols=nil)
             raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
@@ -229,23 +176,35 @@ module Rust
         end
         alias :select_cols :select_columns
+        ##
+        # Deletes the column named +column+.
         def delete_column(column)
             @labels.delete(column)
             @data.delete(column)
         end
+        ##
+        # Deletes the +i+-th row.
         def delete_row(i)
             @data.each do |label, column|
                 column.delete_at(i)
             end
         end
+        ##
+        # Returns a data-frame in which the rows are unique in terms of all the given columns named +by+.
         def uniq_by(by)
             result = self.clone
             result.uniq_by!(by)
             return result
         end
+        ##
+        # Makes sure that in this data-frame the rows are unique in terms of all the given columns named +by+.
         def uniq_by!(by)
             my_keys = {}
             to_delete = []
@@ -268,19 +227,33 @@ module Rust
             return self
         end
+        ##
+        # Return the names of the columns.
         def column_names
             return @labels.map { |k| k.to_s }
         end
         alias :colnames :column_names
+        ##
+        # Returns the number of rows.
         def rows
             @data.values[0].size
         end
+        ##
+        # Returns the number of columns
         def columns
             @labels.size
         end
+        ##
+        # Adds the given +row+ to the data-frame. +row+ can be either:
+        # - An Array of values for all the columns (in the order of #column_names);
+        # - A Hash containing associations between column names and value to be set.
         def add_row(row)
             if row.is_a?(Array)
                 raise "Expected an array of size #{@data.size}" unless row.size == @data.size
@@ -304,6 +277,11 @@ module Rust
         end
         alias :<< :add_row
+        ##
+        # Adds a column named +name+ with the given +values+ (array). The size of +values+ must match the number of
+        # rows of this data-frame. As an alternative, it can be passed a block which returns, for a given row, the
+        # value to assign for the new column.
         def add_column(name, values=nil)
             raise "Column already exists" if @labels.include?(name)
             raise "Values or block required" if !values && !block_given?
@@ -320,6 +298,9 @@ module Rust
             end
         end
+        ##
+        # Yields each row as a Hash containing column names as keys and values as values.
         def each
             self.each_with_index do |element, i|
                 yield element
@@ -328,6 +309,10 @@ module Rust
             return self
         end
+        ##
+        # Yields each row as a Hash containing column names as keys and values as values. Faster alternative to
+        # #each.
         def fast_each
             self.fast_each_with_index do |element, i|
                 yield element
@@ -336,6 +321,9 @@ module Rust
             return self
         end
+        ##
+        # Yields each row as a Hash containing column names as keys and values as values and the row index.
         def each_with_index
             for i in 0...self.rows
                 element = {}
@@ -349,6 +337,10 @@ module Rust
             return self
         end
+        ##
+        # Yields each row as a Hash containing column names as keys and values as values and the row index. Faster
+        # alternative to #each_with_index.
         def fast_each_with_index
             for i in 0...self.rows
                 element = []
@@ -373,6 +365,14 @@ module Rust
                 row_index += 1
             end
+            self.column_names.each do |name|
+                column = self.column(name)
+                if column.is_a?(Factor)
+                    command << "#{variable_name}[,#{name.to_R}] <- factor(#{variable_name}[,#{name.to_R}], labels=#{column.levels.to_R})"
+                end
+            end
             Rust._eval_big(command)
         end
@@ -397,6 +397,9 @@ module Rust
             return result
         end
+        ##
+        # Returns a copy of the data-frame containing only the first +n+ rows.
         def head(n=10)
             result = DataFrame.new(self.column_names)
             self.each_with_index do |row, i|
@@ -405,6 +408,11 @@ module Rust
             return result
         end
+        ##
+        # Merges this data-frame with +other+ in terms of the +by+ column(s) (Array or String).
+        # +first_alias+ and +second_alias+ allow to specify the prefix that should be used for the columns not in +by+
+        # for this and the +other+ data-frame, respectively.
         def merge(other, by, first_alias = "x", second_alias = "y")
             raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
             raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
@@ -471,6 +479,14 @@ module Rust
             return result
         end
+        ##
+        # Aggregate the value in groups depending on the +by+ column (String).
+        # A block must be passed to specify how to aggregate the columns. Aggregators for specific columns can be
+        # specified as optional arguments in which the name of the argument represents the column name and the value
+        # contains a block for aggregating the specific column.
+        # Both the default and the specialized blocks must take as argument an array of values and must return a
+        # scalar value.
         def aggregate(by, **aggregators)
             raise TypeError, "Expected a string" unless by.is_a?(String)
             raise TypeError, "All the aggregators should be procs" unless aggregators.values.all? { |v| v.is_a?(Proc) }
@@ -511,12 +527,18 @@ module Rust
             return result
         end
+        ##
+        # Returns a copy of this data-frame in which the rows are sorted by the values of the +by+ column.
         def sort_by(column)
             result = self.clone
             result.sort_by!(column)
             return result
         end
+        ##
+        # Sorts the rows of this data-frame by the values of the +by+ column.
         def sort_by!(by)
             copy = @data[by].clone
             copy.sort!
@@ -542,6 +564,9 @@ module Rust
             @data[by].sort!
         end
+        ##
+        # Adds all the rows in +dataframe+ to this data-frame. The column names must match.
         def bind_rows!(dataframe)
             raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
             raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
@@ -554,6 +579,9 @@ module Rust
         end
         alias :rbind! :bind_rows!
+        ##
+        # Adds all the columns in +dataframe+ to this data-frame. The number of rows must match.
         def bind_columns!(dataframe)
             raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
             raise "The number of rows are not compatible" if self.rows != dataframe.rows
@@ -567,6 +595,9 @@ module Rust
         end
         alias :cbind! :bind_columns!
+        ##
+        # Returns a copy of this dataframe and adds all the rows in +dataframe+ to it. The column names must match.
         def bind_rows(dataframe)
             result = self.clone
             result.bind_rows!(dataframe)
@@ -574,6 +605,9 @@ module Rust
         end
         alias :rbind :bind_rows
+        ##
+        # Returns a copy of this dataframe and adds all the columns in +dataframe+ to it. The number of rows must match.
         def bind_columns(dataframe)
             result = self.clone
             result.bind_columns!(dataframe)
@@ -581,88 +615,22 @@ module Rust
         end
         alias :cbind :bind_columns
+        ##
+        # Returns a copy of this data-frame.
         def clone
             DataFrame.new(@data)
         end
     end
-    class Matrix < RustDatatype
-        def self.pull_variable(variable)
-            return Rust._pull(variable)
-        end
-        def initialize(data)
-            if data.flatten.size == 0
-                raise "Empty matrices are not allowed"
-            else
-                raise TypeError, "Expected array of array" unless data.is_a?(Array) && data[0].is_a?(Array)
-                raise TypeError, "Only numeric matrices are supported" unless data.all? { |row| row.all?  { |e| e.is_a?(Numeric) } }
-                raise "All the rows must have the same size" unless data.map { |row| row.size }.uniq.size == 1
-                @data = data.clone
-            end
-        end
-        def [](i, j)
-            return @data[i][j]
-        end
-        def rows
-            @data.size
-        end
-        def cols
-            @data[0].size
-        end
-        def []=(i, j, value)
-            raise "Wrong i" unless i.between?(0, @data.size - 1)
-            raise "Wrong j" unless j.between?(0, @data[0].size - 1)
-            @data[i][j] = value
-        end
-        def load_in_r_as(variable_name)
-            Rust._eval("#{variable_name} <- matrix(c(#{@data.flatten.join(",")}), nrow=#{self.rows}, ncol=#{self.cols}, byrow=T)")
-        end
-    end
+    ##
+    # Represents an array of DataFrame
-    class Sequence < RustDatatype
-        attr_reader :min
-        attr_reader :max
-        def initialize(min, max, step=1)
-            @min = min
-            @max = max
-            @step = step
-        end
-        def step(step)
-            @step = step
-        end
-        def each
-            (@min..@max).step(@step) do |v|
-                yield v
-            end
-        end
-        def to_a
-            result = []
-            self.each do |v|
-                result << v
-            end
-            return result
-        end
+    class DataFrameArray < Array
-        def to_R
-            "seq(from=#@min, to=#@max, by=#@step)"
-        end
+        ##
+        # Returns a data-frame with the rows in all the data-frames together (if compatible).
-        def load_in_r_as(variable_name)
-            Rust._eval("#{variable_name} <- #{self.to_R}")
-        end
-    end
-    class DataFrameArray < Array
         def bind_all
             return nil if self.size == 0
@@ -676,7 +644,14 @@ module Rust
         end
     end
+    ##
+    # Represents a hash of DataFrame
     class DataFrameHash < Hash
+        ##
+        # Returns a data-frame with the rows in all the data-frames together (if compatible).
         def bind_all
             return nil if self.values.size == 0
@@ -689,151 +664,4 @@ module Rust
             return result
         end
     end
-    class MathArray < Array
-        def -(other)
-            raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
-            raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
-            result = self.clone
-            other = [other] * self.size if other.is_a?(Numeric)
-            for i in 0...self.size
-                result[i] -= other[i]
-            end
-            return result
-        end
-        def *(other)
-            raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
-            raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
-            result = self.clone
-            other = [other] * self.size if other.is_a?(Numeric)
-            for i in 0...self.size
-                result[i] *= other[i]
-            end
-            return result
-        end
-        def +(other)
-            raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
-            raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
-            result = self.clone
-            other = [other] * self.size if other.is_a?(Numeric)
-            for i in 0...self.size
-                result[i] += other[i]
-            end
-            return result
-        end
-        def /(other) #To recover the syntax highlighting but in Kate: /
-            raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
-            raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
-            result = self.clone
-            other = [other] * self.size if other.is_a?(Numeric)
-            for i in 0...self.size
-                result[i] /= other[i]
-            end
-            return result
-        end
-        def **(other)
-            raise ArgumentError, "Expected numeric" if !other.is_a?(Numeric)
-            result = self.clone
-            for i in 0...self.size
-                result[i] = result[i] ** other
-            end
-            return result
-        end
-    end
-end
-class TrueClass
-    def to_R
-        "TRUE"
-    end
-end
-class FalseClass
-    def to_R
-        "FALSE"
-    end
-end
-class Object
-    def to_R
-        raise TypeError, "Unsupported type for #{self.class}"
-    end
-end
-class NilClass
-    def to_R
-        return "NULL"
-    end
-end
-class Numeric
-    def to_R
-        self.inspect
-    end
-end
-class Float
-    def to_R
-        return self.nan? ? "NA" : super
-    end
-end
-class Array
-    def to_R
-        return "c(#{self.map { |e| e.to_R }.join(",")})"
-    end
-    def distribution
-        result = {}
-        self.each do |value|
-            result[value] = result[value].to_i + 1
-        end
-        return result
-    end
-end
-class String
-    def to_R
-        return self.inspect
-    end
-end
-class Range
-    def to_R
-        [range.min, range.max].to_R
-    end
-end
-module Rust::RBindings
-    def data_frame(*args)
-        Rust::DataFrame.new(*args)
-    end
-end
-module Rust::TestCases
-    def self.sample_dataframe(columns, size=100)
-        result = Rust::DataFrame.new(columns)
-        size.times do |i|
-            result << columns.map { |c| yield i, c }
-        end
-        return result
-    end
-end
-def bind_r!
-    include Rust::RBindings
 end