RubyGems - rust - Versions diffs - 0.9 → 0.10 - Mend

rust 0.9 → 0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

checksums.yaml +4 -4
data/bin/ruby-rust +1 -1
data/lib/rust/core/csv.rb +21 -0
data/lib/rust/core/rust.rb +65 -1
data/lib/rust/core/types/dataframe.rb +146 -0
data/lib/rust/core/types/datatype.rb +34 -0
data/lib/rust/core/types/factor.rb +27 -0
data/lib/rust/core/types/language.rb +44 -11
data/lib/rust/core/types/list.rb +16 -0
data/lib/rust/core/types/matrix.rb +29 -6
data/lib/rust/core/types/s4class.rb +19 -0
data/lib/rust/core/types/utils.rb +14 -1
data/lib/rust/models/anova.rb +17 -0
data/lib/rust/models/regression.rb +54 -1
data/lib/rust/plots/basic-plots.rb +32 -0
data/lib/rust/plots/core.rb +90 -0
data/lib/rust/plots/distribution-plots.rb +13 -0
data/lib/rust/stats/correlation.rb +43 -0
data/lib/rust/stats/descriptive.rb +29 -0
data/lib/rust/stats/effsize.rb +21 -0
data/lib/rust/stats/probabilities.rb +141 -33
data/lib/rust/stats/tests.rb +97 -5
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: aaa404db11033ff42b529516ae4a3f3e252bdddf7677d082847ee06625144f8e
-  data.tar.gz: f72ebc2c95385b87a445f3fb8de3e517579b002e0606209903acd2327616befd
+  metadata.gz: fe8f5c3e0753395fe3925f7a64eab0476308df329a9c9d594c74d1e568419204
+  data.tar.gz: 9f5371713565e77777deba19ba745bb358e0a23dfad6fb562e3940cf90cf8f1e
 SHA512:
-  metadata.gz: 00567c9b6216f7e9dc1a4135dbea263eecae2178273851c023980233531729313e15147e1bec8e9521df2d4fee15e2c4c8bf9bd4a0e2d1475f1d383339a17c21
-  data.tar.gz: 89800fff95be559e6f6bbbc05b6b5f5b52b7d59c54c61cd76ef679876ce5de0c08b405d88eadf0b254c95306ad98f38120e5960a92ce8358fca19ed6cc5c181d
+  metadata.gz: abf20b1c4cea07089c27ab886ff640886ff4e5f74d2b964b3ab959d413cdf1d34d72055e1d7141e3585df5adcdfbf5a53716d3cf7ca7b352ee02d4e39dda020b
+  data.tar.gz: c6c97cc64a50449bcf97a5d584f6a17b4e05762f79321ed9dcccfee496b567933f3fb0b4e8908c4f8029aeb0683cb7d86863586f7c200979f98911aa5b82c258

data/bin/ruby-rust CHANGED Viewed

@@ -1,3 +1,3 @@
 #!/usr/bin/ruby
-system "irb -r \"rust\""
+system "RUBY_RUST_BINDING=1 irb -r \"rust\""

data/lib/rust/core/csv.rb CHANGED Viewed

@@ -2,7 +2,16 @@ require_relative '../core'
 require 'csv'
 module Rust
+    ##
+    # Class that handles CSVs (both loading and saving).
     class CSV
+        ##
+        # Reads a +pattern+ of CSVs (glob-style pattern) and returns a map containing as keys the filenames of the
+        # loaded CSVs and as values the corresponding data-frames. Options can be specified (see #read).
         def self.read_all(pattern, **options)
             result = DataFrameHash.new
             Dir.glob(pattern).each do |filename|
@@ -11,6 +20,13 @@ module Rust
             return result
         end
+        ##
+        # Reads the CSV at +filename+. Options can be specified, such as:
+        # - headers => set to true if the first row contains the headers, false otherwise;
+        # - infer_numbers => if a column contains only numbers, the values are transformed into floats; true by default;
+        # - infer_integers => if infer_numbers is active, it distinguishes between integers and floats;
+        # The other options are the ones that can be used in the function R function "read.csv".
         def self.read(filename, **options)
             hash = {}
             labels = nil
@@ -46,6 +62,11 @@ module Rust
             return result
         end
+        ##
+        # Writes the +dataframe+ as a CSV at +filename+. Options can be specified, such as:
+        # - headers => set to true if the first row should contain the headers, false otherwise;
+        # The other options are the ones that can be used in the function R function "read.csv".
         def self.write(filename, dataframe, **options)
             raise TypeError, "Expected Rust::DataFrame" unless dataframe.is_a?(Rust::DataFrame)

data/lib/rust/core/rust.rb CHANGED Viewed

@@ -2,6 +2,10 @@ require 'code-assertions'
 require 'stringio'
 require 'rinruby'
+##
+# Basic module for the Rust package. It includes a series of sub-modules that provide specific features, such as
+# statistical hypothesis tests, plots, and so on.
 module Rust
     CLIENT_MUTEX = Mutex.new
     R_MUTEX      = Mutex.new
@@ -15,14 +19,23 @@ module Rust
     @@debugging = $RUST_DEBUG || false
     @@in_client_mutex = false
+    ##
+    # Sets the debug mode. Any call to R will be written on the standard output.
     def self.debug
         @@debugging = true
     end
+    ##
+    # Checks if the debug mode is active.
     def self.debug?
         return @@debugging
     end
+    ##
+    # Runs the given block with a mutex. It is mandatory to run any R command with this method.
     def self.exclusive
         result = nil
         CLIENT_MUTEX.synchronize do
@@ -33,6 +46,13 @@ module Rust
         return result
     end
+    ##
+    # Sets a variable in the R environment with a given value.
+    #
+    # Raises an error if the value can not be translated into an R object.
+    #
+    # Example: Rust['a'] = 0.
     def self.[]=(variable, value)
         if value.is_a?(RustDatatype)
             value.load_in_r_as(variable.to_s)
@@ -41,9 +61,13 @@ module Rust
         else
             raise "Trying to assign #{variable} with #{value.class}; expected RustDatatype, String, Numeric, or Array"
         end
     end
+    ##
+    # Retrieves the value of a variable from the R environment.
+    #
+    # Example: Rust['a']
     def self.[](variable)
         return RustDatatype.pull_variable(variable)
     end
@@ -107,6 +131,9 @@ module Rust
         end
     end
+    ##
+    # Checks if the given +name+ library can be used. Returns true if it is available, false otherwise.
     def self.check_library(name)
         self.exclusive do
             result, _ = self._pull("require(\"#{name}\", character.only = TRUE)", true)
@@ -114,6 +141,9 @@ module Rust
         end
     end
+    ##
+    # Loads the given +name+ library.
     def self.load_library(name)
         self.exclusive do
             self._eval("library(\"#{name}\", character.only = TRUE)")
@@ -122,6 +152,9 @@ module Rust
         return nil
     end
+    ##
+    # Installs the given +name+ library and its dependencies.
     def self.install_library(name)
         self.exclusive do
             self._eval("install.packages(\"#{name}\", dependencies = TRUE)")
@@ -130,12 +163,38 @@ module Rust
         return nil
     end
+    ##
+    # Installs the +library+ library if it is not available and loads it.
     def self.prerequisite(library)
         self.install_library(library) unless self.check_library(library)
         self.load_library(library)
     end
+    ##
+    # Ask for help on a given +mod+.
+    def self.help!(mod = nil)
+        unless mod
+            puts "You have the following modules:"
+            Rust.constants.map { |c| Rust.const_get(c) }.select { |c| c.class == Module }.each do |mod|
+                puts "\t- #{mod}"
+            end
+            puts "Run \"help! {module}\" for more detailed information about the module"
+        else
+            if mod.methods.include?(:help!)
+                mod.help!
+            else
+                puts "Sorry, no help available for #{mod}"
+            end
+        end
+    end
 end
+##
+# Module that contains methods that allow to call R functions faster. Such methods have names resembling the ones
+# available in R (e.g., cor, wilcox_test).
 module Rust::RBindings
     def data_frame(*args)
         Rust::DataFrame.new(*args)
@@ -152,6 +211,11 @@ module Rust::TestCases
     end
 end
+##
+# Shortcut for including the RBinding module
 def bind_r!
     include Rust::RBindings
 end
+bind_r! if ENV['RUBY_RUST_BINDING'] == '1'

data/lib/rust/core/types/dataframe.rb CHANGED Viewed

@@ -1,6 +1,10 @@
 require_relative 'datatype'
 module Rust
+    ##
+    # Mirror of the data-frame type in R.
     class DataFrame < RustDatatype
         def self.can_pull?(type, klass)
             return [klass].flatten.include?("data.frame")
@@ -19,6 +23,12 @@ module Rust
             return DataFrame.new(hash)
         end
+        ##
+        # Creates a new data-frame.
+        # +labels_or_data+ can be either:
+        # - an Array of column names (creates an empty data-frame)
+        # - a Hash with column names as keys and values as values
         def initialize(labels_or_data)
             @data = {}
@@ -34,6 +44,9 @@ module Rust
             end
         end
+        ##
+        # Returns the +i+-th row of the data-frame
         def row(i)
             if i < 0 || i >= self.rows
                 return nil
@@ -42,6 +55,9 @@ module Rust
             end
         end
+        ##
+        # Returns the +i+-th row of the data-frame. Faster (but harder to interpret) alternative to #row.
         def fast_row(i)
             if i < 0 || i >= self.rows
                 return nil
@@ -50,6 +66,9 @@ module Rust
             end
         end
+        ##
+        # Shuffles the rows in the data-frame. The arguments are passed to the Array#shuffle method.
         def shuffle(*args)
             result = DataFrame.new(@labels)
@@ -64,6 +83,10 @@ module Rust
             return result
         end
+        ##
+        # Returns a copy of the data-frame containing only the specified +rows+ and/or +cols+. If +rows+ and/or +cols+
+        # are nil, all the rows/columns are returned.
         def [](rows, cols=nil)
             raise "You must specify either rows or columns to select" if !rows && !cols
             result = self
@@ -79,11 +102,17 @@ module Rust
             return result
         end
+        ##
+        # Return the column named +name+.
         def column(name)
             return @data[name]
         end
         alias :| :column
+        ##
+        # Renames the column named +old_name+ in +new_name+.
         def rename_column!(old_name, new_name)
             raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
             raise "This DataFrame already contains a column named #{new_name}" if @labels.include?(new_name)
@@ -92,10 +121,24 @@ module Rust
             @labels[@labels.index(old_name)] = new_name
         end
+        ##
+        # Functionally transforms the column named +column+ by applying the function given as a block.
+        # Example:
+        # df = Rust::DataFrame.new({a: [1,2,3], b: [3,4,5]})
+        # df.transform_column!("a") { |v| v + 1 }
+        # df|"a" # => [2, 3, 4]
         def transform_column!(column)
             @data[column].map! { |e| yield e }
         end
+        ##
+        # Returns a copy data-frame with only the rows for which the function given in the block returns true.
+        # Example:
+        # df = Rust::DataFrame.new({a: [1,2,3], b: ['a','b','c']})
+        # df2 = df.select_rows { |r| r['a'].even? }
+        # df2|"b" # => ['b']
         def select_rows
             result = DataFrame.new(self.column_names)
             self.each_with_index do |row, i|
@@ -104,6 +147,9 @@ module Rust
             return result
         end
+        ##
+        # Returns true if the function given in the block returns true for any of the rows in this data-frame.
         def has_row?
             self.each_with_index do |row, i|
                 return true if yield row, i
@@ -111,6 +157,10 @@ module Rust
             return false
         end
+        ##
+        # Returns a copy of the data-frame with only the columns in +cols+. As an alternative, a block can be used
+        # (only the columns for which the function returns true are kept).
         def select_columns(cols=nil)
             raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
@@ -126,23 +176,35 @@ module Rust
         end
         alias :select_cols :select_columns
+        ##
+        # Deletes the column named +column+.
         def delete_column(column)
             @labels.delete(column)
             @data.delete(column)
         end
+        ##
+        # Deletes the +i+-th row.
         def delete_row(i)
             @data.each do |label, column|
                 column.delete_at(i)
             end
         end
+        ##
+        # Returns a data-frame in which the rows are unique in terms of all the given columns named +by+.
         def uniq_by(by)
             result = self.clone
             result.uniq_by!(by)
             return result
         end
+        ##
+        # Makes sure that in this data-frame the rows are unique in terms of all the given columns named +by+.
         def uniq_by!(by)
             my_keys = {}
             to_delete = []
@@ -165,19 +227,33 @@ module Rust
             return self
         end
+        ##
+        # Return the names of the columns.
         def column_names
             return @labels.map { |k| k.to_s }
         end
         alias :colnames :column_names
+        ##
+        # Returns the number of rows.
         def rows
             @data.values[0].size
         end
+        ##
+        # Returns the number of columns
         def columns
             @labels.size
         end
+        ##
+        # Adds the given +row+ to the data-frame. +row+ can be either:
+        # - An Array of values for all the columns (in the order of #column_names);
+        # - A Hash containing associations between column names and value to be set.
         def add_row(row)
             if row.is_a?(Array)
                 raise "Expected an array of size #{@data.size}" unless row.size == @data.size
@@ -201,6 +277,11 @@ module Rust
         end
         alias :<< :add_row
+        ##
+        # Adds a column named +name+ with the given +values+ (array). The size of +values+ must match the number of
+        # rows of this data-frame. As an alternative, it can be passed a block which returns, for a given row, the
+        # value to assign for the new column.
         def add_column(name, values=nil)
             raise "Column already exists" if @labels.include?(name)
             raise "Values or block required" if !values && !block_given?
@@ -217,6 +298,9 @@ module Rust
             end
         end
+        ##
+        # Yields each row as a Hash containing column names as keys and values as values.
         def each
             self.each_with_index do |element, i|
                 yield element
@@ -225,6 +309,10 @@ module Rust
             return self
         end
+        ##
+        # Yields each row as a Hash containing column names as keys and values as values. Faster alternative to
+        # #each.
         def fast_each
             self.fast_each_with_index do |element, i|
                 yield element
@@ -233,6 +321,9 @@ module Rust
             return self
         end
+        ##
+        # Yields each row as a Hash containing column names as keys and values as values and the row index.
         def each_with_index
             for i in 0...self.rows
                 element = {}
@@ -246,6 +337,10 @@ module Rust
             return self
         end
+        ##
+        # Yields each row as a Hash containing column names as keys and values as values and the row index. Faster
+        # alternative to #each_with_index.
         def fast_each_with_index
             for i in 0...self.rows
                 element = []
@@ -302,6 +397,9 @@ module Rust
             return result
         end
+        ##
+        # Returns a copy of the data-frame containing only the first +n+ rows.
         def head(n=10)
             result = DataFrame.new(self.column_names)
             self.each_with_index do |row, i|
@@ -310,6 +408,11 @@ module Rust
             return result
         end
+        ##
+        # Merges this data-frame with +other+ in terms of the +by+ column(s) (Array or String).
+        # +first_alias+ and +second_alias+ allow to specify the prefix that should be used for the columns not in +by+
+        # for this and the +other+ data-frame, respectively.
         def merge(other, by, first_alias = "x", second_alias = "y")
             raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
             raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
@@ -376,6 +479,14 @@ module Rust
             return result
         end
+        ##
+        # Aggregate the value in groups depending on the +by+ column (String).
+        # A block must be passed to specify how to aggregate the columns. Aggregators for specific columns can be
+        # specified as optional arguments in which the name of the argument represents the column name and the value
+        # contains a block for aggregating the specific column.
+        # Both the default and the specialized blocks must take as argument an array of values and must return a
+        # scalar value.
         def aggregate(by, **aggregators)
             raise TypeError, "Expected a string" unless by.is_a?(String)
             raise TypeError, "All the aggregators should be procs" unless aggregators.values.all? { |v| v.is_a?(Proc) }
@@ -416,12 +527,18 @@ module Rust
             return result
         end
+        ##
+        # Returns a copy of this data-frame in which the rows are sorted by the values of the +by+ column.
         def sort_by(column)
             result = self.clone
             result.sort_by!(column)
             return result
         end
+        ##
+        # Sorts the rows of this data-frame by the values of the +by+ column.
         def sort_by!(by)
             copy = @data[by].clone
             copy.sort!
@@ -447,6 +564,9 @@ module Rust
             @data[by].sort!
         end
+        ##
+        # Adds all the rows in +dataframe+ to this data-frame. The column names must match.
         def bind_rows!(dataframe)
             raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
             raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
@@ -459,6 +579,9 @@ module Rust
         end
         alias :rbind! :bind_rows!
+        ##
+        # Adds all the columns in +dataframe+ to this data-frame. The number of rows must match.
         def bind_columns!(dataframe)
             raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
             raise "The number of rows are not compatible" if self.rows != dataframe.rows
@@ -472,6 +595,9 @@ module Rust
         end
         alias :cbind! :bind_columns!
+        ##
+        # Returns a copy of this dataframe and adds all the rows in +dataframe+ to it. The column names must match.
         def bind_rows(dataframe)
             result = self.clone
             result.bind_rows!(dataframe)
@@ -479,6 +605,9 @@ module Rust
         end
         alias :rbind :bind_rows
+        ##
+        # Returns a copy of this dataframe and adds all the columns in +dataframe+ to it. The number of rows must match.
         def bind_columns(dataframe)
             result = self.clone
             result.bind_columns!(dataframe)
@@ -486,12 +615,22 @@ module Rust
         end
         alias :cbind :bind_columns
+        ##
+        # Returns a copy of this data-frame.
         def clone
             DataFrame.new(@data)
         end
     end
+    ##
+    # Represents an array of DataFrame
     class DataFrameArray < Array
+        ##
+        # Returns a data-frame with the rows in all the data-frames together (if compatible).
         def bind_all
             return nil if self.size == 0
@@ -505,7 +644,14 @@ module Rust
         end
     end
+    ##
+    # Represents a hash of DataFrame
     class DataFrameHash < Hash
+        ##
+        # Returns a data-frame with the rows in all the data-frames together (if compatible).
         def bind_all
             return nil if self.values.size == 0

data/lib/rust/core/types/datatype.rb CHANGED Viewed

@@ -1,7 +1,18 @@
 require_relative '../rust'
 module Rust
+    ##
+    # Represents a data-type that can be loaded from and written to R.
     class RustDatatype
+        ##
+        # Retrieves the given +variable+ from R and transforms it into the appropriate Ruby counterpart.
+        # To infer the type, it uses the class method #can_pull? of all the RustDatatype classes to check the types
+        # that are compatible with the given R variable (type and class). If more than a candidate is available, the one
+        # with maximum #pull_priority is chosen.
         def self.pull_variable(variable, forced_interpreter = nil)
             r_type = Rust._pull("as.character(typeof(#{variable}))")
             r_class = Rust._pull("as.character(class(#{variable}))")
@@ -36,14 +47,24 @@ module Rust
             end
         end
+        ##
+        # Returns the priority of this type when a #pull_variable operation is performed. Higher priority means that
+        # the type is to be preferred over other candidate types.
         def self.pull_priority
             0
         end
+        ##
+        # Writes the current object in R as +variable_name+.
         def load_in_r_as(variable_name)
             raise "Loading #{self.class} in R was not implemented"
         end
+        ##
+        # EXPERIMENTAL: Do not use
         def r_mirror_to(other_variable)
             varname = self.mirrored_R_variable_name
@@ -53,6 +74,9 @@ module Rust
             return varname
         end
+        ##
+        # EXPERIMENTAL: Do not use
         def r_mirror
             varname = self.mirrored_R_variable_name
@@ -67,6 +91,9 @@ module Rust
             return varname
         end
+        ##
+        # Returns the hash of the current object.
         def r_hash
             self.hash.to_s
         end
@@ -77,6 +104,9 @@ module Rust
         end
     end
+    ##
+    # The null value in R
     class Null < RustDatatype
         def self.can_pull?(type, klass)
             return type == "NULL" && klass == "NULL"
@@ -101,6 +131,10 @@ class FalseClass
 end
 class Object
+    ##
+    # Returns a string with the R representation of this object. Raises an exception for unsupported objects.
     def to_R
         raise TypeError, "Unsupported type for #{self.class}"
     end