RubyGems - rust - Versions diffs - 0.3 → 0.9 - Mend

rust 0.3 → 0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml +4 -4
data/bin/ruby-rust +3 -0
data/lib/{rust-csv.rb → rust/core/csv.rb} +14 -4
data/lib/rust/core/rust.rb +157 -0
data/lib/rust/core/types/all.rb +4 -0
data/lib/{rust-core.rb → rust/core/types/dataframe.rb} +183 -245
data/lib/rust/core/types/datatype.rb +161 -0
data/lib/rust/core/types/factor.rb +131 -0
data/lib/rust/core/types/language.rb +166 -0
data/lib/rust/core/types/list.rb +81 -0
data/lib/rust/core/types/matrix.rb +132 -0
data/lib/rust/core/types/s4class.rb +59 -0
data/lib/rust/core/types/utils.rb +109 -0
data/lib/rust/core.rb +7 -0
data/lib/rust/models/all.rb +4 -0
data/lib/rust/models/anova.rb +60 -0
data/lib/rust/models/regression.rb +205 -0
data/lib/rust/plots/all.rb +4 -0
data/lib/rust/plots/basic-plots.rb +111 -0
data/lib/{rust-plots.rb → rust/plots/core.rb} +64 -129
data/lib/rust/plots/distribution-plots.rb +62 -0
data/lib/rust/stats/all.rb +4 -0
data/lib/{rust-basics.rb → rust/stats/correlation.rb} +11 -5
data/lib/rust/stats/descriptive.rb +128 -0
data/lib/{rust-effsize.rb → rust/stats/effsize.rb} +23 -21
data/lib/rust/stats/probabilities.rb +248 -0
data/lib/rust/stats/tests.rb +292 -0
data/lib/rust.rb +4 -8
metadata +31 -12
data/lib/rust-calls.rb +0 -69
data/lib/rust-descriptive.rb +0 -59
data/lib/rust-tests.rb +0 -165

data/lib/{rust-core.rb → rust/core/types/dataframe.rb} RENAMED Viewed

@@ -1,123 +1,20 @@
-require 'code-assertions'
-require 'stringio'
-require 'rinruby'
-require 'csv'
+require_relative 'datatype'
 module Rust
-    CLIENT_MUTEX = Mutex.new
-    R_MUTEX      = Mutex.new
-    R_ENGINE     = RinRuby.new(echo: false)
-    private_constant    :R_ENGINE
-    private_constant    :R_MUTEX
-    private_constant    :CLIENT_MUTEX
-    @@debugging = false
-    @@in_client_mutex = false
-    def self.debug
-        @@debugging = true
-    end
-    def self.exclusive
-        result = nil
-        CLIENT_MUTEX.synchronize do
-            @@in_client_mutex = true
-            result = yield
-            @@in_client_mutex = false
-        end
-        return result
-    end
-    def self.[]=(variable, value)
-        if value.is_a?(RustDatatype)
-            value.load_in_r_as(variable.to_s)
-        elsif value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(Array)
-            R_ENGINE.assign(variable, value)
-        else
-            raise "Given #{value.class}, expected RustDatatype, String, Numeric, or Array"
+    class DataFrame < RustDatatype
+        def self.can_pull?(type, klass)
+            return [klass].flatten.include?("data.frame")
         end
-    end
-    def self.[](variable, type=RustDatatype)
-        return type.pull_variable(variable)
-    end
-    def self._eval_big(r_command, return_warnings = false)
-        r_command = r_command.join("\n") if r_command.is_a?(Array)
-        self._rexec(r_command, return_warnings) do |cmd|
-            result = true
-            instructions = cmd.lines
-            while instructions.size > 0
-                current_command = ""
-                while (instructions.size > 0) && (current_command.length + instructions.first.length < 10000)
-                    current_command << instructions.shift
-                end
-                result &= R_ENGINE.eval(current_command)
-            end
-            result
-        end
-    end
-    def self._pull(r_command, return_warnings = false)
-        self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.pull(cmd) }
-    end
-    def self._eval(r_command, return_warnings = false)
-        self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.eval(cmd) }
-    end
-    def self._rexec(r_command, return_warnings = false)
-        puts "Calling _rexec with command: #{r_command}" if @@debugging
-        R_MUTEX.synchronize do
-            assert("This command must be executed in an exclusive block") { @@in_client_mutex }
-            result = nil
-            begin
-                $stdout = StringIO.new
-                if return_warnings
-                    R_ENGINE.echo(true, true)
-                else
-                    R_ENGINE.echo(false, false)
-                end
-                result = yield(r_command)
-            ensure
-                R_ENGINE.echo(false, false)
-                warnings = $stdout.string
-                $stdout = STDOUT
-            end
-            if return_warnings
-                return result, warnings.lines.map { |w| w.strip.chomp }
-            else
-                return result
-            end
-        end
-    end
-    class RustDatatype
-        def self.pull_variable(variable)
-            return Rust._pull(variable)
+        def self.pull_priority
+            1
         end
-        def load_in_r_as(r_instance, variable_name)
-            raise "Not implemented"
-        end
-    end
-    class DataFrame < RustDatatype
-        def self.pull_variable(variable)
+        def self.pull_variable(variable, type, klass)
             hash = {}
-            colnames = Rust._pull("colnames(#{variable})")
+            colnames = Rust["colnames(#{variable})"]
             colnames.each do |col|
-                hash[col] = Rust._pull("#{variable}$#{col}")
+                hash[col] = Rust["#{variable}$\"#{col}\""]
             end
             return DataFrame.new(hash)
         end
@@ -130,7 +27,10 @@ module Rust
                 @labels.each { |label| @data[label] = [] }
             elsif labels_or_data.is_a? Hash
                 @labels = labels_or_data.keys.map { |l| l.to_s }
-                @data = labels_or_data.clone
+                labels_or_data.each do |key, value|
+                    @data[key.to_s] = value.clone
+                end
             end
         end
@@ -142,6 +42,14 @@ module Rust
             end
         end
+        def fast_row(i)
+            if i < 0 || i >= self.rows
+                return nil
+            else
+                return @labels.map { |label| @data[label][i] }
+            end
+        end
         def shuffle(*args)
             result = DataFrame.new(@labels)
@@ -174,6 +82,7 @@ module Rust
         def column(name)
             return @data[name]
         end
+        alias :| :column
         def rename_column!(old_name, new_name)
             raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
@@ -195,6 +104,13 @@ module Rust
             return result
         end
+        def has_row?
+            self.each_with_index do |row, i|
+                return true if yield row, i
+            end
+            return false
+        end
         def select_columns(cols=nil)
             raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
@@ -215,6 +131,40 @@ module Rust
             @data.delete(column)
         end
+        def delete_row(i)
+            @data.each do |label, column|
+                column.delete_at(i)
+            end
+        end
+        def uniq_by(by)
+            result = self.clone
+            result.uniq_by!(by)
+            return result
+        end
+        def uniq_by!(by)
+            my_keys = {}
+            to_delete = []
+            self.each_with_index do |row, i|
+                key = []
+                by.each do |colname|
+                    key << row[colname]
+                end
+                unless my_keys[key]
+                    my_keys[key] = i
+                else
+                    to_delete << (i-to_delete.size)
+                end
+            end
+            to_delete.each do |i|
+                self.delete_row(i)
+            end
+            return self
+        end
         def column_names
             return @labels.map { |k| k.to_s }
         end
@@ -243,7 +193,7 @@ module Rust
                 row.each do |key, value|
                     @data[key.to_s] << value
                 end
-#
                 return true
             else
                 raise TypeError, "Expected an Array or a Hash"
@@ -275,6 +225,14 @@ module Rust
             return self
         end
+        def fast_each
+            self.fast_each_with_index do |element, i|
+                yield element
+            end
+            return self
+        end
         def each_with_index
             for i in 0...self.rows
                 element = {}
@@ -288,6 +246,19 @@ module Rust
             return self
         end
+        def fast_each_with_index
+            for i in 0...self.rows
+                element = []
+                @labels.each do |label|
+                    element << @data[label][i]
+                end
+                yield element, i
+            end
+            return self
+        end
         def load_in_r_as(variable_name)
             command = []
@@ -299,6 +270,14 @@ module Rust
                 row_index += 1
             end
+            self.column_names.each do |name|
+                column = self.column(name)
+                if column.is_a?(Factor)
+                    command << "#{variable_name}[,#{name.to_R}] <- factor(#{variable_name}[,#{name.to_R}], labels=#{column.levels.to_R})"
+                end
+            end
             Rust._eval_big(command)
         end
@@ -397,6 +376,77 @@ module Rust
             return result
         end
+        def aggregate(by, **aggregators)
+            raise TypeError, "Expected a string" unless by.is_a?(String)
+            raise TypeError, "All the aggregators should be procs" unless aggregators.values.all? { |v| v.is_a?(Proc) }
+            raise "Expected a block for default aggregator" unless block_given?
+            aggregators = aggregators.map { |label, callable| [label.to_s, callable] }.to_h
+            sorted = self.sort_by(by)
+            current_value = nil
+            partials = []
+            partial = nil
+            sorted.column(by).each_with_index do |value, index|
+                if current_value != value
+                    current_value = value
+                    partials << partial if partial
+                    partial = Rust::DataFrame.new(self.column_names)
+                end
+                partial << sorted.fast_row(index)
+            end
+            partials << partial
+            result = Rust::DataFrame.new(self.column_names)
+            partials.each do |partial|
+                aggregated_row = {}
+                aggregated_row[by] = partial.column(by)[0]
+                (self.column_names - [by]).each do |column|
+                    if aggregators[column]
+                        aggregated_row[column] = aggregators[column].call(partial.column(column))
+                    else
+                        aggregated_row[column] = yield partial.column(column)
+                    end
+                end
+                result << aggregated_row
+            end
+            return result
+        end
+        def sort_by(column)
+            result = self.clone
+            result.sort_by!(column)
+            return result
+        end
+        def sort_by!(by)
+            copy = @data[by].clone
+            copy.sort!
+            indices = []
+            @data[by].each_with_index do |value, i|
+                index = copy.index(value)
+                indices << index
+                copy[index] = NilClass
+            end
+            (self.column_names - [by]).each do |column_name|
+                sorted = []
+                column = self.column(column_name)
+                column_i = 0
+                indices.each do |i|
+                    sorted[i] = column[column_i]
+                    column_i += 1
+                end
+                @data[column_name] = sorted
+            end
+            @data[by].sort!
+        end
         def bind_rows!(dataframe)
             raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
             raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
@@ -441,143 +491,31 @@ module Rust
         end
     end
-    class Matrix < RustDatatype
-        def self.pull_variable(variable)
-            return Rust._pull(variable)
-        end
-        def initialize(data)
-            if data.flatten.size == 0
-                raise "Empty matrices are not allowed"
-            else
-                raise TypeError, "Expected array of array" unless data.is_a?(Array) && data[0].is_a?(Array)
-                raise TypeError, "Only numeric matrices are supported" unless data.all? { |row| row.all?  { |e| e.is_a?(Numeric) } }
-                raise "All the rows must have the same size" unless data.map { |row| row.size }.uniq.size == 1
-                @data = data.clone
+    class DataFrameArray < Array
+        def bind_all
+            return nil if self.size == 0
+            result = self.first.clone
+            for i in 1...self.size
+                result .bind_rows!(self[i])
             end
-        end
-        def [](i, j)
-            return @data[i][j]
-        end
-        def rows
-            @data.size
-        end
-        def cols
-            @data[0].size
-        end
-        def []=(i, j, value)
-            raise "Wrong i" unless i.between?(0, @data.size - 1)
-            raise "Wrong j" unless j.between?(0, @data[0].size - 1)
-            @data[i][j] = value
-        end
-        def load_in_r_as(variable_name)
-            Rust._eval("#{variable_name} <- matrix(c(#{@data.flatten.join(",")}), nrow=#{self.rows}, ncol=#{self.cols}, byrow=T)")
+            return result
         end
     end
-    class Sequence
-        attr_reader :min
-        attr_reader :max
-        def initialize(min, max, step=1)
-            @min = min
-            @max = max
-            @step = step
-        end
-        def step(step)
-            @step = step
-        end
-        def each
-            (@min..@max).step(@step) do |v|
-                yield v
-            end
-        end
-        def to_a
-            result = []
-            self.each do |v|
-                result << v
+    class DataFrameHash < Hash
+        def bind_all
+            return nil if self.values.size == 0
+            result = self.values.first.clone
+            for i in 1...self.values.size
+                result .bind_rows!(self.values[i])
             end
             return result
         end
-        def to_R
-            "seq(from=#@min, to=#@max, by=#@step)"
-        end
-    end
-end
-class TrueClass
-    def to_R
-        "TRUE"
-    end
-end
-class FalseClass
-    def to_R
-        "FALSE"
-    end
-end
-class Object
-    def to_R
-        raise TypeError, "Unsupported type for #{self.class}"
-    end
-end
-class NilClass
-    def to_R
-        return "NULL"
-    end
-end
-class Numeric
-    def to_R
-        self.inspect
-    end
-end
-class Float
-    def to_R
-        return self.nan? ? "NA" : super
-    end
-end
-class Array
-    def to_R
-        return "c(#{self.map { |e| e.to_R }.join(",")})"
-    end
-end
-class String
-    def to_R
-        return self.inspect
-    end
-end
-class Range
-    def to_R
-        [range.min, range.max].to_R
-    end
-end
-module Rust::RBindings
-    def read_csv(filename, **options)
-        Rust::CSV.read(filename, **options)
-    end
-    def write_csv(filename, dataframe, **options)
-        Rust::CSV.write(filename, dataframe, **options)
-    end
-    def data_frame(*args)
-        Rust::DataFrame.new(*args)
     end
 end

data/lib/rust/core/types/datatype.rb ADDED Viewed

@@ -0,0 +1,161 @@
+require_relative '../rust'
+module Rust
+    class RustDatatype
+        def self.pull_variable(variable, forced_interpreter = nil)
+            r_type = Rust._pull("as.character(typeof(#{variable}))")
+            r_class = Rust._pull("as.character(class(#{variable}))")
+            if forced_interpreter
+                raise ArgumentError, "Expected null or class as forced_interpreter" if forced_interpreter && !forced_interpreter.is_a?(Class)
+                raise ArgumentError, "Class #{forced_interpreter} can not handle type #{r_type}, class #{r_class}" unless forced_interpreter.can_pull?(r_type, r_class)
+                return forced_interpreter.pull_variable(variable, r_type, r_class)
+            end
+            candidates = []
+            ObjectSpace.each_object(Class) do |type|
+                if type < RustDatatype
+                    if type.can_pull?(r_type, r_class)
+                        candidates << type
+                    end
+                end
+            end
+            if candidates.size > 0
+                type = candidates.max_by { |c| c.pull_priority }
+                puts "Using #{type} to pull #{variable}" if Rust.debug?
+                return type.pull_variable(variable, r_type, r_class)
+            else
+                if Rust._pull("length(#{variable})") == 0
+                    return []
+                else
+                    return Rust._pull(variable)
+                end
+            end
+        end
+        def self.pull_priority
+            0
+        end
+        def load_in_r_as(variable_name)
+            raise "Loading #{self.class} in R was not implemented"
+        end
+        def r_mirror_to(other_variable)
+            varname = self.mirrored_R_variable_name
+            Rust._eval("#{varname} = #{other_variable}")
+            Rust["#{varname}.hash"] = self.r_hash
+            return varname
+        end
+        def r_mirror
+            varname = self.mirrored_R_variable_name
+            if !Rust._pull("exists(\"#{varname}\")") || Rust._pull("#{varname}.hash") != self.r_hash
+                puts "Loading #{varname}" if Rust.debug?
+                Rust[varname] = self
+                Rust["#{varname}.hash"] = self.r_hash
+            else
+                puts "Using cached value for #{varname}" if Rust.debug?
+            end
+            return varname
+        end
+        def r_hash
+            self.hash.to_s
+        end
+        private
+        def mirrored_R_variable_name
+            return "rust.mirrored.#{self.object_id}"
+        end
+    end
+    class Null < RustDatatype
+        def self.can_pull?(type, klass)
+            return type == "NULL" && klass == "NULL"
+        end
+        def self.pull_variable(variable, type, klass)
+            return nil
+        end
+    end
+end
+class TrueClass
+    def to_R
+        "TRUE"
+    end
+end
+class FalseClass
+    def to_R
+        "FALSE"
+    end
+end
+class Object
+    def to_R
+        raise TypeError, "Unsupported type for #{self.class}"
+    end
+end
+class NilClass
+    def to_R
+        return "NULL"
+    end
+    def load_in_r_as(variable)
+        Rust._eval("#{variable} <- NULL")
+    end
+end
+class Numeric
+    def to_R
+        self.inspect
+    end
+end
+class Float
+    def to_R
+        return self.nan? ? "NA" : super
+    end
+end
+class Symbol
+    def to_R
+        return self.to_s.inspect
+    end
+end
+class Array
+    def to_R
+        return "c(#{self.map { |e| e.to_R }.join(",")})"
+    end
+    def distribution
+        result = {}
+        self.each do |value|
+            result[value] = result[value].to_i + 1
+        end
+        return result
+    end
+end
+class String
+    def to_R
+        return self.inspect
+    end
+end
+class Range
+    def to_R
+        [range.min, range.max].to_R
+    end
+end