RubyGems - rust - Versions diffs - 0.2 → 0.3 - Mend

rust 0.2 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: a39315e623da717c4f035a11fbe7363bf3aae0d8885f922c4f0fca689bc7b90a
-  data.tar.gz: e0a1ec7e485a0f9521a42191f6a97d833df8eb35c961d31c9ba67f0a6f0c0c22
+  metadata.gz: 985f7e940ab123fa452dae63792bde90613b134176ff4eaf5b59a719dd8a1ed5
+  data.tar.gz: bef0bb5028c99cb43c8e5453fdbbaf687bad65f8e6b54b06f949e5df6fb61bda
 SHA512:
-  metadata.gz: eaffc66a8a3250f7f687bdf1dec96421681f25ff394010f0c15d7e65b5ed87a9deb25111c7f56baa60b19a53a4849bd92a665bfe507f9ba9b096b722cddeed53
-  data.tar.gz: c5fc8ce8b55347ca402783ab352310719fcdd008eaa2f1ed3b00ca93137736ca6a2d8c9d9b2169865212e7a09ca955e5909d7983b0461bda0c539b51f1c0e379
+  metadata.gz: 418181b9357665ecc654e9a765e24aa792d6287dd5998b1b1bd8f3278e6951d785fff8afe9594c6c95e1f7a384cb08f844939728b62a96ab416259de1c14512b
+  data.tar.gz: 810f14821924bd1b0cebf4fbf9f52e510abc0e935be6cc2852294fd374d54411bcd2a1943f5b60d2ce93501abcaff9cac25dd458e1a8b354b665aab199705a4a

data/lib/rust-basics.rb ADDED

@@ -0,0 +1,126 @@
+require_relative 'rust-core'
+module Rust:: Correlation
+    class Pearson
+        def self.test(d1, d2)
+            raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
+            raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
+            Rust.exclusive do
+                Rust['correlation.a'] = d1
+                Rust['correlation.b'] = d2
+                Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='p')")
+                result = Result.new
+                result.name             = "Pearson's product-moment correlation"
+                result.statistics['t']  = Rust._pull('correlation.result$statistic')
+                result.pvalue           = Rust._pull('correlation.result$p.value')
+                result.correlation      = Rust._pull('correlation.result$estimate')
+                return result
+            end
+        end
+        def self.estimate(d1, d2)
+            self.test(d1, d2).correlation
+        end
+    end
+    class Spearman
+        def self.test(d1, d2)
+            raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
+            raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
+            Rust.exclusive do
+                Rust['correlation.a'] = d1
+                Rust['correlation.b'] = d2
+                Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='s')")
+                result = Result.new
+                result.name             = "Spearman's rank correlation rho"
+                result.statistics['S']  = Rust._pull('correlation.result$statistic')
+                result.pvalue           = Rust._pull('correlation.result$p.value')
+                result.correlation      = Rust._pull('correlation.result$estimate')
+                return result
+            end
+        end
+        def self.estimate(d1, d2)
+            self.test(d1, d2).correlation
+        end
+    end
+    class Kendall
+        def self.test(d1, d2)
+            raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
+            raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
+            Rust.exclusive do
+                Rust['correlation.a'] = d1
+                Rust['correlation.b'] = d2
+                Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='p')")
+                result = Result.new
+                result.name             = "Kendall's rank correlation tau"
+                result.statistics['T']  = Rust._pull('correlation.result$statistic')
+                result.pvalue           = Rust._pull('correlation.result$p.value')
+                result.correlation      = Rust._pull('correlation.result$estimate')
+                return result
+            end
+        end
+        def self.estimate(d1, d2)
+            self.test(d1, d2).correlation
+        end
+    end
+    class Result
+        attr_accessor   :name
+        attr_accessor   :statistics
+        attr_accessor   :pvalue
+        attr_accessor   :correlation
+        alias :estimate :correlation
+        def initialize
+            @statistics = {}
+        end
+        def [](name)
+            return @statistics[name.to_sym]
+        end
+        def []=(name, value)
+            @statistics[name.to_sym] = value
+        end
+        def to_s
+            return "#{name}. Correlation = #{correlation}, P-value = #{pvalue} " +
+                    "#{ statistics.map { |k, v| k.to_s + " -> " + v.to_s  }.join(", ") }."
+        end
+    end
+end
+module Rust::RBindings
+    def cor(d1, d2, **options)
+        return cor_test(d1, d2, **options).correlation
+    end
+    def cor_test(d1, d2, **options)
+        method = options[:method].to_s.downcase
+        if "pearson".start_with?(method)
+            return Rust::Correlation::Pearson.test(d1, d2)
+        elsif "spearman".start_with?(method)
+            return Rust::Correlation::Spearman.test(d1, d2)
+        elsif "kendall".start_with?(method)
+            return Rust::Correlation::Kendall.test(d1, d2)
+        else
+            raise "Unsupported method #{method}"
+        end
+    end
+end

data/lib/rust-calls.rb ADDED

@@ -0,0 +1,69 @@
+require_relative 'rust-core'
+module Rust
+    class Function
+        attr_reader     :name
+        attr_reader     :arguments
+        attr_reader     :options
+        def initialize(name)
+            @function = name
+            @arguments  = Arguments.new
+            @options    = Options.new
+        end
+        def options=(options)
+            raise TypeError, "Expected Options" unless options.is_a?(Options)
+            @options = options
+        end
+        def arguments=(arguments)
+            raise TypeError, "Expected Arguments" unless options.is_a?(Arguments)
+            @arguments = arguments
+        end
+        def to_R
+            params = [@arguments.to_R, @options.to_R].select { |v| v != "" }.join(",")
+            return "#@function(#{params})"
+        end
+        def call
+            Rust._eval(self.to_R)
+        end
+    end
+    class Variable
+        def initialize(name)
+            @name = name
+        end
+        def to_R
+            @name
+        end
+    end
+    class Arguments < Array
+        def to_R
+            return self.map { |v| v.to_R }.join(", ")
+        end
+    end
+    class Options < Hash
+        def to_R
+            return self.map { |k, v| "#{k}=#{v.to_R}" }.join(", ")
+        end
+        def self.from_hash(hash)
+            options = Options.new
+            hash.each do |key, value|
+                options[key.to_s] = value
+            end
+            return options
+        end
+    end
+end
+module Rust::RBindings
+end

data/lib/rust-core.rb CHANGED

@@ -9,8 +9,17 @@ module Rust
     R_ENGINE     = RinRuby.new(echo: false)
+    private_constant    :R_ENGINE
+    private_constant    :R_MUTEX
+    private_constant    :CLIENT_MUTEX
+    @@debugging = false
     @@in_client_mutex = false
+    def self.debug
+        @@debugging = true
+    end
     def self.exclusive
         result = nil
         CLIENT_MUTEX.synchronize do
@@ -27,7 +36,7 @@ module Rust
         elsif value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(Array)
             R_ENGINE.assign(variable, value)
         else
-            raise "Given #{variable.class}, expected RustDatatype, String, Numeric, or Array"
+            raise "Given #{value.class}, expected RustDatatype, String, Numeric, or Array"
         end
     end
@@ -66,6 +75,7 @@ module Rust
     end
     def self._rexec(r_command, return_warnings = false)
+        puts "Calling _rexec with command: #{r_command}" if @@debugging
         R_MUTEX.synchronize do
             assert("This command must be executed in an exclusive block") { @@in_client_mutex }
@@ -120,41 +130,85 @@ module Rust
                 @labels.each { |label| @data[label] = [] }
             elsif labels_or_data.is_a? Hash
                 @labels = labels_or_data.keys.map { |l| l.to_s }
-                @labels.each { |label| @data[label] = [] }
-                for i in 0...labels_or_data.values[0].size
-                    self.add_row(labels_or_data.map { |k, v| [k, v[i]] }.to_h)
-                end
+                @data = labels_or_data.clone
             end
         end
         def row(i)
-            return @data.map { |label, values| [label, values[i]] }.to_h
+            if i < 0 || i >= self.rows
+                return nil
+            else
+                return @data.map { |label, values| [label, values[i]] }.to_h
+            end
+        end
+        def shuffle(*args)
+            result = DataFrame.new(@labels)
+            buffer = []
+            self.each do |row|
+                buffer << row
+            end
+            buffer.shuffle!(*args).each do |row|
+                result << row
+            end
+            return result
+        end
+        def [](rows, cols=nil)
+            raise "You must specify either rows or columns to select" if !rows && !cols
+            result = self
+            if rows && (rows.is_a?(Range) || rows.is_a?(Array))
+                result = result.select_rows { |row, i| rows.include?(i) }
+            end
+            if cols && cols.is_a?(Array)
+                cols = cols.map { |c| c.to_s }
+                result = result.select_columns(cols)
+            end
+            return result
         end
-        alias :[] :row
         def column(name)
             return @data[name]
         end
+        def rename_column!(old_name, new_name)
+            raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
+            raise "This DataFrame already contains a column named #{new_name}" if @labels.include?(new_name)
+            @data[new_name.to_s] = @data.delete(old_name)
+            @labels[@labels.index(old_name)] = new_name
+        end
         def transform_column!(column)
             @data[column].map! { |e| yield e }
         end
         def select_rows
             result = DataFrame.new(self.column_names)
-            self.each do |row|
-                result << row if yield row
+            self.each_with_index do |row, i|
+                result << row if yield row, i
             end
             return result
         end
-        def select_cols
+        def select_columns(cols=nil)
+            raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
             result = self.clone
             @labels.each do |label|
-                result.delete_column(label) unless yield label
+                if cols
+                    result.delete_column(label) unless cols.include?(label)
+                else
+                    result.delete_column(label) unless yield label
+                end
             end
             return result
         end
+        alias :select_cols :select_columns
         def delete_column(column)
             @labels.delete(column)
@@ -162,70 +216,18 @@ module Rust
         end
         def column_names
-            return @data.keys.map { |k| k.to_s }
+            return @labels.map { |k| k.to_s }
         end
         alias :colnames :column_names
-        def merge(other, by, first_alias = "x", second_alias = "y")
-            raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
-            raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
-            raise "This dataset should have all the columns in #{by}" unless (by & self.column_names).size == by.size
-            raise "The passed dataset should have all the columns in #{by}" unless (by & other.column_names).size == by.size
-            raise "The aliases can not have the same value" if first_alias == second_alias
-            my_keys = {}
-            self.each_with_index do |row, i|
-                key = []
-                by.each do |colname|
-                    key << row[colname]
-                end
-                my_keys[key] = i
-            end
-            merged_column_self  = (self.column_names - by)
-            merged_column_other = (other.column_names - by)
-            first_alias =  first_alias + "."     if first_alias.length > 0
-            second_alias = second_alias + "."    if second_alias.length > 0
-            merged_columns = merged_column_self.map { |colname| "#{first_alias}#{colname}" } + merged_column_other.map { |colname| "#{second_alias}#{colname}" }
-            columns = by + merged_columns
-            result = DataFrame.new(columns)
-            other.each do |other_row|
-                key = []
-                by.each do |colname|
-                    key << other_row[colname]
-                end
-                my_row_index = my_keys[key]
-                if my_row_index
-                    my_row = self[my_row_index]
-                    to_add = {}
-                    by.each do |colname|
-                        to_add[colname] = my_row[colname]
-                    end
-                    merged_column_self.each do |colname|
-                        to_add["#{first_alias}#{colname}"] = my_row[colname]
-                    end
-                    merged_column_other.each do |colname|
-                        to_add["#{second_alias}#{colname}"] = other_row[colname]
-                    end
-                    result << to_add
-                end
-            end
-            return result
-        end
         def rows
             @data.values[0].size
         end
+        def columns
+            @labels.size
+        end
         def add_row(row)
             if row.is_a?(Array)
                 raise "Expected an array of size #{@data.size}" unless row.size == @data.size
@@ -249,6 +251,22 @@ module Rust
         end
         alias :<< :add_row
+        def add_column(name, values=nil)
+            raise "Column already exists" if @labels.include?(name)
+            raise "Values or block required" if !values && !block_given?
+            raise "Number of values not matching" if values && values.size != self.rows
+            @labels << name
+            if values
+                @data[name] = values.clone
+            else
+                @data[name] = []
+                self.each_with_index do |row, i|
+                    @data[name][i] = yield row
+                end
+            end
+        end
         def each
             self.each_with_index do |element, i|
                 yield element
@@ -276,9 +294,7 @@ module Rust
             command << "#{variable_name} <- data.frame()"
             row_index = 1
             self.each do |row|
-                keys    = row.keys.map { |v| v.inspect }.join(",")
-                values  = row.values.map { |v| v.inspect }.join(",")
-                command << "#{variable_name}[#{row_index}, c(#{keys})] <- c(#{values})"
+                command << "#{variable_name}[#{row_index.to_R}, #{row.keys.to_R}] <- #{row.values.to_R}"
                 row_index += 1
             end
@@ -289,20 +305,140 @@ module Rust
         def inspect
             separator = " | "
             col_widths = self.column_names.map { |colname| [colname, ([colname.length] + @data[colname].map { |e| e.inspect.length }).max] }.to_h
-            col_widths[:rowscol] = self.rows.inspect.length + 3
+            col_widths[:rowscol] = (self.rows - 1).inspect.length + 3
             result = ""
             result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
             result << (" " * col_widths[:rowscol]) + self.column_names.map { |colname| (" " * (col_widths[colname] - colname.length)) + colname }.join(separator) + "\n"
             result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
             self.each_with_index do |row, i|
-                result << "[#{i}] " + row.map { |colname, value| (" " * (col_widths[colname] - value.inspect.length)) + value.inspect }.join(separator) + "\n"
+                index_part = "[" + (" " * (col_widths[:rowscol] - i.inspect.length - 3)) + "#{i}] "
+                row_part   = row.map { |colname, value| (" " * (col_widths[colname] - value.inspect.length)) + value.inspect }.join(separator)
+                result << index_part + row_part + "\n"
             end
             result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length))
             return result
         end
+        def head(n=10)
+            result = DataFrame.new(self.column_names)
+            self.each_with_index do |row, i|
+                result << row if i < n
+            end
+            return result
+        end
+        def merge(other, by, first_alias = "x", second_alias = "y")
+            raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
+            raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
+            raise "This dataset should have all the columns in #{by}" unless (by & self.column_names).size == by.size
+            raise "The passed dataset should have all the columns in #{by}" unless (by & other.column_names).size == by.size
+            if first_alias == second_alias
+                if first_alias == ""
+                    my_columns = self.column_names - by
+                    other_columns = other.column_names - by
+                    intersection = my_columns & other_columns
+                    raise "Cannot merge because the following columns would overlap: #{intersection}" if intersection.size > 0
+                else
+                    raise "The aliases can not have the same value"
+                end
+            end
+            my_keys = {}
+            self.each_with_index do |row, i|
+                key = []
+                by.each do |colname|
+                    key << row[colname]
+                end
+                my_keys[key] = i
+            end
+            merged_column_self  = (self.column_names - by)
+            merged_column_other = (other.column_names - by)
+            first_alias =  first_alias + "."     if first_alias.length > 0
+            second_alias = second_alias + "."    if second_alias.length > 0
+            merged_columns = merged_column_self.map { |colname| "#{first_alias}#{colname}" } + merged_column_other.map { |colname| "#{second_alias}#{colname}" }
+            columns = by + merged_columns
+            result = DataFrame.new(columns)
+            other.each do |other_row|
+                key = []
+                by.each do |colname|
+                    key << other_row[colname]
+                end
+                my_row_index = my_keys[key]
+                if my_row_index
+                    my_row = self.row(my_row_index)
+                    to_add = {}
+                    by.each do |colname|
+                        to_add[colname] = my_row[colname]
+                    end
+                    merged_column_self.each do |colname|
+                        to_add["#{first_alias}#{colname}"] = my_row[colname]
+                    end
+                    merged_column_other.each do |colname|
+                        to_add["#{second_alias}#{colname}"] = other_row[colname]
+                    end
+                    result << to_add
+                end
+            end
+            return result
+        end
+        def bind_rows!(dataframe)
+            raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
+            raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
+            dataframe.each do |row|
+                self << row
+            end
+            return true
+        end
+        alias :rbind! :bind_rows!
+        def bind_columns!(dataframe)
+            raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
+            raise "The number of rows are not compatible" if self.rows != dataframe.rows
+            raise "The dataset would override some columns" if (self.column_names & dataframe.column_names).size > 0
+            dataframe.column_names.each do |column_name|
+                self.add_column(column_name, dataframe.column(column_name))
+            end
+            return true
+        end
+        alias :cbind! :bind_columns!
+        def bind_rows(dataframe)
+            result = self.clone
+            result.bind_rows!(dataframe)
+            return result
+        end
+        alias :rbind :bind_rows
+        def bind_columns(dataframe)
+            result = self.clone
+            result.bind_columns!(dataframe)
+            return result
+        end
+        alias :cbind :bind_columns
+        def clone
+            DataFrame.new(@data)
+        end
     end
     class Matrix < RustDatatype
@@ -344,36 +480,104 @@ module Rust
         end
     end
-    class CSV
-        def self.read(filename, **options)
-            hash = {}
-            labels = nil
-            ::CSV.parse(File.read(filename), **options) do |row|
-                labels = row.headers || (1..row.size).to_a.map { |e| "X#{e}" } unless labels
-                labels.each do |label|
-                    hash[label] = [] unless hash[label]
-                    hash[label] << row[label]
-                end
+    class Sequence
+        attr_reader :min
+        attr_reader :max
+        def initialize(min, max, step=1)
+            @min = min
+            @max = max
+            @step = step
+        end
+        def step(step)
+            @step = step
+        end
+        def each
+            (@min..@max).step(@step) do |v|
+                yield v
             end
-            return Rust::DataFrame.new(hash)
         end
-        def self.write(filename, dataframe, **options)
-            raise TypeError, "Expected Rust::DataFrame" unless dataframe.is_a?(Rust::DataFrame)
-            x[:headers] = dataframe.column_names if x[:headers]
-            hash = {}
-            labels = nil
-            ::CSV.open(filename, 'w', write_headers: (x[:headers] ? true : false), **options) do |csv|
-                dataframe.each do |row|
-                    csv << row
-                end
+        def to_a
+            result = []
+            self.each do |v|
+                result << v
             end
-            return true
+            return result
         end
+        def to_R
+            "seq(from=#@min, to=#@max, by=#@step)"
+        end
+    end
+end
+class TrueClass
+    def to_R
+        "TRUE"
+    end
+end
+class FalseClass
+    def to_R
+        "FALSE"
+    end
+end
+class Object
+    def to_R
+        raise TypeError, "Unsupported type for #{self.class}"
+    end
+end
+class NilClass
+    def to_R
+        return "NULL"
+    end
+end
+class Numeric
+    def to_R
+        self.inspect
+    end
+end
+class Float
+    def to_R
+        return self.nan? ? "NA" : super
+    end
+end
+class Array
+    def to_R
+        return "c(#{self.map { |e| e.to_R }.join(",")})"
+    end
+end
+class String
+    def to_R
+        return self.inspect
+    end
+end
+class Range
+    def to_R
+        [range.min, range.max].to_R
+    end
+end
+module Rust::RBindings
+    def read_csv(filename, **options)
+        Rust::CSV.read(filename, **options)
+    end
+    def write_csv(filename, dataframe, **options)
+        Rust::CSV.write(filename, dataframe, **options)
+    end
+    def data_frame(*args)
+        Rust::DataFrame.new(*args)
     end
 end