rust 0.9 → 0.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ruby-rust +1 -1
- data/lib/rust/core/csv.rb +21 -0
- data/lib/rust/core/rust.rb +65 -1
- data/lib/rust/core/types/dataframe.rb +146 -0
- data/lib/rust/core/types/datatype.rb +34 -0
- data/lib/rust/core/types/factor.rb +27 -0
- data/lib/rust/core/types/language.rb +45 -12
- data/lib/rust/core/types/list.rb +16 -0
- data/lib/rust/core/types/matrix.rb +29 -6
- data/lib/rust/core/types/s4class.rb +19 -0
- data/lib/rust/core/types/utils.rb +14 -1
- data/lib/rust/external/ggplot2/core.rb +171 -0
- data/lib/rust/external/ggplot2/geoms.rb +83 -0
- data/lib/rust/external/ggplot2/helper.rb +122 -0
- data/lib/rust/external/ggplot2/plot_builder.rb +188 -0
- data/lib/rust/external/ggplot2/themes.rb +435 -0
- data/lib/rust/external/ggplot2.rb +5 -0
- data/lib/rust/external/robustbase.rb +44 -0
- data/lib/rust/models/anova.rb +17 -0
- data/lib/rust/models/regression.rb +54 -1
- data/lib/rust/plots/basic-plots.rb +32 -0
- data/lib/rust/plots/core.rb +90 -0
- data/lib/rust/plots/distribution-plots.rb +13 -0
- data/lib/rust/stats/correlation.rb +43 -0
- data/lib/rust/stats/descriptive.rb +29 -0
- data/lib/rust/stats/effsize.rb +21 -0
- data/lib/rust/stats/probabilities.rb +141 -33
- data/lib/rust/stats/tests.rb +97 -5
- data/lib/rust.rb +19 -0
- metadata +9 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8eb6e3759ef38070603a941ef348ac46e4b6c08b4638c493edb2169acf16c793
|
4
|
+
data.tar.gz: f855ca774695688ee64d7513ac94c8b98d3bae154bfae56b803ed1da567cb282
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c281de698d8b4750832d77971dac857dbee2c31252b7950abf91777d37763bbb79577098ab48efeee6f8a5ef2a28949ef72816d33703eaad14887d588b4aac32
|
7
|
+
data.tar.gz: d44c0532c19ff8eb2f505d4da62e82b4b4f32dada4b39a05cddf57679a3a725dd38ceecf63aaf2d551a8ae691841196bad7cc0694d3613b928a84a9b6291ef6a
|
data/bin/ruby-rust
CHANGED
data/lib/rust/core/csv.rb
CHANGED
@@ -2,7 +2,16 @@ require_relative '../core'
|
|
2
2
|
require 'csv'
|
3
3
|
|
4
4
|
module Rust
|
5
|
+
|
6
|
+
##
|
7
|
+
# Class that handles CSVs (both loading and saving).
|
8
|
+
|
5
9
|
class CSV
|
10
|
+
|
11
|
+
##
|
12
|
+
# Reads a +pattern+ of CSVs (glob-style pattern) and returns a map containing as keys the filenames of the
|
13
|
+
# loaded CSVs and as values the corresponding data-frames. Options can be specified (see #read).
|
14
|
+
|
6
15
|
def self.read_all(pattern, **options)
|
7
16
|
result = DataFrameHash.new
|
8
17
|
Dir.glob(pattern).each do |filename|
|
@@ -11,6 +20,13 @@ module Rust
|
|
11
20
|
return result
|
12
21
|
end
|
13
22
|
|
23
|
+
##
|
24
|
+
# Reads the CSV at +filename+. Options can be specified, such as:
|
25
|
+
# - headers => set to true if the first row contains the headers, false otherwise;
|
26
|
+
# - infer_numbers => if a column contains only numbers, the values are transformed into floats; true by default;
|
27
|
+
# - infer_integers => if infer_numbers is active, it distinguishes between integers and floats;
|
28
|
+
# The other options are the ones that can be used in the function R function "read.csv".
|
29
|
+
|
14
30
|
def self.read(filename, **options)
|
15
31
|
hash = {}
|
16
32
|
labels = nil
|
@@ -46,6 +62,11 @@ module Rust
|
|
46
62
|
return result
|
47
63
|
end
|
48
64
|
|
65
|
+
##
|
66
|
+
# Writes the +dataframe+ as a CSV at +filename+. Options can be specified, such as:
|
67
|
+
# - headers => set to true if the first row should contain the headers, false otherwise;
|
68
|
+
# The other options are the ones that can be used in the function R function "read.csv".
|
69
|
+
|
49
70
|
def self.write(filename, dataframe, **options)
|
50
71
|
raise TypeError, "Expected Rust::DataFrame" unless dataframe.is_a?(Rust::DataFrame)
|
51
72
|
|
data/lib/rust/core/rust.rb
CHANGED
@@ -2,6 +2,10 @@ require 'code-assertions'
|
|
2
2
|
require 'stringio'
|
3
3
|
require 'rinruby'
|
4
4
|
|
5
|
+
##
|
6
|
+
# Basic module for the Rust package. It includes a series of sub-modules that provide specific features, such as
|
7
|
+
# statistical hypothesis tests, plots, and so on.
|
8
|
+
|
5
9
|
module Rust
|
6
10
|
CLIENT_MUTEX = Mutex.new
|
7
11
|
R_MUTEX = Mutex.new
|
@@ -15,14 +19,23 @@ module Rust
|
|
15
19
|
@@debugging = $RUST_DEBUG || false
|
16
20
|
@@in_client_mutex = false
|
17
21
|
|
22
|
+
##
|
23
|
+
# Sets the debug mode. Any call to R will be written on the standard output.
|
24
|
+
|
18
25
|
def self.debug
|
19
26
|
@@debugging = true
|
20
27
|
end
|
21
28
|
|
29
|
+
##
|
30
|
+
# Checks if the debug mode is active.
|
31
|
+
|
22
32
|
def self.debug?
|
23
33
|
return @@debugging
|
24
34
|
end
|
25
35
|
|
36
|
+
##
|
37
|
+
# Runs the given block with a mutex. It is mandatory to run any R command with this method.
|
38
|
+
|
26
39
|
def self.exclusive
|
27
40
|
result = nil
|
28
41
|
CLIENT_MUTEX.synchronize do
|
@@ -33,6 +46,13 @@ module Rust
|
|
33
46
|
return result
|
34
47
|
end
|
35
48
|
|
49
|
+
##
|
50
|
+
# Sets a variable in the R environment with a given value.
|
51
|
+
#
|
52
|
+
# Raises an error if the value can not be translated into an R object.
|
53
|
+
#
|
54
|
+
# Example: Rust['a'] = 0.
|
55
|
+
|
36
56
|
def self.[]=(variable, value)
|
37
57
|
if value.is_a?(RustDatatype)
|
38
58
|
value.load_in_r_as(variable.to_s)
|
@@ -41,9 +61,13 @@ module Rust
|
|
41
61
|
else
|
42
62
|
raise "Trying to assign #{variable} with #{value.class}; expected RustDatatype, String, Numeric, or Array"
|
43
63
|
end
|
44
|
-
|
45
64
|
end
|
46
65
|
|
66
|
+
##
|
67
|
+
# Retrieves the value of a variable from the R environment.
|
68
|
+
#
|
69
|
+
# Example: Rust['a']
|
70
|
+
|
47
71
|
def self.[](variable)
|
48
72
|
return RustDatatype.pull_variable(variable)
|
49
73
|
end
|
@@ -107,6 +131,9 @@ module Rust
|
|
107
131
|
end
|
108
132
|
end
|
109
133
|
|
134
|
+
##
|
135
|
+
# Checks if the given +name+ library can be used. Returns true if it is available, false otherwise.
|
136
|
+
|
110
137
|
def self.check_library(name)
|
111
138
|
self.exclusive do
|
112
139
|
result, _ = self._pull("require(\"#{name}\", character.only = TRUE)", true)
|
@@ -114,6 +141,9 @@ module Rust
|
|
114
141
|
end
|
115
142
|
end
|
116
143
|
|
144
|
+
##
|
145
|
+
# Loads the given +name+ library.
|
146
|
+
|
117
147
|
def self.load_library(name)
|
118
148
|
self.exclusive do
|
119
149
|
self._eval("library(\"#{name}\", character.only = TRUE)")
|
@@ -122,6 +152,9 @@ module Rust
|
|
122
152
|
return nil
|
123
153
|
end
|
124
154
|
|
155
|
+
##
|
156
|
+
# Installs the given +name+ library and its dependencies.
|
157
|
+
|
125
158
|
def self.install_library(name)
|
126
159
|
self.exclusive do
|
127
160
|
self._eval("install.packages(\"#{name}\", dependencies = TRUE)")
|
@@ -130,12 +163,38 @@ module Rust
|
|
130
163
|
return nil
|
131
164
|
end
|
132
165
|
|
166
|
+
##
|
167
|
+
# Installs the +library+ library if it is not available and loads it.
|
168
|
+
|
133
169
|
def self.prerequisite(library)
|
134
170
|
self.install_library(library) unless self.check_library(library)
|
135
171
|
self.load_library(library)
|
136
172
|
end
|
173
|
+
|
174
|
+
##
|
175
|
+
# Ask for help on a given +mod+.
|
176
|
+
|
177
|
+
def self.help!(mod = nil)
|
178
|
+
unless mod
|
179
|
+
puts "You have the following modules:"
|
180
|
+
Rust.constants.map { |c| Rust.const_get(c) }.select { |c| c.class == Module }.each do |mod|
|
181
|
+
puts "\t- #{mod}"
|
182
|
+
end
|
183
|
+
puts "Run \"help! {module}\" for more detailed information about the module"
|
184
|
+
else
|
185
|
+
if mod.methods.include?(:help!)
|
186
|
+
mod.help!
|
187
|
+
else
|
188
|
+
puts "Sorry, no help available for #{mod}"
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
137
192
|
end
|
138
193
|
|
194
|
+
##
|
195
|
+
# Module that contains methods that allow to call R functions faster. Such methods have names resembling the ones
|
196
|
+
# available in R (e.g., cor, wilcox_test).
|
197
|
+
|
139
198
|
module Rust::RBindings
|
140
199
|
def data_frame(*args)
|
141
200
|
Rust::DataFrame.new(*args)
|
@@ -152,6 +211,11 @@ module Rust::TestCases
|
|
152
211
|
end
|
153
212
|
end
|
154
213
|
|
214
|
+
##
|
215
|
+
# Shortcut for including the RBinding module
|
216
|
+
|
155
217
|
def bind_r!
|
156
218
|
include Rust::RBindings
|
157
219
|
end
|
220
|
+
|
221
|
+
bind_r! if ENV['RUBY_RUST_BINDING'] == '1'
|
@@ -1,6 +1,10 @@
|
|
1
1
|
require_relative 'datatype'
|
2
2
|
|
3
3
|
module Rust
|
4
|
+
|
5
|
+
##
|
6
|
+
# Mirror of the data-frame type in R.
|
7
|
+
|
4
8
|
class DataFrame < RustDatatype
|
5
9
|
def self.can_pull?(type, klass)
|
6
10
|
return [klass].flatten.include?("data.frame")
|
@@ -19,6 +23,12 @@ module Rust
|
|
19
23
|
return DataFrame.new(hash)
|
20
24
|
end
|
21
25
|
|
26
|
+
##
|
27
|
+
# Creates a new data-frame.
|
28
|
+
# +labels_or_data+ can be either:
|
29
|
+
# - an Array of column names (creates an empty data-frame)
|
30
|
+
# - a Hash with column names as keys and values as values
|
31
|
+
|
22
32
|
def initialize(labels_or_data)
|
23
33
|
@data = {}
|
24
34
|
|
@@ -34,6 +44,9 @@ module Rust
|
|
34
44
|
end
|
35
45
|
end
|
36
46
|
|
47
|
+
##
|
48
|
+
# Returns the +i+-th row of the data-frame
|
49
|
+
|
37
50
|
def row(i)
|
38
51
|
if i < 0 || i >= self.rows
|
39
52
|
return nil
|
@@ -42,6 +55,9 @@ module Rust
|
|
42
55
|
end
|
43
56
|
end
|
44
57
|
|
58
|
+
##
|
59
|
+
# Returns the +i+-th row of the data-frame. Faster (but harder to interpret) alternative to #row.
|
60
|
+
|
45
61
|
def fast_row(i)
|
46
62
|
if i < 0 || i >= self.rows
|
47
63
|
return nil
|
@@ -50,6 +66,9 @@ module Rust
|
|
50
66
|
end
|
51
67
|
end
|
52
68
|
|
69
|
+
##
|
70
|
+
# Shuffles the rows in the data-frame. The arguments are passed to the Array#shuffle method.
|
71
|
+
|
53
72
|
def shuffle(*args)
|
54
73
|
result = DataFrame.new(@labels)
|
55
74
|
|
@@ -64,6 +83,10 @@ module Rust
|
|
64
83
|
return result
|
65
84
|
end
|
66
85
|
|
86
|
+
##
|
87
|
+
# Returns a copy of the data-frame containing only the specified +rows+ and/or +cols+. If +rows+ and/or +cols+
|
88
|
+
# are nil, all the rows/columns are returned.
|
89
|
+
|
67
90
|
def [](rows, cols=nil)
|
68
91
|
raise "You must specify either rows or columns to select" if !rows && !cols
|
69
92
|
result = self
|
@@ -79,11 +102,17 @@ module Rust
|
|
79
102
|
return result
|
80
103
|
end
|
81
104
|
|
105
|
+
##
|
106
|
+
# Return the column named +name+.
|
107
|
+
|
82
108
|
def column(name)
|
83
109
|
return @data[name]
|
84
110
|
end
|
85
111
|
alias :| :column
|
86
112
|
|
113
|
+
##
|
114
|
+
# Renames the column named +old_name+ in +new_name+.
|
115
|
+
|
87
116
|
def rename_column!(old_name, new_name)
|
88
117
|
raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
|
89
118
|
raise "This DataFrame already contains a column named #{new_name}" if @labels.include?(new_name)
|
@@ -92,10 +121,24 @@ module Rust
|
|
92
121
|
@labels[@labels.index(old_name)] = new_name
|
93
122
|
end
|
94
123
|
|
124
|
+
##
|
125
|
+
# Functionally transforms the column named +column+ by applying the function given as a block.
|
126
|
+
# Example:
|
127
|
+
# df = Rust::DataFrame.new({a: [1,2,3], b: [3,4,5]})
|
128
|
+
# df.transform_column!("a") { |v| v + 1 }
|
129
|
+
# df|"a" # => [2, 3, 4]
|
130
|
+
|
95
131
|
def transform_column!(column)
|
96
132
|
@data[column].map! { |e| yield e }
|
97
133
|
end
|
98
134
|
|
135
|
+
##
|
136
|
+
# Returns a copy data-frame with only the rows for which the function given in the block returns true.
|
137
|
+
# Example:
|
138
|
+
# df = Rust::DataFrame.new({a: [1,2,3], b: ['a','b','c']})
|
139
|
+
# df2 = df.select_rows { |r| r['a'].even? }
|
140
|
+
# df2|"b" # => ['b']
|
141
|
+
|
99
142
|
def select_rows
|
100
143
|
result = DataFrame.new(self.column_names)
|
101
144
|
self.each_with_index do |row, i|
|
@@ -104,6 +147,9 @@ module Rust
|
|
104
147
|
return result
|
105
148
|
end
|
106
149
|
|
150
|
+
##
|
151
|
+
# Returns true if the function given in the block returns true for any of the rows in this data-frame.
|
152
|
+
|
107
153
|
def has_row?
|
108
154
|
self.each_with_index do |row, i|
|
109
155
|
return true if yield row, i
|
@@ -111,6 +157,10 @@ module Rust
|
|
111
157
|
return false
|
112
158
|
end
|
113
159
|
|
160
|
+
##
|
161
|
+
# Returns a copy of the data-frame with only the columns in +cols+. As an alternative, a block can be used
|
162
|
+
# (only the columns for which the function returns true are kept).
|
163
|
+
|
114
164
|
def select_columns(cols=nil)
|
115
165
|
raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
|
116
166
|
|
@@ -126,23 +176,35 @@ module Rust
|
|
126
176
|
end
|
127
177
|
alias :select_cols :select_columns
|
128
178
|
|
179
|
+
##
|
180
|
+
# Deletes the column named +column+.
|
181
|
+
|
129
182
|
def delete_column(column)
|
130
183
|
@labels.delete(column)
|
131
184
|
@data.delete(column)
|
132
185
|
end
|
133
186
|
|
187
|
+
##
|
188
|
+
# Deletes the +i+-th row.
|
189
|
+
|
134
190
|
def delete_row(i)
|
135
191
|
@data.each do |label, column|
|
136
192
|
column.delete_at(i)
|
137
193
|
end
|
138
194
|
end
|
139
195
|
|
196
|
+
##
|
197
|
+
# Returns a data-frame in which the rows are unique in terms of all the given columns named +by+.
|
198
|
+
|
140
199
|
def uniq_by(by)
|
141
200
|
result = self.clone
|
142
201
|
result.uniq_by!(by)
|
143
202
|
return result
|
144
203
|
end
|
145
204
|
|
205
|
+
##
|
206
|
+
# Makes sure that in this data-frame the rows are unique in terms of all the given columns named +by+.
|
207
|
+
|
146
208
|
def uniq_by!(by)
|
147
209
|
my_keys = {}
|
148
210
|
to_delete = []
|
@@ -165,19 +227,33 @@ module Rust
|
|
165
227
|
return self
|
166
228
|
end
|
167
229
|
|
230
|
+
##
|
231
|
+
# Return the names of the columns.
|
232
|
+
|
168
233
|
def column_names
|
169
234
|
return @labels.map { |k| k.to_s }
|
170
235
|
end
|
171
236
|
alias :colnames :column_names
|
172
237
|
|
238
|
+
##
|
239
|
+
# Returns the number of rows.
|
240
|
+
|
173
241
|
def rows
|
174
242
|
@data.values[0].size
|
175
243
|
end
|
176
244
|
|
245
|
+
##
|
246
|
+
# Returns the number of columns
|
247
|
+
|
177
248
|
def columns
|
178
249
|
@labels.size
|
179
250
|
end
|
180
251
|
|
252
|
+
##
|
253
|
+
# Adds the given +row+ to the data-frame. +row+ can be either:
|
254
|
+
# - An Array of values for all the columns (in the order of #column_names);
|
255
|
+
# - A Hash containing associations between column names and value to be set.
|
256
|
+
|
181
257
|
def add_row(row)
|
182
258
|
if row.is_a?(Array)
|
183
259
|
raise "Expected an array of size #{@data.size}" unless row.size == @data.size
|
@@ -201,6 +277,11 @@ module Rust
|
|
201
277
|
end
|
202
278
|
alias :<< :add_row
|
203
279
|
|
280
|
+
##
|
281
|
+
# Adds a column named +name+ with the given +values+ (array). The size of +values+ must match the number of
|
282
|
+
# rows of this data-frame. As an alternative, it can be passed a block which returns, for a given row, the
|
283
|
+
# value to assign for the new column.
|
284
|
+
|
204
285
|
def add_column(name, values=nil)
|
205
286
|
raise "Column already exists" if @labels.include?(name)
|
206
287
|
raise "Values or block required" if !values && !block_given?
|
@@ -217,6 +298,9 @@ module Rust
|
|
217
298
|
end
|
218
299
|
end
|
219
300
|
|
301
|
+
##
|
302
|
+
# Yields each row as a Hash containing column names as keys and values as values.
|
303
|
+
|
220
304
|
def each
|
221
305
|
self.each_with_index do |element, i|
|
222
306
|
yield element
|
@@ -225,6 +309,10 @@ module Rust
|
|
225
309
|
return self
|
226
310
|
end
|
227
311
|
|
312
|
+
##
|
313
|
+
# Yields each row as a Hash containing column names as keys and values as values. Faster alternative to
|
314
|
+
# #each.
|
315
|
+
|
228
316
|
def fast_each
|
229
317
|
self.fast_each_with_index do |element, i|
|
230
318
|
yield element
|
@@ -233,6 +321,9 @@ module Rust
|
|
233
321
|
return self
|
234
322
|
end
|
235
323
|
|
324
|
+
##
|
325
|
+
# Yields each row as a Hash containing column names as keys and values as values and the row index.
|
326
|
+
|
236
327
|
def each_with_index
|
237
328
|
for i in 0...self.rows
|
238
329
|
element = {}
|
@@ -246,6 +337,10 @@ module Rust
|
|
246
337
|
return self
|
247
338
|
end
|
248
339
|
|
340
|
+
##
|
341
|
+
# Yields each row as a Hash containing column names as keys and values as values and the row index. Faster
|
342
|
+
# alternative to #each_with_index.
|
343
|
+
|
249
344
|
def fast_each_with_index
|
250
345
|
for i in 0...self.rows
|
251
346
|
element = []
|
@@ -302,6 +397,9 @@ module Rust
|
|
302
397
|
return result
|
303
398
|
end
|
304
399
|
|
400
|
+
##
|
401
|
+
# Returns a copy of the data-frame containing only the first +n+ rows.
|
402
|
+
|
305
403
|
def head(n=10)
|
306
404
|
result = DataFrame.new(self.column_names)
|
307
405
|
self.each_with_index do |row, i|
|
@@ -310,6 +408,11 @@ module Rust
|
|
310
408
|
return result
|
311
409
|
end
|
312
410
|
|
411
|
+
##
|
412
|
+
# Merges this data-frame with +other+ in terms of the +by+ column(s) (Array or String).
|
413
|
+
# +first_alias+ and +second_alias+ allow to specify the prefix that should be used for the columns not in +by+
|
414
|
+
# for this and the +other+ data-frame, respectively.
|
415
|
+
|
313
416
|
def merge(other, by, first_alias = "x", second_alias = "y")
|
314
417
|
raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
|
315
418
|
raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
|
@@ -376,6 +479,14 @@ module Rust
|
|
376
479
|
return result
|
377
480
|
end
|
378
481
|
|
482
|
+
##
|
483
|
+
# Aggregate the value in groups depending on the +by+ column (String).
|
484
|
+
# A block must be passed to specify how to aggregate the columns. Aggregators for specific columns can be
|
485
|
+
# specified as optional arguments in which the name of the argument represents the column name and the value
|
486
|
+
# contains a block for aggregating the specific column.
|
487
|
+
# Both the default and the specialized blocks must take as argument an array of values and must return a
|
488
|
+
# scalar value.
|
489
|
+
|
379
490
|
def aggregate(by, **aggregators)
|
380
491
|
raise TypeError, "Expected a string" unless by.is_a?(String)
|
381
492
|
raise TypeError, "All the aggregators should be procs" unless aggregators.values.all? { |v| v.is_a?(Proc) }
|
@@ -416,12 +527,18 @@ module Rust
|
|
416
527
|
return result
|
417
528
|
end
|
418
529
|
|
530
|
+
##
|
531
|
+
# Returns a copy of this data-frame in which the rows are sorted by the values of the +by+ column.
|
532
|
+
|
419
533
|
def sort_by(column)
|
420
534
|
result = self.clone
|
421
535
|
result.sort_by!(column)
|
422
536
|
return result
|
423
537
|
end
|
424
538
|
|
539
|
+
##
|
540
|
+
# Sorts the rows of this data-frame by the values of the +by+ column.
|
541
|
+
|
425
542
|
def sort_by!(by)
|
426
543
|
copy = @data[by].clone
|
427
544
|
copy.sort!
|
@@ -447,6 +564,9 @@ module Rust
|
|
447
564
|
@data[by].sort!
|
448
565
|
end
|
449
566
|
|
567
|
+
##
|
568
|
+
# Adds all the rows in +dataframe+ to this data-frame. The column names must match.
|
569
|
+
|
450
570
|
def bind_rows!(dataframe)
|
451
571
|
raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
|
452
572
|
raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
|
@@ -459,6 +579,9 @@ module Rust
|
|
459
579
|
end
|
460
580
|
alias :rbind! :bind_rows!
|
461
581
|
|
582
|
+
##
|
583
|
+
# Adds all the columns in +dataframe+ to this data-frame. The number of rows must match.
|
584
|
+
|
462
585
|
def bind_columns!(dataframe)
|
463
586
|
raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
|
464
587
|
raise "The number of rows are not compatible" if self.rows != dataframe.rows
|
@@ -472,6 +595,9 @@ module Rust
|
|
472
595
|
end
|
473
596
|
alias :cbind! :bind_columns!
|
474
597
|
|
598
|
+
##
|
599
|
+
# Returns a copy of this dataframe and adds all the rows in +dataframe+ to it. The column names must match.
|
600
|
+
|
475
601
|
def bind_rows(dataframe)
|
476
602
|
result = self.clone
|
477
603
|
result.bind_rows!(dataframe)
|
@@ -479,6 +605,9 @@ module Rust
|
|
479
605
|
end
|
480
606
|
alias :rbind :bind_rows
|
481
607
|
|
608
|
+
##
|
609
|
+
# Returns a copy of this dataframe and adds all the columns in +dataframe+ to it. The number of rows must match.
|
610
|
+
|
482
611
|
def bind_columns(dataframe)
|
483
612
|
result = self.clone
|
484
613
|
result.bind_columns!(dataframe)
|
@@ -486,12 +615,22 @@ module Rust
|
|
486
615
|
end
|
487
616
|
alias :cbind :bind_columns
|
488
617
|
|
618
|
+
##
|
619
|
+
# Returns a copy of this data-frame.
|
620
|
+
|
489
621
|
def clone
|
490
622
|
DataFrame.new(@data)
|
491
623
|
end
|
492
624
|
end
|
493
625
|
|
626
|
+
##
|
627
|
+
# Represents an array of DataFrame
|
628
|
+
|
494
629
|
class DataFrameArray < Array
|
630
|
+
|
631
|
+
##
|
632
|
+
# Returns a data-frame with the rows in all the data-frames together (if compatible).
|
633
|
+
|
495
634
|
def bind_all
|
496
635
|
return nil if self.size == 0
|
497
636
|
|
@@ -505,7 +644,14 @@ module Rust
|
|
505
644
|
end
|
506
645
|
end
|
507
646
|
|
647
|
+
##
|
648
|
+
# Represents a hash of DataFrame
|
649
|
+
|
508
650
|
class DataFrameHash < Hash
|
651
|
+
|
652
|
+
##
|
653
|
+
# Returns a data-frame with the rows in all the data-frames together (if compatible).
|
654
|
+
|
509
655
|
def bind_all
|
510
656
|
return nil if self.values.size == 0
|
511
657
|
|
@@ -1,7 +1,18 @@
|
|
1
1
|
require_relative '../rust'
|
2
2
|
|
3
3
|
module Rust
|
4
|
+
|
5
|
+
##
|
6
|
+
# Represents a data-type that can be loaded from and written to R.
|
7
|
+
|
4
8
|
class RustDatatype
|
9
|
+
|
10
|
+
##
|
11
|
+
# Retrieves the given +variable+ from R and transforms it into the appropriate Ruby counterpart.
|
12
|
+
# To infer the type, it uses the class method #can_pull? of all the RustDatatype classes to check the types
|
13
|
+
# that are compatible with the given R variable (type and class). If more than a candidate is available, the one
|
14
|
+
# with maximum #pull_priority is chosen.
|
15
|
+
|
5
16
|
def self.pull_variable(variable, forced_interpreter = nil)
|
6
17
|
r_type = Rust._pull("as.character(typeof(#{variable}))")
|
7
18
|
r_class = Rust._pull("as.character(class(#{variable}))")
|
@@ -36,14 +47,24 @@ module Rust
|
|
36
47
|
end
|
37
48
|
end
|
38
49
|
|
50
|
+
##
|
51
|
+
# Returns the priority of this type when a #pull_variable operation is performed. Higher priority means that
|
52
|
+
# the type is to be preferred over other candidate types.
|
53
|
+
|
39
54
|
def self.pull_priority
|
40
55
|
0
|
41
56
|
end
|
42
57
|
|
58
|
+
##
|
59
|
+
# Writes the current object in R as +variable_name+.
|
60
|
+
|
43
61
|
def load_in_r_as(variable_name)
|
44
62
|
raise "Loading #{self.class} in R was not implemented"
|
45
63
|
end
|
46
64
|
|
65
|
+
##
|
66
|
+
# EXPERIMENTAL: Do not use
|
67
|
+
|
47
68
|
def r_mirror_to(other_variable)
|
48
69
|
varname = self.mirrored_R_variable_name
|
49
70
|
|
@@ -53,6 +74,9 @@ module Rust
|
|
53
74
|
return varname
|
54
75
|
end
|
55
76
|
|
77
|
+
##
|
78
|
+
# EXPERIMENTAL: Do not use
|
79
|
+
|
56
80
|
def r_mirror
|
57
81
|
varname = self.mirrored_R_variable_name
|
58
82
|
|
@@ -67,6 +91,9 @@ module Rust
|
|
67
91
|
return varname
|
68
92
|
end
|
69
93
|
|
94
|
+
##
|
95
|
+
# Returns the hash of the current object.
|
96
|
+
|
70
97
|
def r_hash
|
71
98
|
self.hash.to_s
|
72
99
|
end
|
@@ -77,6 +104,9 @@ module Rust
|
|
77
104
|
end
|
78
105
|
end
|
79
106
|
|
107
|
+
##
|
108
|
+
# The null value in R
|
109
|
+
|
80
110
|
class Null < RustDatatype
|
81
111
|
def self.can_pull?(type, klass)
|
82
112
|
return type == "NULL" && klass == "NULL"
|
@@ -101,6 +131,10 @@ class FalseClass
|
|
101
131
|
end
|
102
132
|
|
103
133
|
class Object
|
134
|
+
|
135
|
+
##
|
136
|
+
# Returns a string with the R representation of this object. Raises an exception for unsupported objects.
|
137
|
+
|
104
138
|
def to_R
|
105
139
|
raise TypeError, "Unsupported type for #{self.class}"
|
106
140
|
end
|