rust 0.9 → 0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ruby-rust +1 -1
- data/lib/rust/core/csv.rb +21 -0
- data/lib/rust/core/rust.rb +65 -1
- data/lib/rust/core/types/dataframe.rb +146 -0
- data/lib/rust/core/types/datatype.rb +34 -0
- data/lib/rust/core/types/factor.rb +27 -0
- data/lib/rust/core/types/language.rb +44 -11
- data/lib/rust/core/types/list.rb +16 -0
- data/lib/rust/core/types/matrix.rb +29 -6
- data/lib/rust/core/types/s4class.rb +19 -0
- data/lib/rust/core/types/utils.rb +14 -1
- data/lib/rust/models/anova.rb +17 -0
- data/lib/rust/models/regression.rb +54 -1
- data/lib/rust/plots/basic-plots.rb +32 -0
- data/lib/rust/plots/core.rb +90 -0
- data/lib/rust/plots/distribution-plots.rb +13 -0
- data/lib/rust/stats/correlation.rb +43 -0
- data/lib/rust/stats/descriptive.rb +29 -0
- data/lib/rust/stats/effsize.rb +21 -0
- data/lib/rust/stats/probabilities.rb +141 -33
- data/lib/rust/stats/tests.rb +97 -5
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fe8f5c3e0753395fe3925f7a64eab0476308df329a9c9d594c74d1e568419204
|
4
|
+
data.tar.gz: 9f5371713565e77777deba19ba745bb358e0a23dfad6fb562e3940cf90cf8f1e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: abf20b1c4cea07089c27ab886ff640886ff4e5f74d2b964b3ab959d413cdf1d34d72055e1d7141e3585df5adcdfbf5a53716d3cf7ca7b352ee02d4e39dda020b
|
7
|
+
data.tar.gz: c6c97cc64a50449bcf97a5d584f6a17b4e05762f79321ed9dcccfee496b567933f3fb0b4e8908c4f8029aeb0683cb7d86863586f7c200979f98911aa5b82c258
|
data/bin/ruby-rust
CHANGED
data/lib/rust/core/csv.rb
CHANGED
@@ -2,7 +2,16 @@ require_relative '../core'
|
|
2
2
|
require 'csv'
|
3
3
|
|
4
4
|
module Rust
|
5
|
+
|
6
|
+
##
|
7
|
+
# Class that handles CSVs (both loading and saving).
|
8
|
+
|
5
9
|
class CSV
|
10
|
+
|
11
|
+
##
|
12
|
+
# Reads a +pattern+ of CSVs (glob-style pattern) and returns a map containing as keys the filenames of the
|
13
|
+
# loaded CSVs and as values the corresponding data-frames. Options can be specified (see #read).
|
14
|
+
|
6
15
|
def self.read_all(pattern, **options)
|
7
16
|
result = DataFrameHash.new
|
8
17
|
Dir.glob(pattern).each do |filename|
|
@@ -11,6 +20,13 @@ module Rust
|
|
11
20
|
return result
|
12
21
|
end
|
13
22
|
|
23
|
+
##
|
24
|
+
# Reads the CSV at +filename+. Options can be specified, such as:
|
25
|
+
# - headers => set to true if the first row contains the headers, false otherwise;
|
26
|
+
# - infer_numbers => if a column contains only numbers, the values are transformed into floats; true by default;
|
27
|
+
# - infer_integers => if infer_numbers is active, it distinguishes between integers and floats;
|
28
|
+
# The other options are the ones that can be used in the function R function "read.csv".
|
29
|
+
|
14
30
|
def self.read(filename, **options)
|
15
31
|
hash = {}
|
16
32
|
labels = nil
|
@@ -46,6 +62,11 @@ module Rust
|
|
46
62
|
return result
|
47
63
|
end
|
48
64
|
|
65
|
+
##
|
66
|
+
# Writes the +dataframe+ as a CSV at +filename+. Options can be specified, such as:
|
67
|
+
# - headers => set to true if the first row should contain the headers, false otherwise;
|
68
|
+
# The other options are the ones that can be used in the function R function "read.csv".
|
69
|
+
|
49
70
|
def self.write(filename, dataframe, **options)
|
50
71
|
raise TypeError, "Expected Rust::DataFrame" unless dataframe.is_a?(Rust::DataFrame)
|
51
72
|
|
data/lib/rust/core/rust.rb
CHANGED
@@ -2,6 +2,10 @@ require 'code-assertions'
|
|
2
2
|
require 'stringio'
|
3
3
|
require 'rinruby'
|
4
4
|
|
5
|
+
##
|
6
|
+
# Basic module for the Rust package. It includes a series of sub-modules that provide specific features, such as
|
7
|
+
# statistical hypothesis tests, plots, and so on.
|
8
|
+
|
5
9
|
module Rust
|
6
10
|
CLIENT_MUTEX = Mutex.new
|
7
11
|
R_MUTEX = Mutex.new
|
@@ -15,14 +19,23 @@ module Rust
|
|
15
19
|
@@debugging = $RUST_DEBUG || false
|
16
20
|
@@in_client_mutex = false
|
17
21
|
|
22
|
+
##
|
23
|
+
# Sets the debug mode. Any call to R will be written on the standard output.
|
24
|
+
|
18
25
|
def self.debug
|
19
26
|
@@debugging = true
|
20
27
|
end
|
21
28
|
|
29
|
+
##
|
30
|
+
# Checks if the debug mode is active.
|
31
|
+
|
22
32
|
def self.debug?
|
23
33
|
return @@debugging
|
24
34
|
end
|
25
35
|
|
36
|
+
##
|
37
|
+
# Runs the given block with a mutex. It is mandatory to run any R command with this method.
|
38
|
+
|
26
39
|
def self.exclusive
|
27
40
|
result = nil
|
28
41
|
CLIENT_MUTEX.synchronize do
|
@@ -33,6 +46,13 @@ module Rust
|
|
33
46
|
return result
|
34
47
|
end
|
35
48
|
|
49
|
+
##
|
50
|
+
# Sets a variable in the R environment with a given value.
|
51
|
+
#
|
52
|
+
# Raises an error if the value can not be translated into an R object.
|
53
|
+
#
|
54
|
+
# Example: Rust['a'] = 0.
|
55
|
+
|
36
56
|
def self.[]=(variable, value)
|
37
57
|
if value.is_a?(RustDatatype)
|
38
58
|
value.load_in_r_as(variable.to_s)
|
@@ -41,9 +61,13 @@ module Rust
|
|
41
61
|
else
|
42
62
|
raise "Trying to assign #{variable} with #{value.class}; expected RustDatatype, String, Numeric, or Array"
|
43
63
|
end
|
44
|
-
|
45
64
|
end
|
46
65
|
|
66
|
+
##
|
67
|
+
# Retrieves the value of a variable from the R environment.
|
68
|
+
#
|
69
|
+
# Example: Rust['a']
|
70
|
+
|
47
71
|
def self.[](variable)
|
48
72
|
return RustDatatype.pull_variable(variable)
|
49
73
|
end
|
@@ -107,6 +131,9 @@ module Rust
|
|
107
131
|
end
|
108
132
|
end
|
109
133
|
|
134
|
+
##
|
135
|
+
# Checks if the given +name+ library can be used. Returns true if it is available, false otherwise.
|
136
|
+
|
110
137
|
def self.check_library(name)
|
111
138
|
self.exclusive do
|
112
139
|
result, _ = self._pull("require(\"#{name}\", character.only = TRUE)", true)
|
@@ -114,6 +141,9 @@ module Rust
|
|
114
141
|
end
|
115
142
|
end
|
116
143
|
|
144
|
+
##
|
145
|
+
# Loads the given +name+ library.
|
146
|
+
|
117
147
|
def self.load_library(name)
|
118
148
|
self.exclusive do
|
119
149
|
self._eval("library(\"#{name}\", character.only = TRUE)")
|
@@ -122,6 +152,9 @@ module Rust
|
|
122
152
|
return nil
|
123
153
|
end
|
124
154
|
|
155
|
+
##
|
156
|
+
# Installs the given +name+ library and its dependencies.
|
157
|
+
|
125
158
|
def self.install_library(name)
|
126
159
|
self.exclusive do
|
127
160
|
self._eval("install.packages(\"#{name}\", dependencies = TRUE)")
|
@@ -130,12 +163,38 @@ module Rust
|
|
130
163
|
return nil
|
131
164
|
end
|
132
165
|
|
166
|
+
##
|
167
|
+
# Installs the +library+ library if it is not available and loads it.
|
168
|
+
|
133
169
|
def self.prerequisite(library)
|
134
170
|
self.install_library(library) unless self.check_library(library)
|
135
171
|
self.load_library(library)
|
136
172
|
end
|
173
|
+
|
174
|
+
##
|
175
|
+
# Ask for help on a given +mod+.
|
176
|
+
|
177
|
+
def self.help!(mod = nil)
|
178
|
+
unless mod
|
179
|
+
puts "You have the following modules:"
|
180
|
+
Rust.constants.map { |c| Rust.const_get(c) }.select { |c| c.class == Module }.each do |mod|
|
181
|
+
puts "\t- #{mod}"
|
182
|
+
end
|
183
|
+
puts "Run \"help! {module}\" for more detailed information about the module"
|
184
|
+
else
|
185
|
+
if mod.methods.include?(:help!)
|
186
|
+
mod.help!
|
187
|
+
else
|
188
|
+
puts "Sorry, no help available for #{mod}"
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
137
192
|
end
|
138
193
|
|
194
|
+
##
|
195
|
+
# Module that contains methods that allow to call R functions faster. Such methods have names resembling the ones
|
196
|
+
# available in R (e.g., cor, wilcox_test).
|
197
|
+
|
139
198
|
module Rust::RBindings
|
140
199
|
def data_frame(*args)
|
141
200
|
Rust::DataFrame.new(*args)
|
@@ -152,6 +211,11 @@ module Rust::TestCases
|
|
152
211
|
end
|
153
212
|
end
|
154
213
|
|
214
|
+
##
|
215
|
+
# Shortcut for including the RBinding module
|
216
|
+
|
155
217
|
def bind_r!
|
156
218
|
include Rust::RBindings
|
157
219
|
end
|
220
|
+
|
221
|
+
bind_r! if ENV['RUBY_RUST_BINDING'] == '1'
|
@@ -1,6 +1,10 @@
|
|
1
1
|
require_relative 'datatype'
|
2
2
|
|
3
3
|
module Rust
|
4
|
+
|
5
|
+
##
|
6
|
+
# Mirror of the data-frame type in R.
|
7
|
+
|
4
8
|
class DataFrame < RustDatatype
|
5
9
|
def self.can_pull?(type, klass)
|
6
10
|
return [klass].flatten.include?("data.frame")
|
@@ -19,6 +23,12 @@ module Rust
|
|
19
23
|
return DataFrame.new(hash)
|
20
24
|
end
|
21
25
|
|
26
|
+
##
|
27
|
+
# Creates a new data-frame.
|
28
|
+
# +labels_or_data+ can be either:
|
29
|
+
# - an Array of column names (creates an empty data-frame)
|
30
|
+
# - a Hash with column names as keys and values as values
|
31
|
+
|
22
32
|
def initialize(labels_or_data)
|
23
33
|
@data = {}
|
24
34
|
|
@@ -34,6 +44,9 @@ module Rust
|
|
34
44
|
end
|
35
45
|
end
|
36
46
|
|
47
|
+
##
|
48
|
+
# Returns the +i+-th row of the data-frame
|
49
|
+
|
37
50
|
def row(i)
|
38
51
|
if i < 0 || i >= self.rows
|
39
52
|
return nil
|
@@ -42,6 +55,9 @@ module Rust
|
|
42
55
|
end
|
43
56
|
end
|
44
57
|
|
58
|
+
##
|
59
|
+
# Returns the +i+-th row of the data-frame. Faster (but harder to interpret) alternative to #row.
|
60
|
+
|
45
61
|
def fast_row(i)
|
46
62
|
if i < 0 || i >= self.rows
|
47
63
|
return nil
|
@@ -50,6 +66,9 @@ module Rust
|
|
50
66
|
end
|
51
67
|
end
|
52
68
|
|
69
|
+
##
|
70
|
+
# Shuffles the rows in the data-frame. The arguments are passed to the Array#shuffle method.
|
71
|
+
|
53
72
|
def shuffle(*args)
|
54
73
|
result = DataFrame.new(@labels)
|
55
74
|
|
@@ -64,6 +83,10 @@ module Rust
|
|
64
83
|
return result
|
65
84
|
end
|
66
85
|
|
86
|
+
##
|
87
|
+
# Returns a copy of the data-frame containing only the specified +rows+ and/or +cols+. If +rows+ and/or +cols+
|
88
|
+
# are nil, all the rows/columns are returned.
|
89
|
+
|
67
90
|
def [](rows, cols=nil)
|
68
91
|
raise "You must specify either rows or columns to select" if !rows && !cols
|
69
92
|
result = self
|
@@ -79,11 +102,17 @@ module Rust
|
|
79
102
|
return result
|
80
103
|
end
|
81
104
|
|
105
|
+
##
|
106
|
+
# Return the column named +name+.
|
107
|
+
|
82
108
|
def column(name)
|
83
109
|
return @data[name]
|
84
110
|
end
|
85
111
|
alias :| :column
|
86
112
|
|
113
|
+
##
|
114
|
+
# Renames the column named +old_name+ in +new_name+.
|
115
|
+
|
87
116
|
def rename_column!(old_name, new_name)
|
88
117
|
raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
|
89
118
|
raise "This DataFrame already contains a column named #{new_name}" if @labels.include?(new_name)
|
@@ -92,10 +121,24 @@ module Rust
|
|
92
121
|
@labels[@labels.index(old_name)] = new_name
|
93
122
|
end
|
94
123
|
|
124
|
+
##
|
125
|
+
# Functionally transforms the column named +column+ by applying the function given as a block.
|
126
|
+
# Example:
|
127
|
+
# df = Rust::DataFrame.new({a: [1,2,3], b: [3,4,5]})
|
128
|
+
# df.transform_column!("a") { |v| v + 1 }
|
129
|
+
# df|"a" # => [2, 3, 4]
|
130
|
+
|
95
131
|
def transform_column!(column)
|
96
132
|
@data[column].map! { |e| yield e }
|
97
133
|
end
|
98
134
|
|
135
|
+
##
|
136
|
+
# Returns a copy data-frame with only the rows for which the function given in the block returns true.
|
137
|
+
# Example:
|
138
|
+
# df = Rust::DataFrame.new({a: [1,2,3], b: ['a','b','c']})
|
139
|
+
# df2 = df.select_rows { |r| r['a'].even? }
|
140
|
+
# df2|"b" # => ['b']
|
141
|
+
|
99
142
|
def select_rows
|
100
143
|
result = DataFrame.new(self.column_names)
|
101
144
|
self.each_with_index do |row, i|
|
@@ -104,6 +147,9 @@ module Rust
|
|
104
147
|
return result
|
105
148
|
end
|
106
149
|
|
150
|
+
##
|
151
|
+
# Returns true if the function given in the block returns true for any of the rows in this data-frame.
|
152
|
+
|
107
153
|
def has_row?
|
108
154
|
self.each_with_index do |row, i|
|
109
155
|
return true if yield row, i
|
@@ -111,6 +157,10 @@ module Rust
|
|
111
157
|
return false
|
112
158
|
end
|
113
159
|
|
160
|
+
##
|
161
|
+
# Returns a copy of the data-frame with only the columns in +cols+. As an alternative, a block can be used
|
162
|
+
# (only the columns for which the function returns true are kept).
|
163
|
+
|
114
164
|
def select_columns(cols=nil)
|
115
165
|
raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
|
116
166
|
|
@@ -126,23 +176,35 @@ module Rust
|
|
126
176
|
end
|
127
177
|
alias :select_cols :select_columns
|
128
178
|
|
179
|
+
##
|
180
|
+
# Deletes the column named +column+.
|
181
|
+
|
129
182
|
def delete_column(column)
|
130
183
|
@labels.delete(column)
|
131
184
|
@data.delete(column)
|
132
185
|
end
|
133
186
|
|
187
|
+
##
|
188
|
+
# Deletes the +i+-th row.
|
189
|
+
|
134
190
|
def delete_row(i)
|
135
191
|
@data.each do |label, column|
|
136
192
|
column.delete_at(i)
|
137
193
|
end
|
138
194
|
end
|
139
195
|
|
196
|
+
##
|
197
|
+
# Returns a data-frame in which the rows are unique in terms of all the given columns named +by+.
|
198
|
+
|
140
199
|
def uniq_by(by)
|
141
200
|
result = self.clone
|
142
201
|
result.uniq_by!(by)
|
143
202
|
return result
|
144
203
|
end
|
145
204
|
|
205
|
+
##
|
206
|
+
# Makes sure that in this data-frame the rows are unique in terms of all the given columns named +by+.
|
207
|
+
|
146
208
|
def uniq_by!(by)
|
147
209
|
my_keys = {}
|
148
210
|
to_delete = []
|
@@ -165,19 +227,33 @@ module Rust
|
|
165
227
|
return self
|
166
228
|
end
|
167
229
|
|
230
|
+
##
|
231
|
+
# Return the names of the columns.
|
232
|
+
|
168
233
|
def column_names
|
169
234
|
return @labels.map { |k| k.to_s }
|
170
235
|
end
|
171
236
|
alias :colnames :column_names
|
172
237
|
|
238
|
+
##
|
239
|
+
# Returns the number of rows.
|
240
|
+
|
173
241
|
def rows
|
174
242
|
@data.values[0].size
|
175
243
|
end
|
176
244
|
|
245
|
+
##
|
246
|
+
# Returns the number of columns
|
247
|
+
|
177
248
|
def columns
|
178
249
|
@labels.size
|
179
250
|
end
|
180
251
|
|
252
|
+
##
|
253
|
+
# Adds the given +row+ to the data-frame. +row+ can be either:
|
254
|
+
# - An Array of values for all the columns (in the order of #column_names);
|
255
|
+
# - A Hash containing associations between column names and value to be set.
|
256
|
+
|
181
257
|
def add_row(row)
|
182
258
|
if row.is_a?(Array)
|
183
259
|
raise "Expected an array of size #{@data.size}" unless row.size == @data.size
|
@@ -201,6 +277,11 @@ module Rust
|
|
201
277
|
end
|
202
278
|
alias :<< :add_row
|
203
279
|
|
280
|
+
##
|
281
|
+
# Adds a column named +name+ with the given +values+ (array). The size of +values+ must match the number of
|
282
|
+
# rows of this data-frame. As an alternative, it can be passed a block which returns, for a given row, the
|
283
|
+
# value to assign for the new column.
|
284
|
+
|
204
285
|
def add_column(name, values=nil)
|
205
286
|
raise "Column already exists" if @labels.include?(name)
|
206
287
|
raise "Values or block required" if !values && !block_given?
|
@@ -217,6 +298,9 @@ module Rust
|
|
217
298
|
end
|
218
299
|
end
|
219
300
|
|
301
|
+
##
|
302
|
+
# Yields each row as a Hash containing column names as keys and values as values.
|
303
|
+
|
220
304
|
def each
|
221
305
|
self.each_with_index do |element, i|
|
222
306
|
yield element
|
@@ -225,6 +309,10 @@ module Rust
|
|
225
309
|
return self
|
226
310
|
end
|
227
311
|
|
312
|
+
##
|
313
|
+
# Yields each row as a Hash containing column names as keys and values as values. Faster alternative to
|
314
|
+
# #each.
|
315
|
+
|
228
316
|
def fast_each
|
229
317
|
self.fast_each_with_index do |element, i|
|
230
318
|
yield element
|
@@ -233,6 +321,9 @@ module Rust
|
|
233
321
|
return self
|
234
322
|
end
|
235
323
|
|
324
|
+
##
|
325
|
+
# Yields each row as a Hash containing column names as keys and values as values and the row index.
|
326
|
+
|
236
327
|
def each_with_index
|
237
328
|
for i in 0...self.rows
|
238
329
|
element = {}
|
@@ -246,6 +337,10 @@ module Rust
|
|
246
337
|
return self
|
247
338
|
end
|
248
339
|
|
340
|
+
##
|
341
|
+
# Yields each row as a Hash containing column names as keys and values as values and the row index. Faster
|
342
|
+
# alternative to #each_with_index.
|
343
|
+
|
249
344
|
def fast_each_with_index
|
250
345
|
for i in 0...self.rows
|
251
346
|
element = []
|
@@ -302,6 +397,9 @@ module Rust
|
|
302
397
|
return result
|
303
398
|
end
|
304
399
|
|
400
|
+
##
|
401
|
+
# Returns a copy of the data-frame containing only the first +n+ rows.
|
402
|
+
|
305
403
|
def head(n=10)
|
306
404
|
result = DataFrame.new(self.column_names)
|
307
405
|
self.each_with_index do |row, i|
|
@@ -310,6 +408,11 @@ module Rust
|
|
310
408
|
return result
|
311
409
|
end
|
312
410
|
|
411
|
+
##
|
412
|
+
# Merges this data-frame with +other+ in terms of the +by+ column(s) (Array or String).
|
413
|
+
# +first_alias+ and +second_alias+ allow to specify the prefix that should be used for the columns not in +by+
|
414
|
+
# for this and the +other+ data-frame, respectively.
|
415
|
+
|
313
416
|
def merge(other, by, first_alias = "x", second_alias = "y")
|
314
417
|
raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
|
315
418
|
raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
|
@@ -376,6 +479,14 @@ module Rust
|
|
376
479
|
return result
|
377
480
|
end
|
378
481
|
|
482
|
+
##
|
483
|
+
# Aggregate the value in groups depending on the +by+ column (String).
|
484
|
+
# A block must be passed to specify how to aggregate the columns. Aggregators for specific columns can be
|
485
|
+
# specified as optional arguments in which the name of the argument represents the column name and the value
|
486
|
+
# contains a block for aggregating the specific column.
|
487
|
+
# Both the default and the specialized blocks must take as argument an array of values and must return a
|
488
|
+
# scalar value.
|
489
|
+
|
379
490
|
def aggregate(by, **aggregators)
|
380
491
|
raise TypeError, "Expected a string" unless by.is_a?(String)
|
381
492
|
raise TypeError, "All the aggregators should be procs" unless aggregators.values.all? { |v| v.is_a?(Proc) }
|
@@ -416,12 +527,18 @@ module Rust
|
|
416
527
|
return result
|
417
528
|
end
|
418
529
|
|
530
|
+
##
|
531
|
+
# Returns a copy of this data-frame in which the rows are sorted by the values of the +by+ column.
|
532
|
+
|
419
533
|
def sort_by(column)
|
420
534
|
result = self.clone
|
421
535
|
result.sort_by!(column)
|
422
536
|
return result
|
423
537
|
end
|
424
538
|
|
539
|
+
##
|
540
|
+
# Sorts the rows of this data-frame by the values of the +by+ column.
|
541
|
+
|
425
542
|
def sort_by!(by)
|
426
543
|
copy = @data[by].clone
|
427
544
|
copy.sort!
|
@@ -447,6 +564,9 @@ module Rust
|
|
447
564
|
@data[by].sort!
|
448
565
|
end
|
449
566
|
|
567
|
+
##
|
568
|
+
# Adds all the rows in +dataframe+ to this data-frame. The column names must match.
|
569
|
+
|
450
570
|
def bind_rows!(dataframe)
|
451
571
|
raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
|
452
572
|
raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
|
@@ -459,6 +579,9 @@ module Rust
|
|
459
579
|
end
|
460
580
|
alias :rbind! :bind_rows!
|
461
581
|
|
582
|
+
##
|
583
|
+
# Adds all the columns in +dataframe+ to this data-frame. The number of rows must match.
|
584
|
+
|
462
585
|
def bind_columns!(dataframe)
|
463
586
|
raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
|
464
587
|
raise "The number of rows are not compatible" if self.rows != dataframe.rows
|
@@ -472,6 +595,9 @@ module Rust
|
|
472
595
|
end
|
473
596
|
alias :cbind! :bind_columns!
|
474
597
|
|
598
|
+
##
|
599
|
+
# Returns a copy of this dataframe and adds all the rows in +dataframe+ to it. The column names must match.
|
600
|
+
|
475
601
|
def bind_rows(dataframe)
|
476
602
|
result = self.clone
|
477
603
|
result.bind_rows!(dataframe)
|
@@ -479,6 +605,9 @@ module Rust
|
|
479
605
|
end
|
480
606
|
alias :rbind :bind_rows
|
481
607
|
|
608
|
+
##
|
609
|
+
# Returns a copy of this dataframe and adds all the columns in +dataframe+ to it. The number of rows must match.
|
610
|
+
|
482
611
|
def bind_columns(dataframe)
|
483
612
|
result = self.clone
|
484
613
|
result.bind_columns!(dataframe)
|
@@ -486,12 +615,22 @@ module Rust
|
|
486
615
|
end
|
487
616
|
alias :cbind :bind_columns
|
488
617
|
|
618
|
+
##
|
619
|
+
# Returns a copy of this data-frame.
|
620
|
+
|
489
621
|
def clone
|
490
622
|
DataFrame.new(@data)
|
491
623
|
end
|
492
624
|
end
|
493
625
|
|
626
|
+
##
|
627
|
+
# Represents an array of DataFrame
|
628
|
+
|
494
629
|
class DataFrameArray < Array
|
630
|
+
|
631
|
+
##
|
632
|
+
# Returns a data-frame with the rows in all the data-frames together (if compatible).
|
633
|
+
|
495
634
|
def bind_all
|
496
635
|
return nil if self.size == 0
|
497
636
|
|
@@ -505,7 +644,14 @@ module Rust
|
|
505
644
|
end
|
506
645
|
end
|
507
646
|
|
647
|
+
##
|
648
|
+
# Represents a hash of DataFrame
|
649
|
+
|
508
650
|
class DataFrameHash < Hash
|
651
|
+
|
652
|
+
##
|
653
|
+
# Returns a data-frame with the rows in all the data-frames together (if compatible).
|
654
|
+
|
509
655
|
def bind_all
|
510
656
|
return nil if self.values.size == 0
|
511
657
|
|
@@ -1,7 +1,18 @@
|
|
1
1
|
require_relative '../rust'
|
2
2
|
|
3
3
|
module Rust
|
4
|
+
|
5
|
+
##
|
6
|
+
# Represents a data-type that can be loaded from and written to R.
|
7
|
+
|
4
8
|
class RustDatatype
|
9
|
+
|
10
|
+
##
|
11
|
+
# Retrieves the given +variable+ from R and transforms it into the appropriate Ruby counterpart.
|
12
|
+
# To infer the type, it uses the class method #can_pull? of all the RustDatatype classes to check the types
|
13
|
+
# that are compatible with the given R variable (type and class). If more than a candidate is available, the one
|
14
|
+
# with maximum #pull_priority is chosen.
|
15
|
+
|
5
16
|
def self.pull_variable(variable, forced_interpreter = nil)
|
6
17
|
r_type = Rust._pull("as.character(typeof(#{variable}))")
|
7
18
|
r_class = Rust._pull("as.character(class(#{variable}))")
|
@@ -36,14 +47,24 @@ module Rust
|
|
36
47
|
end
|
37
48
|
end
|
38
49
|
|
50
|
+
##
|
51
|
+
# Returns the priority of this type when a #pull_variable operation is performed. Higher priority means that
|
52
|
+
# the type is to be preferred over other candidate types.
|
53
|
+
|
39
54
|
def self.pull_priority
|
40
55
|
0
|
41
56
|
end
|
42
57
|
|
58
|
+
##
|
59
|
+
# Writes the current object in R as +variable_name+.
|
60
|
+
|
43
61
|
def load_in_r_as(variable_name)
|
44
62
|
raise "Loading #{self.class} in R was not implemented"
|
45
63
|
end
|
46
64
|
|
65
|
+
##
|
66
|
+
# EXPERIMENTAL: Do not use
|
67
|
+
|
47
68
|
def r_mirror_to(other_variable)
|
48
69
|
varname = self.mirrored_R_variable_name
|
49
70
|
|
@@ -53,6 +74,9 @@ module Rust
|
|
53
74
|
return varname
|
54
75
|
end
|
55
76
|
|
77
|
+
##
|
78
|
+
# EXPERIMENTAL: Do not use
|
79
|
+
|
56
80
|
def r_mirror
|
57
81
|
varname = self.mirrored_R_variable_name
|
58
82
|
|
@@ -67,6 +91,9 @@ module Rust
|
|
67
91
|
return varname
|
68
92
|
end
|
69
93
|
|
94
|
+
##
|
95
|
+
# Returns the hash of the current object.
|
96
|
+
|
70
97
|
def r_hash
|
71
98
|
self.hash.to_s
|
72
99
|
end
|
@@ -77,6 +104,9 @@ module Rust
|
|
77
104
|
end
|
78
105
|
end
|
79
106
|
|
107
|
+
##
|
108
|
+
# The null value in R
|
109
|
+
|
80
110
|
class Null < RustDatatype
|
81
111
|
def self.can_pull?(type, klass)
|
82
112
|
return type == "NULL" && klass == "NULL"
|
@@ -101,6 +131,10 @@ class FalseClass
|
|
101
131
|
end
|
102
132
|
|
103
133
|
class Object
|
134
|
+
|
135
|
+
##
|
136
|
+
# Returns a string with the R representation of this object. Raises an exception for unsupported objects.
|
137
|
+
|
104
138
|
def to_R
|
105
139
|
raise TypeError, "Unsupported type for #{self.class}"
|
106
140
|
end
|