rust 0.4 → 0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ruby-rust +3 -0
- data/lib/{rust-csv.rb → rust/core/csv.rb} +35 -4
- data/lib/rust/core/rust.rb +221 -0
- data/lib/rust/core/types/all.rb +4 -0
- data/lib/{rust-core.rb → rust/core/types/dataframe.rb} +324 -244
- data/lib/rust/core/types/datatype.rb +195 -0
- data/lib/rust/core/types/factor.rb +158 -0
- data/lib/rust/core/types/language.rb +199 -0
- data/lib/rust/core/types/list.rb +97 -0
- data/lib/rust/core/types/matrix.rb +155 -0
- data/lib/rust/core/types/s4class.rb +78 -0
- data/lib/rust/core/types/utils.rb +122 -0
- data/lib/rust/core.rb +7 -0
- data/lib/rust/models/all.rb +4 -0
- data/lib/rust/models/anova.rb +77 -0
- data/lib/rust/models/regression.rb +258 -0
- data/lib/rust/plots/all.rb +4 -0
- data/lib/rust/plots/basic-plots.rb +143 -0
- data/lib/{rust-plots.rb → rust/plots/core.rb} +98 -107
- data/lib/rust/plots/distribution-plots.rb +75 -0
- data/lib/rust/stats/all.rb +4 -0
- data/lib/{rust-basics.rb → rust/stats/correlation.rb} +46 -3
- data/lib/rust/stats/descriptive.rb +157 -0
- data/lib/{rust-effsize.rb → rust/stats/effsize.rb} +44 -21
- data/lib/rust/stats/probabilities.rb +356 -0
- data/lib/rust/stats/tests.rb +384 -0
- data/lib/rust.rb +4 -8
- metadata +31 -12
- data/lib/rust-calls.rb +0 -69
- data/lib/rust-descriptive.rb +0 -67
- data/lib/rust-tests.rb +0 -165
@@ -1,127 +1,34 @@
|
|
1
|
-
|
2
|
-
require 'stringio'
|
3
|
-
require 'rinruby'
|
4
|
-
require 'csv'
|
1
|
+
require_relative 'datatype'
|
5
2
|
|
6
3
|
module Rust
|
7
|
-
CLIENT_MUTEX = Mutex.new
|
8
|
-
R_MUTEX = Mutex.new
|
9
4
|
|
10
|
-
|
5
|
+
##
|
6
|
+
# Mirror of the data-frame type in R.
|
11
7
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
@@debugging = false
|
17
|
-
@@in_client_mutex = false
|
18
|
-
|
19
|
-
def self.debug
|
20
|
-
@@debugging = true
|
21
|
-
end
|
22
|
-
|
23
|
-
def self.exclusive
|
24
|
-
result = nil
|
25
|
-
CLIENT_MUTEX.synchronize do
|
26
|
-
@@in_client_mutex = true
|
27
|
-
result = yield
|
28
|
-
@@in_client_mutex = false
|
29
|
-
end
|
30
|
-
return result
|
31
|
-
end
|
32
|
-
|
33
|
-
def self.[]=(variable, value)
|
34
|
-
if value.is_a?(RustDatatype)
|
35
|
-
value.load_in_r_as(variable.to_s)
|
36
|
-
elsif value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(Array)
|
37
|
-
R_ENGINE.assign(variable, value)
|
38
|
-
else
|
39
|
-
raise "Given #{value.class}, expected RustDatatype, String, Numeric, or Array"
|
8
|
+
class DataFrame < RustDatatype
|
9
|
+
def self.can_pull?(type, klass)
|
10
|
+
return [klass].flatten.include?("data.frame")
|
40
11
|
end
|
41
12
|
|
42
|
-
|
43
|
-
|
44
|
-
def self.[](variable, type=RustDatatype)
|
45
|
-
return type.pull_variable(variable)
|
46
|
-
end
|
47
|
-
|
48
|
-
def self._eval_big(r_command, return_warnings = false)
|
49
|
-
r_command = r_command.join("\n") if r_command.is_a?(Array)
|
50
|
-
|
51
|
-
self._rexec(r_command, return_warnings) do |cmd|
|
52
|
-
result = true
|
53
|
-
instructions = cmd.lines
|
54
|
-
|
55
|
-
while instructions.size > 0
|
56
|
-
current_command = ""
|
57
|
-
|
58
|
-
while (instructions.size > 0) && (current_command.length + instructions.first.length < 10000)
|
59
|
-
current_command << instructions.shift
|
60
|
-
end
|
61
|
-
|
62
|
-
result &= R_ENGINE.eval(current_command)
|
63
|
-
end
|
64
|
-
|
65
|
-
result
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
def self._pull(r_command, return_warnings = false)
|
70
|
-
self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.pull(cmd) }
|
71
|
-
end
|
72
|
-
|
73
|
-
def self._eval(r_command, return_warnings = false)
|
74
|
-
self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.eval(cmd) }
|
75
|
-
end
|
76
|
-
|
77
|
-
def self._rexec(r_command, return_warnings = false)
|
78
|
-
puts "Calling _rexec with command: #{r_command}" if @@debugging
|
79
|
-
R_MUTEX.synchronize do
|
80
|
-
assert("This command must be executed in an exclusive block") { @@in_client_mutex }
|
81
|
-
|
82
|
-
result = nil
|
83
|
-
begin
|
84
|
-
$stdout = StringIO.new
|
85
|
-
if return_warnings
|
86
|
-
R_ENGINE.echo(true, true)
|
87
|
-
else
|
88
|
-
R_ENGINE.echo(false, false)
|
89
|
-
end
|
90
|
-
result = yield(r_command)
|
91
|
-
ensure
|
92
|
-
R_ENGINE.echo(false, false)
|
93
|
-
warnings = $stdout.string
|
94
|
-
$stdout = STDOUT
|
95
|
-
end
|
96
|
-
|
97
|
-
if return_warnings
|
98
|
-
return result, warnings.lines.map { |w| w.strip.chomp }
|
99
|
-
else
|
100
|
-
return result
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
class RustDatatype
|
106
|
-
def self.pull_variable(variable)
|
107
|
-
return Rust._pull(variable)
|
13
|
+
def self.pull_priority
|
14
|
+
1
|
108
15
|
end
|
109
16
|
|
110
|
-
def
|
111
|
-
raise "Not implemented"
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
class DataFrame < RustDatatype
|
116
|
-
def self.pull_variable(variable)
|
17
|
+
def self.pull_variable(variable, type, klass)
|
117
18
|
hash = {}
|
118
|
-
colnames = Rust
|
19
|
+
colnames = Rust["colnames(#{variable})"]
|
119
20
|
colnames.each do |col|
|
120
|
-
hash[col] = Rust
|
21
|
+
hash[col] = Rust["#{variable}$\"#{col}\""]
|
121
22
|
end
|
122
23
|
return DataFrame.new(hash)
|
123
24
|
end
|
124
25
|
|
26
|
+
##
|
27
|
+
# Creates a new data-frame.
|
28
|
+
# +labels_or_data+ can be either:
|
29
|
+
# - an Array of column names (creates an empty data-frame)
|
30
|
+
# - a Hash with column names as keys and values as values
|
31
|
+
|
125
32
|
def initialize(labels_or_data)
|
126
33
|
@data = {}
|
127
34
|
|
@@ -130,10 +37,16 @@ module Rust
|
|
130
37
|
@labels.each { |label| @data[label] = [] }
|
131
38
|
elsif labels_or_data.is_a? Hash
|
132
39
|
@labels = labels_or_data.keys.map { |l| l.to_s }
|
133
|
-
|
40
|
+
|
41
|
+
labels_or_data.each do |key, value|
|
42
|
+
@data[key.to_s] = value.clone
|
43
|
+
end
|
134
44
|
end
|
135
45
|
end
|
136
46
|
|
47
|
+
##
|
48
|
+
# Returns the +i+-th row of the data-frame
|
49
|
+
|
137
50
|
def row(i)
|
138
51
|
if i < 0 || i >= self.rows
|
139
52
|
return nil
|
@@ -142,6 +55,20 @@ module Rust
|
|
142
55
|
end
|
143
56
|
end
|
144
57
|
|
58
|
+
##
|
59
|
+
# Returns the +i+-th row of the data-frame. Faster (but harder to interpret) alternative to #row.
|
60
|
+
|
61
|
+
def fast_row(i)
|
62
|
+
if i < 0 || i >= self.rows
|
63
|
+
return nil
|
64
|
+
else
|
65
|
+
return @labels.map { |label| @data[label][i] }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
##
|
70
|
+
# Shuffles the rows in the data-frame. The arguments are passed to the Array#shuffle method.
|
71
|
+
|
145
72
|
def shuffle(*args)
|
146
73
|
result = DataFrame.new(@labels)
|
147
74
|
|
@@ -156,6 +83,10 @@ module Rust
|
|
156
83
|
return result
|
157
84
|
end
|
158
85
|
|
86
|
+
##
|
87
|
+
# Returns a copy of the data-frame containing only the specified +rows+ and/or +cols+. If +rows+ and/or +cols+
|
88
|
+
# are nil, all the rows/columns are returned.
|
89
|
+
|
159
90
|
def [](rows, cols=nil)
|
160
91
|
raise "You must specify either rows or columns to select" if !rows && !cols
|
161
92
|
result = self
|
@@ -171,9 +102,16 @@ module Rust
|
|
171
102
|
return result
|
172
103
|
end
|
173
104
|
|
105
|
+
##
|
106
|
+
# Return the column named +name+.
|
107
|
+
|
174
108
|
def column(name)
|
175
109
|
return @data[name]
|
176
110
|
end
|
111
|
+
alias :| :column
|
112
|
+
|
113
|
+
##
|
114
|
+
# Renames the column named +old_name+ in +new_name+.
|
177
115
|
|
178
116
|
def rename_column!(old_name, new_name)
|
179
117
|
raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
|
@@ -183,10 +121,24 @@ module Rust
|
|
183
121
|
@labels[@labels.index(old_name)] = new_name
|
184
122
|
end
|
185
123
|
|
124
|
+
##
|
125
|
+
# Functionally transforms the column named +column+ by applying the function given as a block.
|
126
|
+
# Example:
|
127
|
+
# df = Rust::DataFrame.new({a: [1,2,3], b: [3,4,5]})
|
128
|
+
# df.transform_column!("a") { |v| v + 1 }
|
129
|
+
# df|"a" # => [2, 3, 4]
|
130
|
+
|
186
131
|
def transform_column!(column)
|
187
132
|
@data[column].map! { |e| yield e }
|
188
133
|
end
|
189
134
|
|
135
|
+
##
|
136
|
+
# Returns a copy data-frame with only the rows for which the function given in the block returns true.
|
137
|
+
# Example:
|
138
|
+
# df = Rust::DataFrame.new({a: [1,2,3], b: ['a','b','c']})
|
139
|
+
# df2 = df.select_rows { |r| r['a'].even? }
|
140
|
+
# df2|"b" # => ['b']
|
141
|
+
|
190
142
|
def select_rows
|
191
143
|
result = DataFrame.new(self.column_names)
|
192
144
|
self.each_with_index do |row, i|
|
@@ -195,6 +147,20 @@ module Rust
|
|
195
147
|
return result
|
196
148
|
end
|
197
149
|
|
150
|
+
##
|
151
|
+
# Returns true if the function given in the block returns true for any of the rows in this data-frame.
|
152
|
+
|
153
|
+
def has_row?
|
154
|
+
self.each_with_index do |row, i|
|
155
|
+
return true if yield row, i
|
156
|
+
end
|
157
|
+
return false
|
158
|
+
end
|
159
|
+
|
160
|
+
##
|
161
|
+
# Returns a copy of the data-frame with only the columns in +cols+. As an alternative, a block can be used
|
162
|
+
# (only the columns for which the function returns true are kept).
|
163
|
+
|
198
164
|
def select_columns(cols=nil)
|
199
165
|
raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
|
200
166
|
|
@@ -210,24 +176,84 @@ module Rust
|
|
210
176
|
end
|
211
177
|
alias :select_cols :select_columns
|
212
178
|
|
179
|
+
##
|
180
|
+
# Deletes the column named +column+.
|
181
|
+
|
213
182
|
def delete_column(column)
|
214
183
|
@labels.delete(column)
|
215
184
|
@data.delete(column)
|
216
185
|
end
|
217
186
|
|
187
|
+
##
|
188
|
+
# Deletes the +i+-th row.
|
189
|
+
|
190
|
+
def delete_row(i)
|
191
|
+
@data.each do |label, column|
|
192
|
+
column.delete_at(i)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
##
|
197
|
+
# Returns a data-frame in which the rows are unique in terms of all the given columns named +by+.
|
198
|
+
|
199
|
+
def uniq_by(by)
|
200
|
+
result = self.clone
|
201
|
+
result.uniq_by!(by)
|
202
|
+
return result
|
203
|
+
end
|
204
|
+
|
205
|
+
##
|
206
|
+
# Makes sure that in this data-frame the rows are unique in terms of all the given columns named +by+.
|
207
|
+
|
208
|
+
def uniq_by!(by)
|
209
|
+
my_keys = {}
|
210
|
+
to_delete = []
|
211
|
+
self.each_with_index do |row, i|
|
212
|
+
key = []
|
213
|
+
by.each do |colname|
|
214
|
+
key << row[colname]
|
215
|
+
end
|
216
|
+
unless my_keys[key]
|
217
|
+
my_keys[key] = i
|
218
|
+
else
|
219
|
+
to_delete << (i-to_delete.size)
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
to_delete.each do |i|
|
224
|
+
self.delete_row(i)
|
225
|
+
end
|
226
|
+
|
227
|
+
return self
|
228
|
+
end
|
229
|
+
|
230
|
+
##
|
231
|
+
# Return the names of the columns.
|
232
|
+
|
218
233
|
def column_names
|
219
234
|
return @labels.map { |k| k.to_s }
|
220
235
|
end
|
221
236
|
alias :colnames :column_names
|
222
237
|
|
238
|
+
##
|
239
|
+
# Returns the number of rows.
|
240
|
+
|
223
241
|
def rows
|
224
242
|
@data.values[0].size
|
225
243
|
end
|
226
244
|
|
245
|
+
##
|
246
|
+
# Returns the number of columns
|
247
|
+
|
227
248
|
def columns
|
228
249
|
@labels.size
|
229
250
|
end
|
230
251
|
|
252
|
+
##
|
253
|
+
# Adds the given +row+ to the data-frame. +row+ can be either:
|
254
|
+
# - An Array of values for all the columns (in the order of #column_names);
|
255
|
+
# - A Hash containing associations between column names and value to be set.
|
256
|
+
|
231
257
|
def add_row(row)
|
232
258
|
if row.is_a?(Array)
|
233
259
|
raise "Expected an array of size #{@data.size}" unless row.size == @data.size
|
@@ -243,7 +269,7 @@ module Rust
|
|
243
269
|
row.each do |key, value|
|
244
270
|
@data[key.to_s] << value
|
245
271
|
end
|
246
|
-
|
272
|
+
|
247
273
|
return true
|
248
274
|
else
|
249
275
|
raise TypeError, "Expected an Array or a Hash"
|
@@ -251,6 +277,11 @@ module Rust
|
|
251
277
|
end
|
252
278
|
alias :<< :add_row
|
253
279
|
|
280
|
+
##
|
281
|
+
# Adds a column named +name+ with the given +values+ (array). The size of +values+ must match the number of
|
282
|
+
# rows of this data-frame. As an alternative, it can be passed a block which returns, for a given row, the
|
283
|
+
# value to assign for the new column.
|
284
|
+
|
254
285
|
def add_column(name, values=nil)
|
255
286
|
raise "Column already exists" if @labels.include?(name)
|
256
287
|
raise "Values or block required" if !values && !block_given?
|
@@ -267,6 +298,9 @@ module Rust
|
|
267
298
|
end
|
268
299
|
end
|
269
300
|
|
301
|
+
##
|
302
|
+
# Yields each row as a Hash containing column names as keys and values as values.
|
303
|
+
|
270
304
|
def each
|
271
305
|
self.each_with_index do |element, i|
|
272
306
|
yield element
|
@@ -275,6 +309,21 @@ module Rust
|
|
275
309
|
return self
|
276
310
|
end
|
277
311
|
|
312
|
+
##
|
313
|
+
# Yields each row as a Hash containing column names as keys and values as values. Faster alternative to
|
314
|
+
# #each.
|
315
|
+
|
316
|
+
def fast_each
|
317
|
+
self.fast_each_with_index do |element, i|
|
318
|
+
yield element
|
319
|
+
end
|
320
|
+
|
321
|
+
return self
|
322
|
+
end
|
323
|
+
|
324
|
+
##
|
325
|
+
# Yields each row as a Hash containing column names as keys and values as values and the row index.
|
326
|
+
|
278
327
|
def each_with_index
|
279
328
|
for i in 0...self.rows
|
280
329
|
element = {}
|
@@ -288,6 +337,23 @@ module Rust
|
|
288
337
|
return self
|
289
338
|
end
|
290
339
|
|
340
|
+
##
|
341
|
+
# Yields each row as a Hash containing column names as keys and values as values and the row index. Faster
|
342
|
+
# alternative to #each_with_index.
|
343
|
+
|
344
|
+
def fast_each_with_index
|
345
|
+
for i in 0...self.rows
|
346
|
+
element = []
|
347
|
+
@labels.each do |label|
|
348
|
+
element << @data[label][i]
|
349
|
+
end
|
350
|
+
|
351
|
+
yield element, i
|
352
|
+
end
|
353
|
+
|
354
|
+
return self
|
355
|
+
end
|
356
|
+
|
291
357
|
def load_in_r_as(variable_name)
|
292
358
|
command = []
|
293
359
|
|
@@ -299,6 +365,14 @@ module Rust
|
|
299
365
|
row_index += 1
|
300
366
|
end
|
301
367
|
|
368
|
+
self.column_names.each do |name|
|
369
|
+
column = self.column(name)
|
370
|
+
|
371
|
+
if column.is_a?(Factor)
|
372
|
+
command << "#{variable_name}[,#{name.to_R}] <- factor(#{variable_name}[,#{name.to_R}], labels=#{column.levels.to_R})"
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
302
376
|
Rust._eval_big(command)
|
303
377
|
end
|
304
378
|
|
@@ -323,6 +397,9 @@ module Rust
|
|
323
397
|
return result
|
324
398
|
end
|
325
399
|
|
400
|
+
##
|
401
|
+
# Returns a copy of the data-frame containing only the first +n+ rows.
|
402
|
+
|
326
403
|
def head(n=10)
|
327
404
|
result = DataFrame.new(self.column_names)
|
328
405
|
self.each_with_index do |row, i|
|
@@ -331,6 +408,11 @@ module Rust
|
|
331
408
|
return result
|
332
409
|
end
|
333
410
|
|
411
|
+
##
|
412
|
+
# Merges this data-frame with +other+ in terms of the +by+ column(s) (Array or String).
|
413
|
+
# +first_alias+ and +second_alias+ allow to specify the prefix that should be used for the columns not in +by+
|
414
|
+
# for this and the +other+ data-frame, respectively.
|
415
|
+
|
334
416
|
def merge(other, by, first_alias = "x", second_alias = "y")
|
335
417
|
raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
|
336
418
|
raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
|
@@ -397,6 +479,94 @@ module Rust
|
|
397
479
|
return result
|
398
480
|
end
|
399
481
|
|
482
|
+
##
|
483
|
+
# Aggregate the value in groups depending on the +by+ column (String).
|
484
|
+
# A block must be passed to specify how to aggregate the columns. Aggregators for specific columns can be
|
485
|
+
# specified as optional arguments in which the name of the argument represents the column name and the value
|
486
|
+
# contains a block for aggregating the specific column.
|
487
|
+
# Both the default and the specialized blocks must take as argument an array of values and must return a
|
488
|
+
# scalar value.
|
489
|
+
|
490
|
+
def aggregate(by, **aggregators)
|
491
|
+
raise TypeError, "Expected a string" unless by.is_a?(String)
|
492
|
+
raise TypeError, "All the aggregators should be procs" unless aggregators.values.all? { |v| v.is_a?(Proc) }
|
493
|
+
raise "Expected a block for default aggregator" unless block_given?
|
494
|
+
|
495
|
+
aggregators = aggregators.map { |label, callable| [label.to_s, callable] }.to_h
|
496
|
+
|
497
|
+
sorted = self.sort_by(by)
|
498
|
+
|
499
|
+
current_value = nil
|
500
|
+
partials = []
|
501
|
+
partial = nil
|
502
|
+
sorted.column(by).each_with_index do |value, index|
|
503
|
+
if current_value != value
|
504
|
+
current_value = value
|
505
|
+
partials << partial if partial
|
506
|
+
partial = Rust::DataFrame.new(self.column_names)
|
507
|
+
end
|
508
|
+
partial << sorted.fast_row(index)
|
509
|
+
end
|
510
|
+
partials << partial
|
511
|
+
|
512
|
+
result = Rust::DataFrame.new(self.column_names)
|
513
|
+
partials.each do |partial|
|
514
|
+
aggregated_row = {}
|
515
|
+
aggregated_row[by] = partial.column(by)[0]
|
516
|
+
(self.column_names - [by]).each do |column|
|
517
|
+
if aggregators[column]
|
518
|
+
aggregated_row[column] = aggregators[column].call(partial.column(column))
|
519
|
+
else
|
520
|
+
aggregated_row[column] = yield partial.column(column)
|
521
|
+
end
|
522
|
+
end
|
523
|
+
|
524
|
+
result << aggregated_row
|
525
|
+
end
|
526
|
+
|
527
|
+
return result
|
528
|
+
end
|
529
|
+
|
530
|
+
##
|
531
|
+
# Returns a copy of this data-frame in which the rows are sorted by the values of the +by+ column.
|
532
|
+
|
533
|
+
def sort_by(column)
|
534
|
+
result = self.clone
|
535
|
+
result.sort_by!(column)
|
536
|
+
return result
|
537
|
+
end
|
538
|
+
|
539
|
+
##
|
540
|
+
# Sorts the rows of this data-frame by the values of the +by+ column.
|
541
|
+
|
542
|
+
def sort_by!(by)
|
543
|
+
copy = @data[by].clone
|
544
|
+
copy.sort!
|
545
|
+
|
546
|
+
indices = []
|
547
|
+
@data[by].each_with_index do |value, i|
|
548
|
+
index = copy.index(value)
|
549
|
+
indices << index
|
550
|
+
|
551
|
+
copy[index] = NilClass
|
552
|
+
end
|
553
|
+
|
554
|
+
(self.column_names - [by]).each do |column_name|
|
555
|
+
sorted = []
|
556
|
+
column = self.column(column_name)
|
557
|
+
column_i = 0
|
558
|
+
indices.each do |i|
|
559
|
+
sorted[i] = column[column_i]
|
560
|
+
column_i += 1
|
561
|
+
end
|
562
|
+
@data[column_name] = sorted
|
563
|
+
end
|
564
|
+
@data[by].sort!
|
565
|
+
end
|
566
|
+
|
567
|
+
##
|
568
|
+
# Adds all the rows in +dataframe+ to this data-frame. The column names must match.
|
569
|
+
|
400
570
|
def bind_rows!(dataframe)
|
401
571
|
raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
|
402
572
|
raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
|
@@ -409,6 +579,9 @@ module Rust
|
|
409
579
|
end
|
410
580
|
alias :rbind! :bind_rows!
|
411
581
|
|
582
|
+
##
|
583
|
+
# Adds all the columns in +dataframe+ to this data-frame. The number of rows must match.
|
584
|
+
|
412
585
|
def bind_columns!(dataframe)
|
413
586
|
raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
|
414
587
|
raise "The number of rows are not compatible" if self.rows != dataframe.rows
|
@@ -422,6 +595,9 @@ module Rust
|
|
422
595
|
end
|
423
596
|
alias :cbind! :bind_columns!
|
424
597
|
|
598
|
+
##
|
599
|
+
# Returns a copy of this dataframe and adds all the rows in +dataframe+ to it. The column names must match.
|
600
|
+
|
425
601
|
def bind_rows(dataframe)
|
426
602
|
result = self.clone
|
427
603
|
result.bind_rows!(dataframe)
|
@@ -429,6 +605,9 @@ module Rust
|
|
429
605
|
end
|
430
606
|
alias :rbind :bind_rows
|
431
607
|
|
608
|
+
##
|
609
|
+
# Returns a copy of this dataframe and adds all the columns in +dataframe+ to it. The number of rows must match.
|
610
|
+
|
432
611
|
def bind_columns(dataframe)
|
433
612
|
result = self.clone
|
434
613
|
result.bind_columns!(dataframe)
|
@@ -436,152 +615,53 @@ module Rust
|
|
436
615
|
end
|
437
616
|
alias :cbind :bind_columns
|
438
617
|
|
618
|
+
##
|
619
|
+
# Returns a copy of this data-frame.
|
620
|
+
|
439
621
|
def clone
|
440
622
|
DataFrame.new(@data)
|
441
623
|
end
|
442
624
|
end
|
443
625
|
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
def initialize(data)
|
450
|
-
if data.flatten.size == 0
|
451
|
-
raise "Empty matrices are not allowed"
|
452
|
-
else
|
453
|
-
raise TypeError, "Expected array of array" unless data.is_a?(Array) && data[0].is_a?(Array)
|
454
|
-
raise TypeError, "Only numeric matrices are supported" unless data.all? { |row| row.all? { |e| e.is_a?(Numeric) } }
|
455
|
-
raise "All the rows must have the same size" unless data.map { |row| row.size }.uniq.size == 1
|
456
|
-
@data = data.clone
|
457
|
-
end
|
458
|
-
end
|
459
|
-
|
460
|
-
def [](i, j)
|
461
|
-
return @data[i][j]
|
462
|
-
end
|
463
|
-
|
464
|
-
def rows
|
465
|
-
@data.size
|
466
|
-
end
|
467
|
-
|
468
|
-
def cols
|
469
|
-
@data[0].size
|
470
|
-
end
|
626
|
+
##
|
627
|
+
# Represents an array of DataFrame
|
628
|
+
|
629
|
+
class DataFrameArray < Array
|
471
630
|
|
472
|
-
|
473
|
-
|
474
|
-
raise "Wrong j" unless j.between?(0, @data[0].size - 1)
|
475
|
-
@data[i][j] = value
|
476
|
-
end
|
631
|
+
##
|
632
|
+
# Returns a data-frame with the rows in all the data-frames together (if compatible).
|
477
633
|
|
478
|
-
def
|
479
|
-
|
634
|
+
def bind_all
|
635
|
+
return nil if self.size == 0
|
636
|
+
|
637
|
+
result = self.first.clone
|
638
|
+
|
639
|
+
for i in 1...self.size
|
640
|
+
result .bind_rows!(self[i])
|
641
|
+
end
|
642
|
+
|
643
|
+
return result
|
480
644
|
end
|
481
645
|
end
|
482
646
|
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
def initialize(min, max, step=1)
|
488
|
-
@min = min
|
489
|
-
@max = max
|
490
|
-
@step = step
|
491
|
-
end
|
492
|
-
|
493
|
-
def step(step)
|
494
|
-
@step = step
|
495
|
-
end
|
647
|
+
##
|
648
|
+
# Represents a hash of DataFrame
|
649
|
+
|
650
|
+
class DataFrameHash < Hash
|
496
651
|
|
497
|
-
|
498
|
-
|
499
|
-
yield v
|
500
|
-
end
|
501
|
-
end
|
652
|
+
##
|
653
|
+
# Returns a data-frame with the rows in all the data-frames together (if compatible).
|
502
654
|
|
503
|
-
def
|
504
|
-
|
505
|
-
|
506
|
-
|
655
|
+
def bind_all
|
656
|
+
return nil if self.values.size == 0
|
657
|
+
|
658
|
+
result = self.values.first.clone
|
659
|
+
|
660
|
+
for i in 1...self.values.size
|
661
|
+
result .bind_rows!(self.values[i])
|
507
662
|
end
|
663
|
+
|
508
664
|
return result
|
509
665
|
end
|
510
|
-
|
511
|
-
def to_R
|
512
|
-
"seq(from=#@min, to=#@max, by=#@step)"
|
513
|
-
end
|
514
|
-
end
|
515
|
-
end
|
516
|
-
|
517
|
-
class TrueClass
|
518
|
-
def to_R
|
519
|
-
"TRUE"
|
520
|
-
end
|
521
|
-
end
|
522
|
-
|
523
|
-
class FalseClass
|
524
|
-
def to_R
|
525
|
-
"FALSE"
|
526
|
-
end
|
527
|
-
end
|
528
|
-
|
529
|
-
class Object
|
530
|
-
def to_R
|
531
|
-
raise TypeError, "Unsupported type for #{self.class}"
|
532
666
|
end
|
533
667
|
end
|
534
|
-
|
535
|
-
class NilClass
|
536
|
-
def to_R
|
537
|
-
return "NULL"
|
538
|
-
end
|
539
|
-
end
|
540
|
-
|
541
|
-
class Numeric
|
542
|
-
def to_R
|
543
|
-
self.inspect
|
544
|
-
end
|
545
|
-
end
|
546
|
-
|
547
|
-
class Float
|
548
|
-
def to_R
|
549
|
-
return self.nan? ? "NA" : super
|
550
|
-
end
|
551
|
-
end
|
552
|
-
|
553
|
-
class Array
|
554
|
-
def to_R
|
555
|
-
return "c(#{self.map { |e| e.to_R }.join(",")})"
|
556
|
-
end
|
557
|
-
end
|
558
|
-
|
559
|
-
class String
|
560
|
-
def to_R
|
561
|
-
return self.inspect
|
562
|
-
end
|
563
|
-
end
|
564
|
-
|
565
|
-
class Range
|
566
|
-
def to_R
|
567
|
-
[range.min, range.max].to_R
|
568
|
-
end
|
569
|
-
end
|
570
|
-
|
571
|
-
module Rust::RBindings
|
572
|
-
def read_csv(filename, **options)
|
573
|
-
Rust::CSV.read(filename, **options)
|
574
|
-
end
|
575
|
-
|
576
|
-
def write_csv(filename, dataframe, **options)
|
577
|
-
Rust::CSV.write(filename, dataframe, **options)
|
578
|
-
end
|
579
|
-
|
580
|
-
def data_frame(*args)
|
581
|
-
Rust::DataFrame.new(*args)
|
582
|
-
end
|
583
|
-
end
|
584
|
-
|
585
|
-
def bind_r!
|
586
|
-
include Rust::RBindings
|
587
|
-
end
|