rust 0.4 → 0.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ruby-rust +3 -0
- data/lib/{rust-csv.rb → rust/core/csv.rb} +35 -4
- data/lib/rust/core/rust.rb +221 -0
- data/lib/rust/core/types/all.rb +4 -0
- data/lib/{rust-core.rb → rust/core/types/dataframe.rb} +324 -244
- data/lib/rust/core/types/datatype.rb +195 -0
- data/lib/rust/core/types/factor.rb +158 -0
- data/lib/rust/core/types/language.rb +199 -0
- data/lib/rust/core/types/list.rb +97 -0
- data/lib/rust/core/types/matrix.rb +155 -0
- data/lib/rust/core/types/s4class.rb +78 -0
- data/lib/rust/core/types/utils.rb +122 -0
- data/lib/rust/core.rb +7 -0
- data/lib/rust/models/all.rb +4 -0
- data/lib/rust/models/anova.rb +77 -0
- data/lib/rust/models/regression.rb +258 -0
- data/lib/rust/plots/all.rb +4 -0
- data/lib/rust/plots/basic-plots.rb +143 -0
- data/lib/{rust-plots.rb → rust/plots/core.rb} +98 -107
- data/lib/rust/plots/distribution-plots.rb +75 -0
- data/lib/rust/stats/all.rb +4 -0
- data/lib/{rust-basics.rb → rust/stats/correlation.rb} +46 -3
- data/lib/rust/stats/descriptive.rb +157 -0
- data/lib/{rust-effsize.rb → rust/stats/effsize.rb} +44 -21
- data/lib/rust/stats/probabilities.rb +356 -0
- data/lib/rust/stats/tests.rb +384 -0
- data/lib/rust.rb +4 -8
- metadata +31 -12
- data/lib/rust-calls.rb +0 -69
- data/lib/rust-descriptive.rb +0 -67
- data/lib/rust-tests.rb +0 -165
@@ -1,127 +1,34 @@
|
|
1
|
-
|
2
|
-
require 'stringio'
|
3
|
-
require 'rinruby'
|
4
|
-
require 'csv'
|
1
|
+
require_relative 'datatype'
|
5
2
|
|
6
3
|
module Rust
|
7
|
-
CLIENT_MUTEX = Mutex.new
|
8
|
-
R_MUTEX = Mutex.new
|
9
4
|
|
10
|
-
|
5
|
+
##
|
6
|
+
# Mirror of the data-frame type in R.
|
11
7
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
@@debugging = false
|
17
|
-
@@in_client_mutex = false
|
18
|
-
|
19
|
-
def self.debug
|
20
|
-
@@debugging = true
|
21
|
-
end
|
22
|
-
|
23
|
-
def self.exclusive
|
24
|
-
result = nil
|
25
|
-
CLIENT_MUTEX.synchronize do
|
26
|
-
@@in_client_mutex = true
|
27
|
-
result = yield
|
28
|
-
@@in_client_mutex = false
|
29
|
-
end
|
30
|
-
return result
|
31
|
-
end
|
32
|
-
|
33
|
-
def self.[]=(variable, value)
|
34
|
-
if value.is_a?(RustDatatype)
|
35
|
-
value.load_in_r_as(variable.to_s)
|
36
|
-
elsif value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(Array)
|
37
|
-
R_ENGINE.assign(variable, value)
|
38
|
-
else
|
39
|
-
raise "Given #{value.class}, expected RustDatatype, String, Numeric, or Array"
|
8
|
+
class DataFrame < RustDatatype
|
9
|
+
def self.can_pull?(type, klass)
|
10
|
+
return [klass].flatten.include?("data.frame")
|
40
11
|
end
|
41
12
|
|
42
|
-
|
43
|
-
|
44
|
-
def self.[](variable, type=RustDatatype)
|
45
|
-
return type.pull_variable(variable)
|
46
|
-
end
|
47
|
-
|
48
|
-
def self._eval_big(r_command, return_warnings = false)
|
49
|
-
r_command = r_command.join("\n") if r_command.is_a?(Array)
|
50
|
-
|
51
|
-
self._rexec(r_command, return_warnings) do |cmd|
|
52
|
-
result = true
|
53
|
-
instructions = cmd.lines
|
54
|
-
|
55
|
-
while instructions.size > 0
|
56
|
-
current_command = ""
|
57
|
-
|
58
|
-
while (instructions.size > 0) && (current_command.length + instructions.first.length < 10000)
|
59
|
-
current_command << instructions.shift
|
60
|
-
end
|
61
|
-
|
62
|
-
result &= R_ENGINE.eval(current_command)
|
63
|
-
end
|
64
|
-
|
65
|
-
result
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
def self._pull(r_command, return_warnings = false)
|
70
|
-
self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.pull(cmd) }
|
71
|
-
end
|
72
|
-
|
73
|
-
def self._eval(r_command, return_warnings = false)
|
74
|
-
self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.eval(cmd) }
|
75
|
-
end
|
76
|
-
|
77
|
-
def self._rexec(r_command, return_warnings = false)
|
78
|
-
puts "Calling _rexec with command: #{r_command}" if @@debugging
|
79
|
-
R_MUTEX.synchronize do
|
80
|
-
assert("This command must be executed in an exclusive block") { @@in_client_mutex }
|
81
|
-
|
82
|
-
result = nil
|
83
|
-
begin
|
84
|
-
$stdout = StringIO.new
|
85
|
-
if return_warnings
|
86
|
-
R_ENGINE.echo(true, true)
|
87
|
-
else
|
88
|
-
R_ENGINE.echo(false, false)
|
89
|
-
end
|
90
|
-
result = yield(r_command)
|
91
|
-
ensure
|
92
|
-
R_ENGINE.echo(false, false)
|
93
|
-
warnings = $stdout.string
|
94
|
-
$stdout = STDOUT
|
95
|
-
end
|
96
|
-
|
97
|
-
if return_warnings
|
98
|
-
return result, warnings.lines.map { |w| w.strip.chomp }
|
99
|
-
else
|
100
|
-
return result
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
class RustDatatype
|
106
|
-
def self.pull_variable(variable)
|
107
|
-
return Rust._pull(variable)
|
13
|
+
def self.pull_priority
|
14
|
+
1
|
108
15
|
end
|
109
16
|
|
110
|
-
def
|
111
|
-
raise "Not implemented"
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
class DataFrame < RustDatatype
|
116
|
-
def self.pull_variable(variable)
|
17
|
+
def self.pull_variable(variable, type, klass)
|
117
18
|
hash = {}
|
118
|
-
colnames = Rust
|
19
|
+
colnames = Rust["colnames(#{variable})"]
|
119
20
|
colnames.each do |col|
|
120
|
-
hash[col] = Rust
|
21
|
+
hash[col] = Rust["#{variable}$\"#{col}\""]
|
121
22
|
end
|
122
23
|
return DataFrame.new(hash)
|
123
24
|
end
|
124
25
|
|
26
|
+
##
|
27
|
+
# Creates a new data-frame.
|
28
|
+
# +labels_or_data+ can be either:
|
29
|
+
# - an Array of column names (creates an empty data-frame)
|
30
|
+
# - a Hash with column names as keys and values as values
|
31
|
+
|
125
32
|
def initialize(labels_or_data)
|
126
33
|
@data = {}
|
127
34
|
|
@@ -130,10 +37,16 @@ module Rust
|
|
130
37
|
@labels.each { |label| @data[label] = [] }
|
131
38
|
elsif labels_or_data.is_a? Hash
|
132
39
|
@labels = labels_or_data.keys.map { |l| l.to_s }
|
133
|
-
|
40
|
+
|
41
|
+
labels_or_data.each do |key, value|
|
42
|
+
@data[key.to_s] = value.clone
|
43
|
+
end
|
134
44
|
end
|
135
45
|
end
|
136
46
|
|
47
|
+
##
|
48
|
+
# Returns the +i+-th row of the data-frame
|
49
|
+
|
137
50
|
def row(i)
|
138
51
|
if i < 0 || i >= self.rows
|
139
52
|
return nil
|
@@ -142,6 +55,20 @@ module Rust
|
|
142
55
|
end
|
143
56
|
end
|
144
57
|
|
58
|
+
##
|
59
|
+
# Returns the +i+-th row of the data-frame. Faster (but harder to interpret) alternative to #row.
|
60
|
+
|
61
|
+
def fast_row(i)
|
62
|
+
if i < 0 || i >= self.rows
|
63
|
+
return nil
|
64
|
+
else
|
65
|
+
return @labels.map { |label| @data[label][i] }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
##
|
70
|
+
# Shuffles the rows in the data-frame. The arguments are passed to the Array#shuffle method.
|
71
|
+
|
145
72
|
def shuffle(*args)
|
146
73
|
result = DataFrame.new(@labels)
|
147
74
|
|
@@ -156,6 +83,10 @@ module Rust
|
|
156
83
|
return result
|
157
84
|
end
|
158
85
|
|
86
|
+
##
|
87
|
+
# Returns a copy of the data-frame containing only the specified +rows+ and/or +cols+. If +rows+ and/or +cols+
|
88
|
+
# are nil, all the rows/columns are returned.
|
89
|
+
|
159
90
|
def [](rows, cols=nil)
|
160
91
|
raise "You must specify either rows or columns to select" if !rows && !cols
|
161
92
|
result = self
|
@@ -171,9 +102,16 @@ module Rust
|
|
171
102
|
return result
|
172
103
|
end
|
173
104
|
|
105
|
+
##
|
106
|
+
# Return the column named +name+.
|
107
|
+
|
174
108
|
def column(name)
|
175
109
|
return @data[name]
|
176
110
|
end
|
111
|
+
alias :| :column
|
112
|
+
|
113
|
+
##
|
114
|
+
# Renames the column named +old_name+ in +new_name+.
|
177
115
|
|
178
116
|
def rename_column!(old_name, new_name)
|
179
117
|
raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
|
@@ -183,10 +121,24 @@ module Rust
|
|
183
121
|
@labels[@labels.index(old_name)] = new_name
|
184
122
|
end
|
185
123
|
|
124
|
+
##
|
125
|
+
# Functionally transforms the column named +column+ by applying the function given as a block.
|
126
|
+
# Example:
|
127
|
+
# df = Rust::DataFrame.new({a: [1,2,3], b: [3,4,5]})
|
128
|
+
# df.transform_column!("a") { |v| v + 1 }
|
129
|
+
# df|"a" # => [2, 3, 4]
|
130
|
+
|
186
131
|
def transform_column!(column)
|
187
132
|
@data[column].map! { |e| yield e }
|
188
133
|
end
|
189
134
|
|
135
|
+
##
|
136
|
+
# Returns a copy data-frame with only the rows for which the function given in the block returns true.
|
137
|
+
# Example:
|
138
|
+
# df = Rust::DataFrame.new({a: [1,2,3], b: ['a','b','c']})
|
139
|
+
# df2 = df.select_rows { |r| r['a'].even? }
|
140
|
+
# df2|"b" # => ['b']
|
141
|
+
|
190
142
|
def select_rows
|
191
143
|
result = DataFrame.new(self.column_names)
|
192
144
|
self.each_with_index do |row, i|
|
@@ -195,6 +147,20 @@ module Rust
|
|
195
147
|
return result
|
196
148
|
end
|
197
149
|
|
150
|
+
##
|
151
|
+
# Returns true if the function given in the block returns true for any of the rows in this data-frame.
|
152
|
+
|
153
|
+
def has_row?
|
154
|
+
self.each_with_index do |row, i|
|
155
|
+
return true if yield row, i
|
156
|
+
end
|
157
|
+
return false
|
158
|
+
end
|
159
|
+
|
160
|
+
##
|
161
|
+
# Returns a copy of the data-frame with only the columns in +cols+. As an alternative, a block can be used
|
162
|
+
# (only the columns for which the function returns true are kept).
|
163
|
+
|
198
164
|
def select_columns(cols=nil)
|
199
165
|
raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
|
200
166
|
|
@@ -210,24 +176,84 @@ module Rust
|
|
210
176
|
end
|
211
177
|
alias :select_cols :select_columns
|
212
178
|
|
179
|
+
##
|
180
|
+
# Deletes the column named +column+.
|
181
|
+
|
213
182
|
def delete_column(column)
|
214
183
|
@labels.delete(column)
|
215
184
|
@data.delete(column)
|
216
185
|
end
|
217
186
|
|
187
|
+
##
|
188
|
+
# Deletes the +i+-th row.
|
189
|
+
|
190
|
+
def delete_row(i)
|
191
|
+
@data.each do |label, column|
|
192
|
+
column.delete_at(i)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
##
|
197
|
+
# Returns a data-frame in which the rows are unique in terms of all the given columns named +by+.
|
198
|
+
|
199
|
+
def uniq_by(by)
|
200
|
+
result = self.clone
|
201
|
+
result.uniq_by!(by)
|
202
|
+
return result
|
203
|
+
end
|
204
|
+
|
205
|
+
##
|
206
|
+
# Makes sure that in this data-frame the rows are unique in terms of all the given columns named +by+.
|
207
|
+
|
208
|
+
def uniq_by!(by)
|
209
|
+
my_keys = {}
|
210
|
+
to_delete = []
|
211
|
+
self.each_with_index do |row, i|
|
212
|
+
key = []
|
213
|
+
by.each do |colname|
|
214
|
+
key << row[colname]
|
215
|
+
end
|
216
|
+
unless my_keys[key]
|
217
|
+
my_keys[key] = i
|
218
|
+
else
|
219
|
+
to_delete << (i-to_delete.size)
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
to_delete.each do |i|
|
224
|
+
self.delete_row(i)
|
225
|
+
end
|
226
|
+
|
227
|
+
return self
|
228
|
+
end
|
229
|
+
|
230
|
+
##
|
231
|
+
# Return the names of the columns.
|
232
|
+
|
218
233
|
def column_names
|
219
234
|
return @labels.map { |k| k.to_s }
|
220
235
|
end
|
221
236
|
alias :colnames :column_names
|
222
237
|
|
238
|
+
##
|
239
|
+
# Returns the number of rows.
|
240
|
+
|
223
241
|
def rows
|
224
242
|
@data.values[0].size
|
225
243
|
end
|
226
244
|
|
245
|
+
##
|
246
|
+
# Returns the number of columns
|
247
|
+
|
227
248
|
def columns
|
228
249
|
@labels.size
|
229
250
|
end
|
230
251
|
|
252
|
+
##
|
253
|
+
# Adds the given +row+ to the data-frame. +row+ can be either:
|
254
|
+
# - An Array of values for all the columns (in the order of #column_names);
|
255
|
+
# - A Hash containing associations between column names and value to be set.
|
256
|
+
|
231
257
|
def add_row(row)
|
232
258
|
if row.is_a?(Array)
|
233
259
|
raise "Expected an array of size #{@data.size}" unless row.size == @data.size
|
@@ -243,7 +269,7 @@ module Rust
|
|
243
269
|
row.each do |key, value|
|
244
270
|
@data[key.to_s] << value
|
245
271
|
end
|
246
|
-
|
272
|
+
|
247
273
|
return true
|
248
274
|
else
|
249
275
|
raise TypeError, "Expected an Array or a Hash"
|
@@ -251,6 +277,11 @@ module Rust
|
|
251
277
|
end
|
252
278
|
alias :<< :add_row
|
253
279
|
|
280
|
+
##
|
281
|
+
# Adds a column named +name+ with the given +values+ (array). The size of +values+ must match the number of
|
282
|
+
# rows of this data-frame. As an alternative, it can be passed a block which returns, for a given row, the
|
283
|
+
# value to assign for the new column.
|
284
|
+
|
254
285
|
def add_column(name, values=nil)
|
255
286
|
raise "Column already exists" if @labels.include?(name)
|
256
287
|
raise "Values or block required" if !values && !block_given?
|
@@ -267,6 +298,9 @@ module Rust
|
|
267
298
|
end
|
268
299
|
end
|
269
300
|
|
301
|
+
##
|
302
|
+
# Yields each row as a Hash containing column names as keys and values as values.
|
303
|
+
|
270
304
|
def each
|
271
305
|
self.each_with_index do |element, i|
|
272
306
|
yield element
|
@@ -275,6 +309,21 @@ module Rust
|
|
275
309
|
return self
|
276
310
|
end
|
277
311
|
|
312
|
+
##
|
313
|
+
# Yields each row as a Hash containing column names as keys and values as values. Faster alternative to
|
314
|
+
# #each.
|
315
|
+
|
316
|
+
def fast_each
|
317
|
+
self.fast_each_with_index do |element, i|
|
318
|
+
yield element
|
319
|
+
end
|
320
|
+
|
321
|
+
return self
|
322
|
+
end
|
323
|
+
|
324
|
+
##
|
325
|
+
# Yields each row as a Hash containing column names as keys and values as values and the row index.
|
326
|
+
|
278
327
|
def each_with_index
|
279
328
|
for i in 0...self.rows
|
280
329
|
element = {}
|
@@ -288,6 +337,23 @@ module Rust
|
|
288
337
|
return self
|
289
338
|
end
|
290
339
|
|
340
|
+
##
|
341
|
+
# Yields each row as a Hash containing column names as keys and values as values and the row index. Faster
|
342
|
+
# alternative to #each_with_index.
|
343
|
+
|
344
|
+
def fast_each_with_index
|
345
|
+
for i in 0...self.rows
|
346
|
+
element = []
|
347
|
+
@labels.each do |label|
|
348
|
+
element << @data[label][i]
|
349
|
+
end
|
350
|
+
|
351
|
+
yield element, i
|
352
|
+
end
|
353
|
+
|
354
|
+
return self
|
355
|
+
end
|
356
|
+
|
291
357
|
def load_in_r_as(variable_name)
|
292
358
|
command = []
|
293
359
|
|
@@ -299,6 +365,14 @@ module Rust
|
|
299
365
|
row_index += 1
|
300
366
|
end
|
301
367
|
|
368
|
+
self.column_names.each do |name|
|
369
|
+
column = self.column(name)
|
370
|
+
|
371
|
+
if column.is_a?(Factor)
|
372
|
+
command << "#{variable_name}[,#{name.to_R}] <- factor(#{variable_name}[,#{name.to_R}], labels=#{column.levels.to_R})"
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
302
376
|
Rust._eval_big(command)
|
303
377
|
end
|
304
378
|
|
@@ -323,6 +397,9 @@ module Rust
|
|
323
397
|
return result
|
324
398
|
end
|
325
399
|
|
400
|
+
##
|
401
|
+
# Returns a copy of the data-frame containing only the first +n+ rows.
|
402
|
+
|
326
403
|
def head(n=10)
|
327
404
|
result = DataFrame.new(self.column_names)
|
328
405
|
self.each_with_index do |row, i|
|
@@ -331,6 +408,11 @@ module Rust
|
|
331
408
|
return result
|
332
409
|
end
|
333
410
|
|
411
|
+
##
|
412
|
+
# Merges this data-frame with +other+ in terms of the +by+ column(s) (Array or String).
|
413
|
+
# +first_alias+ and +second_alias+ allow to specify the prefix that should be used for the columns not in +by+
|
414
|
+
# for this and the +other+ data-frame, respectively.
|
415
|
+
|
334
416
|
def merge(other, by, first_alias = "x", second_alias = "y")
|
335
417
|
raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
|
336
418
|
raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
|
@@ -397,6 +479,94 @@ module Rust
|
|
397
479
|
return result
|
398
480
|
end
|
399
481
|
|
482
|
+
##
|
483
|
+
# Aggregate the value in groups depending on the +by+ column (String).
|
484
|
+
# A block must be passed to specify how to aggregate the columns. Aggregators for specific columns can be
|
485
|
+
# specified as optional arguments in which the name of the argument represents the column name and the value
|
486
|
+
# contains a block for aggregating the specific column.
|
487
|
+
# Both the default and the specialized blocks must take as argument an array of values and must return a
|
488
|
+
# scalar value.
|
489
|
+
|
490
|
+
def aggregate(by, **aggregators)
|
491
|
+
raise TypeError, "Expected a string" unless by.is_a?(String)
|
492
|
+
raise TypeError, "All the aggregators should be procs" unless aggregators.values.all? { |v| v.is_a?(Proc) }
|
493
|
+
raise "Expected a block for default aggregator" unless block_given?
|
494
|
+
|
495
|
+
aggregators = aggregators.map { |label, callable| [label.to_s, callable] }.to_h
|
496
|
+
|
497
|
+
sorted = self.sort_by(by)
|
498
|
+
|
499
|
+
current_value = nil
|
500
|
+
partials = []
|
501
|
+
partial = nil
|
502
|
+
sorted.column(by).each_with_index do |value, index|
|
503
|
+
if current_value != value
|
504
|
+
current_value = value
|
505
|
+
partials << partial if partial
|
506
|
+
partial = Rust::DataFrame.new(self.column_names)
|
507
|
+
end
|
508
|
+
partial << sorted.fast_row(index)
|
509
|
+
end
|
510
|
+
partials << partial
|
511
|
+
|
512
|
+
result = Rust::DataFrame.new(self.column_names)
|
513
|
+
partials.each do |partial|
|
514
|
+
aggregated_row = {}
|
515
|
+
aggregated_row[by] = partial.column(by)[0]
|
516
|
+
(self.column_names - [by]).each do |column|
|
517
|
+
if aggregators[column]
|
518
|
+
aggregated_row[column] = aggregators[column].call(partial.column(column))
|
519
|
+
else
|
520
|
+
aggregated_row[column] = yield partial.column(column)
|
521
|
+
end
|
522
|
+
end
|
523
|
+
|
524
|
+
result << aggregated_row
|
525
|
+
end
|
526
|
+
|
527
|
+
return result
|
528
|
+
end
|
529
|
+
|
530
|
+
##
|
531
|
+
# Returns a copy of this data-frame in which the rows are sorted by the values of the +by+ column.
|
532
|
+
|
533
|
+
def sort_by(column)
|
534
|
+
result = self.clone
|
535
|
+
result.sort_by!(column)
|
536
|
+
return result
|
537
|
+
end
|
538
|
+
|
539
|
+
##
|
540
|
+
# Sorts the rows of this data-frame by the values of the +by+ column.
|
541
|
+
|
542
|
+
def sort_by!(by)
|
543
|
+
copy = @data[by].clone
|
544
|
+
copy.sort!
|
545
|
+
|
546
|
+
indices = []
|
547
|
+
@data[by].each_with_index do |value, i|
|
548
|
+
index = copy.index(value)
|
549
|
+
indices << index
|
550
|
+
|
551
|
+
copy[index] = NilClass
|
552
|
+
end
|
553
|
+
|
554
|
+
(self.column_names - [by]).each do |column_name|
|
555
|
+
sorted = []
|
556
|
+
column = self.column(column_name)
|
557
|
+
column_i = 0
|
558
|
+
indices.each do |i|
|
559
|
+
sorted[i] = column[column_i]
|
560
|
+
column_i += 1
|
561
|
+
end
|
562
|
+
@data[column_name] = sorted
|
563
|
+
end
|
564
|
+
@data[by].sort!
|
565
|
+
end
|
566
|
+
|
567
|
+
##
|
568
|
+
# Adds all the rows in +dataframe+ to this data-frame. The column names must match.
|
569
|
+
|
400
570
|
def bind_rows!(dataframe)
|
401
571
|
raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
|
402
572
|
raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
|
@@ -409,6 +579,9 @@ module Rust
|
|
409
579
|
end
|
410
580
|
alias :rbind! :bind_rows!
|
411
581
|
|
582
|
+
##
|
583
|
+
# Adds all the columns in +dataframe+ to this data-frame. The number of rows must match.
|
584
|
+
|
412
585
|
def bind_columns!(dataframe)
|
413
586
|
raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
|
414
587
|
raise "The number of rows are not compatible" if self.rows != dataframe.rows
|
@@ -422,6 +595,9 @@ module Rust
|
|
422
595
|
end
|
423
596
|
alias :cbind! :bind_columns!
|
424
597
|
|
598
|
+
##
|
599
|
+
# Returns a copy of this dataframe and adds all the rows in +dataframe+ to it. The column names must match.
|
600
|
+
|
425
601
|
def bind_rows(dataframe)
|
426
602
|
result = self.clone
|
427
603
|
result.bind_rows!(dataframe)
|
@@ -429,6 +605,9 @@ module Rust
|
|
429
605
|
end
|
430
606
|
alias :rbind :bind_rows
|
431
607
|
|
608
|
+
##
|
609
|
+
# Returns a copy of this dataframe and adds all the columns in +dataframe+ to it. The number of rows must match.
|
610
|
+
|
432
611
|
def bind_columns(dataframe)
|
433
612
|
result = self.clone
|
434
613
|
result.bind_columns!(dataframe)
|
@@ -436,152 +615,53 @@ module Rust
|
|
436
615
|
end
|
437
616
|
alias :cbind :bind_columns
|
438
617
|
|
618
|
+
##
|
619
|
+
# Returns a copy of this data-frame.
|
620
|
+
|
439
621
|
def clone
|
440
622
|
DataFrame.new(@data)
|
441
623
|
end
|
442
624
|
end
|
443
625
|
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
def initialize(data)
|
450
|
-
if data.flatten.size == 0
|
451
|
-
raise "Empty matrices are not allowed"
|
452
|
-
else
|
453
|
-
raise TypeError, "Expected array of array" unless data.is_a?(Array) && data[0].is_a?(Array)
|
454
|
-
raise TypeError, "Only numeric matrices are supported" unless data.all? { |row| row.all? { |e| e.is_a?(Numeric) } }
|
455
|
-
raise "All the rows must have the same size" unless data.map { |row| row.size }.uniq.size == 1
|
456
|
-
@data = data.clone
|
457
|
-
end
|
458
|
-
end
|
459
|
-
|
460
|
-
def [](i, j)
|
461
|
-
return @data[i][j]
|
462
|
-
end
|
463
|
-
|
464
|
-
def rows
|
465
|
-
@data.size
|
466
|
-
end
|
467
|
-
|
468
|
-
def cols
|
469
|
-
@data[0].size
|
470
|
-
end
|
626
|
+
##
|
627
|
+
# Represents an array of DataFrame
|
628
|
+
|
629
|
+
class DataFrameArray < Array
|
471
630
|
|
472
|
-
|
473
|
-
|
474
|
-
raise "Wrong j" unless j.between?(0, @data[0].size - 1)
|
475
|
-
@data[i][j] = value
|
476
|
-
end
|
631
|
+
##
|
632
|
+
# Returns a data-frame with the rows in all the data-frames together (if compatible).
|
477
633
|
|
478
|
-
def
|
479
|
-
|
634
|
+
def bind_all
|
635
|
+
return nil if self.size == 0
|
636
|
+
|
637
|
+
result = self.first.clone
|
638
|
+
|
639
|
+
for i in 1...self.size
|
640
|
+
result .bind_rows!(self[i])
|
641
|
+
end
|
642
|
+
|
643
|
+
return result
|
480
644
|
end
|
481
645
|
end
|
482
646
|
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
def initialize(min, max, step=1)
|
488
|
-
@min = min
|
489
|
-
@max = max
|
490
|
-
@step = step
|
491
|
-
end
|
492
|
-
|
493
|
-
def step(step)
|
494
|
-
@step = step
|
495
|
-
end
|
647
|
+
##
|
648
|
+
# Represents a hash of DataFrame
|
649
|
+
|
650
|
+
class DataFrameHash < Hash
|
496
651
|
|
497
|
-
|
498
|
-
|
499
|
-
yield v
|
500
|
-
end
|
501
|
-
end
|
652
|
+
##
|
653
|
+
# Returns a data-frame with the rows in all the data-frames together (if compatible).
|
502
654
|
|
503
|
-
def
|
504
|
-
|
505
|
-
|
506
|
-
|
655
|
+
def bind_all
|
656
|
+
return nil if self.values.size == 0
|
657
|
+
|
658
|
+
result = self.values.first.clone
|
659
|
+
|
660
|
+
for i in 1...self.values.size
|
661
|
+
result .bind_rows!(self.values[i])
|
507
662
|
end
|
663
|
+
|
508
664
|
return result
|
509
665
|
end
|
510
|
-
|
511
|
-
def to_R
|
512
|
-
"seq(from=#@min, to=#@max, by=#@step)"
|
513
|
-
end
|
514
|
-
end
|
515
|
-
end
|
516
|
-
|
517
|
-
class TrueClass
|
518
|
-
def to_R
|
519
|
-
"TRUE"
|
520
|
-
end
|
521
|
-
end
|
522
|
-
|
523
|
-
class FalseClass
|
524
|
-
def to_R
|
525
|
-
"FALSE"
|
526
|
-
end
|
527
|
-
end
|
528
|
-
|
529
|
-
class Object
|
530
|
-
def to_R
|
531
|
-
raise TypeError, "Unsupported type for #{self.class}"
|
532
666
|
end
|
533
667
|
end
|
534
|
-
|
535
|
-
class NilClass
|
536
|
-
def to_R
|
537
|
-
return "NULL"
|
538
|
-
end
|
539
|
-
end
|
540
|
-
|
541
|
-
class Numeric
|
542
|
-
def to_R
|
543
|
-
self.inspect
|
544
|
-
end
|
545
|
-
end
|
546
|
-
|
547
|
-
class Float
|
548
|
-
def to_R
|
549
|
-
return self.nan? ? "NA" : super
|
550
|
-
end
|
551
|
-
end
|
552
|
-
|
553
|
-
class Array
|
554
|
-
def to_R
|
555
|
-
return "c(#{self.map { |e| e.to_R }.join(",")})"
|
556
|
-
end
|
557
|
-
end
|
558
|
-
|
559
|
-
class String
|
560
|
-
def to_R
|
561
|
-
return self.inspect
|
562
|
-
end
|
563
|
-
end
|
564
|
-
|
565
|
-
class Range
|
566
|
-
def to_R
|
567
|
-
[range.min, range.max].to_R
|
568
|
-
end
|
569
|
-
end
|
570
|
-
|
571
|
-
module Rust::RBindings
|
572
|
-
def read_csv(filename, **options)
|
573
|
-
Rust::CSV.read(filename, **options)
|
574
|
-
end
|
575
|
-
|
576
|
-
def write_csv(filename, dataframe, **options)
|
577
|
-
Rust::CSV.write(filename, dataframe, **options)
|
578
|
-
end
|
579
|
-
|
580
|
-
def data_frame(*args)
|
581
|
-
Rust::DataFrame.new(*args)
|
582
|
-
end
|
583
|
-
end
|
584
|
-
|
585
|
-
def bind_r!
|
586
|
-
include Rust::RBindings
|
587
|
-
end
|