rust 0.7 → 0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ruby-rust +3 -0
- data/lib/{rust-csv.rb → rust/core/csv.rb} +23 -1
- data/lib/rust/core/rust.rb +221 -0
- data/lib/rust/core/types/all.rb +4 -0
- data/lib/{rust-core.rb → rust/core/types/dataframe.rb} +159 -331
- data/lib/rust/core/types/datatype.rb +195 -0
- data/lib/rust/core/types/factor.rb +158 -0
- data/lib/rust/core/types/language.rb +199 -0
- data/lib/rust/core/types/list.rb +97 -0
- data/lib/rust/core/types/matrix.rb +155 -0
- data/lib/rust/core/types/s4class.rb +78 -0
- data/lib/rust/core/types/utils.rb +122 -0
- data/lib/rust/core.rb +7 -0
- data/lib/rust/external/robustbase.rb +44 -0
- data/lib/rust/models/all.rb +4 -0
- data/lib/rust/models/anova.rb +77 -0
- data/lib/rust/models/regression.rb +258 -0
- data/lib/rust/plots/all.rb +4 -0
- data/lib/rust/plots/basic-plots.rb +143 -0
- data/lib/{rust-plots.rb → rust/plots/core.rb} +89 -167
- data/lib/rust/plots/distribution-plots.rb +75 -0
- data/lib/rust/stats/all.rb +4 -0
- data/lib/{rust-basics.rb → rust/stats/correlation.rb} +45 -2
- data/lib/{rust-descriptive.rb → rust/stats/descriptive.rb} +52 -3
- data/lib/{rust-effsize.rb → rust/stats/effsize.rb} +28 -13
- data/lib/{rust-probabilities.rb → rust/stats/probabilities.rb} +142 -34
- data/lib/{rust-tests.rb → rust/stats/tests.rb} +178 -92
- data/lib/rust.rb +4 -9
- metadata +32 -13
- data/lib/rust-calls.rb +0 -80
@@ -1,127 +1,34 @@
|
|
1
|
-
|
2
|
-
require 'stringio'
|
3
|
-
require 'rinruby'
|
4
|
-
require 'csv'
|
1
|
+
require_relative 'datatype'
|
5
2
|
|
6
3
|
module Rust
|
7
|
-
CLIENT_MUTEX = Mutex.new
|
8
|
-
R_MUTEX = Mutex.new
|
9
4
|
|
10
|
-
|
5
|
+
##
|
6
|
+
# Mirror of the data-frame type in R.
|
11
7
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
@@debugging = false
|
17
|
-
@@in_client_mutex = false
|
18
|
-
|
19
|
-
def self.debug
|
20
|
-
@@debugging = true
|
21
|
-
end
|
22
|
-
|
23
|
-
def self.exclusive
|
24
|
-
result = nil
|
25
|
-
CLIENT_MUTEX.synchronize do
|
26
|
-
@@in_client_mutex = true
|
27
|
-
result = yield
|
28
|
-
@@in_client_mutex = false
|
29
|
-
end
|
30
|
-
return result
|
31
|
-
end
|
32
|
-
|
33
|
-
def self.[]=(variable, value)
|
34
|
-
if value.is_a?(RustDatatype)
|
35
|
-
value.load_in_r_as(variable.to_s)
|
36
|
-
elsif value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(Array)
|
37
|
-
R_ENGINE.assign(variable, value)
|
38
|
-
else
|
39
|
-
raise "Given #{value.class}, expected RustDatatype, String, Numeric, or Array"
|
8
|
+
class DataFrame < RustDatatype
|
9
|
+
def self.can_pull?(type, klass)
|
10
|
+
return [klass].flatten.include?("data.frame")
|
40
11
|
end
|
41
12
|
|
42
|
-
|
43
|
-
|
44
|
-
def self.[](variable, type=RustDatatype)
|
45
|
-
return type.pull_variable(variable)
|
46
|
-
end
|
47
|
-
|
48
|
-
def self._eval_big(r_command, return_warnings = false)
|
49
|
-
r_command = r_command.join("\n") if r_command.is_a?(Array)
|
50
|
-
|
51
|
-
self._rexec(r_command, return_warnings) do |cmd|
|
52
|
-
result = true
|
53
|
-
instructions = cmd.lines
|
54
|
-
|
55
|
-
while instructions.size > 0
|
56
|
-
current_command = ""
|
57
|
-
|
58
|
-
while (instructions.size > 0) && (current_command.length + instructions.first.length < 10000)
|
59
|
-
current_command << instructions.shift
|
60
|
-
end
|
61
|
-
|
62
|
-
result &= R_ENGINE.eval(current_command)
|
63
|
-
end
|
64
|
-
|
65
|
-
result
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
def self._pull(r_command, return_warnings = false)
|
70
|
-
self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.pull(cmd) }
|
71
|
-
end
|
72
|
-
|
73
|
-
def self._eval(r_command, return_warnings = false)
|
74
|
-
self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.eval(cmd) }
|
75
|
-
end
|
76
|
-
|
77
|
-
def self._rexec(r_command, return_warnings = false)
|
78
|
-
puts "Calling _rexec with command: #{r_command}" if @@debugging
|
79
|
-
R_MUTEX.synchronize do
|
80
|
-
assert("This command must be executed in an exclusive block") { @@in_client_mutex }
|
81
|
-
|
82
|
-
result = nil
|
83
|
-
begin
|
84
|
-
$stdout = StringIO.new
|
85
|
-
if return_warnings
|
86
|
-
R_ENGINE.echo(true, true)
|
87
|
-
else
|
88
|
-
R_ENGINE.echo(false, false)
|
89
|
-
end
|
90
|
-
result = yield(r_command)
|
91
|
-
ensure
|
92
|
-
R_ENGINE.echo(false, false)
|
93
|
-
warnings = $stdout.string
|
94
|
-
$stdout = STDOUT
|
95
|
-
end
|
96
|
-
|
97
|
-
if return_warnings
|
98
|
-
return result, warnings.lines.map { |w| w.strip.chomp }
|
99
|
-
else
|
100
|
-
return result
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
class RustDatatype
|
106
|
-
def self.pull_variable(variable)
|
107
|
-
return Rust._pull(variable)
|
13
|
+
def self.pull_priority
|
14
|
+
1
|
108
15
|
end
|
109
16
|
|
110
|
-
def
|
111
|
-
raise "Not implemented"
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
class DataFrame < RustDatatype
|
116
|
-
def self.pull_variable(variable)
|
17
|
+
def self.pull_variable(variable, type, klass)
|
117
18
|
hash = {}
|
118
|
-
colnames = Rust
|
19
|
+
colnames = Rust["colnames(#{variable})"]
|
119
20
|
colnames.each do |col|
|
120
|
-
hash[col] = Rust
|
21
|
+
hash[col] = Rust["#{variable}$\"#{col}\""]
|
121
22
|
end
|
122
23
|
return DataFrame.new(hash)
|
123
24
|
end
|
124
25
|
|
26
|
+
##
|
27
|
+
# Creates a new data-frame.
|
28
|
+
# +labels_or_data+ can be either:
|
29
|
+
# - an Array of column names (creates an empty data-frame)
|
30
|
+
# - a Hash with column names as keys and values as values
|
31
|
+
|
125
32
|
def initialize(labels_or_data)
|
126
33
|
@data = {}
|
127
34
|
|
@@ -137,6 +44,9 @@ module Rust
|
|
137
44
|
end
|
138
45
|
end
|
139
46
|
|
47
|
+
##
|
48
|
+
# Returns the +i+-th row of the data-frame
|
49
|
+
|
140
50
|
def row(i)
|
141
51
|
if i < 0 || i >= self.rows
|
142
52
|
return nil
|
@@ -145,6 +55,9 @@ module Rust
|
|
145
55
|
end
|
146
56
|
end
|
147
57
|
|
58
|
+
##
|
59
|
+
# Returns the +i+-th row of the data-frame. Faster (but harder to interpret) alternative to #row.
|
60
|
+
|
148
61
|
def fast_row(i)
|
149
62
|
if i < 0 || i >= self.rows
|
150
63
|
return nil
|
@@ -153,6 +66,9 @@ module Rust
|
|
153
66
|
end
|
154
67
|
end
|
155
68
|
|
69
|
+
##
|
70
|
+
# Shuffles the rows in the data-frame. The arguments are passed to the Array#shuffle method.
|
71
|
+
|
156
72
|
def shuffle(*args)
|
157
73
|
result = DataFrame.new(@labels)
|
158
74
|
|
@@ -167,6 +83,10 @@ module Rust
|
|
167
83
|
return result
|
168
84
|
end
|
169
85
|
|
86
|
+
##
|
87
|
+
# Returns a copy of the data-frame containing only the specified +rows+ and/or +cols+. If +rows+ and/or +cols+
|
88
|
+
# are nil, all the rows/columns are returned.
|
89
|
+
|
170
90
|
def [](rows, cols=nil)
|
171
91
|
raise "You must specify either rows or columns to select" if !rows && !cols
|
172
92
|
result = self
|
@@ -182,11 +102,17 @@ module Rust
|
|
182
102
|
return result
|
183
103
|
end
|
184
104
|
|
105
|
+
##
|
106
|
+
# Return the column named +name+.
|
107
|
+
|
185
108
|
def column(name)
|
186
109
|
return @data[name]
|
187
110
|
end
|
188
111
|
alias :| :column
|
189
112
|
|
113
|
+
##
|
114
|
+
# Renames the column named +old_name+ in +new_name+.
|
115
|
+
|
190
116
|
def rename_column!(old_name, new_name)
|
191
117
|
raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
|
192
118
|
raise "This DataFrame already contains a column named #{new_name}" if @labels.include?(new_name)
|
@@ -195,10 +121,24 @@ module Rust
|
|
195
121
|
@labels[@labels.index(old_name)] = new_name
|
196
122
|
end
|
197
123
|
|
124
|
+
##
|
125
|
+
# Functionally transforms the column named +column+ by applying the function given as a block.
|
126
|
+
# Example:
|
127
|
+
# df = Rust::DataFrame.new({a: [1,2,3], b: [3,4,5]})
|
128
|
+
# df.transform_column!("a") { |v| v + 1 }
|
129
|
+
# df|"a" # => [2, 3, 4]
|
130
|
+
|
198
131
|
def transform_column!(column)
|
199
132
|
@data[column].map! { |e| yield e }
|
200
133
|
end
|
201
134
|
|
135
|
+
##
|
136
|
+
# Returns a copy data-frame with only the rows for which the function given in the block returns true.
|
137
|
+
# Example:
|
138
|
+
# df = Rust::DataFrame.new({a: [1,2,3], b: ['a','b','c']})
|
139
|
+
# df2 = df.select_rows { |r| r['a'].even? }
|
140
|
+
# df2|"b" # => ['b']
|
141
|
+
|
202
142
|
def select_rows
|
203
143
|
result = DataFrame.new(self.column_names)
|
204
144
|
self.each_with_index do |row, i|
|
@@ -207,6 +147,9 @@ module Rust
|
|
207
147
|
return result
|
208
148
|
end
|
209
149
|
|
150
|
+
##
|
151
|
+
# Returns true if the function given in the block returns true for any of the rows in this data-frame.
|
152
|
+
|
210
153
|
def has_row?
|
211
154
|
self.each_with_index do |row, i|
|
212
155
|
return true if yield row, i
|
@@ -214,6 +157,10 @@ module Rust
|
|
214
157
|
return false
|
215
158
|
end
|
216
159
|
|
160
|
+
##
|
161
|
+
# Returns a copy of the data-frame with only the columns in +cols+. As an alternative, a block can be used
|
162
|
+
# (only the columns for which the function returns true are kept).
|
163
|
+
|
217
164
|
def select_columns(cols=nil)
|
218
165
|
raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
|
219
166
|
|
@@ -229,23 +176,35 @@ module Rust
|
|
229
176
|
end
|
230
177
|
alias :select_cols :select_columns
|
231
178
|
|
179
|
+
##
|
180
|
+
# Deletes the column named +column+.
|
181
|
+
|
232
182
|
def delete_column(column)
|
233
183
|
@labels.delete(column)
|
234
184
|
@data.delete(column)
|
235
185
|
end
|
236
186
|
|
187
|
+
##
|
188
|
+
# Deletes the +i+-th row.
|
189
|
+
|
237
190
|
def delete_row(i)
|
238
191
|
@data.each do |label, column|
|
239
192
|
column.delete_at(i)
|
240
193
|
end
|
241
194
|
end
|
242
195
|
|
196
|
+
##
|
197
|
+
# Returns a data-frame in which the rows are unique in terms of all the given columns named +by+.
|
198
|
+
|
243
199
|
def uniq_by(by)
|
244
200
|
result = self.clone
|
245
201
|
result.uniq_by!(by)
|
246
202
|
return result
|
247
203
|
end
|
248
204
|
|
205
|
+
##
|
206
|
+
# Makes sure that in this data-frame the rows are unique in terms of all the given columns named +by+.
|
207
|
+
|
249
208
|
def uniq_by!(by)
|
250
209
|
my_keys = {}
|
251
210
|
to_delete = []
|
@@ -268,19 +227,33 @@ module Rust
|
|
268
227
|
return self
|
269
228
|
end
|
270
229
|
|
230
|
+
##
|
231
|
+
# Return the names of the columns.
|
232
|
+
|
271
233
|
def column_names
|
272
234
|
return @labels.map { |k| k.to_s }
|
273
235
|
end
|
274
236
|
alias :colnames :column_names
|
275
237
|
|
238
|
+
##
|
239
|
+
# Returns the number of rows.
|
240
|
+
|
276
241
|
def rows
|
277
242
|
@data.values[0].size
|
278
243
|
end
|
279
244
|
|
245
|
+
##
|
246
|
+
# Returns the number of columns
|
247
|
+
|
280
248
|
def columns
|
281
249
|
@labels.size
|
282
250
|
end
|
283
251
|
|
252
|
+
##
|
253
|
+
# Adds the given +row+ to the data-frame. +row+ can be either:
|
254
|
+
# - An Array of values for all the columns (in the order of #column_names);
|
255
|
+
# - A Hash containing associations between column names and value to be set.
|
256
|
+
|
284
257
|
def add_row(row)
|
285
258
|
if row.is_a?(Array)
|
286
259
|
raise "Expected an array of size #{@data.size}" unless row.size == @data.size
|
@@ -304,6 +277,11 @@ module Rust
|
|
304
277
|
end
|
305
278
|
alias :<< :add_row
|
306
279
|
|
280
|
+
##
|
281
|
+
# Adds a column named +name+ with the given +values+ (array). The size of +values+ must match the number of
|
282
|
+
# rows of this data-frame. As an alternative, it can be passed a block which returns, for a given row, the
|
283
|
+
# value to assign for the new column.
|
284
|
+
|
307
285
|
def add_column(name, values=nil)
|
308
286
|
raise "Column already exists" if @labels.include?(name)
|
309
287
|
raise "Values or block required" if !values && !block_given?
|
@@ -320,6 +298,9 @@ module Rust
|
|
320
298
|
end
|
321
299
|
end
|
322
300
|
|
301
|
+
##
|
302
|
+
# Yields each row as a Hash containing column names as keys and values as values.
|
303
|
+
|
323
304
|
def each
|
324
305
|
self.each_with_index do |element, i|
|
325
306
|
yield element
|
@@ -328,6 +309,10 @@ module Rust
|
|
328
309
|
return self
|
329
310
|
end
|
330
311
|
|
312
|
+
##
|
313
|
+
# Yields each row as a Hash containing column names as keys and values as values. Faster alternative to
|
314
|
+
# #each.
|
315
|
+
|
331
316
|
def fast_each
|
332
317
|
self.fast_each_with_index do |element, i|
|
333
318
|
yield element
|
@@ -336,6 +321,9 @@ module Rust
|
|
336
321
|
return self
|
337
322
|
end
|
338
323
|
|
324
|
+
##
|
325
|
+
# Yields each row as a Hash containing column names as keys and values as values and the row index.
|
326
|
+
|
339
327
|
def each_with_index
|
340
328
|
for i in 0...self.rows
|
341
329
|
element = {}
|
@@ -349,6 +337,10 @@ module Rust
|
|
349
337
|
return self
|
350
338
|
end
|
351
339
|
|
340
|
+
##
|
341
|
+
# Yields each row as a Hash containing column names as keys and values as values and the row index. Faster
|
342
|
+
# alternative to #each_with_index.
|
343
|
+
|
352
344
|
def fast_each_with_index
|
353
345
|
for i in 0...self.rows
|
354
346
|
element = []
|
@@ -373,6 +365,14 @@ module Rust
|
|
373
365
|
row_index += 1
|
374
366
|
end
|
375
367
|
|
368
|
+
self.column_names.each do |name|
|
369
|
+
column = self.column(name)
|
370
|
+
|
371
|
+
if column.is_a?(Factor)
|
372
|
+
command << "#{variable_name}[,#{name.to_R}] <- factor(#{variable_name}[,#{name.to_R}], labels=#{column.levels.to_R})"
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
376
|
Rust._eval_big(command)
|
377
377
|
end
|
378
378
|
|
@@ -397,6 +397,9 @@ module Rust
|
|
397
397
|
return result
|
398
398
|
end
|
399
399
|
|
400
|
+
##
|
401
|
+
# Returns a copy of the data-frame containing only the first +n+ rows.
|
402
|
+
|
400
403
|
def head(n=10)
|
401
404
|
result = DataFrame.new(self.column_names)
|
402
405
|
self.each_with_index do |row, i|
|
@@ -405,6 +408,11 @@ module Rust
|
|
405
408
|
return result
|
406
409
|
end
|
407
410
|
|
411
|
+
##
|
412
|
+
# Merges this data-frame with +other+ in terms of the +by+ column(s) (Array or String).
|
413
|
+
# +first_alias+ and +second_alias+ allow to specify the prefix that should be used for the columns not in +by+
|
414
|
+
# for this and the +other+ data-frame, respectively.
|
415
|
+
|
408
416
|
def merge(other, by, first_alias = "x", second_alias = "y")
|
409
417
|
raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
|
410
418
|
raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
|
@@ -471,6 +479,14 @@ module Rust
|
|
471
479
|
return result
|
472
480
|
end
|
473
481
|
|
482
|
+
##
|
483
|
+
# Aggregate the value in groups depending on the +by+ column (String).
|
484
|
+
# A block must be passed to specify how to aggregate the columns. Aggregators for specific columns can be
|
485
|
+
# specified as optional arguments in which the name of the argument represents the column name and the value
|
486
|
+
# contains a block for aggregating the specific column.
|
487
|
+
# Both the default and the specialized blocks must take as argument an array of values and must return a
|
488
|
+
# scalar value.
|
489
|
+
|
474
490
|
def aggregate(by, **aggregators)
|
475
491
|
raise TypeError, "Expected a string" unless by.is_a?(String)
|
476
492
|
raise TypeError, "All the aggregators should be procs" unless aggregators.values.all? { |v| v.is_a?(Proc) }
|
@@ -511,12 +527,18 @@ module Rust
|
|
511
527
|
return result
|
512
528
|
end
|
513
529
|
|
530
|
+
##
|
531
|
+
# Returns a copy of this data-frame in which the rows are sorted by the values of the +by+ column.
|
532
|
+
|
514
533
|
def sort_by(column)
|
515
534
|
result = self.clone
|
516
535
|
result.sort_by!(column)
|
517
536
|
return result
|
518
537
|
end
|
519
538
|
|
539
|
+
##
|
540
|
+
# Sorts the rows of this data-frame by the values of the +by+ column.
|
541
|
+
|
520
542
|
def sort_by!(by)
|
521
543
|
copy = @data[by].clone
|
522
544
|
copy.sort!
|
@@ -542,6 +564,9 @@ module Rust
|
|
542
564
|
@data[by].sort!
|
543
565
|
end
|
544
566
|
|
567
|
+
##
|
568
|
+
# Adds all the rows in +dataframe+ to this data-frame. The column names must match.
|
569
|
+
|
545
570
|
def bind_rows!(dataframe)
|
546
571
|
raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
|
547
572
|
raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
|
@@ -554,6 +579,9 @@ module Rust
|
|
554
579
|
end
|
555
580
|
alias :rbind! :bind_rows!
|
556
581
|
|
582
|
+
##
|
583
|
+
# Adds all the columns in +dataframe+ to this data-frame. The number of rows must match.
|
584
|
+
|
557
585
|
def bind_columns!(dataframe)
|
558
586
|
raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
|
559
587
|
raise "The number of rows are not compatible" if self.rows != dataframe.rows
|
@@ -567,6 +595,9 @@ module Rust
|
|
567
595
|
end
|
568
596
|
alias :cbind! :bind_columns!
|
569
597
|
|
598
|
+
##
|
599
|
+
# Returns a copy of this dataframe and adds all the rows in +dataframe+ to it. The column names must match.
|
600
|
+
|
570
601
|
def bind_rows(dataframe)
|
571
602
|
result = self.clone
|
572
603
|
result.bind_rows!(dataframe)
|
@@ -574,6 +605,9 @@ module Rust
|
|
574
605
|
end
|
575
606
|
alias :rbind :bind_rows
|
576
607
|
|
608
|
+
##
|
609
|
+
# Returns a copy of this dataframe and adds all the columns in +dataframe+ to it. The number of rows must match.
|
610
|
+
|
577
611
|
def bind_columns(dataframe)
|
578
612
|
result = self.clone
|
579
613
|
result.bind_columns!(dataframe)
|
@@ -581,88 +615,22 @@ module Rust
|
|
581
615
|
end
|
582
616
|
alias :cbind :bind_columns
|
583
617
|
|
618
|
+
##
|
619
|
+
# Returns a copy of this data-frame.
|
620
|
+
|
584
621
|
def clone
|
585
622
|
DataFrame.new(@data)
|
586
623
|
end
|
587
624
|
end
|
588
625
|
|
589
|
-
|
590
|
-
|
591
|
-
return Rust._pull(variable)
|
592
|
-
end
|
593
|
-
|
594
|
-
def initialize(data)
|
595
|
-
if data.flatten.size == 0
|
596
|
-
raise "Empty matrices are not allowed"
|
597
|
-
else
|
598
|
-
raise TypeError, "Expected array of array" unless data.is_a?(Array) && data[0].is_a?(Array)
|
599
|
-
raise TypeError, "Only numeric matrices are supported" unless data.all? { |row| row.all? { |e| e.is_a?(Numeric) } }
|
600
|
-
raise "All the rows must have the same size" unless data.map { |row| row.size }.uniq.size == 1
|
601
|
-
@data = data.clone
|
602
|
-
end
|
603
|
-
end
|
604
|
-
|
605
|
-
def [](i, j)
|
606
|
-
return @data[i][j]
|
607
|
-
end
|
608
|
-
|
609
|
-
def rows
|
610
|
-
@data.size
|
611
|
-
end
|
612
|
-
|
613
|
-
def cols
|
614
|
-
@data[0].size
|
615
|
-
end
|
616
|
-
|
617
|
-
def []=(i, j, value)
|
618
|
-
raise "Wrong i" unless i.between?(0, @data.size - 1)
|
619
|
-
raise "Wrong j" unless j.between?(0, @data[0].size - 1)
|
620
|
-
@data[i][j] = value
|
621
|
-
end
|
622
|
-
|
623
|
-
def load_in_r_as(variable_name)
|
624
|
-
Rust._eval("#{variable_name} <- matrix(c(#{@data.flatten.join(",")}), nrow=#{self.rows}, ncol=#{self.cols}, byrow=T)")
|
625
|
-
end
|
626
|
-
end
|
626
|
+
##
|
627
|
+
# Represents an array of DataFrame
|
627
628
|
|
628
|
-
class
|
629
|
-
attr_reader :min
|
630
|
-
attr_reader :max
|
631
|
-
|
632
|
-
def initialize(min, max, step=1)
|
633
|
-
@min = min
|
634
|
-
@max = max
|
635
|
-
@step = step
|
636
|
-
end
|
637
|
-
|
638
|
-
def step(step)
|
639
|
-
@step = step
|
640
|
-
end
|
641
|
-
|
642
|
-
def each
|
643
|
-
(@min..@max).step(@step) do |v|
|
644
|
-
yield v
|
645
|
-
end
|
646
|
-
end
|
647
|
-
|
648
|
-
def to_a
|
649
|
-
result = []
|
650
|
-
self.each do |v|
|
651
|
-
result << v
|
652
|
-
end
|
653
|
-
return result
|
654
|
-
end
|
629
|
+
class DataFrameArray < Array
|
655
630
|
|
656
|
-
|
657
|
-
|
658
|
-
end
|
631
|
+
##
|
632
|
+
# Returns a data-frame with the rows in all the data-frames together (if compatible).
|
659
633
|
|
660
|
-
def load_in_r_as(variable_name)
|
661
|
-
Rust._eval("#{variable_name} <- #{self.to_R}")
|
662
|
-
end
|
663
|
-
end
|
664
|
-
|
665
|
-
class DataFrameArray < Array
|
666
634
|
def bind_all
|
667
635
|
return nil if self.size == 0
|
668
636
|
|
@@ -676,7 +644,14 @@ module Rust
|
|
676
644
|
end
|
677
645
|
end
|
678
646
|
|
647
|
+
##
|
648
|
+
# Represents a hash of DataFrame
|
649
|
+
|
679
650
|
class DataFrameHash < Hash
|
651
|
+
|
652
|
+
##
|
653
|
+
# Returns a data-frame with the rows in all the data-frames together (if compatible).
|
654
|
+
|
680
655
|
def bind_all
|
681
656
|
return nil if self.values.size == 0
|
682
657
|
|
@@ -689,151 +664,4 @@ module Rust
|
|
689
664
|
return result
|
690
665
|
end
|
691
666
|
end
|
692
|
-
|
693
|
-
class MathArray < Array
|
694
|
-
def -(other)
|
695
|
-
raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
|
696
|
-
raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
|
697
|
-
|
698
|
-
result = self.clone
|
699
|
-
other = [other] * self.size if other.is_a?(Numeric)
|
700
|
-
for i in 0...self.size
|
701
|
-
result[i] -= other[i]
|
702
|
-
end
|
703
|
-
|
704
|
-
return result
|
705
|
-
end
|
706
|
-
|
707
|
-
def *(other)
|
708
|
-
raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
|
709
|
-
raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
|
710
|
-
|
711
|
-
result = self.clone
|
712
|
-
other = [other] * self.size if other.is_a?(Numeric)
|
713
|
-
for i in 0...self.size
|
714
|
-
result[i] *= other[i]
|
715
|
-
end
|
716
|
-
|
717
|
-
return result
|
718
|
-
end
|
719
|
-
|
720
|
-
def +(other)
|
721
|
-
raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
|
722
|
-
raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
|
723
|
-
|
724
|
-
result = self.clone
|
725
|
-
other = [other] * self.size if other.is_a?(Numeric)
|
726
|
-
for i in 0...self.size
|
727
|
-
result[i] += other[i]
|
728
|
-
end
|
729
|
-
|
730
|
-
return result
|
731
|
-
end
|
732
|
-
|
733
|
-
def /(other) #To recover the syntax highlighting but in Kate: /
|
734
|
-
raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
|
735
|
-
raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
|
736
|
-
|
737
|
-
result = self.clone
|
738
|
-
other = [other] * self.size if other.is_a?(Numeric)
|
739
|
-
for i in 0...self.size
|
740
|
-
result[i] /= other[i]
|
741
|
-
end
|
742
|
-
|
743
|
-
return result
|
744
|
-
end
|
745
|
-
|
746
|
-
def **(other)
|
747
|
-
raise ArgumentError, "Expected numeric" if !other.is_a?(Numeric)
|
748
|
-
|
749
|
-
result = self.clone
|
750
|
-
for i in 0...self.size
|
751
|
-
result[i] = result[i] ** other
|
752
|
-
end
|
753
|
-
|
754
|
-
return result
|
755
|
-
end
|
756
|
-
end
|
757
|
-
end
|
758
|
-
|
759
|
-
class TrueClass
|
760
|
-
def to_R
|
761
|
-
"TRUE"
|
762
|
-
end
|
763
|
-
end
|
764
|
-
|
765
|
-
class FalseClass
|
766
|
-
def to_R
|
767
|
-
"FALSE"
|
768
|
-
end
|
769
|
-
end
|
770
|
-
|
771
|
-
class Object
|
772
|
-
def to_R
|
773
|
-
raise TypeError, "Unsupported type for #{self.class}"
|
774
|
-
end
|
775
|
-
end
|
776
|
-
|
777
|
-
class NilClass
|
778
|
-
def to_R
|
779
|
-
return "NULL"
|
780
|
-
end
|
781
|
-
end
|
782
|
-
|
783
|
-
class Numeric
|
784
|
-
def to_R
|
785
|
-
self.inspect
|
786
|
-
end
|
787
|
-
end
|
788
|
-
|
789
|
-
class Float
|
790
|
-
def to_R
|
791
|
-
return self.nan? ? "NA" : super
|
792
|
-
end
|
793
|
-
end
|
794
|
-
|
795
|
-
class Array
|
796
|
-
def to_R
|
797
|
-
return "c(#{self.map { |e| e.to_R }.join(",")})"
|
798
|
-
end
|
799
|
-
|
800
|
-
def distribution
|
801
|
-
result = {}
|
802
|
-
self.each do |value|
|
803
|
-
result[value] = result[value].to_i + 1
|
804
|
-
end
|
805
|
-
return result
|
806
|
-
end
|
807
|
-
end
|
808
|
-
|
809
|
-
class String
|
810
|
-
def to_R
|
811
|
-
return self.inspect
|
812
|
-
end
|
813
|
-
end
|
814
|
-
|
815
|
-
class Range
|
816
|
-
def to_R
|
817
|
-
[range.min, range.max].to_R
|
818
|
-
end
|
819
|
-
end
|
820
|
-
|
821
|
-
module Rust::RBindings
|
822
|
-
def data_frame(*args)
|
823
|
-
Rust::DataFrame.new(*args)
|
824
|
-
end
|
825
|
-
end
|
826
|
-
|
827
|
-
module Rust::TestCases
|
828
|
-
def self.sample_dataframe(columns, size=100)
|
829
|
-
result = Rust::DataFrame.new(columns)
|
830
|
-
size.times do |i|
|
831
|
-
result << columns.map { |c| yield i, c }
|
832
|
-
end
|
833
|
-
return result
|
834
|
-
end
|
835
|
-
end
|
836
|
-
|
837
|
-
def bind_r!
|
838
|
-
include Rust::RBindings
|
839
667
|
end
|