rust 0.7 → 0.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ruby-rust +3 -0
- data/lib/{rust-csv.rb → rust/core/csv.rb} +23 -1
- data/lib/rust/core/rust.rb +221 -0
- data/lib/rust/core/types/all.rb +4 -0
- data/lib/{rust-core.rb → rust/core/types/dataframe.rb} +159 -331
- data/lib/rust/core/types/datatype.rb +195 -0
- data/lib/rust/core/types/factor.rb +158 -0
- data/lib/rust/core/types/language.rb +199 -0
- data/lib/rust/core/types/list.rb +97 -0
- data/lib/rust/core/types/matrix.rb +155 -0
- data/lib/rust/core/types/s4class.rb +78 -0
- data/lib/rust/core/types/utils.rb +122 -0
- data/lib/rust/core.rb +7 -0
- data/lib/rust/external/robustbase.rb +44 -0
- data/lib/rust/models/all.rb +4 -0
- data/lib/rust/models/anova.rb +77 -0
- data/lib/rust/models/regression.rb +258 -0
- data/lib/rust/plots/all.rb +4 -0
- data/lib/rust/plots/basic-plots.rb +143 -0
- data/lib/{rust-plots.rb → rust/plots/core.rb} +89 -167
- data/lib/rust/plots/distribution-plots.rb +75 -0
- data/lib/rust/stats/all.rb +4 -0
- data/lib/{rust-basics.rb → rust/stats/correlation.rb} +45 -2
- data/lib/{rust-descriptive.rb → rust/stats/descriptive.rb} +52 -3
- data/lib/{rust-effsize.rb → rust/stats/effsize.rb} +28 -13
- data/lib/{rust-probabilities.rb → rust/stats/probabilities.rb} +142 -34
- data/lib/{rust-tests.rb → rust/stats/tests.rb} +178 -92
- data/lib/rust.rb +4 -9
- metadata +32 -13
- data/lib/rust-calls.rb +0 -80
@@ -1,127 +1,34 @@
|
|
1
|
-
|
2
|
-
require 'stringio'
|
3
|
-
require 'rinruby'
|
4
|
-
require 'csv'
|
1
|
+
require_relative 'datatype'
|
5
2
|
|
6
3
|
module Rust
|
7
|
-
CLIENT_MUTEX = Mutex.new
|
8
|
-
R_MUTEX = Mutex.new
|
9
4
|
|
10
|
-
|
5
|
+
##
|
6
|
+
# Mirror of the data-frame type in R.
|
11
7
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
@@debugging = false
|
17
|
-
@@in_client_mutex = false
|
18
|
-
|
19
|
-
def self.debug
|
20
|
-
@@debugging = true
|
21
|
-
end
|
22
|
-
|
23
|
-
def self.exclusive
|
24
|
-
result = nil
|
25
|
-
CLIENT_MUTEX.synchronize do
|
26
|
-
@@in_client_mutex = true
|
27
|
-
result = yield
|
28
|
-
@@in_client_mutex = false
|
29
|
-
end
|
30
|
-
return result
|
31
|
-
end
|
32
|
-
|
33
|
-
def self.[]=(variable, value)
|
34
|
-
if value.is_a?(RustDatatype)
|
35
|
-
value.load_in_r_as(variable.to_s)
|
36
|
-
elsif value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(Array)
|
37
|
-
R_ENGINE.assign(variable, value)
|
38
|
-
else
|
39
|
-
raise "Given #{value.class}, expected RustDatatype, String, Numeric, or Array"
|
8
|
+
class DataFrame < RustDatatype
|
9
|
+
def self.can_pull?(type, klass)
|
10
|
+
return [klass].flatten.include?("data.frame")
|
40
11
|
end
|
41
12
|
|
42
|
-
|
43
|
-
|
44
|
-
def self.[](variable, type=RustDatatype)
|
45
|
-
return type.pull_variable(variable)
|
46
|
-
end
|
47
|
-
|
48
|
-
def self._eval_big(r_command, return_warnings = false)
|
49
|
-
r_command = r_command.join("\n") if r_command.is_a?(Array)
|
50
|
-
|
51
|
-
self._rexec(r_command, return_warnings) do |cmd|
|
52
|
-
result = true
|
53
|
-
instructions = cmd.lines
|
54
|
-
|
55
|
-
while instructions.size > 0
|
56
|
-
current_command = ""
|
57
|
-
|
58
|
-
while (instructions.size > 0) && (current_command.length + instructions.first.length < 10000)
|
59
|
-
current_command << instructions.shift
|
60
|
-
end
|
61
|
-
|
62
|
-
result &= R_ENGINE.eval(current_command)
|
63
|
-
end
|
64
|
-
|
65
|
-
result
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
def self._pull(r_command, return_warnings = false)
|
70
|
-
self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.pull(cmd) }
|
71
|
-
end
|
72
|
-
|
73
|
-
def self._eval(r_command, return_warnings = false)
|
74
|
-
self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.eval(cmd) }
|
75
|
-
end
|
76
|
-
|
77
|
-
def self._rexec(r_command, return_warnings = false)
|
78
|
-
puts "Calling _rexec with command: #{r_command}" if @@debugging
|
79
|
-
R_MUTEX.synchronize do
|
80
|
-
assert("This command must be executed in an exclusive block") { @@in_client_mutex }
|
81
|
-
|
82
|
-
result = nil
|
83
|
-
begin
|
84
|
-
$stdout = StringIO.new
|
85
|
-
if return_warnings
|
86
|
-
R_ENGINE.echo(true, true)
|
87
|
-
else
|
88
|
-
R_ENGINE.echo(false, false)
|
89
|
-
end
|
90
|
-
result = yield(r_command)
|
91
|
-
ensure
|
92
|
-
R_ENGINE.echo(false, false)
|
93
|
-
warnings = $stdout.string
|
94
|
-
$stdout = STDOUT
|
95
|
-
end
|
96
|
-
|
97
|
-
if return_warnings
|
98
|
-
return result, warnings.lines.map { |w| w.strip.chomp }
|
99
|
-
else
|
100
|
-
return result
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
class RustDatatype
|
106
|
-
def self.pull_variable(variable)
|
107
|
-
return Rust._pull(variable)
|
13
|
+
def self.pull_priority
|
14
|
+
1
|
108
15
|
end
|
109
16
|
|
110
|
-
def
|
111
|
-
raise "Not implemented"
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
class DataFrame < RustDatatype
|
116
|
-
def self.pull_variable(variable)
|
17
|
+
def self.pull_variable(variable, type, klass)
|
117
18
|
hash = {}
|
118
|
-
colnames = Rust
|
19
|
+
colnames = Rust["colnames(#{variable})"]
|
119
20
|
colnames.each do |col|
|
120
|
-
hash[col] = Rust
|
21
|
+
hash[col] = Rust["#{variable}$\"#{col}\""]
|
121
22
|
end
|
122
23
|
return DataFrame.new(hash)
|
123
24
|
end
|
124
25
|
|
26
|
+
##
|
27
|
+
# Creates a new data-frame.
|
28
|
+
# +labels_or_data+ can be either:
|
29
|
+
# - an Array of column names (creates an empty data-frame)
|
30
|
+
# - a Hash with column names as keys and values as values
|
31
|
+
|
125
32
|
def initialize(labels_or_data)
|
126
33
|
@data = {}
|
127
34
|
|
@@ -137,6 +44,9 @@ module Rust
|
|
137
44
|
end
|
138
45
|
end
|
139
46
|
|
47
|
+
##
|
48
|
+
# Returns the +i+-th row of the data-frame
|
49
|
+
|
140
50
|
def row(i)
|
141
51
|
if i < 0 || i >= self.rows
|
142
52
|
return nil
|
@@ -145,6 +55,9 @@ module Rust
|
|
145
55
|
end
|
146
56
|
end
|
147
57
|
|
58
|
+
##
|
59
|
+
# Returns the +i+-th row of the data-frame. Faster (but harder to interpret) alternative to #row.
|
60
|
+
|
148
61
|
def fast_row(i)
|
149
62
|
if i < 0 || i >= self.rows
|
150
63
|
return nil
|
@@ -153,6 +66,9 @@ module Rust
|
|
153
66
|
end
|
154
67
|
end
|
155
68
|
|
69
|
+
##
|
70
|
+
# Shuffles the rows in the data-frame. The arguments are passed to the Array#shuffle method.
|
71
|
+
|
156
72
|
def shuffle(*args)
|
157
73
|
result = DataFrame.new(@labels)
|
158
74
|
|
@@ -167,6 +83,10 @@ module Rust
|
|
167
83
|
return result
|
168
84
|
end
|
169
85
|
|
86
|
+
##
|
87
|
+
# Returns a copy of the data-frame containing only the specified +rows+ and/or +cols+. If +rows+ and/or +cols+
|
88
|
+
# are nil, all the rows/columns are returned.
|
89
|
+
|
170
90
|
def [](rows, cols=nil)
|
171
91
|
raise "You must specify either rows or columns to select" if !rows && !cols
|
172
92
|
result = self
|
@@ -182,11 +102,17 @@ module Rust
|
|
182
102
|
return result
|
183
103
|
end
|
184
104
|
|
105
|
+
##
|
106
|
+
# Return the column named +name+.
|
107
|
+
|
185
108
|
def column(name)
|
186
109
|
return @data[name]
|
187
110
|
end
|
188
111
|
alias :| :column
|
189
112
|
|
113
|
+
##
|
114
|
+
# Renames the column named +old_name+ in +new_name+.
|
115
|
+
|
190
116
|
def rename_column!(old_name, new_name)
|
191
117
|
raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
|
192
118
|
raise "This DataFrame already contains a column named #{new_name}" if @labels.include?(new_name)
|
@@ -195,10 +121,24 @@ module Rust
|
|
195
121
|
@labels[@labels.index(old_name)] = new_name
|
196
122
|
end
|
197
123
|
|
124
|
+
##
|
125
|
+
# Functionally transforms the column named +column+ by applying the function given as a block.
|
126
|
+
# Example:
|
127
|
+
# df = Rust::DataFrame.new({a: [1,2,3], b: [3,4,5]})
|
128
|
+
# df.transform_column!("a") { |v| v + 1 }
|
129
|
+
# df|"a" # => [2, 3, 4]
|
130
|
+
|
198
131
|
def transform_column!(column)
|
199
132
|
@data[column].map! { |e| yield e }
|
200
133
|
end
|
201
134
|
|
135
|
+
##
|
136
|
+
# Returns a copy data-frame with only the rows for which the function given in the block returns true.
|
137
|
+
# Example:
|
138
|
+
# df = Rust::DataFrame.new({a: [1,2,3], b: ['a','b','c']})
|
139
|
+
# df2 = df.select_rows { |r| r['a'].even? }
|
140
|
+
# df2|"b" # => ['b']
|
141
|
+
|
202
142
|
def select_rows
|
203
143
|
result = DataFrame.new(self.column_names)
|
204
144
|
self.each_with_index do |row, i|
|
@@ -207,6 +147,9 @@ module Rust
|
|
207
147
|
return result
|
208
148
|
end
|
209
149
|
|
150
|
+
##
|
151
|
+
# Returns true if the function given in the block returns true for any of the rows in this data-frame.
|
152
|
+
|
210
153
|
def has_row?
|
211
154
|
self.each_with_index do |row, i|
|
212
155
|
return true if yield row, i
|
@@ -214,6 +157,10 @@ module Rust
|
|
214
157
|
return false
|
215
158
|
end
|
216
159
|
|
160
|
+
##
|
161
|
+
# Returns a copy of the data-frame with only the columns in +cols+. As an alternative, a block can be used
|
162
|
+
# (only the columns for which the function returns true are kept).
|
163
|
+
|
217
164
|
def select_columns(cols=nil)
|
218
165
|
raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
|
219
166
|
|
@@ -229,23 +176,35 @@ module Rust
|
|
229
176
|
end
|
230
177
|
alias :select_cols :select_columns
|
231
178
|
|
179
|
+
##
|
180
|
+
# Deletes the column named +column+.
|
181
|
+
|
232
182
|
def delete_column(column)
|
233
183
|
@labels.delete(column)
|
234
184
|
@data.delete(column)
|
235
185
|
end
|
236
186
|
|
187
|
+
##
|
188
|
+
# Deletes the +i+-th row.
|
189
|
+
|
237
190
|
def delete_row(i)
|
238
191
|
@data.each do |label, column|
|
239
192
|
column.delete_at(i)
|
240
193
|
end
|
241
194
|
end
|
242
195
|
|
196
|
+
##
|
197
|
+
# Returns a data-frame in which the rows are unique in terms of all the given columns named +by+.
|
198
|
+
|
243
199
|
def uniq_by(by)
|
244
200
|
result = self.clone
|
245
201
|
result.uniq_by!(by)
|
246
202
|
return result
|
247
203
|
end
|
248
204
|
|
205
|
+
##
|
206
|
+
# Makes sure that in this data-frame the rows are unique in terms of all the given columns named +by+.
|
207
|
+
|
249
208
|
def uniq_by!(by)
|
250
209
|
my_keys = {}
|
251
210
|
to_delete = []
|
@@ -268,19 +227,33 @@ module Rust
|
|
268
227
|
return self
|
269
228
|
end
|
270
229
|
|
230
|
+
##
|
231
|
+
# Return the names of the columns.
|
232
|
+
|
271
233
|
def column_names
|
272
234
|
return @labels.map { |k| k.to_s }
|
273
235
|
end
|
274
236
|
alias :colnames :column_names
|
275
237
|
|
238
|
+
##
|
239
|
+
# Returns the number of rows.
|
240
|
+
|
276
241
|
def rows
|
277
242
|
@data.values[0].size
|
278
243
|
end
|
279
244
|
|
245
|
+
##
|
246
|
+
# Returns the number of columns
|
247
|
+
|
280
248
|
def columns
|
281
249
|
@labels.size
|
282
250
|
end
|
283
251
|
|
252
|
+
##
|
253
|
+
# Adds the given +row+ to the data-frame. +row+ can be either:
|
254
|
+
# - An Array of values for all the columns (in the order of #column_names);
|
255
|
+
# - A Hash containing associations between column names and value to be set.
|
256
|
+
|
284
257
|
def add_row(row)
|
285
258
|
if row.is_a?(Array)
|
286
259
|
raise "Expected an array of size #{@data.size}" unless row.size == @data.size
|
@@ -304,6 +277,11 @@ module Rust
|
|
304
277
|
end
|
305
278
|
alias :<< :add_row
|
306
279
|
|
280
|
+
##
|
281
|
+
# Adds a column named +name+ with the given +values+ (array). The size of +values+ must match the number of
|
282
|
+
# rows of this data-frame. As an alternative, it can be passed a block which returns, for a given row, the
|
283
|
+
# value to assign for the new column.
|
284
|
+
|
307
285
|
def add_column(name, values=nil)
|
308
286
|
raise "Column already exists" if @labels.include?(name)
|
309
287
|
raise "Values or block required" if !values && !block_given?
|
@@ -320,6 +298,9 @@ module Rust
|
|
320
298
|
end
|
321
299
|
end
|
322
300
|
|
301
|
+
##
|
302
|
+
# Yields each row as a Hash containing column names as keys and values as values.
|
303
|
+
|
323
304
|
def each
|
324
305
|
self.each_with_index do |element, i|
|
325
306
|
yield element
|
@@ -328,6 +309,10 @@ module Rust
|
|
328
309
|
return self
|
329
310
|
end
|
330
311
|
|
312
|
+
##
|
313
|
+
# Yields each row as a Hash containing column names as keys and values as values. Faster alternative to
|
314
|
+
# #each.
|
315
|
+
|
331
316
|
def fast_each
|
332
317
|
self.fast_each_with_index do |element, i|
|
333
318
|
yield element
|
@@ -336,6 +321,9 @@ module Rust
|
|
336
321
|
return self
|
337
322
|
end
|
338
323
|
|
324
|
+
##
|
325
|
+
# Yields each row as a Hash containing column names as keys and values as values and the row index.
|
326
|
+
|
339
327
|
def each_with_index
|
340
328
|
for i in 0...self.rows
|
341
329
|
element = {}
|
@@ -349,6 +337,10 @@ module Rust
|
|
349
337
|
return self
|
350
338
|
end
|
351
339
|
|
340
|
+
##
|
341
|
+
# Yields each row as a Hash containing column names as keys and values as values and the row index. Faster
|
342
|
+
# alternative to #each_with_index.
|
343
|
+
|
352
344
|
def fast_each_with_index
|
353
345
|
for i in 0...self.rows
|
354
346
|
element = []
|
@@ -373,6 +365,14 @@ module Rust
|
|
373
365
|
row_index += 1
|
374
366
|
end
|
375
367
|
|
368
|
+
self.column_names.each do |name|
|
369
|
+
column = self.column(name)
|
370
|
+
|
371
|
+
if column.is_a?(Factor)
|
372
|
+
command << "#{variable_name}[,#{name.to_R}] <- factor(#{variable_name}[,#{name.to_R}], labels=#{column.levels.to_R})"
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
376
|
Rust._eval_big(command)
|
377
377
|
end
|
378
378
|
|
@@ -397,6 +397,9 @@ module Rust
|
|
397
397
|
return result
|
398
398
|
end
|
399
399
|
|
400
|
+
##
|
401
|
+
# Returns a copy of the data-frame containing only the first +n+ rows.
|
402
|
+
|
400
403
|
def head(n=10)
|
401
404
|
result = DataFrame.new(self.column_names)
|
402
405
|
self.each_with_index do |row, i|
|
@@ -405,6 +408,11 @@ module Rust
|
|
405
408
|
return result
|
406
409
|
end
|
407
410
|
|
411
|
+
##
|
412
|
+
# Merges this data-frame with +other+ in terms of the +by+ column(s) (Array or String).
|
413
|
+
# +first_alias+ and +second_alias+ allow to specify the prefix that should be used for the columns not in +by+
|
414
|
+
# for this and the +other+ data-frame, respectively.
|
415
|
+
|
408
416
|
def merge(other, by, first_alias = "x", second_alias = "y")
|
409
417
|
raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
|
410
418
|
raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
|
@@ -471,6 +479,14 @@ module Rust
|
|
471
479
|
return result
|
472
480
|
end
|
473
481
|
|
482
|
+
##
|
483
|
+
# Aggregate the value in groups depending on the +by+ column (String).
|
484
|
+
# A block must be passed to specify how to aggregate the columns. Aggregators for specific columns can be
|
485
|
+
# specified as optional arguments in which the name of the argument represents the column name and the value
|
486
|
+
# contains a block for aggregating the specific column.
|
487
|
+
# Both the default and the specialized blocks must take as argument an array of values and must return a
|
488
|
+
# scalar value.
|
489
|
+
|
474
490
|
def aggregate(by, **aggregators)
|
475
491
|
raise TypeError, "Expected a string" unless by.is_a?(String)
|
476
492
|
raise TypeError, "All the aggregators should be procs" unless aggregators.values.all? { |v| v.is_a?(Proc) }
|
@@ -511,12 +527,18 @@ module Rust
|
|
511
527
|
return result
|
512
528
|
end
|
513
529
|
|
530
|
+
##
|
531
|
+
# Returns a copy of this data-frame in which the rows are sorted by the values of the +by+ column.
|
532
|
+
|
514
533
|
def sort_by(column)
|
515
534
|
result = self.clone
|
516
535
|
result.sort_by!(column)
|
517
536
|
return result
|
518
537
|
end
|
519
538
|
|
539
|
+
##
|
540
|
+
# Sorts the rows of this data-frame by the values of the +by+ column.
|
541
|
+
|
520
542
|
def sort_by!(by)
|
521
543
|
copy = @data[by].clone
|
522
544
|
copy.sort!
|
@@ -542,6 +564,9 @@ module Rust
|
|
542
564
|
@data[by].sort!
|
543
565
|
end
|
544
566
|
|
567
|
+
##
|
568
|
+
# Adds all the rows in +dataframe+ to this data-frame. The column names must match.
|
569
|
+
|
545
570
|
def bind_rows!(dataframe)
|
546
571
|
raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
|
547
572
|
raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
|
@@ -554,6 +579,9 @@ module Rust
|
|
554
579
|
end
|
555
580
|
alias :rbind! :bind_rows!
|
556
581
|
|
582
|
+
##
|
583
|
+
# Adds all the columns in +dataframe+ to this data-frame. The number of rows must match.
|
584
|
+
|
557
585
|
def bind_columns!(dataframe)
|
558
586
|
raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
|
559
587
|
raise "The number of rows are not compatible" if self.rows != dataframe.rows
|
@@ -567,6 +595,9 @@ module Rust
|
|
567
595
|
end
|
568
596
|
alias :cbind! :bind_columns!
|
569
597
|
|
598
|
+
##
|
599
|
+
# Returns a copy of this dataframe and adds all the rows in +dataframe+ to it. The column names must match.
|
600
|
+
|
570
601
|
def bind_rows(dataframe)
|
571
602
|
result = self.clone
|
572
603
|
result.bind_rows!(dataframe)
|
@@ -574,6 +605,9 @@ module Rust
|
|
574
605
|
end
|
575
606
|
alias :rbind :bind_rows
|
576
607
|
|
608
|
+
##
|
609
|
+
# Returns a copy of this dataframe and adds all the columns in +dataframe+ to it. The number of rows must match.
|
610
|
+
|
577
611
|
def bind_columns(dataframe)
|
578
612
|
result = self.clone
|
579
613
|
result.bind_columns!(dataframe)
|
@@ -581,88 +615,22 @@ module Rust
|
|
581
615
|
end
|
582
616
|
alias :cbind :bind_columns
|
583
617
|
|
618
|
+
##
|
619
|
+
# Returns a copy of this data-frame.
|
620
|
+
|
584
621
|
def clone
|
585
622
|
DataFrame.new(@data)
|
586
623
|
end
|
587
624
|
end
|
588
625
|
|
589
|
-
|
590
|
-
|
591
|
-
return Rust._pull(variable)
|
592
|
-
end
|
593
|
-
|
594
|
-
def initialize(data)
|
595
|
-
if data.flatten.size == 0
|
596
|
-
raise "Empty matrices are not allowed"
|
597
|
-
else
|
598
|
-
raise TypeError, "Expected array of array" unless data.is_a?(Array) && data[0].is_a?(Array)
|
599
|
-
raise TypeError, "Only numeric matrices are supported" unless data.all? { |row| row.all? { |e| e.is_a?(Numeric) } }
|
600
|
-
raise "All the rows must have the same size" unless data.map { |row| row.size }.uniq.size == 1
|
601
|
-
@data = data.clone
|
602
|
-
end
|
603
|
-
end
|
604
|
-
|
605
|
-
def [](i, j)
|
606
|
-
return @data[i][j]
|
607
|
-
end
|
608
|
-
|
609
|
-
def rows
|
610
|
-
@data.size
|
611
|
-
end
|
612
|
-
|
613
|
-
def cols
|
614
|
-
@data[0].size
|
615
|
-
end
|
616
|
-
|
617
|
-
def []=(i, j, value)
|
618
|
-
raise "Wrong i" unless i.between?(0, @data.size - 1)
|
619
|
-
raise "Wrong j" unless j.between?(0, @data[0].size - 1)
|
620
|
-
@data[i][j] = value
|
621
|
-
end
|
622
|
-
|
623
|
-
def load_in_r_as(variable_name)
|
624
|
-
Rust._eval("#{variable_name} <- matrix(c(#{@data.flatten.join(",")}), nrow=#{self.rows}, ncol=#{self.cols}, byrow=T)")
|
625
|
-
end
|
626
|
-
end
|
626
|
+
##
|
627
|
+
# Represents an array of DataFrame
|
627
628
|
|
628
|
-
class
|
629
|
-
attr_reader :min
|
630
|
-
attr_reader :max
|
631
|
-
|
632
|
-
def initialize(min, max, step=1)
|
633
|
-
@min = min
|
634
|
-
@max = max
|
635
|
-
@step = step
|
636
|
-
end
|
637
|
-
|
638
|
-
def step(step)
|
639
|
-
@step = step
|
640
|
-
end
|
641
|
-
|
642
|
-
def each
|
643
|
-
(@min..@max).step(@step) do |v|
|
644
|
-
yield v
|
645
|
-
end
|
646
|
-
end
|
647
|
-
|
648
|
-
def to_a
|
649
|
-
result = []
|
650
|
-
self.each do |v|
|
651
|
-
result << v
|
652
|
-
end
|
653
|
-
return result
|
654
|
-
end
|
629
|
+
class DataFrameArray < Array
|
655
630
|
|
656
|
-
|
657
|
-
|
658
|
-
end
|
631
|
+
##
|
632
|
+
# Returns a data-frame with the rows in all the data-frames together (if compatible).
|
659
633
|
|
660
|
-
def load_in_r_as(variable_name)
|
661
|
-
Rust._eval("#{variable_name} <- #{self.to_R}")
|
662
|
-
end
|
663
|
-
end
|
664
|
-
|
665
|
-
class DataFrameArray < Array
|
666
634
|
def bind_all
|
667
635
|
return nil if self.size == 0
|
668
636
|
|
@@ -676,7 +644,14 @@ module Rust
|
|
676
644
|
end
|
677
645
|
end
|
678
646
|
|
647
|
+
##
|
648
|
+
# Represents a hash of DataFrame
|
649
|
+
|
679
650
|
class DataFrameHash < Hash
|
651
|
+
|
652
|
+
##
|
653
|
+
# Returns a data-frame with the rows in all the data-frames together (if compatible).
|
654
|
+
|
680
655
|
def bind_all
|
681
656
|
return nil if self.values.size == 0
|
682
657
|
|
@@ -689,151 +664,4 @@ module Rust
|
|
689
664
|
return result
|
690
665
|
end
|
691
666
|
end
|
692
|
-
|
693
|
-
class MathArray < Array
|
694
|
-
def -(other)
|
695
|
-
raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
|
696
|
-
raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
|
697
|
-
|
698
|
-
result = self.clone
|
699
|
-
other = [other] * self.size if other.is_a?(Numeric)
|
700
|
-
for i in 0...self.size
|
701
|
-
result[i] -= other[i]
|
702
|
-
end
|
703
|
-
|
704
|
-
return result
|
705
|
-
end
|
706
|
-
|
707
|
-
def *(other)
|
708
|
-
raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
|
709
|
-
raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
|
710
|
-
|
711
|
-
result = self.clone
|
712
|
-
other = [other] * self.size if other.is_a?(Numeric)
|
713
|
-
for i in 0...self.size
|
714
|
-
result[i] *= other[i]
|
715
|
-
end
|
716
|
-
|
717
|
-
return result
|
718
|
-
end
|
719
|
-
|
720
|
-
def +(other)
|
721
|
-
raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
|
722
|
-
raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
|
723
|
-
|
724
|
-
result = self.clone
|
725
|
-
other = [other] * self.size if other.is_a?(Numeric)
|
726
|
-
for i in 0...self.size
|
727
|
-
result[i] += other[i]
|
728
|
-
end
|
729
|
-
|
730
|
-
return result
|
731
|
-
end
|
732
|
-
|
733
|
-
def /(other) #To recover the syntax highlighting but in Kate: /
|
734
|
-
raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
|
735
|
-
raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
|
736
|
-
|
737
|
-
result = self.clone
|
738
|
-
other = [other] * self.size if other.is_a?(Numeric)
|
739
|
-
for i in 0...self.size
|
740
|
-
result[i] /= other[i]
|
741
|
-
end
|
742
|
-
|
743
|
-
return result
|
744
|
-
end
|
745
|
-
|
746
|
-
def **(other)
|
747
|
-
raise ArgumentError, "Expected numeric" if !other.is_a?(Numeric)
|
748
|
-
|
749
|
-
result = self.clone
|
750
|
-
for i in 0...self.size
|
751
|
-
result[i] = result[i] ** other
|
752
|
-
end
|
753
|
-
|
754
|
-
return result
|
755
|
-
end
|
756
|
-
end
|
757
|
-
end
|
758
|
-
|
759
|
-
class TrueClass
|
760
|
-
def to_R
|
761
|
-
"TRUE"
|
762
|
-
end
|
763
|
-
end
|
764
|
-
|
765
|
-
class FalseClass
|
766
|
-
def to_R
|
767
|
-
"FALSE"
|
768
|
-
end
|
769
|
-
end
|
770
|
-
|
771
|
-
class Object
|
772
|
-
def to_R
|
773
|
-
raise TypeError, "Unsupported type for #{self.class}"
|
774
|
-
end
|
775
|
-
end
|
776
|
-
|
777
|
-
class NilClass
|
778
|
-
def to_R
|
779
|
-
return "NULL"
|
780
|
-
end
|
781
|
-
end
|
782
|
-
|
783
|
-
class Numeric
|
784
|
-
def to_R
|
785
|
-
self.inspect
|
786
|
-
end
|
787
|
-
end
|
788
|
-
|
789
|
-
class Float
|
790
|
-
def to_R
|
791
|
-
return self.nan? ? "NA" : super
|
792
|
-
end
|
793
|
-
end
|
794
|
-
|
795
|
-
class Array
|
796
|
-
def to_R
|
797
|
-
return "c(#{self.map { |e| e.to_R }.join(",")})"
|
798
|
-
end
|
799
|
-
|
800
|
-
def distribution
|
801
|
-
result = {}
|
802
|
-
self.each do |value|
|
803
|
-
result[value] = result[value].to_i + 1
|
804
|
-
end
|
805
|
-
return result
|
806
|
-
end
|
807
|
-
end
|
808
|
-
|
809
|
-
class String
|
810
|
-
def to_R
|
811
|
-
return self.inspect
|
812
|
-
end
|
813
|
-
end
|
814
|
-
|
815
|
-
class Range
|
816
|
-
def to_R
|
817
|
-
[range.min, range.max].to_R
|
818
|
-
end
|
819
|
-
end
|
820
|
-
|
821
|
-
module Rust::RBindings
|
822
|
-
def data_frame(*args)
|
823
|
-
Rust::DataFrame.new(*args)
|
824
|
-
end
|
825
|
-
end
|
826
|
-
|
827
|
-
module Rust::TestCases
|
828
|
-
def self.sample_dataframe(columns, size=100)
|
829
|
-
result = Rust::DataFrame.new(columns)
|
830
|
-
size.times do |i|
|
831
|
-
result << columns.map { |c| yield i, c }
|
832
|
-
end
|
833
|
-
return result
|
834
|
-
end
|
835
|
-
end
|
836
|
-
|
837
|
-
def bind_r!
|
838
|
-
include Rust::RBindings
|
839
667
|
end
|