rust 0.2 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rust-basics.rb +126 -0
- data/lib/rust-calls.rb +69 -0
- data/lib/rust-core.rb +303 -99
- data/lib/rust-csv.rb +95 -0
- data/lib/rust-descriptive.rb +3 -3
- data/lib/rust-effsize.rb +14 -14
- data/lib/rust-plots.rb +351 -0
- data/lib/rust-tests.rb +41 -20
- data/lib/rust.rb +5 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 985f7e940ab123fa452dae63792bde90613b134176ff4eaf5b59a719dd8a1ed5
|
4
|
+
data.tar.gz: bef0bb5028c99cb43c8e5453fdbbaf687bad65f8e6b54b06f949e5df6fb61bda
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 418181b9357665ecc654e9a765e24aa792d6287dd5998b1b1bd8f3278e6951d785fff8afe9594c6c95e1f7a384cb08f844939728b62a96ab416259de1c14512b
|
7
|
+
data.tar.gz: 810f14821924bd1b0cebf4fbf9f52e510abc0e935be6cc2852294fd374d54411bcd2a1943f5b60d2ce93501abcaff9cac25dd458e1a8b354b665aab199705a4a
|
data/lib/rust-basics.rb
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
require_relative 'rust-core'
|
2
|
+
|
3
|
+
module Rust:: Correlation
|
4
|
+
class Pearson
|
5
|
+
def self.test(d1, d2)
|
6
|
+
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
7
|
+
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
8
|
+
|
9
|
+
Rust.exclusive do
|
10
|
+
Rust['correlation.a'] = d1
|
11
|
+
Rust['correlation.b'] = d2
|
12
|
+
|
13
|
+
Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='p')")
|
14
|
+
|
15
|
+
result = Result.new
|
16
|
+
result.name = "Pearson's product-moment correlation"
|
17
|
+
result.statistics['t'] = Rust._pull('correlation.result$statistic')
|
18
|
+
result.pvalue = Rust._pull('correlation.result$p.value')
|
19
|
+
result.correlation = Rust._pull('correlation.result$estimate')
|
20
|
+
|
21
|
+
return result
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.estimate(d1, d2)
|
26
|
+
self.test(d1, d2).correlation
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class Spearman
|
31
|
+
def self.test(d1, d2)
|
32
|
+
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
33
|
+
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
34
|
+
|
35
|
+
Rust.exclusive do
|
36
|
+
Rust['correlation.a'] = d1
|
37
|
+
Rust['correlation.b'] = d2
|
38
|
+
|
39
|
+
Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='s')")
|
40
|
+
|
41
|
+
result = Result.new
|
42
|
+
result.name = "Spearman's rank correlation rho"
|
43
|
+
result.statistics['S'] = Rust._pull('correlation.result$statistic')
|
44
|
+
result.pvalue = Rust._pull('correlation.result$p.value')
|
45
|
+
result.correlation = Rust._pull('correlation.result$estimate')
|
46
|
+
|
47
|
+
return result
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.estimate(d1, d2)
|
52
|
+
self.test(d1, d2).correlation
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class Kendall
|
57
|
+
def self.test(d1, d2)
|
58
|
+
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
59
|
+
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
60
|
+
|
61
|
+
Rust.exclusive do
|
62
|
+
Rust['correlation.a'] = d1
|
63
|
+
Rust['correlation.b'] = d2
|
64
|
+
|
65
|
+
Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='p')")
|
66
|
+
|
67
|
+
result = Result.new
|
68
|
+
result.name = "Kendall's rank correlation tau"
|
69
|
+
result.statistics['T'] = Rust._pull('correlation.result$statistic')
|
70
|
+
result.pvalue = Rust._pull('correlation.result$p.value')
|
71
|
+
result.correlation = Rust._pull('correlation.result$estimate')
|
72
|
+
|
73
|
+
return result
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def self.estimate(d1, d2)
|
78
|
+
self.test(d1, d2).correlation
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
class Result
|
83
|
+
attr_accessor :name
|
84
|
+
attr_accessor :statistics
|
85
|
+
attr_accessor :pvalue
|
86
|
+
attr_accessor :correlation
|
87
|
+
|
88
|
+
alias :estimate :correlation
|
89
|
+
|
90
|
+
def initialize
|
91
|
+
@statistics = {}
|
92
|
+
end
|
93
|
+
|
94
|
+
def [](name)
|
95
|
+
return @statistics[name.to_sym]
|
96
|
+
end
|
97
|
+
|
98
|
+
def []=(name, value)
|
99
|
+
@statistics[name.to_sym] = value
|
100
|
+
end
|
101
|
+
|
102
|
+
def to_s
|
103
|
+
return "#{name}. Correlation = #{correlation}, P-value = #{pvalue} " +
|
104
|
+
"#{ statistics.map { |k, v| k.to_s + " -> " + v.to_s }.join(", ") }."
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
module Rust::RBindings
|
110
|
+
def cor(d1, d2, **options)
|
111
|
+
return cor_test(d1, d2, **options).correlation
|
112
|
+
end
|
113
|
+
|
114
|
+
def cor_test(d1, d2, **options)
|
115
|
+
method = options[:method].to_s.downcase
|
116
|
+
if "pearson".start_with?(method)
|
117
|
+
return Rust::Correlation::Pearson.test(d1, d2)
|
118
|
+
elsif "spearman".start_with?(method)
|
119
|
+
return Rust::Correlation::Spearman.test(d1, d2)
|
120
|
+
elsif "kendall".start_with?(method)
|
121
|
+
return Rust::Correlation::Kendall.test(d1, d2)
|
122
|
+
else
|
123
|
+
raise "Unsupported method #{method}"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
data/lib/rust-calls.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
require_relative 'rust-core'
|
2
|
+
|
3
|
+
module Rust
|
4
|
+
class Function
|
5
|
+
attr_reader :name
|
6
|
+
attr_reader :arguments
|
7
|
+
attr_reader :options
|
8
|
+
|
9
|
+
def initialize(name)
|
10
|
+
@function = name
|
11
|
+
@arguments = Arguments.new
|
12
|
+
@options = Options.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def options=(options)
|
16
|
+
raise TypeError, "Expected Options" unless options.is_a?(Options)
|
17
|
+
|
18
|
+
@options = options
|
19
|
+
end
|
20
|
+
|
21
|
+
def arguments=(arguments)
|
22
|
+
raise TypeError, "Expected Arguments" unless options.is_a?(Arguments)
|
23
|
+
|
24
|
+
@arguments = arguments
|
25
|
+
end
|
26
|
+
|
27
|
+
def to_R
|
28
|
+
params = [@arguments.to_R, @options.to_R].select { |v| v != "" }.join(",")
|
29
|
+
return "#@function(#{params})"
|
30
|
+
end
|
31
|
+
|
32
|
+
def call
|
33
|
+
Rust._eval(self.to_R)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
class Variable
|
38
|
+
def initialize(name)
|
39
|
+
@name = name
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_R
|
43
|
+
@name
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
class Arguments < Array
|
48
|
+
def to_R
|
49
|
+
return self.map { |v| v.to_R }.join(", ")
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
class Options < Hash
|
54
|
+
def to_R
|
55
|
+
return self.map { |k, v| "#{k}=#{v.to_R}" }.join(", ")
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.from_hash(hash)
|
59
|
+
options = Options.new
|
60
|
+
hash.each do |key, value|
|
61
|
+
options[key.to_s] = value
|
62
|
+
end
|
63
|
+
return options
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
module Rust::RBindings
|
69
|
+
end
|
data/lib/rust-core.rb
CHANGED
@@ -9,8 +9,17 @@ module Rust
|
|
9
9
|
|
10
10
|
R_ENGINE = RinRuby.new(echo: false)
|
11
11
|
|
12
|
+
private_constant :R_ENGINE
|
13
|
+
private_constant :R_MUTEX
|
14
|
+
private_constant :CLIENT_MUTEX
|
15
|
+
|
16
|
+
@@debugging = false
|
12
17
|
@@in_client_mutex = false
|
13
18
|
|
19
|
+
def self.debug
|
20
|
+
@@debugging = true
|
21
|
+
end
|
22
|
+
|
14
23
|
def self.exclusive
|
15
24
|
result = nil
|
16
25
|
CLIENT_MUTEX.synchronize do
|
@@ -27,7 +36,7 @@ module Rust
|
|
27
36
|
elsif value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(Array)
|
28
37
|
R_ENGINE.assign(variable, value)
|
29
38
|
else
|
30
|
-
raise "Given #{
|
39
|
+
raise "Given #{value.class}, expected RustDatatype, String, Numeric, or Array"
|
31
40
|
end
|
32
41
|
|
33
42
|
end
|
@@ -66,6 +75,7 @@ module Rust
|
|
66
75
|
end
|
67
76
|
|
68
77
|
def self._rexec(r_command, return_warnings = false)
|
78
|
+
puts "Calling _rexec with command: #{r_command}" if @@debugging
|
69
79
|
R_MUTEX.synchronize do
|
70
80
|
assert("This command must be executed in an exclusive block") { @@in_client_mutex }
|
71
81
|
|
@@ -120,41 +130,85 @@ module Rust
|
|
120
130
|
@labels.each { |label| @data[label] = [] }
|
121
131
|
elsif labels_or_data.is_a? Hash
|
122
132
|
@labels = labels_or_data.keys.map { |l| l.to_s }
|
123
|
-
@
|
124
|
-
for i in 0...labels_or_data.values[0].size
|
125
|
-
self.add_row(labels_or_data.map { |k, v| [k, v[i]] }.to_h)
|
126
|
-
end
|
133
|
+
@data = labels_or_data.clone
|
127
134
|
end
|
128
135
|
end
|
129
136
|
|
130
137
|
def row(i)
|
131
|
-
|
138
|
+
if i < 0 || i >= self.rows
|
139
|
+
return nil
|
140
|
+
else
|
141
|
+
return @data.map { |label, values| [label, values[i]] }.to_h
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def shuffle(*args)
|
146
|
+
result = DataFrame.new(@labels)
|
147
|
+
|
148
|
+
buffer = []
|
149
|
+
self.each do |row|
|
150
|
+
buffer << row
|
151
|
+
end
|
152
|
+
buffer.shuffle!(*args).each do |row|
|
153
|
+
result << row
|
154
|
+
end
|
155
|
+
|
156
|
+
return result
|
157
|
+
end
|
158
|
+
|
159
|
+
def [](rows, cols=nil)
|
160
|
+
raise "You must specify either rows or columns to select" if !rows && !cols
|
161
|
+
result = self
|
162
|
+
if rows && (rows.is_a?(Range) || rows.is_a?(Array))
|
163
|
+
result = result.select_rows { |row, i| rows.include?(i) }
|
164
|
+
end
|
165
|
+
|
166
|
+
if cols && cols.is_a?(Array)
|
167
|
+
cols = cols.map { |c| c.to_s }
|
168
|
+
result = result.select_columns(cols)
|
169
|
+
end
|
170
|
+
|
171
|
+
return result
|
132
172
|
end
|
133
|
-
alias :[] :row
|
134
173
|
|
135
174
|
def column(name)
|
136
175
|
return @data[name]
|
137
176
|
end
|
138
177
|
|
178
|
+
def rename_column!(old_name, new_name)
|
179
|
+
raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
|
180
|
+
raise "This DataFrame already contains a column named #{new_name}" if @labels.include?(new_name)
|
181
|
+
|
182
|
+
@data[new_name.to_s] = @data.delete(old_name)
|
183
|
+
@labels[@labels.index(old_name)] = new_name
|
184
|
+
end
|
185
|
+
|
139
186
|
def transform_column!(column)
|
140
187
|
@data[column].map! { |e| yield e }
|
141
188
|
end
|
142
189
|
|
143
190
|
def select_rows
|
144
191
|
result = DataFrame.new(self.column_names)
|
145
|
-
self.
|
146
|
-
result << row if yield row
|
192
|
+
self.each_with_index do |row, i|
|
193
|
+
result << row if yield row, i
|
147
194
|
end
|
148
195
|
return result
|
149
196
|
end
|
150
197
|
|
151
|
-
def
|
198
|
+
def select_columns(cols=nil)
|
199
|
+
raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
|
200
|
+
|
152
201
|
result = self.clone
|
153
202
|
@labels.each do |label|
|
154
|
-
|
203
|
+
if cols
|
204
|
+
result.delete_column(label) unless cols.include?(label)
|
205
|
+
else
|
206
|
+
result.delete_column(label) unless yield label
|
207
|
+
end
|
155
208
|
end
|
156
209
|
return result
|
157
210
|
end
|
211
|
+
alias :select_cols :select_columns
|
158
212
|
|
159
213
|
def delete_column(column)
|
160
214
|
@labels.delete(column)
|
@@ -162,70 +216,18 @@ module Rust
|
|
162
216
|
end
|
163
217
|
|
164
218
|
def column_names
|
165
|
-
return @
|
219
|
+
return @labels.map { |k| k.to_s }
|
166
220
|
end
|
167
221
|
alias :colnames :column_names
|
168
222
|
|
169
|
-
def merge(other, by, first_alias = "x", second_alias = "y")
|
170
|
-
raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
|
171
|
-
raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
|
172
|
-
raise "This dataset should have all the columns in #{by}" unless (by & self.column_names).size == by.size
|
173
|
-
raise "The passed dataset should have all the columns in #{by}" unless (by & other.column_names).size == by.size
|
174
|
-
raise "The aliases can not have the same value" if first_alias == second_alias
|
175
|
-
|
176
|
-
my_keys = {}
|
177
|
-
self.each_with_index do |row, i|
|
178
|
-
key = []
|
179
|
-
by.each do |colname|
|
180
|
-
key << row[colname]
|
181
|
-
end
|
182
|
-
|
183
|
-
my_keys[key] = i
|
184
|
-
end
|
185
|
-
|
186
|
-
merged_column_self = (self.column_names - by)
|
187
|
-
merged_column_other = (other.column_names - by)
|
188
|
-
|
189
|
-
first_alias = first_alias + "." if first_alias.length > 0
|
190
|
-
second_alias = second_alias + "." if second_alias.length > 0
|
191
|
-
|
192
|
-
merged_columns = merged_column_self.map { |colname| "#{first_alias}#{colname}" } + merged_column_other.map { |colname| "#{second_alias}#{colname}" }
|
193
|
-
columns = by + merged_columns
|
194
|
-
result = DataFrame.new(columns)
|
195
|
-
other.each do |other_row|
|
196
|
-
key = []
|
197
|
-
by.each do |colname|
|
198
|
-
key << other_row[colname]
|
199
|
-
end
|
200
|
-
|
201
|
-
my_row_index = my_keys[key]
|
202
|
-
if my_row_index
|
203
|
-
my_row = self[my_row_index]
|
204
|
-
|
205
|
-
to_add = {}
|
206
|
-
by.each do |colname|
|
207
|
-
to_add[colname] = my_row[colname]
|
208
|
-
end
|
209
|
-
|
210
|
-
merged_column_self.each do |colname|
|
211
|
-
to_add["#{first_alias}#{colname}"] = my_row[colname]
|
212
|
-
end
|
213
|
-
|
214
|
-
merged_column_other.each do |colname|
|
215
|
-
to_add["#{second_alias}#{colname}"] = other_row[colname]
|
216
|
-
end
|
217
|
-
|
218
|
-
result << to_add
|
219
|
-
end
|
220
|
-
end
|
221
|
-
|
222
|
-
return result
|
223
|
-
end
|
224
|
-
|
225
223
|
def rows
|
226
224
|
@data.values[0].size
|
227
225
|
end
|
228
226
|
|
227
|
+
def columns
|
228
|
+
@labels.size
|
229
|
+
end
|
230
|
+
|
229
231
|
def add_row(row)
|
230
232
|
if row.is_a?(Array)
|
231
233
|
raise "Expected an array of size #{@data.size}" unless row.size == @data.size
|
@@ -249,6 +251,22 @@ module Rust
|
|
249
251
|
end
|
250
252
|
alias :<< :add_row
|
251
253
|
|
254
|
+
def add_column(name, values=nil)
|
255
|
+
raise "Column already exists" if @labels.include?(name)
|
256
|
+
raise "Values or block required" if !values && !block_given?
|
257
|
+
raise "Number of values not matching" if values && values.size != self.rows
|
258
|
+
|
259
|
+
@labels << name
|
260
|
+
if values
|
261
|
+
@data[name] = values.clone
|
262
|
+
else
|
263
|
+
@data[name] = []
|
264
|
+
self.each_with_index do |row, i|
|
265
|
+
@data[name][i] = yield row
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
252
270
|
def each
|
253
271
|
self.each_with_index do |element, i|
|
254
272
|
yield element
|
@@ -276,9 +294,7 @@ module Rust
|
|
276
294
|
command << "#{variable_name} <- data.frame()"
|
277
295
|
row_index = 1
|
278
296
|
self.each do |row|
|
279
|
-
|
280
|
-
values = row.values.map { |v| v.inspect }.join(",")
|
281
|
-
command << "#{variable_name}[#{row_index}, c(#{keys})] <- c(#{values})"
|
297
|
+
command << "#{variable_name}[#{row_index.to_R}, #{row.keys.to_R}] <- #{row.values.to_R}"
|
282
298
|
|
283
299
|
row_index += 1
|
284
300
|
end
|
@@ -289,20 +305,140 @@ module Rust
|
|
289
305
|
def inspect
|
290
306
|
separator = " | "
|
291
307
|
col_widths = self.column_names.map { |colname| [colname, ([colname.length] + @data[colname].map { |e| e.inspect.length }).max] }.to_h
|
292
|
-
col_widths[:rowscol] = self.rows.inspect.length + 3
|
308
|
+
col_widths[:rowscol] = (self.rows - 1).inspect.length + 3
|
293
309
|
|
294
310
|
result = ""
|
295
311
|
result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
|
296
312
|
result << (" " * col_widths[:rowscol]) + self.column_names.map { |colname| (" " * (col_widths[colname] - colname.length)) + colname }.join(separator) + "\n"
|
297
313
|
result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
|
298
314
|
self.each_with_index do |row, i|
|
299
|
-
|
315
|
+
index_part = "[" + (" " * (col_widths[:rowscol] - i.inspect.length - 3)) + "#{i}] "
|
316
|
+
row_part = row.map { |colname, value| (" " * (col_widths[colname] - value.inspect.length)) + value.inspect }.join(separator)
|
317
|
+
|
318
|
+
result << index_part + row_part + "\n"
|
300
319
|
end
|
301
320
|
|
302
321
|
result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length))
|
303
322
|
|
304
323
|
return result
|
305
324
|
end
|
325
|
+
|
326
|
+
def head(n=10)
|
327
|
+
result = DataFrame.new(self.column_names)
|
328
|
+
self.each_with_index do |row, i|
|
329
|
+
result << row if i < n
|
330
|
+
end
|
331
|
+
return result
|
332
|
+
end
|
333
|
+
|
334
|
+
def merge(other, by, first_alias = "x", second_alias = "y")
|
335
|
+
raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
|
336
|
+
raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
|
337
|
+
raise "This dataset should have all the columns in #{by}" unless (by & self.column_names).size == by.size
|
338
|
+
raise "The passed dataset should have all the columns in #{by}" unless (by & other.column_names).size == by.size
|
339
|
+
|
340
|
+
if first_alias == second_alias
|
341
|
+
if first_alias == ""
|
342
|
+
my_columns = self.column_names - by
|
343
|
+
other_columns = other.column_names - by
|
344
|
+
intersection = my_columns & other_columns
|
345
|
+
raise "Cannot merge because the following columns would overlap: #{intersection}" if intersection.size > 0
|
346
|
+
else
|
347
|
+
raise "The aliases can not have the same value"
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
my_keys = {}
|
352
|
+
self.each_with_index do |row, i|
|
353
|
+
key = []
|
354
|
+
by.each do |colname|
|
355
|
+
key << row[colname]
|
356
|
+
end
|
357
|
+
|
358
|
+
my_keys[key] = i
|
359
|
+
end
|
360
|
+
|
361
|
+
merged_column_self = (self.column_names - by)
|
362
|
+
merged_column_other = (other.column_names - by)
|
363
|
+
|
364
|
+
first_alias = first_alias + "." if first_alias.length > 0
|
365
|
+
second_alias = second_alias + "." if second_alias.length > 0
|
366
|
+
|
367
|
+
merged_columns = merged_column_self.map { |colname| "#{first_alias}#{colname}" } + merged_column_other.map { |colname| "#{second_alias}#{colname}" }
|
368
|
+
columns = by + merged_columns
|
369
|
+
result = DataFrame.new(columns)
|
370
|
+
other.each do |other_row|
|
371
|
+
key = []
|
372
|
+
by.each do |colname|
|
373
|
+
key << other_row[colname]
|
374
|
+
end
|
375
|
+
|
376
|
+
my_row_index = my_keys[key]
|
377
|
+
if my_row_index
|
378
|
+
my_row = self.row(my_row_index)
|
379
|
+
|
380
|
+
to_add = {}
|
381
|
+
by.each do |colname|
|
382
|
+
to_add[colname] = my_row[colname]
|
383
|
+
end
|
384
|
+
|
385
|
+
merged_column_self.each do |colname|
|
386
|
+
to_add["#{first_alias}#{colname}"] = my_row[colname]
|
387
|
+
end
|
388
|
+
|
389
|
+
merged_column_other.each do |colname|
|
390
|
+
to_add["#{second_alias}#{colname}"] = other_row[colname]
|
391
|
+
end
|
392
|
+
|
393
|
+
result << to_add
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
return result
|
398
|
+
end
|
399
|
+
|
400
|
+
def bind_rows!(dataframe)
|
401
|
+
raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
|
402
|
+
raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
|
403
|
+
|
404
|
+
dataframe.each do |row|
|
405
|
+
self << row
|
406
|
+
end
|
407
|
+
|
408
|
+
return true
|
409
|
+
end
|
410
|
+
alias :rbind! :bind_rows!
|
411
|
+
|
412
|
+
def bind_columns!(dataframe)
|
413
|
+
raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
|
414
|
+
raise "The number of rows are not compatible" if self.rows != dataframe.rows
|
415
|
+
raise "The dataset would override some columns" if (self.column_names & dataframe.column_names).size > 0
|
416
|
+
|
417
|
+
dataframe.column_names.each do |column_name|
|
418
|
+
self.add_column(column_name, dataframe.column(column_name))
|
419
|
+
end
|
420
|
+
|
421
|
+
return true
|
422
|
+
end
|
423
|
+
alias :cbind! :bind_columns!
|
424
|
+
|
425
|
+
def bind_rows(dataframe)
|
426
|
+
result = self.clone
|
427
|
+
result.bind_rows!(dataframe)
|
428
|
+
return result
|
429
|
+
end
|
430
|
+
alias :rbind :bind_rows
|
431
|
+
|
432
|
+
def bind_columns(dataframe)
|
433
|
+
result = self.clone
|
434
|
+
result.bind_columns!(dataframe)
|
435
|
+
return result
|
436
|
+
end
|
437
|
+
alias :cbind :bind_columns
|
438
|
+
|
439
|
+
def clone
|
440
|
+
DataFrame.new(@data)
|
441
|
+
end
|
306
442
|
end
|
307
443
|
|
308
444
|
class Matrix < RustDatatype
|
@@ -344,36 +480,104 @@ module Rust
|
|
344
480
|
end
|
345
481
|
end
|
346
482
|
|
347
|
-
class
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
483
|
+
class Sequence
|
484
|
+
attr_reader :min
|
485
|
+
attr_reader :max
|
486
|
+
|
487
|
+
def initialize(min, max, step=1)
|
488
|
+
@min = min
|
489
|
+
@max = max
|
490
|
+
@step = step
|
491
|
+
end
|
492
|
+
|
493
|
+
def step(step)
|
494
|
+
@step = step
|
495
|
+
end
|
496
|
+
|
497
|
+
def each
|
498
|
+
(@min..@max).step(@step) do |v|
|
499
|
+
yield v
|
358
500
|
end
|
359
|
-
|
360
|
-
return Rust::DataFrame.new(hash)
|
361
501
|
end
|
362
502
|
|
363
|
-
def
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
hash = {}
|
369
|
-
labels = nil
|
370
|
-
::CSV.open(filename, 'w', write_headers: (x[:headers] ? true : false), **options) do |csv|
|
371
|
-
dataframe.each do |row|
|
372
|
-
csv << row
|
373
|
-
end
|
503
|
+
def to_a
|
504
|
+
result = []
|
505
|
+
self.each do |v|
|
506
|
+
result << v
|
374
507
|
end
|
375
|
-
|
376
|
-
return true
|
508
|
+
return result
|
377
509
|
end
|
510
|
+
|
511
|
+
def to_R
|
512
|
+
"seq(from=#@min, to=#@max, by=#@step)"
|
513
|
+
end
|
514
|
+
end
|
515
|
+
end
|
516
|
+
|
517
|
+
class TrueClass
|
518
|
+
def to_R
|
519
|
+
"TRUE"
|
520
|
+
end
|
521
|
+
end
|
522
|
+
|
523
|
+
class FalseClass
|
524
|
+
def to_R
|
525
|
+
"FALSE"
|
526
|
+
end
|
527
|
+
end
|
528
|
+
|
529
|
+
class Object
|
530
|
+
def to_R
|
531
|
+
raise TypeError, "Unsupported type for #{self.class}"
|
532
|
+
end
|
533
|
+
end
|
534
|
+
|
535
|
+
class NilClass
|
536
|
+
def to_R
|
537
|
+
return "NULL"
|
538
|
+
end
|
539
|
+
end
|
540
|
+
|
541
|
+
class Numeric
|
542
|
+
def to_R
|
543
|
+
self.inspect
|
544
|
+
end
|
545
|
+
end
|
546
|
+
|
547
|
+
class Float
|
548
|
+
def to_R
|
549
|
+
return self.nan? ? "NA" : super
|
550
|
+
end
|
551
|
+
end
|
552
|
+
|
553
|
+
class Array
|
554
|
+
def to_R
|
555
|
+
return "c(#{self.map { |e| e.to_R }.join(",")})"
|
556
|
+
end
|
557
|
+
end
|
558
|
+
|
559
|
+
class String
|
560
|
+
def to_R
|
561
|
+
return self.inspect
|
562
|
+
end
|
563
|
+
end
|
564
|
+
|
565
|
+
class Range
|
566
|
+
def to_R
|
567
|
+
[range.min, range.max].to_R
|
568
|
+
end
|
569
|
+
end
|
570
|
+
|
571
|
+
module Rust::RBindings
|
572
|
+
def read_csv(filename, **options)
|
573
|
+
Rust::CSV.read(filename, **options)
|
574
|
+
end
|
575
|
+
|
576
|
+
def write_csv(filename, dataframe, **options)
|
577
|
+
Rust::CSV.write(filename, dataframe, **options)
|
578
|
+
end
|
579
|
+
|
580
|
+
def data_frame(*args)
|
581
|
+
Rust::DataFrame.new(*args)
|
378
582
|
end
|
379
583
|
end
|