rust 0.1 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rust-core.rb +337 -24
- data/lib/rust-tests.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a39315e623da717c4f035a11fbe7363bf3aae0d8885f922c4f0fca689bc7b90a
|
4
|
+
data.tar.gz: e0a1ec7e485a0f9521a42191f6a97d833df8eb35c961d31c9ba67f0a6f0c0c22
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eaffc66a8a3250f7f687bdf1dec96421681f25ff394010f0c15d7e65b5ed87a9deb25111c7f56baa60b19a53a4849bd92a665bfe507f9ba9b096b722cddeed53
|
7
|
+
data.tar.gz: c5fc8ce8b55347ca402783ab352310719fcdd008eaa2f1ed3b00ca93137736ca6a2d8c9d9b2169865212e7a09ca955e5909d7983b0461bda0c539b51f1c0e379
|
data/lib/rust-core.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'code-assertions'
|
2
2
|
require 'stringio'
|
3
3
|
require 'rinruby'
|
4
|
+
require 'csv'
|
4
5
|
|
5
6
|
module Rust
|
6
7
|
CLIENT_MUTEX = Mutex.new
|
@@ -11,27 +12,77 @@ module Rust
|
|
11
12
|
@@in_client_mutex = false
|
12
13
|
|
13
14
|
def self.exclusive
|
15
|
+
result = nil
|
14
16
|
CLIENT_MUTEX.synchronize do
|
15
17
|
@@in_client_mutex = true
|
16
|
-
yield
|
18
|
+
result = yield
|
17
19
|
@@in_client_mutex = false
|
18
20
|
end
|
21
|
+
return result
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.[]=(variable, value)
|
25
|
+
if value.is_a?(RustDatatype)
|
26
|
+
value.load_in_r_as(variable.to_s)
|
27
|
+
elsif value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(Array)
|
28
|
+
R_ENGINE.assign(variable, value)
|
29
|
+
else
|
30
|
+
raise "Given #{variable.class}, expected RustDatatype, String, Numeric, or Array"
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.[](variable, type=RustDatatype)
|
36
|
+
return type.pull_variable(variable)
|
37
|
+
end
|
38
|
+
|
39
|
+
def self._eval_big(r_command, return_warnings = false)
|
40
|
+
r_command = r_command.join("\n") if r_command.is_a?(Array)
|
41
|
+
|
42
|
+
self._rexec(r_command, return_warnings) do |cmd|
|
43
|
+
result = true
|
44
|
+
instructions = cmd.lines
|
45
|
+
|
46
|
+
while instructions.size > 0
|
47
|
+
current_command = ""
|
48
|
+
|
49
|
+
while (instructions.size > 0) && (current_command.length + instructions.first.length < 10000)
|
50
|
+
current_command << instructions.shift
|
51
|
+
end
|
52
|
+
|
53
|
+
result &= R_ENGINE.eval(current_command)
|
54
|
+
end
|
55
|
+
|
56
|
+
result
|
57
|
+
end
|
19
58
|
end
|
20
59
|
|
21
60
|
def self._pull(r_command, return_warnings = false)
|
22
|
-
|
61
|
+
self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.pull(cmd) }
|
62
|
+
end
|
63
|
+
|
64
|
+
def self._eval(r_command, return_warnings = false)
|
65
|
+
self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.eval(cmd) }
|
66
|
+
end
|
67
|
+
|
68
|
+
def self._rexec(r_command, return_warnings = false)
|
69
|
+
R_MUTEX.synchronize do
|
23
70
|
assert("This command must be executed in an exclusive block") { @@in_client_mutex }
|
24
71
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
72
|
+
result = nil
|
73
|
+
begin
|
74
|
+
$stdout = StringIO.new
|
75
|
+
if return_warnings
|
76
|
+
R_ENGINE.echo(true, true)
|
77
|
+
else
|
78
|
+
R_ENGINE.echo(false, false)
|
79
|
+
end
|
80
|
+
result = yield(r_command)
|
81
|
+
ensure
|
29
82
|
R_ENGINE.echo(false, false)
|
83
|
+
warnings = $stdout.string
|
84
|
+
$stdout = STDOUT
|
30
85
|
end
|
31
|
-
result = R_ENGINE.pull(r_command)
|
32
|
-
R_ENGINE.echo(false, false)
|
33
|
-
warnings = $stdout.string
|
34
|
-
$stdout = STDOUT
|
35
86
|
|
36
87
|
if return_warnings
|
37
88
|
return result, warnings.lines.map { |w| w.strip.chomp }
|
@@ -41,26 +92,288 @@ module Rust
|
|
41
92
|
end
|
42
93
|
end
|
43
94
|
|
44
|
-
|
45
|
-
|
46
|
-
|
95
|
+
class RustDatatype
|
96
|
+
def self.pull_variable(variable)
|
97
|
+
return Rust._pull(variable)
|
98
|
+
end
|
99
|
+
|
100
|
+
def load_in_r_as(r_instance, variable_name)
|
101
|
+
raise "Not implemented"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
class DataFrame < RustDatatype
|
106
|
+
def self.pull_variable(variable)
|
107
|
+
hash = {}
|
108
|
+
colnames = Rust._pull("colnames(#{variable})")
|
109
|
+
colnames.each do |col|
|
110
|
+
hash[col] = Rust._pull("#{variable}$#{col}")
|
111
|
+
end
|
112
|
+
return DataFrame.new(hash)
|
113
|
+
end
|
114
|
+
|
115
|
+
def initialize(labels_or_data)
|
116
|
+
@data = {}
|
47
117
|
|
48
|
-
|
49
|
-
|
50
|
-
|
118
|
+
if labels_or_data.is_a? Array
|
119
|
+
@labels = labels_or_data.map { |l| l.to_s }
|
120
|
+
@labels.each { |label| @data[label] = [] }
|
121
|
+
elsif labels_or_data.is_a? Hash
|
122
|
+
@labels = labels_or_data.keys.map { |l| l.to_s }
|
123
|
+
@labels.each { |label| @data[label] = [] }
|
124
|
+
for i in 0...labels_or_data.values[0].size
|
125
|
+
self.add_row(labels_or_data.map { |k, v| [k, v[i]] }.to_h)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def row(i)
|
131
|
+
return @data.map { |label, values| [label, values[i]] }.to_h
|
132
|
+
end
|
133
|
+
alias :[] :row
|
134
|
+
|
135
|
+
def column(name)
|
136
|
+
return @data[name]
|
137
|
+
end
|
138
|
+
|
139
|
+
def transform_column!(column)
|
140
|
+
@data[column].map! { |e| yield e }
|
141
|
+
end
|
142
|
+
|
143
|
+
def select_rows
|
144
|
+
result = DataFrame.new(self.column_names)
|
145
|
+
self.each do |row|
|
146
|
+
result << row if yield row
|
147
|
+
end
|
148
|
+
return result
|
149
|
+
end
|
150
|
+
|
151
|
+
def select_cols
|
152
|
+
result = self.clone
|
153
|
+
@labels.each do |label|
|
154
|
+
result.delete_column(label) unless yield label
|
155
|
+
end
|
156
|
+
return result
|
157
|
+
end
|
158
|
+
|
159
|
+
def delete_column(column)
|
160
|
+
@labels.delete(column)
|
161
|
+
@data.delete(column)
|
162
|
+
end
|
163
|
+
|
164
|
+
def column_names
|
165
|
+
return @data.keys.map { |k| k.to_s }
|
166
|
+
end
|
167
|
+
alias :colnames :column_names
|
168
|
+
|
169
|
+
def merge(other, by, first_alias = "x", second_alias = "y")
|
170
|
+
raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
|
171
|
+
raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
|
172
|
+
raise "This dataset should have all the columns in #{by}" unless (by & self.column_names).size == by.size
|
173
|
+
raise "The passed dataset should have all the columns in #{by}" unless (by & other.column_names).size == by.size
|
174
|
+
raise "The aliases can not have the same value" if first_alias == second_alias
|
175
|
+
|
176
|
+
my_keys = {}
|
177
|
+
self.each_with_index do |row, i|
|
178
|
+
key = []
|
179
|
+
by.each do |colname|
|
180
|
+
key << row[colname]
|
181
|
+
end
|
182
|
+
|
183
|
+
my_keys[key] = i
|
184
|
+
end
|
185
|
+
|
186
|
+
merged_column_self = (self.column_names - by)
|
187
|
+
merged_column_other = (other.column_names - by)
|
188
|
+
|
189
|
+
first_alias = first_alias + "." if first_alias.length > 0
|
190
|
+
second_alias = second_alias + "." if second_alias.length > 0
|
191
|
+
|
192
|
+
merged_columns = merged_column_self.map { |colname| "#{first_alias}#{colname}" } + merged_column_other.map { |colname| "#{second_alias}#{colname}" }
|
193
|
+
columns = by + merged_columns
|
194
|
+
result = DataFrame.new(columns)
|
195
|
+
other.each do |other_row|
|
196
|
+
key = []
|
197
|
+
by.each do |colname|
|
198
|
+
key << other_row[colname]
|
199
|
+
end
|
200
|
+
|
201
|
+
my_row_index = my_keys[key]
|
202
|
+
if my_row_index
|
203
|
+
my_row = self[my_row_index]
|
204
|
+
|
205
|
+
to_add = {}
|
206
|
+
by.each do |colname|
|
207
|
+
to_add[colname] = my_row[colname]
|
208
|
+
end
|
209
|
+
|
210
|
+
merged_column_self.each do |colname|
|
211
|
+
to_add["#{first_alias}#{colname}"] = my_row[colname]
|
212
|
+
end
|
213
|
+
|
214
|
+
merged_column_other.each do |colname|
|
215
|
+
to_add["#{second_alias}#{colname}"] = other_row[colname]
|
216
|
+
end
|
217
|
+
|
218
|
+
result << to_add
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
return result
|
223
|
+
end
|
224
|
+
|
225
|
+
def rows
|
226
|
+
@data.values[0].size
|
227
|
+
end
|
228
|
+
|
229
|
+
def add_row(row)
|
230
|
+
if row.is_a?(Array)
|
231
|
+
raise "Expected an array of size #{@data.size}" unless row.size == @data.size
|
232
|
+
|
233
|
+
@labels.each_with_index do |label, i|
|
234
|
+
@data[label] << row[i]
|
235
|
+
end
|
236
|
+
|
237
|
+
return true
|
238
|
+
elsif row.is_a?(Hash)
|
239
|
+
raise "Expected a hash with the following keys: #{@data.keys}" unless row.keys.map { |l| l.to_s }.sort == @data.keys.sort
|
240
|
+
|
241
|
+
row.each do |key, value|
|
242
|
+
@data[key.to_s] << value
|
243
|
+
end
|
244
|
+
#
|
245
|
+
return true
|
51
246
|
else
|
52
|
-
|
247
|
+
raise TypeError, "Expected an Array or a Hash"
|
248
|
+
end
|
249
|
+
end
|
250
|
+
alias :<< :add_row
|
251
|
+
|
252
|
+
def each
|
253
|
+
self.each_with_index do |element, i|
|
254
|
+
yield element
|
53
255
|
end
|
54
|
-
result = R_ENGINE.eval(r_command)
|
55
|
-
R_ENGINE.echo(false, false)
|
56
|
-
warnings = $stdout.string
|
57
|
-
$stdout = STDOUT
|
58
256
|
|
59
|
-
|
60
|
-
|
257
|
+
return self
|
258
|
+
end
|
259
|
+
|
260
|
+
def each_with_index
|
261
|
+
for i in 0...self.rows
|
262
|
+
element = {}
|
263
|
+
@labels.each do |label|
|
264
|
+
element[label] = @data[label][i]
|
265
|
+
end
|
266
|
+
|
267
|
+
yield element, i
|
268
|
+
end
|
269
|
+
|
270
|
+
return self
|
271
|
+
end
|
272
|
+
|
273
|
+
def load_in_r_as(variable_name)
|
274
|
+
command = []
|
275
|
+
|
276
|
+
command << "#{variable_name} <- data.frame()"
|
277
|
+
row_index = 1
|
278
|
+
self.each do |row|
|
279
|
+
keys = row.keys.map { |v| v.inspect }.join(",")
|
280
|
+
values = row.values.map { |v| v.inspect }.join(",")
|
281
|
+
command << "#{variable_name}[#{row_index}, c(#{keys})] <- c(#{values})"
|
282
|
+
|
283
|
+
row_index += 1
|
284
|
+
end
|
285
|
+
|
286
|
+
Rust._eval_big(command)
|
287
|
+
end
|
288
|
+
|
289
|
+
def inspect
|
290
|
+
separator = " | "
|
291
|
+
col_widths = self.column_names.map { |colname| [colname, ([colname.length] + @data[colname].map { |e| e.inspect.length }).max] }.to_h
|
292
|
+
col_widths[:rowscol] = self.rows.inspect.length + 3
|
293
|
+
|
294
|
+
result = ""
|
295
|
+
result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
|
296
|
+
result << (" " * col_widths[:rowscol]) + self.column_names.map { |colname| (" " * (col_widths[colname] - colname.length)) + colname }.join(separator) + "\n"
|
297
|
+
result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
|
298
|
+
self.each_with_index do |row, i|
|
299
|
+
result << "[#{i}] " + row.map { |colname, value| (" " * (col_widths[colname] - value.inspect.length)) + value.inspect }.join(separator) + "\n"
|
300
|
+
end
|
301
|
+
|
302
|
+
result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length))
|
303
|
+
|
304
|
+
return result
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
class Matrix < RustDatatype
|
309
|
+
def self.pull_variable(variable)
|
310
|
+
return Rust._pull(variable)
|
311
|
+
end
|
312
|
+
|
313
|
+
def initialize(data)
|
314
|
+
if data.flatten.size == 0
|
315
|
+
raise "Empty matrices are not allowed"
|
61
316
|
else
|
62
|
-
|
317
|
+
raise TypeError, "Expected array of array" unless data.is_a?(Array) && data[0].is_a?(Array)
|
318
|
+
raise TypeError, "Only numeric matrices are supported" unless data.all? { |row| row.all? { |e| e.is_a?(Numeric) } }
|
319
|
+
raise "All the rows must have the same size" unless data.map { |row| row.size }.uniq.size == 1
|
320
|
+
@data = data.clone
|
63
321
|
end
|
64
322
|
end
|
323
|
+
|
324
|
+
def [](i, j)
|
325
|
+
return @data[i][j]
|
326
|
+
end
|
327
|
+
|
328
|
+
def rows
|
329
|
+
@data.size
|
330
|
+
end
|
331
|
+
|
332
|
+
def cols
|
333
|
+
@data[0].size
|
334
|
+
end
|
335
|
+
|
336
|
+
def []=(i, j, value)
|
337
|
+
raise "Wrong i" unless i.between?(0, @data.size - 1)
|
338
|
+
raise "Wrong j" unless j.between?(0, @data[0].size - 1)
|
339
|
+
@data[i][j] = value
|
340
|
+
end
|
341
|
+
|
342
|
+
def load_in_r_as(variable_name)
|
343
|
+
Rust._eval("#{variable_name} <- matrix(c(#{@data.flatten.join(",")}), nrow=#{self.rows}, ncol=#{self.cols}, byrow=T)")
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
class CSV
|
348
|
+
def self.read(filename, **options)
|
349
|
+
hash = {}
|
350
|
+
labels = nil
|
351
|
+
::CSV.parse(File.read(filename), **options) do |row|
|
352
|
+
labels = row.headers || (1..row.size).to_a.map { |e| "X#{e}" } unless labels
|
353
|
+
|
354
|
+
labels.each do |label|
|
355
|
+
hash[label] = [] unless hash[label]
|
356
|
+
hash[label] << row[label]
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
return Rust::DataFrame.new(hash)
|
361
|
+
end
|
362
|
+
|
363
|
+
def self.write(filename, dataframe, **options)
|
364
|
+
raise TypeError, "Expected Rust::DataFrame" unless dataframe.is_a?(Rust::DataFrame)
|
365
|
+
|
366
|
+
x[:headers] = dataframe.column_names if x[:headers]
|
367
|
+
|
368
|
+
hash = {}
|
369
|
+
labels = nil
|
370
|
+
::CSV.open(filename, 'w', write_headers: (x[:headers] ? true : false), **options) do |csv|
|
371
|
+
dataframe.each do |row|
|
372
|
+
csv << row
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
return true
|
377
|
+
end
|
65
378
|
end
|
66
379
|
end
|
data/lib/rust-tests.rb
CHANGED
@@ -66,7 +66,7 @@ module Rust::StatisticalTests::Wilcoxon
|
|
66
66
|
|
67
67
|
_, warnings = Rust._eval("result = wilcox.test(a, b, alternative='two.sided', paired=F)", true)
|
68
68
|
result = Rust::StatisticalTests::Result.new
|
69
|
-
result.name = "Mann–Whitney U test
|
69
|
+
result.name = "Wilcoxon Ranked-Sum test (a.k.a. Mann–Whitney U test)"
|
70
70
|
result.pvalue = Rust._pull("result$p.value")
|
71
71
|
result[:w] = Rust._pull("result$statistic")
|
72
72
|
result.exact = !warnings.include?("cannot compute exact p-value with ties")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rust
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.2'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Scalabrino
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rinruby
|