rust 0.1 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rust-core.rb +337 -24
- data/lib/rust-tests.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a39315e623da717c4f035a11fbe7363bf3aae0d8885f922c4f0fca689bc7b90a
|
4
|
+
data.tar.gz: e0a1ec7e485a0f9521a42191f6a97d833df8eb35c961d31c9ba67f0a6f0c0c22
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eaffc66a8a3250f7f687bdf1dec96421681f25ff394010f0c15d7e65b5ed87a9deb25111c7f56baa60b19a53a4849bd92a665bfe507f9ba9b096b722cddeed53
|
7
|
+
data.tar.gz: c5fc8ce8b55347ca402783ab352310719fcdd008eaa2f1ed3b00ca93137736ca6a2d8c9d9b2169865212e7a09ca955e5909d7983b0461bda0c539b51f1c0e379
|
data/lib/rust-core.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'code-assertions'
|
2
2
|
require 'stringio'
|
3
3
|
require 'rinruby'
|
4
|
+
require 'csv'
|
4
5
|
|
5
6
|
module Rust
|
6
7
|
CLIENT_MUTEX = Mutex.new
|
@@ -11,27 +12,77 @@ module Rust
|
|
11
12
|
@@in_client_mutex = false
|
12
13
|
|
13
14
|
def self.exclusive
|
15
|
+
result = nil
|
14
16
|
CLIENT_MUTEX.synchronize do
|
15
17
|
@@in_client_mutex = true
|
16
|
-
yield
|
18
|
+
result = yield
|
17
19
|
@@in_client_mutex = false
|
18
20
|
end
|
21
|
+
return result
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.[]=(variable, value)
|
25
|
+
if value.is_a?(RustDatatype)
|
26
|
+
value.load_in_r_as(variable.to_s)
|
27
|
+
elsif value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(Array)
|
28
|
+
R_ENGINE.assign(variable, value)
|
29
|
+
else
|
30
|
+
raise "Given #{variable.class}, expected RustDatatype, String, Numeric, or Array"
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.[](variable, type=RustDatatype)
|
36
|
+
return type.pull_variable(variable)
|
37
|
+
end
|
38
|
+
|
39
|
+
def self._eval_big(r_command, return_warnings = false)
|
40
|
+
r_command = r_command.join("\n") if r_command.is_a?(Array)
|
41
|
+
|
42
|
+
self._rexec(r_command, return_warnings) do |cmd|
|
43
|
+
result = true
|
44
|
+
instructions = cmd.lines
|
45
|
+
|
46
|
+
while instructions.size > 0
|
47
|
+
current_command = ""
|
48
|
+
|
49
|
+
while (instructions.size > 0) && (current_command.length + instructions.first.length < 10000)
|
50
|
+
current_command << instructions.shift
|
51
|
+
end
|
52
|
+
|
53
|
+
result &= R_ENGINE.eval(current_command)
|
54
|
+
end
|
55
|
+
|
56
|
+
result
|
57
|
+
end
|
19
58
|
end
|
20
59
|
|
21
60
|
def self._pull(r_command, return_warnings = false)
|
22
|
-
|
61
|
+
self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.pull(cmd) }
|
62
|
+
end
|
63
|
+
|
64
|
+
def self._eval(r_command, return_warnings = false)
|
65
|
+
self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.eval(cmd) }
|
66
|
+
end
|
67
|
+
|
68
|
+
def self._rexec(r_command, return_warnings = false)
|
69
|
+
R_MUTEX.synchronize do
|
23
70
|
assert("This command must be executed in an exclusive block") { @@in_client_mutex }
|
24
71
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
72
|
+
result = nil
|
73
|
+
begin
|
74
|
+
$stdout = StringIO.new
|
75
|
+
if return_warnings
|
76
|
+
R_ENGINE.echo(true, true)
|
77
|
+
else
|
78
|
+
R_ENGINE.echo(false, false)
|
79
|
+
end
|
80
|
+
result = yield(r_command)
|
81
|
+
ensure
|
29
82
|
R_ENGINE.echo(false, false)
|
83
|
+
warnings = $stdout.string
|
84
|
+
$stdout = STDOUT
|
30
85
|
end
|
31
|
-
result = R_ENGINE.pull(r_command)
|
32
|
-
R_ENGINE.echo(false, false)
|
33
|
-
warnings = $stdout.string
|
34
|
-
$stdout = STDOUT
|
35
86
|
|
36
87
|
if return_warnings
|
37
88
|
return result, warnings.lines.map { |w| w.strip.chomp }
|
@@ -41,26 +92,288 @@ module Rust
|
|
41
92
|
end
|
42
93
|
end
|
43
94
|
|
44
|
-
|
45
|
-
|
46
|
-
|
95
|
+
class RustDatatype
|
96
|
+
def self.pull_variable(variable)
|
97
|
+
return Rust._pull(variable)
|
98
|
+
end
|
99
|
+
|
100
|
+
def load_in_r_as(r_instance, variable_name)
|
101
|
+
raise "Not implemented"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
class DataFrame < RustDatatype
|
106
|
+
def self.pull_variable(variable)
|
107
|
+
hash = {}
|
108
|
+
colnames = Rust._pull("colnames(#{variable})")
|
109
|
+
colnames.each do |col|
|
110
|
+
hash[col] = Rust._pull("#{variable}$#{col}")
|
111
|
+
end
|
112
|
+
return DataFrame.new(hash)
|
113
|
+
end
|
114
|
+
|
115
|
+
def initialize(labels_or_data)
|
116
|
+
@data = {}
|
47
117
|
|
48
|
-
|
49
|
-
|
50
|
-
|
118
|
+
if labels_or_data.is_a? Array
|
119
|
+
@labels = labels_or_data.map { |l| l.to_s }
|
120
|
+
@labels.each { |label| @data[label] = [] }
|
121
|
+
elsif labels_or_data.is_a? Hash
|
122
|
+
@labels = labels_or_data.keys.map { |l| l.to_s }
|
123
|
+
@labels.each { |label| @data[label] = [] }
|
124
|
+
for i in 0...labels_or_data.values[0].size
|
125
|
+
self.add_row(labels_or_data.map { |k, v| [k, v[i]] }.to_h)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def row(i)
|
131
|
+
return @data.map { |label, values| [label, values[i]] }.to_h
|
132
|
+
end
|
133
|
+
alias :[] :row
|
134
|
+
|
135
|
+
def column(name)
|
136
|
+
return @data[name]
|
137
|
+
end
|
138
|
+
|
139
|
+
def transform_column!(column)
|
140
|
+
@data[column].map! { |e| yield e }
|
141
|
+
end
|
142
|
+
|
143
|
+
def select_rows
|
144
|
+
result = DataFrame.new(self.column_names)
|
145
|
+
self.each do |row|
|
146
|
+
result << row if yield row
|
147
|
+
end
|
148
|
+
return result
|
149
|
+
end
|
150
|
+
|
151
|
+
def select_cols
|
152
|
+
result = self.clone
|
153
|
+
@labels.each do |label|
|
154
|
+
result.delete_column(label) unless yield label
|
155
|
+
end
|
156
|
+
return result
|
157
|
+
end
|
158
|
+
|
159
|
+
def delete_column(column)
|
160
|
+
@labels.delete(column)
|
161
|
+
@data.delete(column)
|
162
|
+
end
|
163
|
+
|
164
|
+
def column_names
|
165
|
+
return @data.keys.map { |k| k.to_s }
|
166
|
+
end
|
167
|
+
alias :colnames :column_names
|
168
|
+
|
169
|
+
def merge(other, by, first_alias = "x", second_alias = "y")
|
170
|
+
raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
|
171
|
+
raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
|
172
|
+
raise "This dataset should have all the columns in #{by}" unless (by & self.column_names).size == by.size
|
173
|
+
raise "The passed dataset should have all the columns in #{by}" unless (by & other.column_names).size == by.size
|
174
|
+
raise "The aliases can not have the same value" if first_alias == second_alias
|
175
|
+
|
176
|
+
my_keys = {}
|
177
|
+
self.each_with_index do |row, i|
|
178
|
+
key = []
|
179
|
+
by.each do |colname|
|
180
|
+
key << row[colname]
|
181
|
+
end
|
182
|
+
|
183
|
+
my_keys[key] = i
|
184
|
+
end
|
185
|
+
|
186
|
+
merged_column_self = (self.column_names - by)
|
187
|
+
merged_column_other = (other.column_names - by)
|
188
|
+
|
189
|
+
first_alias = first_alias + "." if first_alias.length > 0
|
190
|
+
second_alias = second_alias + "." if second_alias.length > 0
|
191
|
+
|
192
|
+
merged_columns = merged_column_self.map { |colname| "#{first_alias}#{colname}" } + merged_column_other.map { |colname| "#{second_alias}#{colname}" }
|
193
|
+
columns = by + merged_columns
|
194
|
+
result = DataFrame.new(columns)
|
195
|
+
other.each do |other_row|
|
196
|
+
key = []
|
197
|
+
by.each do |colname|
|
198
|
+
key << other_row[colname]
|
199
|
+
end
|
200
|
+
|
201
|
+
my_row_index = my_keys[key]
|
202
|
+
if my_row_index
|
203
|
+
my_row = self[my_row_index]
|
204
|
+
|
205
|
+
to_add = {}
|
206
|
+
by.each do |colname|
|
207
|
+
to_add[colname] = my_row[colname]
|
208
|
+
end
|
209
|
+
|
210
|
+
merged_column_self.each do |colname|
|
211
|
+
to_add["#{first_alias}#{colname}"] = my_row[colname]
|
212
|
+
end
|
213
|
+
|
214
|
+
merged_column_other.each do |colname|
|
215
|
+
to_add["#{second_alias}#{colname}"] = other_row[colname]
|
216
|
+
end
|
217
|
+
|
218
|
+
result << to_add
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
return result
|
223
|
+
end
|
224
|
+
|
225
|
+
def rows
|
226
|
+
@data.values[0].size
|
227
|
+
end
|
228
|
+
|
229
|
+
def add_row(row)
|
230
|
+
if row.is_a?(Array)
|
231
|
+
raise "Expected an array of size #{@data.size}" unless row.size == @data.size
|
232
|
+
|
233
|
+
@labels.each_with_index do |label, i|
|
234
|
+
@data[label] << row[i]
|
235
|
+
end
|
236
|
+
|
237
|
+
return true
|
238
|
+
elsif row.is_a?(Hash)
|
239
|
+
raise "Expected a hash with the following keys: #{@data.keys}" unless row.keys.map { |l| l.to_s }.sort == @data.keys.sort
|
240
|
+
|
241
|
+
row.each do |key, value|
|
242
|
+
@data[key.to_s] << value
|
243
|
+
end
|
244
|
+
#
|
245
|
+
return true
|
51
246
|
else
|
52
|
-
|
247
|
+
raise TypeError, "Expected an Array or a Hash"
|
248
|
+
end
|
249
|
+
end
|
250
|
+
alias :<< :add_row
|
251
|
+
|
252
|
+
def each
|
253
|
+
self.each_with_index do |element, i|
|
254
|
+
yield element
|
53
255
|
end
|
54
|
-
result = R_ENGINE.eval(r_command)
|
55
|
-
R_ENGINE.echo(false, false)
|
56
|
-
warnings = $stdout.string
|
57
|
-
$stdout = STDOUT
|
58
256
|
|
59
|
-
|
60
|
-
|
257
|
+
return self
|
258
|
+
end
|
259
|
+
|
260
|
+
def each_with_index
|
261
|
+
for i in 0...self.rows
|
262
|
+
element = {}
|
263
|
+
@labels.each do |label|
|
264
|
+
element[label] = @data[label][i]
|
265
|
+
end
|
266
|
+
|
267
|
+
yield element, i
|
268
|
+
end
|
269
|
+
|
270
|
+
return self
|
271
|
+
end
|
272
|
+
|
273
|
+
def load_in_r_as(variable_name)
|
274
|
+
command = []
|
275
|
+
|
276
|
+
command << "#{variable_name} <- data.frame()"
|
277
|
+
row_index = 1
|
278
|
+
self.each do |row|
|
279
|
+
keys = row.keys.map { |v| v.inspect }.join(",")
|
280
|
+
values = row.values.map { |v| v.inspect }.join(",")
|
281
|
+
command << "#{variable_name}[#{row_index}, c(#{keys})] <- c(#{values})"
|
282
|
+
|
283
|
+
row_index += 1
|
284
|
+
end
|
285
|
+
|
286
|
+
Rust._eval_big(command)
|
287
|
+
end
|
288
|
+
|
289
|
+
def inspect
|
290
|
+
separator = " | "
|
291
|
+
col_widths = self.column_names.map { |colname| [colname, ([colname.length] + @data[colname].map { |e| e.inspect.length }).max] }.to_h
|
292
|
+
col_widths[:rowscol] = self.rows.inspect.length + 3
|
293
|
+
|
294
|
+
result = ""
|
295
|
+
result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
|
296
|
+
result << (" " * col_widths[:rowscol]) + self.column_names.map { |colname| (" " * (col_widths[colname] - colname.length)) + colname }.join(separator) + "\n"
|
297
|
+
result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
|
298
|
+
self.each_with_index do |row, i|
|
299
|
+
result << "[#{i}] " + row.map { |colname, value| (" " * (col_widths[colname] - value.inspect.length)) + value.inspect }.join(separator) + "\n"
|
300
|
+
end
|
301
|
+
|
302
|
+
result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length))
|
303
|
+
|
304
|
+
return result
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
class Matrix < RustDatatype
|
309
|
+
def self.pull_variable(variable)
|
310
|
+
return Rust._pull(variable)
|
311
|
+
end
|
312
|
+
|
313
|
+
def initialize(data)
|
314
|
+
if data.flatten.size == 0
|
315
|
+
raise "Empty matrices are not allowed"
|
61
316
|
else
|
62
|
-
|
317
|
+
raise TypeError, "Expected array of array" unless data.is_a?(Array) && data[0].is_a?(Array)
|
318
|
+
raise TypeError, "Only numeric matrices are supported" unless data.all? { |row| row.all? { |e| e.is_a?(Numeric) } }
|
319
|
+
raise "All the rows must have the same size" unless data.map { |row| row.size }.uniq.size == 1
|
320
|
+
@data = data.clone
|
63
321
|
end
|
64
322
|
end
|
323
|
+
|
324
|
+
def [](i, j)
|
325
|
+
return @data[i][j]
|
326
|
+
end
|
327
|
+
|
328
|
+
def rows
|
329
|
+
@data.size
|
330
|
+
end
|
331
|
+
|
332
|
+
def cols
|
333
|
+
@data[0].size
|
334
|
+
end
|
335
|
+
|
336
|
+
def []=(i, j, value)
|
337
|
+
raise "Wrong i" unless i.between?(0, @data.size - 1)
|
338
|
+
raise "Wrong j" unless j.between?(0, @data[0].size - 1)
|
339
|
+
@data[i][j] = value
|
340
|
+
end
|
341
|
+
|
342
|
+
def load_in_r_as(variable_name)
|
343
|
+
Rust._eval("#{variable_name} <- matrix(c(#{@data.flatten.join(",")}), nrow=#{self.rows}, ncol=#{self.cols}, byrow=T)")
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
class CSV
|
348
|
+
def self.read(filename, **options)
|
349
|
+
hash = {}
|
350
|
+
labels = nil
|
351
|
+
::CSV.parse(File.read(filename), **options) do |row|
|
352
|
+
labels = row.headers || (1..row.size).to_a.map { |e| "X#{e}" } unless labels
|
353
|
+
|
354
|
+
labels.each do |label|
|
355
|
+
hash[label] = [] unless hash[label]
|
356
|
+
hash[label] << row[label]
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
return Rust::DataFrame.new(hash)
|
361
|
+
end
|
362
|
+
|
363
|
+
def self.write(filename, dataframe, **options)
|
364
|
+
raise TypeError, "Expected Rust::DataFrame" unless dataframe.is_a?(Rust::DataFrame)
|
365
|
+
|
366
|
+
x[:headers] = dataframe.column_names if x[:headers]
|
367
|
+
|
368
|
+
hash = {}
|
369
|
+
labels = nil
|
370
|
+
::CSV.open(filename, 'w', write_headers: (x[:headers] ? true : false), **options) do |csv|
|
371
|
+
dataframe.each do |row|
|
372
|
+
csv << row
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
return true
|
377
|
+
end
|
65
378
|
end
|
66
379
|
end
|
data/lib/rust-tests.rb
CHANGED
@@ -66,7 +66,7 @@ module Rust::StatisticalTests::Wilcoxon
|
|
66
66
|
|
67
67
|
_, warnings = Rust._eval("result = wilcox.test(a, b, alternative='two.sided', paired=F)", true)
|
68
68
|
result = Rust::StatisticalTests::Result.new
|
69
|
-
result.name = "Mann–Whitney U test
|
69
|
+
result.name = "Wilcoxon Ranked-Sum test (a.k.a. Mann–Whitney U test)"
|
70
70
|
result.pvalue = Rust._pull("result$p.value")
|
71
71
|
result[:w] = Rust._pull("result$statistic")
|
72
72
|
result.exact = !warnings.include?("cannot compute exact p-value with ties")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rust
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.2'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Scalabrino
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rinruby
|