rust 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rust-core.rb +337 -24
  3. data/lib/rust-tests.rb +1 -1
  4. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fca0e67fdbbd71271d8baf0ab5c1b7cee895dd6f546699ca420bd0fa9ceb8a83
4
- data.tar.gz: d0c30431caacfcd06744d8973ee2938d3e8b44297624d6dcb514fde1280a43cd
3
+ metadata.gz: a39315e623da717c4f035a11fbe7363bf3aae0d8885f922c4f0fca689bc7b90a
4
+ data.tar.gz: e0a1ec7e485a0f9521a42191f6a97d833df8eb35c961d31c9ba67f0a6f0c0c22
5
5
  SHA512:
6
- metadata.gz: 5004a2514605db69f616eeb25c3d89eb7ac6b6702be97a15202c0b4bc9731e4e0b55347a256dc7123c85c7342be3af096689a88a07b1c0489f88a86aacfef982
7
- data.tar.gz: 1e969dca0511a2ac803c1e4369b6471641af4850b53663293dd69cd1009ecec352570173a484fe91b9ed6c822052592fde7a6cb165dc01eaed6efabea3a693cd
6
+ metadata.gz: eaffc66a8a3250f7f687bdf1dec96421681f25ff394010f0c15d7e65b5ed87a9deb25111c7f56baa60b19a53a4849bd92a665bfe507f9ba9b096b722cddeed53
7
+ data.tar.gz: c5fc8ce8b55347ca402783ab352310719fcdd008eaa2f1ed3b00ca93137736ca6a2d8c9d9b2169865212e7a09ca955e5909d7983b0461bda0c539b51f1c0e379
@@ -1,6 +1,7 @@
1
1
  require 'code-assertions'
2
2
  require 'stringio'
3
3
  require 'rinruby'
4
+ require 'csv'
4
5
 
5
6
  module Rust
6
7
  CLIENT_MUTEX = Mutex.new
@@ -11,27 +12,77 @@ module Rust
11
12
  @@in_client_mutex = false
12
13
 
13
14
  def self.exclusive
15
+ result = nil
14
16
  CLIENT_MUTEX.synchronize do
15
17
  @@in_client_mutex = true
16
- yield
18
+ result = yield
17
19
  @@in_client_mutex = false
18
20
  end
21
+ return result
22
+ end
23
+
24
+ def self.[]=(variable, value)
25
+ if value.is_a?(RustDatatype)
26
+ value.load_in_r_as(variable.to_s)
27
+ elsif value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(Array)
28
+ R_ENGINE.assign(variable, value)
29
+ else
30
+ raise "Given #{variable.class}, expected RustDatatype, String, Numeric, or Array"
31
+ end
32
+
33
+ end
34
+
35
+ def self.[](variable, type=RustDatatype)
36
+ return type.pull_variable(variable)
37
+ end
38
+
39
+ def self._eval_big(r_command, return_warnings = false)
40
+ r_command = r_command.join("\n") if r_command.is_a?(Array)
41
+
42
+ self._rexec(r_command, return_warnings) do |cmd|
43
+ result = true
44
+ instructions = cmd.lines
45
+
46
+ while instructions.size > 0
47
+ current_command = ""
48
+
49
+ while (instructions.size > 0) && (current_command.length + instructions.first.length < 10000)
50
+ current_command << instructions.shift
51
+ end
52
+
53
+ result &= R_ENGINE.eval(current_command)
54
+ end
55
+
56
+ result
57
+ end
19
58
  end
20
59
 
21
60
  def self._pull(r_command, return_warnings = false)
22
- R_MUTEX.synchronize do
61
+ self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.pull(cmd) }
62
+ end
63
+
64
+ def self._eval(r_command, return_warnings = false)
65
+ self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.eval(cmd) }
66
+ end
67
+
68
+ def self._rexec(r_command, return_warnings = false)
69
+ R_MUTEX.synchronize do
23
70
  assert("This command must be executed in an exclusive block") { @@in_client_mutex }
24
71
 
25
- $stdout = StringIO.new
26
- if return_warnings
27
- R_ENGINE.echo(true, true)
28
- else
72
+ result = nil
73
+ begin
74
+ $stdout = StringIO.new
75
+ if return_warnings
76
+ R_ENGINE.echo(true, true)
77
+ else
78
+ R_ENGINE.echo(false, false)
79
+ end
80
+ result = yield(r_command)
81
+ ensure
29
82
  R_ENGINE.echo(false, false)
83
+ warnings = $stdout.string
84
+ $stdout = STDOUT
30
85
  end
31
- result = R_ENGINE.pull(r_command)
32
- R_ENGINE.echo(false, false)
33
- warnings = $stdout.string
34
- $stdout = STDOUT
35
86
 
36
87
  if return_warnings
37
88
  return result, warnings.lines.map { |w| w.strip.chomp }
@@ -41,26 +92,288 @@ module Rust
41
92
  end
42
93
  end
43
94
 
44
- def self._eval(r_command, return_warnings = false)
45
- R_MUTEX.synchronize do
46
- assert("This command must be executed in an exclusive block") { @@in_client_mutex }
95
+ class RustDatatype
96
+ def self.pull_variable(variable)
97
+ return Rust._pull(variable)
98
+ end
99
+
100
+ def load_in_r_as(r_instance, variable_name)
101
+ raise "Not implemented"
102
+ end
103
+ end
104
+
105
+ class DataFrame < RustDatatype
106
+ def self.pull_variable(variable)
107
+ hash = {}
108
+ colnames = Rust._pull("colnames(#{variable})")
109
+ colnames.each do |col|
110
+ hash[col] = Rust._pull("#{variable}$#{col}")
111
+ end
112
+ return DataFrame.new(hash)
113
+ end
114
+
115
+ def initialize(labels_or_data)
116
+ @data = {}
47
117
 
48
- $stdout = StringIO.new
49
- if return_warnings
50
- R_ENGINE.echo(true, true)
118
+ if labels_or_data.is_a? Array
119
+ @labels = labels_or_data.map { |l| l.to_s }
120
+ @labels.each { |label| @data[label] = [] }
121
+ elsif labels_or_data.is_a? Hash
122
+ @labels = labels_or_data.keys.map { |l| l.to_s }
123
+ @labels.each { |label| @data[label] = [] }
124
+ for i in 0...labels_or_data.values[0].size
125
+ self.add_row(labels_or_data.map { |k, v| [k, v[i]] }.to_h)
126
+ end
127
+ end
128
+ end
129
+
130
+ def row(i)
131
+ return @data.map { |label, values| [label, values[i]] }.to_h
132
+ end
133
+ alias :[] :row
134
+
135
+ def column(name)
136
+ return @data[name]
137
+ end
138
+
139
+ def transform_column!(column)
140
+ @data[column].map! { |e| yield e }
141
+ end
142
+
143
+ def select_rows
144
+ result = DataFrame.new(self.column_names)
145
+ self.each do |row|
146
+ result << row if yield row
147
+ end
148
+ return result
149
+ end
150
+
151
+ def select_cols
152
+ result = self.clone
153
+ @labels.each do |label|
154
+ result.delete_column(label) unless yield label
155
+ end
156
+ return result
157
+ end
158
+
159
+ def delete_column(column)
160
+ @labels.delete(column)
161
+ @data.delete(column)
162
+ end
163
+
164
+ def column_names
165
+ return @data.keys.map { |k| k.to_s }
166
+ end
167
+ alias :colnames :column_names
168
+
169
+ def merge(other, by, first_alias = "x", second_alias = "y")
170
+ raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
171
+ raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
172
+ raise "This dataset should have all the columns in #{by}" unless (by & self.column_names).size == by.size
173
+ raise "The passed dataset should have all the columns in #{by}" unless (by & other.column_names).size == by.size
174
+ raise "The aliases can not have the same value" if first_alias == second_alias
175
+
176
+ my_keys = {}
177
+ self.each_with_index do |row, i|
178
+ key = []
179
+ by.each do |colname|
180
+ key << row[colname]
181
+ end
182
+
183
+ my_keys[key] = i
184
+ end
185
+
186
+ merged_column_self = (self.column_names - by)
187
+ merged_column_other = (other.column_names - by)
188
+
189
+ first_alias = first_alias + "." if first_alias.length > 0
190
+ second_alias = second_alias + "." if second_alias.length > 0
191
+
192
+ merged_columns = merged_column_self.map { |colname| "#{first_alias}#{colname}" } + merged_column_other.map { |colname| "#{second_alias}#{colname}" }
193
+ columns = by + merged_columns
194
+ result = DataFrame.new(columns)
195
+ other.each do |other_row|
196
+ key = []
197
+ by.each do |colname|
198
+ key << other_row[colname]
199
+ end
200
+
201
+ my_row_index = my_keys[key]
202
+ if my_row_index
203
+ my_row = self[my_row_index]
204
+
205
+ to_add = {}
206
+ by.each do |colname|
207
+ to_add[colname] = my_row[colname]
208
+ end
209
+
210
+ merged_column_self.each do |colname|
211
+ to_add["#{first_alias}#{colname}"] = my_row[colname]
212
+ end
213
+
214
+ merged_column_other.each do |colname|
215
+ to_add["#{second_alias}#{colname}"] = other_row[colname]
216
+ end
217
+
218
+ result << to_add
219
+ end
220
+ end
221
+
222
+ return result
223
+ end
224
+
225
+ def rows
226
+ @data.values[0].size
227
+ end
228
+
229
+ def add_row(row)
230
+ if row.is_a?(Array)
231
+ raise "Expected an array of size #{@data.size}" unless row.size == @data.size
232
+
233
+ @labels.each_with_index do |label, i|
234
+ @data[label] << row[i]
235
+ end
236
+
237
+ return true
238
+ elsif row.is_a?(Hash)
239
+ raise "Expected a hash with the following keys: #{@data.keys}" unless row.keys.map { |l| l.to_s }.sort == @data.keys.sort
240
+
241
+ row.each do |key, value|
242
+ @data[key.to_s] << value
243
+ end
244
+ #
245
+ return true
51
246
  else
52
- R_ENGINE.echo(false, false)
247
+ raise TypeError, "Expected an Array or a Hash"
248
+ end
249
+ end
250
+ alias :<< :add_row
251
+
252
+ def each
253
+ self.each_with_index do |element, i|
254
+ yield element
53
255
  end
54
- result = R_ENGINE.eval(r_command)
55
- R_ENGINE.echo(false, false)
56
- warnings = $stdout.string
57
- $stdout = STDOUT
58
256
 
59
- if return_warnings
60
- return result, warnings.lines.map { |w| w.strip.chomp }
257
+ return self
258
+ end
259
+
260
+ def each_with_index
261
+ for i in 0...self.rows
262
+ element = {}
263
+ @labels.each do |label|
264
+ element[label] = @data[label][i]
265
+ end
266
+
267
+ yield element, i
268
+ end
269
+
270
+ return self
271
+ end
272
+
273
+ def load_in_r_as(variable_name)
274
+ command = []
275
+
276
+ command << "#{variable_name} <- data.frame()"
277
+ row_index = 1
278
+ self.each do |row|
279
+ keys = row.keys.map { |v| v.inspect }.join(",")
280
+ values = row.values.map { |v| v.inspect }.join(",")
281
+ command << "#{variable_name}[#{row_index}, c(#{keys})] <- c(#{values})"
282
+
283
+ row_index += 1
284
+ end
285
+
286
+ Rust._eval_big(command)
287
+ end
288
+
289
+ def inspect
290
+ separator = " | "
291
+ col_widths = self.column_names.map { |colname| [colname, ([colname.length] + @data[colname].map { |e| e.inspect.length }).max] }.to_h
292
+ col_widths[:rowscol] = self.rows.inspect.length + 3
293
+
294
+ result = ""
295
+ result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
296
+ result << (" " * col_widths[:rowscol]) + self.column_names.map { |colname| (" " * (col_widths[colname] - colname.length)) + colname }.join(separator) + "\n"
297
+ result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
298
+ self.each_with_index do |row, i|
299
+ result << "[#{i}] " + row.map { |colname, value| (" " * (col_widths[colname] - value.inspect.length)) + value.inspect }.join(separator) + "\n"
300
+ end
301
+
302
+ result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length))
303
+
304
+ return result
305
+ end
306
+ end
307
+
308
+ class Matrix < RustDatatype
309
+ def self.pull_variable(variable)
310
+ return Rust._pull(variable)
311
+ end
312
+
313
+ def initialize(data)
314
+ if data.flatten.size == 0
315
+ raise "Empty matrices are not allowed"
61
316
  else
62
- return result
317
+ raise TypeError, "Expected array of array" unless data.is_a?(Array) && data[0].is_a?(Array)
318
+ raise TypeError, "Only numeric matrices are supported" unless data.all? { |row| row.all? { |e| e.is_a?(Numeric) } }
319
+ raise "All the rows must have the same size" unless data.map { |row| row.size }.uniq.size == 1
320
+ @data = data.clone
63
321
  end
64
322
  end
323
+
324
+ def [](i, j)
325
+ return @data[i][j]
326
+ end
327
+
328
+ def rows
329
+ @data.size
330
+ end
331
+
332
+ def cols
333
+ @data[0].size
334
+ end
335
+
336
+ def []=(i, j, value)
337
+ raise "Wrong i" unless i.between?(0, @data.size - 1)
338
+ raise "Wrong j" unless j.between?(0, @data[0].size - 1)
339
+ @data[i][j] = value
340
+ end
341
+
342
+ def load_in_r_as(variable_name)
343
+ Rust._eval("#{variable_name} <- matrix(c(#{@data.flatten.join(",")}), nrow=#{self.rows}, ncol=#{self.cols}, byrow=T)")
344
+ end
345
+ end
346
+
347
+ class CSV
348
+ def self.read(filename, **options)
349
+ hash = {}
350
+ labels = nil
351
+ ::CSV.parse(File.read(filename), **options) do |row|
352
+ labels = row.headers || (1..row.size).to_a.map { |e| "X#{e}" } unless labels
353
+
354
+ labels.each do |label|
355
+ hash[label] = [] unless hash[label]
356
+ hash[label] << row[label]
357
+ end
358
+ end
359
+
360
+ return Rust::DataFrame.new(hash)
361
+ end
362
+
363
+ def self.write(filename, dataframe, **options)
364
+ raise TypeError, "Expected Rust::DataFrame" unless dataframe.is_a?(Rust::DataFrame)
365
+
366
+ x[:headers] = dataframe.column_names if x[:headers]
367
+
368
+ hash = {}
369
+ labels = nil
370
+ ::CSV.open(filename, 'w', write_headers: (x[:headers] ? true : false), **options) do |csv|
371
+ dataframe.each do |row|
372
+ csv << row
373
+ end
374
+ end
375
+
376
+ return true
377
+ end
65
378
  end
66
379
  end
@@ -66,7 +66,7 @@ module Rust::StatisticalTests::Wilcoxon
66
66
 
67
67
  _, warnings = Rust._eval("result = wilcox.test(a, b, alternative='two.sided', paired=F)", true)
68
68
  result = Rust::StatisticalTests::Result.new
69
- result.name = "Mann–Whitney U test, Wilcoxon Ranked-Sum test"
69
+ result.name = "Wilcoxon Ranked-Sum test (a.k.a. Mann–Whitney U test)"
70
70
  result.pvalue = Rust._pull("result$p.value")
71
71
  result[:w] = Rust._pull("result$statistic")
72
72
  result.exact = !warnings.include?("cannot compute exact p-value with ties")
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rust
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.1'
4
+ version: '0.2'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-08-28 00:00:00.000000000 Z
11
+ date: 2020-09-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rinruby