rust 0.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rust-core.rb +337 -24
  3. data/lib/rust-tests.rb +1 -1
  4. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fca0e67fdbbd71271d8baf0ab5c1b7cee895dd6f546699ca420bd0fa9ceb8a83
4
- data.tar.gz: d0c30431caacfcd06744d8973ee2938d3e8b44297624d6dcb514fde1280a43cd
3
+ metadata.gz: a39315e623da717c4f035a11fbe7363bf3aae0d8885f922c4f0fca689bc7b90a
4
+ data.tar.gz: e0a1ec7e485a0f9521a42191f6a97d833df8eb35c961d31c9ba67f0a6f0c0c22
5
5
  SHA512:
6
- metadata.gz: 5004a2514605db69f616eeb25c3d89eb7ac6b6702be97a15202c0b4bc9731e4e0b55347a256dc7123c85c7342be3af096689a88a07b1c0489f88a86aacfef982
7
- data.tar.gz: 1e969dca0511a2ac803c1e4369b6471641af4850b53663293dd69cd1009ecec352570173a484fe91b9ed6c822052592fde7a6cb165dc01eaed6efabea3a693cd
6
+ metadata.gz: eaffc66a8a3250f7f687bdf1dec96421681f25ff394010f0c15d7e65b5ed87a9deb25111c7f56baa60b19a53a4849bd92a665bfe507f9ba9b096b722cddeed53
7
+ data.tar.gz: c5fc8ce8b55347ca402783ab352310719fcdd008eaa2f1ed3b00ca93137736ca6a2d8c9d9b2169865212e7a09ca955e5909d7983b0461bda0c539b51f1c0e379
@@ -1,6 +1,7 @@
1
1
  require 'code-assertions'
2
2
  require 'stringio'
3
3
  require 'rinruby'
4
+ require 'csv'
4
5
 
5
6
  module Rust
6
7
  CLIENT_MUTEX = Mutex.new
@@ -11,27 +12,77 @@ module Rust
11
12
  @@in_client_mutex = false
12
13
 
13
14
  def self.exclusive
15
+ result = nil
14
16
  CLIENT_MUTEX.synchronize do
15
17
  @@in_client_mutex = true
16
- yield
18
+ result = yield
17
19
  @@in_client_mutex = false
18
20
  end
21
+ return result
22
+ end
23
+
24
+ def self.[]=(variable, value)
25
+ if value.is_a?(RustDatatype)
26
+ value.load_in_r_as(variable.to_s)
27
+ elsif value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(Array)
28
+ R_ENGINE.assign(variable, value)
29
+ else
30
+ raise "Given #{variable.class}, expected RustDatatype, String, Numeric, or Array"
31
+ end
32
+
33
+ end
34
+
35
+ def self.[](variable, type=RustDatatype)
36
+ return type.pull_variable(variable)
37
+ end
38
+
39
+ def self._eval_big(r_command, return_warnings = false)
40
+ r_command = r_command.join("\n") if r_command.is_a?(Array)
41
+
42
+ self._rexec(r_command, return_warnings) do |cmd|
43
+ result = true
44
+ instructions = cmd.lines
45
+
46
+ while instructions.size > 0
47
+ current_command = ""
48
+
49
+ while (instructions.size > 0) && (current_command.length + instructions.first.length < 10000)
50
+ current_command << instructions.shift
51
+ end
52
+
53
+ result &= R_ENGINE.eval(current_command)
54
+ end
55
+
56
+ result
57
+ end
19
58
  end
20
59
 
21
60
  def self._pull(r_command, return_warnings = false)
22
- R_MUTEX.synchronize do
61
+ self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.pull(cmd) }
62
+ end
63
+
64
+ def self._eval(r_command, return_warnings = false)
65
+ self._rexec(r_command, return_warnings) { |cmd| R_ENGINE.eval(cmd) }
66
+ end
67
+
68
+ def self._rexec(r_command, return_warnings = false)
69
+ R_MUTEX.synchronize do
23
70
  assert("This command must be executed in an exclusive block") { @@in_client_mutex }
24
71
 
25
- $stdout = StringIO.new
26
- if return_warnings
27
- R_ENGINE.echo(true, true)
28
- else
72
+ result = nil
73
+ begin
74
+ $stdout = StringIO.new
75
+ if return_warnings
76
+ R_ENGINE.echo(true, true)
77
+ else
78
+ R_ENGINE.echo(false, false)
79
+ end
80
+ result = yield(r_command)
81
+ ensure
29
82
  R_ENGINE.echo(false, false)
83
+ warnings = $stdout.string
84
+ $stdout = STDOUT
30
85
  end
31
- result = R_ENGINE.pull(r_command)
32
- R_ENGINE.echo(false, false)
33
- warnings = $stdout.string
34
- $stdout = STDOUT
35
86
 
36
87
  if return_warnings
37
88
  return result, warnings.lines.map { |w| w.strip.chomp }
@@ -41,26 +92,288 @@ module Rust
41
92
  end
42
93
  end
43
94
 
44
- def self._eval(r_command, return_warnings = false)
45
- R_MUTEX.synchronize do
46
- assert("This command must be executed in an exclusive block") { @@in_client_mutex }
95
+ class RustDatatype
96
+ def self.pull_variable(variable)
97
+ return Rust._pull(variable)
98
+ end
99
+
100
+ def load_in_r_as(r_instance, variable_name)
101
+ raise "Not implemented"
102
+ end
103
+ end
104
+
105
+ class DataFrame < RustDatatype
106
+ def self.pull_variable(variable)
107
+ hash = {}
108
+ colnames = Rust._pull("colnames(#{variable})")
109
+ colnames.each do |col|
110
+ hash[col] = Rust._pull("#{variable}$#{col}")
111
+ end
112
+ return DataFrame.new(hash)
113
+ end
114
+
115
+ def initialize(labels_or_data)
116
+ @data = {}
47
117
 
48
- $stdout = StringIO.new
49
- if return_warnings
50
- R_ENGINE.echo(true, true)
118
+ if labels_or_data.is_a? Array
119
+ @labels = labels_or_data.map { |l| l.to_s }
120
+ @labels.each { |label| @data[label] = [] }
121
+ elsif labels_or_data.is_a? Hash
122
+ @labels = labels_or_data.keys.map { |l| l.to_s }
123
+ @labels.each { |label| @data[label] = [] }
124
+ for i in 0...labels_or_data.values[0].size
125
+ self.add_row(labels_or_data.map { |k, v| [k, v[i]] }.to_h)
126
+ end
127
+ end
128
+ end
129
+
130
+ def row(i)
131
+ return @data.map { |label, values| [label, values[i]] }.to_h
132
+ end
133
+ alias :[] :row
134
+
135
+ def column(name)
136
+ return @data[name]
137
+ end
138
+
139
+ def transform_column!(column)
140
+ @data[column].map! { |e| yield e }
141
+ end
142
+
143
+ def select_rows
144
+ result = DataFrame.new(self.column_names)
145
+ self.each do |row|
146
+ result << row if yield row
147
+ end
148
+ return result
149
+ end
150
+
151
+ def select_cols
152
+ result = self.clone
153
+ @labels.each do |label|
154
+ result.delete_column(label) unless yield label
155
+ end
156
+ return result
157
+ end
158
+
159
+ def delete_column(column)
160
+ @labels.delete(column)
161
+ @data.delete(column)
162
+ end
163
+
164
+ def column_names
165
+ return @data.keys.map { |k| k.to_s }
166
+ end
167
+ alias :colnames :column_names
168
+
169
+ def merge(other, by, first_alias = "x", second_alias = "y")
170
+ raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
171
+ raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
172
+ raise "This dataset should have all the columns in #{by}" unless (by & self.column_names).size == by.size
173
+ raise "The passed dataset should have all the columns in #{by}" unless (by & other.column_names).size == by.size
174
+ raise "The aliases can not have the same value" if first_alias == second_alias
175
+
176
+ my_keys = {}
177
+ self.each_with_index do |row, i|
178
+ key = []
179
+ by.each do |colname|
180
+ key << row[colname]
181
+ end
182
+
183
+ my_keys[key] = i
184
+ end
185
+
186
+ merged_column_self = (self.column_names - by)
187
+ merged_column_other = (other.column_names - by)
188
+
189
+ first_alias = first_alias + "." if first_alias.length > 0
190
+ second_alias = second_alias + "." if second_alias.length > 0
191
+
192
+ merged_columns = merged_column_self.map { |colname| "#{first_alias}#{colname}" } + merged_column_other.map { |colname| "#{second_alias}#{colname}" }
193
+ columns = by + merged_columns
194
+ result = DataFrame.new(columns)
195
+ other.each do |other_row|
196
+ key = []
197
+ by.each do |colname|
198
+ key << other_row[colname]
199
+ end
200
+
201
+ my_row_index = my_keys[key]
202
+ if my_row_index
203
+ my_row = self[my_row_index]
204
+
205
+ to_add = {}
206
+ by.each do |colname|
207
+ to_add[colname] = my_row[colname]
208
+ end
209
+
210
+ merged_column_self.each do |colname|
211
+ to_add["#{first_alias}#{colname}"] = my_row[colname]
212
+ end
213
+
214
+ merged_column_other.each do |colname|
215
+ to_add["#{second_alias}#{colname}"] = other_row[colname]
216
+ end
217
+
218
+ result << to_add
219
+ end
220
+ end
221
+
222
+ return result
223
+ end
224
+
225
+ def rows
226
+ @data.values[0].size
227
+ end
228
+
229
+ def add_row(row)
230
+ if row.is_a?(Array)
231
+ raise "Expected an array of size #{@data.size}" unless row.size == @data.size
232
+
233
+ @labels.each_with_index do |label, i|
234
+ @data[label] << row[i]
235
+ end
236
+
237
+ return true
238
+ elsif row.is_a?(Hash)
239
+ raise "Expected a hash with the following keys: #{@data.keys}" unless row.keys.map { |l| l.to_s }.sort == @data.keys.sort
240
+
241
+ row.each do |key, value|
242
+ @data[key.to_s] << value
243
+ end
244
+ #
245
+ return true
51
246
  else
52
- R_ENGINE.echo(false, false)
247
+ raise TypeError, "Expected an Array or a Hash"
248
+ end
249
+ end
250
+ alias :<< :add_row
251
+
252
+ def each
253
+ self.each_with_index do |element, i|
254
+ yield element
53
255
  end
54
- result = R_ENGINE.eval(r_command)
55
- R_ENGINE.echo(false, false)
56
- warnings = $stdout.string
57
- $stdout = STDOUT
58
256
 
59
- if return_warnings
60
- return result, warnings.lines.map { |w| w.strip.chomp }
257
+ return self
258
+ end
259
+
260
+ def each_with_index
261
+ for i in 0...self.rows
262
+ element = {}
263
+ @labels.each do |label|
264
+ element[label] = @data[label][i]
265
+ end
266
+
267
+ yield element, i
268
+ end
269
+
270
+ return self
271
+ end
272
+
273
+ def load_in_r_as(variable_name)
274
+ command = []
275
+
276
+ command << "#{variable_name} <- data.frame()"
277
+ row_index = 1
278
+ self.each do |row|
279
+ keys = row.keys.map { |v| v.inspect }.join(",")
280
+ values = row.values.map { |v| v.inspect }.join(",")
281
+ command << "#{variable_name}[#{row_index}, c(#{keys})] <- c(#{values})"
282
+
283
+ row_index += 1
284
+ end
285
+
286
+ Rust._eval_big(command)
287
+ end
288
+
289
+ def inspect
290
+ separator = " | "
291
+ col_widths = self.column_names.map { |colname| [colname, ([colname.length] + @data[colname].map { |e| e.inspect.length }).max] }.to_h
292
+ col_widths[:rowscol] = self.rows.inspect.length + 3
293
+
294
+ result = ""
295
+ result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
296
+ result << (" " * col_widths[:rowscol]) + self.column_names.map { |colname| (" " * (col_widths[colname] - colname.length)) + colname }.join(separator) + "\n"
297
+ result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
298
+ self.each_with_index do |row, i|
299
+ result << "[#{i}] " + row.map { |colname, value| (" " * (col_widths[colname] - value.inspect.length)) + value.inspect }.join(separator) + "\n"
300
+ end
301
+
302
+ result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length))
303
+
304
+ return result
305
+ end
306
+ end
307
+
308
+ class Matrix < RustDatatype
309
+ def self.pull_variable(variable)
310
+ return Rust._pull(variable)
311
+ end
312
+
313
+ def initialize(data)
314
+ if data.flatten.size == 0
315
+ raise "Empty matrices are not allowed"
61
316
  else
62
- return result
317
+ raise TypeError, "Expected array of array" unless data.is_a?(Array) && data[0].is_a?(Array)
318
+ raise TypeError, "Only numeric matrices are supported" unless data.all? { |row| row.all? { |e| e.is_a?(Numeric) } }
319
+ raise "All the rows must have the same size" unless data.map { |row| row.size }.uniq.size == 1
320
+ @data = data.clone
63
321
  end
64
322
  end
323
+
324
+ def [](i, j)
325
+ return @data[i][j]
326
+ end
327
+
328
+ def rows
329
+ @data.size
330
+ end
331
+
332
+ def cols
333
+ @data[0].size
334
+ end
335
+
336
+ def []=(i, j, value)
337
+ raise "Wrong i" unless i.between?(0, @data.size - 1)
338
+ raise "Wrong j" unless j.between?(0, @data[0].size - 1)
339
+ @data[i][j] = value
340
+ end
341
+
342
+ def load_in_r_as(variable_name)
343
+ Rust._eval("#{variable_name} <- matrix(c(#{@data.flatten.join(",")}), nrow=#{self.rows}, ncol=#{self.cols}, byrow=T)")
344
+ end
345
+ end
346
+
347
+ class CSV
348
+ def self.read(filename, **options)
349
+ hash = {}
350
+ labels = nil
351
+ ::CSV.parse(File.read(filename), **options) do |row|
352
+ labels = row.headers || (1..row.size).to_a.map { |e| "X#{e}" } unless labels
353
+
354
+ labels.each do |label|
355
+ hash[label] = [] unless hash[label]
356
+ hash[label] << row[label]
357
+ end
358
+ end
359
+
360
+ return Rust::DataFrame.new(hash)
361
+ end
362
+
363
+ def self.write(filename, dataframe, **options)
364
+ raise TypeError, "Expected Rust::DataFrame" unless dataframe.is_a?(Rust::DataFrame)
365
+
366
+ x[:headers] = dataframe.column_names if x[:headers]
367
+
368
+ hash = {}
369
+ labels = nil
370
+ ::CSV.open(filename, 'w', write_headers: (x[:headers] ? true : false), **options) do |csv|
371
+ dataframe.each do |row|
372
+ csv << row
373
+ end
374
+ end
375
+
376
+ return true
377
+ end
65
378
  end
66
379
  end
@@ -66,7 +66,7 @@ module Rust::StatisticalTests::Wilcoxon
66
66
 
67
67
  _, warnings = Rust._eval("result = wilcox.test(a, b, alternative='two.sided', paired=F)", true)
68
68
  result = Rust::StatisticalTests::Result.new
69
- result.name = "Mann–Whitney U test, Wilcoxon Ranked-Sum test"
69
+ result.name = "Wilcoxon Ranked-Sum test (a.k.a. Mann–Whitney U test)"
70
70
  result.pvalue = Rust._pull("result$p.value")
71
71
  result[:w] = Rust._pull("result$statistic")
72
72
  result.exact = !warnings.include?("cannot compute exact p-value with ties")
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rust
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.1'
4
+ version: '0.2'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-08-28 00:00:00.000000000 Z
11
+ date: 2020-09-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rinruby