rust 0.4 → 0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ca9e5aaa6bcfff9d1b261c5a3ced5cf74b7731085b4b094dba55df72884aca9a
4
- data.tar.gz: 8c90363b44a0c95abc9610fb78c3c632c6ae4265ad2260ff7e9740777245f63e
3
+ metadata.gz: 35c41ec98f5b286ef597096152249f19825a27bc33030e85ea196df8b778d3b1
4
+ data.tar.gz: 48b2e61f707ebcd05fa5f593016c8c0fb4b9d1e58751dd5deae83715583bd978
5
5
  SHA512:
6
- metadata.gz: 0ca17e2c0dda2188138f11e1ae4becaa8a5c4b0d2cc12273775b9ade1fefbc860f3ccee2251fbe353b9dbde55eded960e8cf26642af042070d979ed192b332e3
7
- data.tar.gz: 28dacd36f814acf51d222c8e746f65b94ef53ab5f24902ba6f654b05267c926e8db03cca82a876ca1183f55293da96d60ec862c5bfa26f6abd430d1f5e998709
6
+ metadata.gz: b0cc1289721d52cd376e0a242c2be419ea511f03a0fdd42c4c7a1973cde24e8dfc467e9d355693f85632cccb98af6986777aa020b715db5f06aa88a63d154ea0
7
+ data.tar.gz: 4c91c808773dcd04913a594cfcaa7956318908e6d590ebec1d9a04beef18986adc70a3c5e480c39bdc9f753b5edf920a1939208b8b530ccfc67124ac153a4804
data/lib/rust-basics.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require_relative 'rust-core'
2
2
 
3
- module Rust:: Correlation
3
+ module Rust::Correlation
4
4
  class Pearson
5
5
  def self.test(d1, d2)
6
6
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
data/lib/rust-calls.rb CHANGED
@@ -34,6 +34,17 @@ module Rust
34
34
  end
35
35
  end
36
36
 
37
+ class SimpleFormula
38
+ def initialize(dependent, independent)
39
+ @dependent = dependent
40
+ @independent = independent
41
+ end
42
+
43
+ def to_R
44
+ return "#@dependent ~ #@independent"
45
+ end
46
+ end
47
+
37
48
  class Variable
38
49
  def initialize(name)
39
50
  @name = name
data/lib/rust-core.rb CHANGED
@@ -130,7 +130,10 @@ module Rust
130
130
  @labels.each { |label| @data[label] = [] }
131
131
  elsif labels_or_data.is_a? Hash
132
132
  @labels = labels_or_data.keys.map { |l| l.to_s }
133
- @data = labels_or_data.clone
133
+
134
+ labels_or_data.each do |key, value|
135
+ @data[key.to_s] = value.clone
136
+ end
134
137
  end
135
138
  end
136
139
 
@@ -142,6 +145,14 @@ module Rust
142
145
  end
143
146
  end
144
147
 
148
+ def fast_row(i)
149
+ if i < 0 || i >= self.rows
150
+ return nil
151
+ else
152
+ return @labels.map { |label| @data[label][i] }
153
+ end
154
+ end
155
+
145
156
  def shuffle(*args)
146
157
  result = DataFrame.new(@labels)
147
158
 
@@ -174,6 +185,7 @@ module Rust
174
185
  def column(name)
175
186
  return @data[name]
176
187
  end
188
+ alias :| :column
177
189
 
178
190
  def rename_column!(old_name, new_name)
179
191
  raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
@@ -195,6 +207,13 @@ module Rust
195
207
  return result
196
208
  end
197
209
 
210
+ def has_row?
211
+ self.each_with_index do |row, i|
212
+ return true if yield row, i
213
+ end
214
+ return false
215
+ end
216
+
198
217
  def select_columns(cols=nil)
199
218
  raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
200
219
 
@@ -215,6 +234,40 @@ module Rust
215
234
  @data.delete(column)
216
235
  end
217
236
 
237
+ def delete_row(i)
238
+ @data.each do |label, column|
239
+ column.delete_at(i)
240
+ end
241
+ end
242
+
243
+ def uniq_by(by)
244
+ result = self.clone
245
+ result.uniq_by!(by)
246
+ return result
247
+ end
248
+
249
+ def uniq_by!(by)
250
+ my_keys = {}
251
+ to_delete = []
252
+ self.each_with_index do |row, i|
253
+ key = []
254
+ by.each do |colname|
255
+ key << row[colname]
256
+ end
257
+ unless my_keys[key]
258
+ my_keys[key] = i
259
+ else
260
+ to_delete << (i-to_delete.size)
261
+ end
262
+ end
263
+
264
+ to_delete.each do |i|
265
+ self.delete_row(i)
266
+ end
267
+
268
+ return self
269
+ end
270
+
218
271
  def column_names
219
272
  return @labels.map { |k| k.to_s }
220
273
  end
@@ -243,7 +296,7 @@ module Rust
243
296
  row.each do |key, value|
244
297
  @data[key.to_s] << value
245
298
  end
246
- #
299
+
247
300
  return true
248
301
  else
249
302
  raise TypeError, "Expected an Array or a Hash"
@@ -275,6 +328,14 @@ module Rust
275
328
  return self
276
329
  end
277
330
 
331
+ def fast_each
332
+ self.fast_each_with_index do |element, i|
333
+ yield element
334
+ end
335
+
336
+ return self
337
+ end
338
+
278
339
  def each_with_index
279
340
  for i in 0...self.rows
280
341
  element = {}
@@ -288,6 +349,19 @@ module Rust
288
349
  return self
289
350
  end
290
351
 
352
+ def fast_each_with_index
353
+ for i in 0...self.rows
354
+ element = []
355
+ @labels.each do |label|
356
+ element << @data[label][i]
357
+ end
358
+
359
+ yield element, i
360
+ end
361
+
362
+ return self
363
+ end
364
+
291
365
  def load_in_r_as(variable_name)
292
366
  command = []
293
367
 
@@ -397,6 +471,77 @@ module Rust
397
471
  return result
398
472
  end
399
473
 
474
+ def aggregate(by, **aggregators)
475
+ raise TypeError, "Expected a string" unless by.is_a?(String)
476
+ raise TypeError, "All the aggregators should be procs" unless aggregators.values.all? { |v| v.is_a?(Proc) }
477
+ raise "Expected a block for default aggregator" unless block_given?
478
+
479
+ aggregators = aggregators.map { |label, callable| [label.to_s, callable] }.to_h
480
+
481
+ sorted = self.sort_by(by)
482
+
483
+ current_value = nil
484
+ partials = []
485
+ partial = nil
486
+ sorted.column(by).each_with_index do |value, index|
487
+ if current_value != value
488
+ current_value = value
489
+ partials << partial if partial
490
+ partial = Rust::DataFrame.new(self.column_names)
491
+ end
492
+ partial << sorted.fast_row(index)
493
+ end
494
+ partials << partial
495
+
496
+ result = Rust::DataFrame.new(self.column_names)
497
+ partials.each do |partial|
498
+ aggregated_row = {}
499
+ aggregated_row[by] = partial.column(by)[0]
500
+ (self.column_names - [by]).each do |column|
501
+ if aggregators[column]
502
+ aggregated_row[column] = aggregators[column].call(partial.column(column))
503
+ else
504
+ aggregated_row[column] = yield partial.column(column)
505
+ end
506
+ end
507
+
508
+ result << aggregated_row
509
+ end
510
+
511
+ return result
512
+ end
513
+
514
+ def sort_by(column)
515
+ result = self.clone
516
+ result.sort_by!(column)
517
+ return result
518
+ end
519
+
520
+ def sort_by!(by)
521
+ copy = @data[by].clone
522
+ copy.sort!
523
+
524
+ indices = []
525
+ @data[by].each_with_index do |value, i|
526
+ index = copy.index(value)
527
+ indices << index
528
+
529
+ copy[index] = NilClass
530
+ end
531
+
532
+ (self.column_names - [by]).each do |column_name|
533
+ sorted = []
534
+ column = self.column(column_name)
535
+ column_i = 0
536
+ indices.each do |i|
537
+ sorted[i] = column[column_i]
538
+ column_i += 1
539
+ end
540
+ @data[column_name] = sorted
541
+ end
542
+ @data[by].sort!
543
+ end
544
+
400
545
  def bind_rows!(dataframe)
401
546
  raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
402
547
  raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
@@ -480,7 +625,7 @@ module Rust
480
625
  end
481
626
  end
482
627
 
483
- class Sequence
628
+ class Sequence < RustDatatype
484
629
  attr_reader :min
485
630
  attr_reader :max
486
631
 
@@ -511,6 +656,103 @@ module Rust
511
656
  def to_R
512
657
  "seq(from=#@min, to=#@max, by=#@step)"
513
658
  end
659
+
660
+ def load_in_r_as(variable_name)
661
+ Rust._eval("#{variable_name} <- #{self.to_R}")
662
+ end
663
+ end
664
+
665
+ class DataFrameArray < Array
666
+ def bind_all
667
+ return nil if self.size == 0
668
+
669
+ result = self.first.clone
670
+
671
+ for i in 1...self.size
672
+ result .bind_rows!(self[i])
673
+ end
674
+
675
+ return result
676
+ end
677
+ end
678
+
679
+ class DataFrameHash < Hash
680
+ def bind_all
681
+ return nil if self.values.size == 0
682
+
683
+ result = self.values.first.clone
684
+
685
+ for i in 1...self.values.size
686
+ result .bind_rows!(self.values[i])
687
+ end
688
+
689
+ return result
690
+ end
691
+ end
692
+
693
+ class MathArray < Array
694
+ def -(other)
695
+ raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
696
+ raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
697
+
698
+ result = self.clone
699
+ other = [other] * self.size if other.is_a?(Numeric)
700
+ for i in 0...self.size
701
+ result[i] -= other[i]
702
+ end
703
+
704
+ return result
705
+ end
706
+
707
+ def *(other)
708
+ raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
709
+ raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
710
+
711
+ result = self.clone
712
+ other = [other] * self.size if other.is_a?(Numeric)
713
+ for i in 0...self.size
714
+ result[i] *= other[i]
715
+ end
716
+
717
+ return result
718
+ end
719
+
720
+ def +(other)
721
+ raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
722
+ raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
723
+
724
+ result = self.clone
725
+ other = [other] * self.size if other.is_a?(Numeric)
726
+ for i in 0...self.size
727
+ result[i] += other[i]
728
+ end
729
+
730
+ return result
731
+ end
732
+
733
+ def /(other) #To recover the syntax highlighting but in Kate: /
734
+ raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
735
+ raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
736
+
737
+ result = self.clone
738
+ other = [other] * self.size if other.is_a?(Numeric)
739
+ for i in 0...self.size
740
+ result[i] /= other[i]
741
+ end
742
+
743
+ return result
744
+ end
745
+
746
+ def **(other)
747
+ raise ArgumentError, "Expected numeric" if !other.is_a?(Numeric)
748
+
749
+ result = self.clone
750
+ for i in 0...self.size
751
+ result[i] = result[i] ** other
752
+ end
753
+
754
+ return result
755
+ end
514
756
  end
515
757
  end
516
758
 
@@ -554,6 +796,14 @@ class Array
554
796
  def to_R
555
797
  return "c(#{self.map { |e| e.to_R }.join(",")})"
556
798
  end
799
+
800
+ def distribution
801
+ result = {}
802
+ self.each do |value|
803
+ result[value] = result[value].to_i + 1
804
+ end
805
+ return result
806
+ end
557
807
  end
558
808
 
559
809
  class String
@@ -569,19 +819,21 @@ class Range
569
819
  end
570
820
 
571
821
  module Rust::RBindings
572
- def read_csv(filename, **options)
573
- Rust::CSV.read(filename, **options)
574
- end
575
-
576
- def write_csv(filename, dataframe, **options)
577
- Rust::CSV.write(filename, dataframe, **options)
578
- end
579
-
580
822
  def data_frame(*args)
581
823
  Rust::DataFrame.new(*args)
582
824
  end
583
825
  end
584
826
 
827
+ module Rust::TestCases
828
+ def self.sample_dataframe(columns, size=100)
829
+ result = Rust::DataFrame.new(columns)
830
+ size.times do |i|
831
+ result << columns.map { |c| yield i, c }
832
+ end
833
+ return result
834
+ end
835
+ end
836
+
585
837
  def bind_r!
586
838
  include Rust::RBindings
587
839
  end
data/lib/rust-csv.rb CHANGED
@@ -3,7 +3,7 @@ require_relative 'rust-core'
3
3
  module Rust
4
4
  class CSV
5
5
  def self.read_all(pattern, **options)
6
- result = {}
6
+ result = DataFrameHash.new
7
7
  Dir.glob(pattern).each do |filename|
8
8
  result[filename] = CSV.read(filename, **options)
9
9
  end
@@ -49,10 +49,9 @@ module Rust
49
49
  raise TypeError, "Expected Rust::DataFrame" unless dataframe.is_a?(Rust::DataFrame)
50
50
 
51
51
  write_headers = options[:headers] != false
52
- options[:headers] = dataframe.column_names if options[:headers] == nil
52
+ options[:headers] = dataframe.column_names unless options[:headers]
53
53
 
54
54
  hash = {}
55
- labels = nil
56
55
  ::CSV.open(filename, 'w', write_headers: write_headers, **options) do |csv|
57
56
  dataframe.each do |row|
58
57
  csv << row
@@ -93,3 +92,13 @@ module Rust
93
92
  end
94
93
  end
95
94
  end
95
+
96
+ module Rust::RBindings
97
+ def read_csv(filename, **options)
98
+ Rust::CSV.read(filename, **options)
99
+ end
100
+
101
+ def write_csv(filename, dataframe, **options)
102
+ Rust::CSV.write(filename, dataframe, **options)
103
+ end
104
+ end
@@ -50,18 +50,59 @@ module Rust::Descriptive
50
50
  def quantile(data, percentiles=[0.0, 0.25, 0.5, 0.75, 1.0])
51
51
  raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
52
52
  raise TypeError, "Expecting Array of numerics" if !percentiles.is_a?(Array) || !percentiles.all? { |e| e.is_a?(Numeric) }
53
- raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
53
+ raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
54
54
 
55
- Rust.exclusive do
56
- Rust['descriptive.data'] = data
57
- Rust['descriptive.percs'] = percentiles
58
-
59
- call_result = Rust._pull("quantile(descriptive.data, descriptive.percs)")
60
- assert { call_result.is_a?(Array) }
61
- assert { call_result.size == percentiles.size }
62
-
63
- return percentiles.zip(call_result).to_h
55
+ n = data.size
56
+ quantiles = percentiles.size
57
+ percentiles = percentiles.map { |x| x > 1.0 ? 1.0 : (x < 0.0 ? 0.0 : x) }
58
+
59
+ rough_indices = percentiles.map { |x| 1 + [n - 1, 0].max * x - 1 }
60
+ floor_indices = rough_indices.map { |i| i.floor }
61
+ ceil_indices = rough_indices.map { |i| i.ceil }
62
+
63
+ data = data.sort
64
+ result = floor_indices.map { |i| data[i] }
65
+ result_ceil = ceil_indices.map { |i| data[i] }
66
+
67
+ indices_to_fix = (0...quantiles).select { |i| rough_indices[i] > floor_indices[i] && result_ceil[i] != result[i] }
68
+ index_approximation_errors = indices_to_fix.map { |i| rough_indices[i] - floor_indices[i] }
69
+ reduced_index_approximation_errors = index_approximation_errors.map { |i| (1 - i) }
70
+ hi_indices = indices_to_fix.map { |i| ceil_indices[i] }
71
+ data_hi_indices = hi_indices.map { |i| data[i] }
72
+
73
+ j = 0
74
+ indices_to_fix.each do |i|
75
+ result[i] = reduced_index_approximation_errors[j] * result[i] + index_approximation_errors[j] * data_hi_indices[j]
76
+ j += 1
64
77
  end
78
+
79
+ return percentiles.zip(result).to_h
80
+ end
81
+
82
+ def outliers(data, k=1.5, **opts)
83
+ outliers_according_to(data, data, k, **opts)
84
+ end
85
+
86
+ def outliers_according_to(data, data_distribution, k=1.5, **opts)
87
+ quantiles = Rust::Descriptive.quantile(data_distribution, [0.25, 0.75])
88
+ q1 = quantiles[0.25]
89
+ q3 = quantiles[0.75]
90
+ iqr = q3 - q1
91
+
92
+ positive_outliers = data.select { |d| d > q3 + iqr * k }
93
+ negative_outliers = data.select { |d| d < q1 - iqr * k }
94
+
95
+ outliers = negative_outliers + positive_outliers
96
+ if opts[:side]
97
+ case opts[:side].to_sym
98
+ when :positive, :neg, :n, :+
99
+ outliers = positive_outliers
100
+ when :negative, :pos, :p, :-
101
+ outliers = negative_outliers
102
+ end
103
+ end
104
+
105
+ return outliers
65
106
  end
66
107
  end
67
108
  end
data/lib/rust-effsize.rb CHANGED
@@ -24,6 +24,10 @@ module Rust::EffectSize::CliffDelta
24
24
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
25
25
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
26
26
 
27
+ if d1.size <= 1 || d2.size <= 1
28
+ return Rust::EffectSize::Result.new
29
+ end
30
+
27
31
  Rust.exclusive do
28
32
  Rust['effsize.a'] = d1
29
33
  Rust['effsize.b'] = d2
@@ -32,10 +36,10 @@ module Rust::EffectSize::CliffDelta
32
36
 
33
37
  result = Rust::EffectSize::Result.new
34
38
  result.name = "Cliff's delta"
35
- result.estimate = Rust._pull("effsize.result$estimate")
36
- result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
37
- result.confidence_level = Rust._pull("effsize.result$conf.level")
38
- result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
39
+ result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
40
+ result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
41
+ result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
42
+ result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
39
43
 
40
44
  return result
41
45
  end
@@ -49,6 +53,10 @@ module Rust::EffectSize::CohenD
49
53
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
50
54
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
51
55
 
56
+ if d1.size <= 1 || d2.size <= 1
57
+ return Rust::EffectSize::Result.new
58
+ end
59
+
52
60
  Rust.exclusive do
53
61
  Rust['effsize.a'] = d1
54
62
  Rust['effsize.b'] = d2
@@ -57,10 +65,10 @@ module Rust::EffectSize::CohenD
57
65
 
58
66
  result = Rust::EffectSize::Result.new
59
67
  result.name = "Cohen's d"
60
- result.estimate = Rust._pull("effsize.result$estimate")
61
- result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
62
- result.confidence_level = Rust._pull("effsize.result$conf.level")
63
- result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
68
+ result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
69
+ result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
70
+ result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
71
+ result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
64
72
 
65
73
  return result
66
74
  end
data/lib/rust-plots.rb CHANGED
@@ -21,6 +21,14 @@ module Rust::Plots
21
21
  return self
22
22
  end
23
23
 
24
+ def palette(size)
25
+ if size <= 1
26
+ return ['black']
27
+ else
28
+ return Rust._pull("hcl.colors(n=#{size})")
29
+ end
30
+ end
31
+
24
32
  def x_range(range)
25
33
  @options['xlim'] = range
26
34
 
@@ -127,10 +135,18 @@ module Rust::Plots
127
135
  end
128
136
 
129
137
  class ScatterPlot < BasePlot
130
- def initialize(x, y)
138
+ def initialize(x = nil, y = nil, **options)
131
139
  super()
132
- @x = x
133
- @y = y
140
+ @series = []
141
+ if x && y
142
+ self.series(x, y, options)
143
+ end
144
+ end
145
+
146
+ def series(x, y, **options)
147
+ @series << [x, y, options]
148
+
149
+ return self
134
150
  end
135
151
 
136
152
  def thickness(t)
@@ -159,13 +175,66 @@ module Rust::Plots
159
175
 
160
176
  protected
161
177
  def _show()
162
- Rust["plotter.x"] = @x
163
- Rust["plotter.y"] = @y
178
+ first = true
179
+ palette = self.palette(@series.size)
180
+ i = 0
181
+
182
+ base_options = {}
183
+ unless @options['xlim']
184
+ x_values = @series.map { |v| v[0] }.flatten
185
+ y_values = @series.map { |v| v[1] }.flatten
186
+
187
+ base_options[:xlim] = [x_values.min, x_values.max]
188
+ base_options[:ylim] = [y_values.min, y_values.max]
189
+ end
190
+
191
+ @series.each do |x, y, options|
192
+ options = options.merge(base_options)
193
+ Rust["plotter.x"] = x
194
+ Rust["plotter.y"] = y
195
+
196
+ function = nil
197
+ if first
198
+ function = Rust::Function.new("plot")
199
+ first = false
200
+ else
201
+ function = Rust::Function.new("lines")
202
+ end
203
+
204
+ augmented_options = {}
205
+ augmented_options['col'] = options[:color] || palette[i]
206
+ augmented_options['xlim'] = options[:xlim] if options[:xlim]
207
+ augmented_options['ylim'] = options[:ylim] if options[:ylim]
208
+
209
+ function.options = self._augmented_options(augmented_options)
210
+ function.arguments << Rust::Variable.new("plotter.x")
211
+ function.arguments << Rust::Variable.new("plotter.y")
212
+
213
+ function.call
214
+
215
+ i += 1
216
+ end
217
+
218
+ return self
219
+ end
220
+ end
221
+
222
+ class BarPlot < BasePlot
223
+ def initialize(bars)
224
+ super()
225
+ @bars = bars
226
+ end
227
+
228
+ protected
229
+ def _show()
230
+ Rust["plotter.bars"] = @bars.values
231
+ Rust["plotter.labels"] = @bars.keys
232
+
233
+ Rust._eval("names(plotter.bars) <- plotter.labels")
164
234
 
165
- function = Rust::Function.new("plot")
235
+ function = Rust::Function.new("barplot")
166
236
  function.options = self._augmented_options
167
- function.arguments << Rust::Variable.new("plotter.x")
168
- function.arguments << Rust::Variable.new("plotter.y")
237
+ function.arguments << Rust::Variable.new("plotter.bars")
169
238
 
170
239
  function.call
171
240
 
@@ -0,0 +1,248 @@
1
+ require_relative 'rust-core'
2
+
3
+ class Numeric
4
+ def distance(other)
5
+ raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
6
+
7
+ return (self - other).abs
8
+ end
9
+ end
10
+
11
+ class Array
12
+ def distance(other)
13
+ raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
14
+
15
+ longest, shortest = self.size > other.size ? [self, other] : [other, self]
16
+
17
+ distance = 0
18
+ for i in 0...longest.size
19
+ distance += longest[i].to_i.distance(shortest[i].to_i)
20
+ end
21
+
22
+ return distance
23
+ end
24
+ end
25
+
26
+ class String
27
+ def distance(other)
28
+ raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
29
+
30
+ return self.bytes.distance other.bytes
31
+ end
32
+ end
33
+
34
+ module Rust
35
+ class RandomVariableSlice
36
+ def initialize(values)
37
+ raise TypeError, "Expected Hash" unless values.is_a?(Hash)
38
+
39
+ @values = values
40
+ end
41
+
42
+ def probability(v=nil)
43
+ unless v
44
+ return @values.values.sum
45
+ else
46
+ return @values[v]
47
+ end
48
+ end
49
+
50
+ def ml
51
+ @values.max_by { |k, v| v }[0]
52
+ end
53
+
54
+ def expected
55
+ @values.map { |k, v| k*v }.sum
56
+ end
57
+
58
+ def >(n)
59
+ self.so_that { |k| k > n}
60
+ end
61
+
62
+ def >=(n)
63
+ self.so_that { |k| k >= n}
64
+ end
65
+
66
+ def <(n)
67
+ self.so_that { |k| k < n}
68
+ end
69
+
70
+ def <=(n)
71
+ self.so_that { |k| k <= n}
72
+ end
73
+
74
+ def ==(n)
75
+ self.so_that { |k| k == n}
76
+ end
77
+
78
+ def so_that
79
+ RandomVariableSlice.new(@values.select { |k, v| yield(k) })
80
+ end
81
+
82
+ def between(a, b)
83
+ RandomVariableSlice.new(@values.select { |k, v| k.between? a, b })
84
+ end
85
+ end
86
+
87
+ class RandomVariable < RandomVariableSlice
88
+ EPSILON = 1e-7
89
+
90
+ attr_reader :values
91
+
92
+ def initialize(values = {0 => 1.0}, exact = false)
93
+ @values = values
94
+ @exact = exact
95
+
96
+ raise "All the probabilities should be in the range [0, 1]" unless @values.values.all? { |v| v.between? 0, 1 }
97
+ raise "The cumulative probability must be exactly 1 (#{@values.values.sum} instead)" unless @values.values.sum.between? 1-EPSILON, 1+EPSILON
98
+
99
+ approx!
100
+ end
101
+
102
+ def probability(v)
103
+ return @values[v].to_f
104
+ end
105
+
106
+ def +(other)
107
+ new_hash = {}
108
+
109
+ @values.each do |my_key, my_value|
110
+ other.values.each do |other_key, other_value|
111
+ sum_key = my_key + other_key
112
+
113
+ new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
114
+ end
115
+ end
116
+
117
+ return RandomVariable.new(new_hash, @exact)
118
+ end
119
+
120
+ def *(times)
121
+ if times.is_a? Integer
122
+ return rep(times)
123
+ elsif times.is_a? RandomVariable
124
+ return mul(times)
125
+ else
126
+ raise "The argument must be an Integer or a RandomVariable"
127
+ end
128
+ end
129
+
130
+ def mul(other)
131
+ new_hash = {}
132
+
133
+ @values.each do |my_key, my_value|
134
+ other.values.each do |other_key, other_value|
135
+ mul_key = my_key * other_key
136
+
137
+ new_hash[mul_key] = new_hash[mul_key].to_f + (my_value * other_value)
138
+ end
139
+ end
140
+
141
+ return RandomVariable.new(new_hash, @exact)
142
+ end
143
+
144
+ def rep(times)
145
+ rv = self
146
+ (times-1).times do
147
+ rv += self
148
+ end
149
+
150
+ return rv
151
+ end
152
+
153
+ def exact!
154
+ @exact = true
155
+ end
156
+
157
+ def approx!
158
+ return if @exact
159
+
160
+ to_delete = []
161
+ @values.each do |v, probability|
162
+ to_delete.push v if probability <= EPSILON
163
+ end
164
+
165
+ to_delete.each do |v|
166
+ probability = @values.delete v
167
+ nearest = @values.keys.min_by { |k| k.distance v }
168
+ @values[nearest] += probability
169
+ end
170
+ end
171
+
172
+ def extract
173
+ v = rand
174
+
175
+ cumulative = 0
176
+ @values.each do |key, prob|
177
+ cumulative += prob
178
+
179
+ return key if cumulative >= v
180
+ end
181
+ end
182
+
183
+ def self.complete(hash, key=0)
184
+ hash[key] = 1 - hash.values.sum
185
+ return RandomVariable.new(hash)
186
+ end
187
+ end
188
+
189
+ class UniformRandomVariable < RandomVariable
190
+ def initialize(values, exact = false)
191
+ super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
192
+ end
193
+ end
194
+
195
+ module Probabilities
196
+ def P(v)
197
+ if v.is_a? RandomVariableSlice
198
+ raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
199
+ return v.probability
200
+ else
201
+ raise "Cannot compute the expected value of a #{v.class}"
202
+ end
203
+ end
204
+
205
+ def E(v)
206
+ if v.is_a? RandomVariableSlice
207
+ return v.expected
208
+ else
209
+ raise "Cannot compute the expected value of a #{v.class}"
210
+ end
211
+ end
212
+ end
213
+
214
+ class RandomVariable
215
+ ENGLISH_ALPHABET = RandomVariable.new({
216
+ "a" => 0.08167,
217
+ "b" => 0.01492,
218
+ "c" => 0.02782,
219
+ "d" => 0.04253,
220
+ "e" => 0.12703,
221
+ "f" => 0.02228,
222
+ "g" => 0.02015,
223
+ "h" => 0.06094,
224
+ "i" => 0.06966,
225
+ "j" => 0.00153,
226
+ "k" => 0.00772,
227
+ "l" => 0.04025,
228
+ "m" => 0.02406,
229
+ "n" => 0.06749,
230
+ "o" => 0.07507,
231
+ "p" => 0.01929,
232
+ "q" => 0.00095,
233
+ "r" => 0.05987,
234
+ "s" => 0.06327,
235
+ "t" => 0.09056,
236
+ "u" => 0.02758,
237
+ "v" => 0.00978,
238
+ "w" => 0.02360,
239
+ "x" => 0.00150,
240
+ "y" => 0.01974,
241
+ "z" => 0.00074
242
+ })
243
+
244
+ DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
245
+
246
+ COIN = UniformRandomVariable.new(["h", "t"])
247
+ end
248
+ end
data/lib/rust-tests.rb CHANGED
@@ -7,6 +7,7 @@ module Rust::StatisticalTests
7
7
  attr_accessor :pvalue
8
8
  attr_accessor :exact
9
9
  attr_accessor :alpha
10
+ attr_accessor :hypothesis
10
11
 
11
12
  def initialize
12
13
  @statistics = {}
@@ -20,6 +21,16 @@ module Rust::StatisticalTests
20
21
  @statistics[name.to_sym] = value
21
22
  end
22
23
 
24
+ def adjusted_pvalue(method='bonferroni')
25
+ return 1 unless @hypothesis
26
+ @hypothesis.adjusted_pvalue_for(self, method)
27
+ end
28
+
29
+ def hypothesis=(value)
30
+ @hypothesis = value
31
+ @hypothesis.add(self)
32
+ end
33
+
23
34
  def significant
24
35
  pvalue < alpha
25
36
  end
@@ -31,32 +42,150 @@ module Rust::StatisticalTests
31
42
  (!exact ? " P-value is not exact." : "")
32
43
  end
33
44
  end
45
+
46
+ class Hypothesis
47
+ def self.find(title_or_instance)
48
+ return Hypothesis.new(nil) if title_or_instance == nil
49
+
50
+ if title_or_instance.is_a?(String)
51
+ ObjectSpace.each_object(Hypothesis) do |instance|
52
+ return instance if instance.title == title_or_instance
53
+ end
54
+
55
+ return Hypothesis.new(title_or_instance)
56
+ elsif title_or_instance.is_a?(Hypothesis)
57
+ return title_or_instance
58
+ end
59
+
60
+ raise TypeError, "Expected nil, String or Hypothesis"
61
+ end
62
+
63
+ attr_reader :results
64
+ attr_reader :title
65
+
66
+ def initialize(title)
67
+ @title = title
68
+ @results = []
69
+ end
70
+
71
+ def add(result)
72
+ @results << result
73
+ end
74
+
75
+ def adjusted_pvalue_for(instance, method)
76
+ p_values = @results.map { |r| r.pvalue }
77
+ index = @results.index(instance)
78
+
79
+ adjusted_pvalues = Rust::StatisticalTests::PValueAdjustment.method(method).adjust(*p_values)
80
+
81
+ if adjusted_pvalues.is_a?(Numeric)
82
+ return adjusted_pvalues
83
+ else
84
+ return adjusted_pvalues[index]
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ module Rust::StatisticalTests::PValueAdjustment
91
+ def self.method(name)
92
+ name = name.to_s
93
+ case name.downcase
94
+ when "bonferroni", "b"
95
+ return Bonferroni
96
+ when "holm", "h"
97
+ return Holm
98
+ when "hochberg"
99
+ return Hochberg
100
+ when "hommel"
101
+ return Hommel
102
+ when "benjaminihochberg", "bh"
103
+ return BenjaminiHochberg
104
+ when "benjaminiyekutieli", "by"
105
+ return BenjaminiYekutieli
106
+ end
107
+ end
108
+
109
+ class Bonferroni
110
+ def self.adjust(*p_values)
111
+ Rust.exclusive do
112
+ Rust['adjustment.p'] = p_values
113
+ return Rust._pull("p.adjust(adjustment.p, method=\"bonferroni\")")
114
+ end
115
+ end
116
+ end
117
+
118
+ class Holm
119
+ def self.adjust(*p_values)
120
+ Rust.exclusive do
121
+ Rust['adjustment.p'] = p_values
122
+ return Rust._pull("p.adjust(adjustment.p, method=\"holm\")")
123
+ end
124
+ end
125
+ end
126
+
127
+ class Hochberg
128
+ def self.adjust(*p_values)
129
+ Rust.exclusive do
130
+ Rust['adjustment.p'] = p_values
131
+ return Rust._pull("p.adjust(adjustment.p, method=\"hochberg\")")
132
+ end
133
+ end
134
+ end
135
+
136
+ class Hommel
137
+ def self.adjust(*p_values)
138
+ Rust.exclusive do
139
+ Rust['adjustment.p'] = p_values
140
+ return Rust._pull("p.adjust(adjustment.p, method=\"hommel\")")
141
+ end
142
+ end
143
+ end
144
+
145
+ class BenjaminiHochberg
146
+ def self.adjust(*p_values)
147
+ Rust.exclusive do
148
+ Rust['adjustment.p'] = p_values
149
+ return Rust._pull("p.adjust(adjustment.p, method=\"BH\")")
150
+ end
151
+ end
152
+ end
153
+
154
+ class BenjaminiYekutieli
155
+ def self.adjust(*p_values)
156
+ Rust.exclusive do
157
+ Rust['adjustment.p'] = p_values
158
+ return Rust._pull("p.adjust(adjustment.p, method=\"BY\")")
159
+ end
160
+ end
161
+ end
34
162
  end
35
163
 
36
164
  module Rust::StatisticalTests::Wilcoxon
37
165
  class << self
38
- def paired(d1, d2, alpha = 0.05)
166
+ def paired(d1, d2, alpha = 0.05, **options)
39
167
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
40
168
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
41
169
  raise "The two distributions have different size" if d1.size != d2.size
42
-
170
+
43
171
  Rust.exclusive do
44
172
  Rust["wilcox.a"] = d1
45
173
  Rust["wilcox.b"] = d2
46
174
 
47
175
  _, warnings = Rust._eval("wilcox.result = wilcox.test(wilcox.a, wilcox.b, alternative='two.sided', paired=T)", true)
48
176
  result = Rust::StatisticalTests::Result.new
49
- result.name = "Wilcoxon Signed-Rank test"
50
- result.pvalue = Rust._pull("wilcox.result$p.value")
51
- result[:w] = Rust._pull("wilcox.result$statistic")
52
- result.exact = !warnings.include?("cannot compute exact p-value with zeroes")
53
- result.alpha = alpha
177
+ result.name = "Wilcoxon Signed-Rank test"
178
+ result.pvalue = Rust._pull("wilcox.result$p.value")
179
+ result[:w] = Rust._pull("wilcox.result$statistic")
180
+ result.exact = !warnings.include?("cannot compute exact p-value with zeroes")
181
+ result.alpha = alpha
182
+ result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
54
183
 
55
184
  return result
56
185
  end
57
186
  end
58
187
 
59
- def unpaired(d1, d2, alpha = 0.05)
188
+ def unpaired(d1, d2, alpha = 0.05, **options)
60
189
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
61
190
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
62
191
 
@@ -66,11 +195,12 @@ module Rust::StatisticalTests::Wilcoxon
66
195
 
67
196
  _, warnings = Rust._eval("wilcox.result = wilcox.test(wilcox.a, wilcox.b, alternative='two.sided', paired=F)", true)
68
197
  result = Rust::StatisticalTests::Result.new
69
- result.name = "Wilcoxon Ranked-Sum test (a.k.a. Mann–Whitney U test)"
70
- result.pvalue = Rust._pull("wilcox.result$p.value")
71
- result[:w] = Rust._pull("wilcox.result$statistic")
72
- result.exact = !warnings.include?("cannot compute exact p-value with ties")
73
- result.alpha = alpha
198
+ result.name = "Wilcoxon Ranked-Sum test (a.k.a. Mann–Whitney U test)"
199
+ result.pvalue = Rust._pull("wilcox.result$p.value")
200
+ result[:w] = Rust._pull("wilcox.result$statistic")
201
+ result.exact = !warnings.include?("cannot compute exact p-value with ties")
202
+ result.alpha = alpha
203
+ result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
74
204
 
75
205
  return result
76
206
  end
@@ -80,7 +210,7 @@ end
80
210
 
81
211
  module Rust::StatisticalTests::T
82
212
  class << self
83
- def paired(d1, d2, alpha = 0.05)
213
+ def paired(d1, d2, alpha = 0.05, **options)
84
214
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
85
215
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
86
216
  raise "The two distributions have different size" if d1.size != d2.size
@@ -91,17 +221,18 @@ module Rust::StatisticalTests::T
91
221
 
92
222
  warnings = Rust._eval("t.result = t.test(t.a, t.b, alternative='two.sided', paired=T)")
93
223
  result = Rust::StatisticalTests::Result.new
94
- result.name = "Paired t-test"
95
- result.pvalue = Rust._pull("t.result$p.value")
96
- result[:t] = Rust._pull("t.result$statistic")
97
- result.exact = true
98
- result.alpha = alpha
224
+ result.name = "Paired t-test"
225
+ result.pvalue = Rust._pull("t.result$p.value")
226
+ result[:t] = Rust._pull("t.result$statistic")
227
+ result.exact = true
228
+ result.alpha = alpha
229
+ result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
99
230
 
100
231
  return result
101
232
  end
102
233
  end
103
234
 
104
- def unpaired(d1, d2, alpha = 0.05)
235
+ def unpaired(d1, d2, alpha = 0.05, **options)
105
236
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
106
237
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
107
238
 
@@ -111,11 +242,12 @@ module Rust::StatisticalTests::T
111
242
 
112
243
  Rust._eval("t.result = t.test(t.a, t.b, alternative='two.sided', paired=F)")
113
244
  result = Rust::StatisticalTests::Result.new
114
- result.name = "Welch Two Sample t-test"
115
- result.pvalue = Rust._pull("t.result$p.value")
116
- result[:t] = Rust._pull("t.result$statistic")
117
- result.exact = true
118
- result.alpha = alpha
245
+ result.name = "Welch Two Sample t-test"
246
+ result.pvalue = Rust._pull("t.result$p.value")
247
+ result[:t] = Rust._pull("t.result$statistic")
248
+ result.exact = true
249
+ result.alpha = alpha
250
+ result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
119
251
 
120
252
  return result
121
253
  end
@@ -125,18 +257,19 @@ end
125
257
 
126
258
  module Rust::StatisticalTests::Shapiro
127
259
  class << self
128
- def compute(vector, alpha = 0.05)
260
+ def compute(vector, alpha = 0.05, **options)
129
261
  raise TypeError, "Expecting Array of numerics" if !vector.is_a?(Array) || !vector.all? { |e| e.is_a?(Numeric) }
130
262
  Rust.exclusive do
131
263
  Rust['shapiro.v'] = vector
132
264
 
133
265
  Rust._eval("shapiro.result = shapiro.test(shapiro.v)")
134
266
  result = Rust::StatisticalTests::Result.new
135
- result.name = "Shapiro-Wilk normality test"
136
- result.pvalue = Rust._pull("shapiro.result$p.value")
137
- result[:W] = Rust._pull("shapiro.result$statistic")
138
- result.exact = true
139
- result.alpha = alpha
267
+ result.name = "Shapiro-Wilk normality test"
268
+ result.pvalue = Rust._pull("shapiro.result$p.value")
269
+ result[:W] = Rust._pull("shapiro.result$statistic")
270
+ result.exact = true
271
+ result.alpha = alpha
272
+ result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
140
273
 
141
274
  return result
142
275
  end
data/lib/rust.rb CHANGED
@@ -6,3 +6,4 @@ require_relative 'rust-effsize'
6
6
  require_relative 'rust-descriptive'
7
7
  require_relative 'rust-plots'
8
8
  require_relative 'rust-calls'
9
+ require_relative 'rust-probabilities'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rust
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.4'
4
+ version: '0.7'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-15 00:00:00.000000000 Z
11
+ date: 2021-02-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rinruby
@@ -63,6 +63,7 @@ files:
63
63
  - lib/rust-descriptive.rb
64
64
  - lib/rust-effsize.rb
65
65
  - lib/rust-plots.rb
66
+ - lib/rust-probabilities.rb
66
67
  - lib/rust-tests.rb
67
68
  - lib/rust.rb
68
69
  homepage: https://github.com/intersimone999/ruby-rust
@@ -84,7 +85,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
84
85
  - !ruby/object:Gem::Version
85
86
  version: '0'
86
87
  requirements: []
87
- rubygems_version: 3.1.4
88
+ rubygems_version: 3.2.7
88
89
  signing_key:
89
90
  specification_version: 4
90
91
  summary: Ruby advanced statistical library