rust 0.4 → 0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ca9e5aaa6bcfff9d1b261c5a3ced5cf74b7731085b4b094dba55df72884aca9a
4
- data.tar.gz: 8c90363b44a0c95abc9610fb78c3c632c6ae4265ad2260ff7e9740777245f63e
3
+ metadata.gz: 35c41ec98f5b286ef597096152249f19825a27bc33030e85ea196df8b778d3b1
4
+ data.tar.gz: 48b2e61f707ebcd05fa5f593016c8c0fb4b9d1e58751dd5deae83715583bd978
5
5
  SHA512:
6
- metadata.gz: 0ca17e2c0dda2188138f11e1ae4becaa8a5c4b0d2cc12273775b9ade1fefbc860f3ccee2251fbe353b9dbde55eded960e8cf26642af042070d979ed192b332e3
7
- data.tar.gz: 28dacd36f814acf51d222c8e746f65b94ef53ab5f24902ba6f654b05267c926e8db03cca82a876ca1183f55293da96d60ec862c5bfa26f6abd430d1f5e998709
6
+ metadata.gz: b0cc1289721d52cd376e0a242c2be419ea511f03a0fdd42c4c7a1973cde24e8dfc467e9d355693f85632cccb98af6986777aa020b715db5f06aa88a63d154ea0
7
+ data.tar.gz: 4c91c808773dcd04913a594cfcaa7956318908e6d590ebec1d9a04beef18986adc70a3c5e480c39bdc9f753b5edf920a1939208b8b530ccfc67124ac153a4804
data/lib/rust-basics.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require_relative 'rust-core'
2
2
 
3
- module Rust:: Correlation
3
+ module Rust::Correlation
4
4
  class Pearson
5
5
  def self.test(d1, d2)
6
6
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
data/lib/rust-calls.rb CHANGED
@@ -34,6 +34,17 @@ module Rust
34
34
  end
35
35
  end
36
36
 
37
+ class SimpleFormula
38
+ def initialize(dependent, independent)
39
+ @dependent = dependent
40
+ @independent = independent
41
+ end
42
+
43
+ def to_R
44
+ return "#@dependent ~ #@independent"
45
+ end
46
+ end
47
+
37
48
  class Variable
38
49
  def initialize(name)
39
50
  @name = name
data/lib/rust-core.rb CHANGED
@@ -130,7 +130,10 @@ module Rust
130
130
  @labels.each { |label| @data[label] = [] }
131
131
  elsif labels_or_data.is_a? Hash
132
132
  @labels = labels_or_data.keys.map { |l| l.to_s }
133
- @data = labels_or_data.clone
133
+
134
+ labels_or_data.each do |key, value|
135
+ @data[key.to_s] = value.clone
136
+ end
134
137
  end
135
138
  end
136
139
 
@@ -142,6 +145,14 @@ module Rust
142
145
  end
143
146
  end
144
147
 
148
+ def fast_row(i)
149
+ if i < 0 || i >= self.rows
150
+ return nil
151
+ else
152
+ return @labels.map { |label| @data[label][i] }
153
+ end
154
+ end
155
+
145
156
  def shuffle(*args)
146
157
  result = DataFrame.new(@labels)
147
158
 
@@ -174,6 +185,7 @@ module Rust
174
185
  def column(name)
175
186
  return @data[name]
176
187
  end
188
+ alias :| :column
177
189
 
178
190
  def rename_column!(old_name, new_name)
179
191
  raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
@@ -195,6 +207,13 @@ module Rust
195
207
  return result
196
208
  end
197
209
 
210
+ def has_row?
211
+ self.each_with_index do |row, i|
212
+ return true if yield row, i
213
+ end
214
+ return false
215
+ end
216
+
198
217
  def select_columns(cols=nil)
199
218
  raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
200
219
 
@@ -215,6 +234,40 @@ module Rust
215
234
  @data.delete(column)
216
235
  end
217
236
 
237
+ def delete_row(i)
238
+ @data.each do |label, column|
239
+ column.delete_at(i)
240
+ end
241
+ end
242
+
243
+ def uniq_by(by)
244
+ result = self.clone
245
+ result.uniq_by!(by)
246
+ return result
247
+ end
248
+
249
+ def uniq_by!(by)
250
+ my_keys = {}
251
+ to_delete = []
252
+ self.each_with_index do |row, i|
253
+ key = []
254
+ by.each do |colname|
255
+ key << row[colname]
256
+ end
257
+ unless my_keys[key]
258
+ my_keys[key] = i
259
+ else
260
+ to_delete << (i-to_delete.size)
261
+ end
262
+ end
263
+
264
+ to_delete.each do |i|
265
+ self.delete_row(i)
266
+ end
267
+
268
+ return self
269
+ end
270
+
218
271
  def column_names
219
272
  return @labels.map { |k| k.to_s }
220
273
  end
@@ -243,7 +296,7 @@ module Rust
243
296
  row.each do |key, value|
244
297
  @data[key.to_s] << value
245
298
  end
246
- #
299
+
247
300
  return true
248
301
  else
249
302
  raise TypeError, "Expected an Array or a Hash"
@@ -275,6 +328,14 @@ module Rust
275
328
  return self
276
329
  end
277
330
 
331
+ def fast_each
332
+ self.fast_each_with_index do |element, i|
333
+ yield element
334
+ end
335
+
336
+ return self
337
+ end
338
+
278
339
  def each_with_index
279
340
  for i in 0...self.rows
280
341
  element = {}
@@ -288,6 +349,19 @@ module Rust
288
349
  return self
289
350
  end
290
351
 
352
+ def fast_each_with_index
353
+ for i in 0...self.rows
354
+ element = []
355
+ @labels.each do |label|
356
+ element << @data[label][i]
357
+ end
358
+
359
+ yield element, i
360
+ end
361
+
362
+ return self
363
+ end
364
+
291
365
  def load_in_r_as(variable_name)
292
366
  command = []
293
367
 
@@ -397,6 +471,77 @@ module Rust
397
471
  return result
398
472
  end
399
473
 
474
+ def aggregate(by, **aggregators)
475
+ raise TypeError, "Expected a string" unless by.is_a?(String)
476
+ raise TypeError, "All the aggregators should be procs" unless aggregators.values.all? { |v| v.is_a?(Proc) }
477
+ raise "Expected a block for default aggregator" unless block_given?
478
+
479
+ aggregators = aggregators.map { |label, callable| [label.to_s, callable] }.to_h
480
+
481
+ sorted = self.sort_by(by)
482
+
483
+ current_value = nil
484
+ partials = []
485
+ partial = nil
486
+ sorted.column(by).each_with_index do |value, index|
487
+ if current_value != value
488
+ current_value = value
489
+ partials << partial if partial
490
+ partial = Rust::DataFrame.new(self.column_names)
491
+ end
492
+ partial << sorted.fast_row(index)
493
+ end
494
+ partials << partial
495
+
496
+ result = Rust::DataFrame.new(self.column_names)
497
+ partials.each do |partial|
498
+ aggregated_row = {}
499
+ aggregated_row[by] = partial.column(by)[0]
500
+ (self.column_names - [by]).each do |column|
501
+ if aggregators[column]
502
+ aggregated_row[column] = aggregators[column].call(partial.column(column))
503
+ else
504
+ aggregated_row[column] = yield partial.column(column)
505
+ end
506
+ end
507
+
508
+ result << aggregated_row
509
+ end
510
+
511
+ return result
512
+ end
513
+
514
+ def sort_by(column)
515
+ result = self.clone
516
+ result.sort_by!(column)
517
+ return result
518
+ end
519
+
520
+ def sort_by!(by)
521
+ copy = @data[by].clone
522
+ copy.sort!
523
+
524
+ indices = []
525
+ @data[by].each_with_index do |value, i|
526
+ index = copy.index(value)
527
+ indices << index
528
+
529
+ copy[index] = NilClass
530
+ end
531
+
532
+ (self.column_names - [by]).each do |column_name|
533
+ sorted = []
534
+ column = self.column(column_name)
535
+ column_i = 0
536
+ indices.each do |i|
537
+ sorted[i] = column[column_i]
538
+ column_i += 1
539
+ end
540
+ @data[column_name] = sorted
541
+ end
542
+ @data[by].sort!
543
+ end
544
+
400
545
  def bind_rows!(dataframe)
401
546
  raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
402
547
  raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
@@ -480,7 +625,7 @@ module Rust
480
625
  end
481
626
  end
482
627
 
483
- class Sequence
628
+ class Sequence < RustDatatype
484
629
  attr_reader :min
485
630
  attr_reader :max
486
631
 
@@ -511,6 +656,103 @@ module Rust
511
656
  def to_R
512
657
  "seq(from=#@min, to=#@max, by=#@step)"
513
658
  end
659
+
660
+ def load_in_r_as(variable_name)
661
+ Rust._eval("#{variable_name} <- #{self.to_R}")
662
+ end
663
+ end
664
+
665
+ class DataFrameArray < Array
666
+ def bind_all
667
+ return nil if self.size == 0
668
+
669
+ result = self.first.clone
670
+
671
+ for i in 1...self.size
672
+ result .bind_rows!(self[i])
673
+ end
674
+
675
+ return result
676
+ end
677
+ end
678
+
679
+ class DataFrameHash < Hash
680
+ def bind_all
681
+ return nil if self.values.size == 0
682
+
683
+ result = self.values.first.clone
684
+
685
+ for i in 1...self.values.size
686
+ result .bind_rows!(self.values[i])
687
+ end
688
+
689
+ return result
690
+ end
691
+ end
692
+
693
+ class MathArray < Array
694
+ def -(other)
695
+ raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
696
+ raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
697
+
698
+ result = self.clone
699
+ other = [other] * self.size if other.is_a?(Numeric)
700
+ for i in 0...self.size
701
+ result[i] -= other[i]
702
+ end
703
+
704
+ return result
705
+ end
706
+
707
+ def *(other)
708
+ raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
709
+ raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
710
+
711
+ result = self.clone
712
+ other = [other] * self.size if other.is_a?(Numeric)
713
+ for i in 0...self.size
714
+ result[i] *= other[i]
715
+ end
716
+
717
+ return result
718
+ end
719
+
720
+ def +(other)
721
+ raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
722
+ raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
723
+
724
+ result = self.clone
725
+ other = [other] * self.size if other.is_a?(Numeric)
726
+ for i in 0...self.size
727
+ result[i] += other[i]
728
+ end
729
+
730
+ return result
731
+ end
732
+
733
+ def /(other) #To recover the syntax highlighting but in Kate: /
734
+ raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
735
+ raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
736
+
737
+ result = self.clone
738
+ other = [other] * self.size if other.is_a?(Numeric)
739
+ for i in 0...self.size
740
+ result[i] /= other[i]
741
+ end
742
+
743
+ return result
744
+ end
745
+
746
+ def **(other)
747
+ raise ArgumentError, "Expected numeric" if !other.is_a?(Numeric)
748
+
749
+ result = self.clone
750
+ for i in 0...self.size
751
+ result[i] = result[i] ** other
752
+ end
753
+
754
+ return result
755
+ end
514
756
  end
515
757
  end
516
758
 
@@ -554,6 +796,14 @@ class Array
554
796
  def to_R
555
797
  return "c(#{self.map { |e| e.to_R }.join(",")})"
556
798
  end
799
+
800
+ def distribution
801
+ result = {}
802
+ self.each do |value|
803
+ result[value] = result[value].to_i + 1
804
+ end
805
+ return result
806
+ end
557
807
  end
558
808
 
559
809
  class String
@@ -569,19 +819,21 @@ class Range
569
819
  end
570
820
 
571
821
  module Rust::RBindings
572
- def read_csv(filename, **options)
573
- Rust::CSV.read(filename, **options)
574
- end
575
-
576
- def write_csv(filename, dataframe, **options)
577
- Rust::CSV.write(filename, dataframe, **options)
578
- end
579
-
580
822
  def data_frame(*args)
581
823
  Rust::DataFrame.new(*args)
582
824
  end
583
825
  end
584
826
 
827
+ module Rust::TestCases
828
+ def self.sample_dataframe(columns, size=100)
829
+ result = Rust::DataFrame.new(columns)
830
+ size.times do |i|
831
+ result << columns.map { |c| yield i, c }
832
+ end
833
+ return result
834
+ end
835
+ end
836
+
585
837
  def bind_r!
586
838
  include Rust::RBindings
587
839
  end
data/lib/rust-csv.rb CHANGED
@@ -3,7 +3,7 @@ require_relative 'rust-core'
3
3
  module Rust
4
4
  class CSV
5
5
  def self.read_all(pattern, **options)
6
- result = {}
6
+ result = DataFrameHash.new
7
7
  Dir.glob(pattern).each do |filename|
8
8
  result[filename] = CSV.read(filename, **options)
9
9
  end
@@ -49,10 +49,9 @@ module Rust
49
49
  raise TypeError, "Expected Rust::DataFrame" unless dataframe.is_a?(Rust::DataFrame)
50
50
 
51
51
  write_headers = options[:headers] != false
52
- options[:headers] = dataframe.column_names if options[:headers] == nil
52
+ options[:headers] = dataframe.column_names unless options[:headers]
53
53
 
54
54
  hash = {}
55
- labels = nil
56
55
  ::CSV.open(filename, 'w', write_headers: write_headers, **options) do |csv|
57
56
  dataframe.each do |row|
58
57
  csv << row
@@ -93,3 +92,13 @@ module Rust
93
92
  end
94
93
  end
95
94
  end
95
+
96
+ module Rust::RBindings
97
+ def read_csv(filename, **options)
98
+ Rust::CSV.read(filename, **options)
99
+ end
100
+
101
+ def write_csv(filename, dataframe, **options)
102
+ Rust::CSV.write(filename, dataframe, **options)
103
+ end
104
+ end
@@ -50,18 +50,59 @@ module Rust::Descriptive
50
50
  def quantile(data, percentiles=[0.0, 0.25, 0.5, 0.75, 1.0])
51
51
  raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
52
52
  raise TypeError, "Expecting Array of numerics" if !percentiles.is_a?(Array) || !percentiles.all? { |e| e.is_a?(Numeric) }
53
- raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
53
+ raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
54
54
 
55
- Rust.exclusive do
56
- Rust['descriptive.data'] = data
57
- Rust['descriptive.percs'] = percentiles
58
-
59
- call_result = Rust._pull("quantile(descriptive.data, descriptive.percs)")
60
- assert { call_result.is_a?(Array) }
61
- assert { call_result.size == percentiles.size }
62
-
63
- return percentiles.zip(call_result).to_h
55
+ n = data.size
56
+ quantiles = percentiles.size
57
+ percentiles = percentiles.map { |x| x > 1.0 ? 1.0 : (x < 0.0 ? 0.0 : x) }
58
+
59
+ rough_indices = percentiles.map { |x| 1 + [n - 1, 0].max * x - 1 }
60
+ floor_indices = rough_indices.map { |i| i.floor }
61
+ ceil_indices = rough_indices.map { |i| i.ceil }
62
+
63
+ data = data.sort
64
+ result = floor_indices.map { |i| data[i] }
65
+ result_ceil = ceil_indices.map { |i| data[i] }
66
+
67
+ indices_to_fix = (0...quantiles).select { |i| rough_indices[i] > floor_indices[i] && result_ceil[i] != result[i] }
68
+ index_approximation_errors = indices_to_fix.map { |i| rough_indices[i] - floor_indices[i] }
69
+ reduced_index_approximation_errors = index_approximation_errors.map { |i| (1 - i) }
70
+ hi_indices = indices_to_fix.map { |i| ceil_indices[i] }
71
+ data_hi_indices = hi_indices.map { |i| data[i] }
72
+
73
+ j = 0
74
+ indices_to_fix.each do |i|
75
+ result[i] = reduced_index_approximation_errors[j] * result[i] + index_approximation_errors[j] * data_hi_indices[j]
76
+ j += 1
64
77
  end
78
+
79
+ return percentiles.zip(result).to_h
80
+ end
81
+
82
+ def outliers(data, k=1.5, **opts)
83
+ outliers_according_to(data, data, k, **opts)
84
+ end
85
+
86
+ def outliers_according_to(data, data_distribution, k=1.5, **opts)
87
+ quantiles = Rust::Descriptive.quantile(data_distribution, [0.25, 0.75])
88
+ q1 = quantiles[0.25]
89
+ q3 = quantiles[0.75]
90
+ iqr = q3 - q1
91
+
92
+ positive_outliers = data.select { |d| d > q3 + iqr * k }
93
+ negative_outliers = data.select { |d| d < q1 - iqr * k }
94
+
95
+ outliers = negative_outliers + positive_outliers
96
+ if opts[:side]
97
+ case opts[:side].to_sym
98
+ when :positive, :neg, :n, :+
99
+ outliers = positive_outliers
100
+ when :negative, :pos, :p, :-
101
+ outliers = negative_outliers
102
+ end
103
+ end
104
+
105
+ return outliers
65
106
  end
66
107
  end
67
108
  end
data/lib/rust-effsize.rb CHANGED
@@ -24,6 +24,10 @@ module Rust::EffectSize::CliffDelta
24
24
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
25
25
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
26
26
 
27
+ if d1.size <= 1 || d2.size <= 1
28
+ return Rust::EffectSize::Result.new
29
+ end
30
+
27
31
  Rust.exclusive do
28
32
  Rust['effsize.a'] = d1
29
33
  Rust['effsize.b'] = d2
@@ -32,10 +36,10 @@ module Rust::EffectSize::CliffDelta
32
36
 
33
37
  result = Rust::EffectSize::Result.new
34
38
  result.name = "Cliff's delta"
35
- result.estimate = Rust._pull("effsize.result$estimate")
36
- result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
37
- result.confidence_level = Rust._pull("effsize.result$conf.level")
38
- result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
39
+ result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
40
+ result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
41
+ result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
42
+ result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
39
43
 
40
44
  return result
41
45
  end
@@ -49,6 +53,10 @@ module Rust::EffectSize::CohenD
49
53
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
50
54
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
51
55
 
56
+ if d1.size <= 1 || d2.size <= 1
57
+ return Rust::EffectSize::Result.new
58
+ end
59
+
52
60
  Rust.exclusive do
53
61
  Rust['effsize.a'] = d1
54
62
  Rust['effsize.b'] = d2
@@ -57,10 +65,10 @@ module Rust::EffectSize::CohenD
57
65
 
58
66
  result = Rust::EffectSize::Result.new
59
67
  result.name = "Cohen's d"
60
- result.estimate = Rust._pull("effsize.result$estimate")
61
- result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
62
- result.confidence_level = Rust._pull("effsize.result$conf.level")
63
- result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
68
+ result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
69
+ result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
70
+ result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
71
+ result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
64
72
 
65
73
  return result
66
74
  end
data/lib/rust-plots.rb CHANGED
@@ -21,6 +21,14 @@ module Rust::Plots
21
21
  return self
22
22
  end
23
23
 
24
+ def palette(size)
25
+ if size <= 1
26
+ return ['black']
27
+ else
28
+ return Rust._pull("hcl.colors(n=#{size})")
29
+ end
30
+ end
31
+
24
32
  def x_range(range)
25
33
  @options['xlim'] = range
26
34
 
@@ -127,10 +135,18 @@ module Rust::Plots
127
135
  end
128
136
 
129
137
  class ScatterPlot < BasePlot
130
- def initialize(x, y)
138
+ def initialize(x = nil, y = nil, **options)
131
139
  super()
132
- @x = x
133
- @y = y
140
+ @series = []
141
+ if x && y
142
+ self.series(x, y, options)
143
+ end
144
+ end
145
+
146
+ def series(x, y, **options)
147
+ @series << [x, y, options]
148
+
149
+ return self
134
150
  end
135
151
 
136
152
  def thickness(t)
@@ -159,13 +175,66 @@ module Rust::Plots
159
175
 
160
176
  protected
161
177
  def _show()
162
- Rust["plotter.x"] = @x
163
- Rust["plotter.y"] = @y
178
+ first = true
179
+ palette = self.palette(@series.size)
180
+ i = 0
181
+
182
+ base_options = {}
183
+ unless @options['xlim']
184
+ x_values = @series.map { |v| v[0] }.flatten
185
+ y_values = @series.map { |v| v[1] }.flatten
186
+
187
+ base_options[:xlim] = [x_values.min, x_values.max]
188
+ base_options[:ylim] = [y_values.min, y_values.max]
189
+ end
190
+
191
+ @series.each do |x, y, options|
192
+ options = options.merge(base_options)
193
+ Rust["plotter.x"] = x
194
+ Rust["plotter.y"] = y
195
+
196
+ function = nil
197
+ if first
198
+ function = Rust::Function.new("plot")
199
+ first = false
200
+ else
201
+ function = Rust::Function.new("lines")
202
+ end
203
+
204
+ augmented_options = {}
205
+ augmented_options['col'] = options[:color] || palette[i]
206
+ augmented_options['xlim'] = options[:xlim] if options[:xlim]
207
+ augmented_options['ylim'] = options[:ylim] if options[:ylim]
208
+
209
+ function.options = self._augmented_options(augmented_options)
210
+ function.arguments << Rust::Variable.new("plotter.x")
211
+ function.arguments << Rust::Variable.new("plotter.y")
212
+
213
+ function.call
214
+
215
+ i += 1
216
+ end
217
+
218
+ return self
219
+ end
220
+ end
221
+
222
+ class BarPlot < BasePlot
223
+ def initialize(bars)
224
+ super()
225
+ @bars = bars
226
+ end
227
+
228
+ protected
229
+ def _show()
230
+ Rust["plotter.bars"] = @bars.values
231
+ Rust["plotter.labels"] = @bars.keys
232
+
233
+ Rust._eval("names(plotter.bars) <- plotter.labels")
164
234
 
165
- function = Rust::Function.new("plot")
235
+ function = Rust::Function.new("barplot")
166
236
  function.options = self._augmented_options
167
- function.arguments << Rust::Variable.new("plotter.x")
168
- function.arguments << Rust::Variable.new("plotter.y")
237
+ function.arguments << Rust::Variable.new("plotter.bars")
169
238
 
170
239
  function.call
171
240
 
@@ -0,0 +1,248 @@
1
+ require_relative 'rust-core'
2
+
3
+ class Numeric
4
+ def distance(other)
5
+ raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
6
+
7
+ return (self - other).abs
8
+ end
9
+ end
10
+
11
+ class Array
12
+ def distance(other)
13
+ raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
14
+
15
+ longest, shortest = self.size > other.size ? [self, other] : [other, self]
16
+
17
+ distance = 0
18
+ for i in 0...longest.size
19
+ distance += longest[i].to_i.distance(shortest[i].to_i)
20
+ end
21
+
22
+ return distance
23
+ end
24
+ end
25
+
26
+ class String
27
+ def distance(other)
28
+ raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
29
+
30
+ return self.bytes.distance other.bytes
31
+ end
32
+ end
33
+
34
+ module Rust
35
+ class RandomVariableSlice
36
+ def initialize(values)
37
+ raise TypeError, "Expected Hash" unless values.is_a?(Hash)
38
+
39
+ @values = values
40
+ end
41
+
42
+ def probability(v=nil)
43
+ unless v
44
+ return @values.values.sum
45
+ else
46
+ return @values[v]
47
+ end
48
+ end
49
+
50
+ def ml
51
+ @values.max_by { |k, v| v }[0]
52
+ end
53
+
54
+ def expected
55
+ @values.map { |k, v| k*v }.sum
56
+ end
57
+
58
+ def >(n)
59
+ self.so_that { |k| k > n}
60
+ end
61
+
62
+ def >=(n)
63
+ self.so_that { |k| k >= n}
64
+ end
65
+
66
+ def <(n)
67
+ self.so_that { |k| k < n}
68
+ end
69
+
70
+ def <=(n)
71
+ self.so_that { |k| k <= n}
72
+ end
73
+
74
+ def ==(n)
75
+ self.so_that { |k| k == n}
76
+ end
77
+
78
+ def so_that
79
+ RandomVariableSlice.new(@values.select { |k, v| yield(k) })
80
+ end
81
+
82
+ def between(a, b)
83
+ RandomVariableSlice.new(@values.select { |k, v| k.between? a, b })
84
+ end
85
+ end
86
+
87
+ class RandomVariable < RandomVariableSlice
88
+ EPSILON = 1e-7
89
+
90
+ attr_reader :values
91
+
92
+ def initialize(values = {0 => 1.0}, exact = false)
93
+ @values = values
94
+ @exact = exact
95
+
96
+ raise "All the probabilities should be in the range [0, 1]" unless @values.values.all? { |v| v.between? 0, 1 }
97
+ raise "The cumulative probability must be exactly 1 (#{@values.values.sum} instead)" unless @values.values.sum.between? 1-EPSILON, 1+EPSILON
98
+
99
+ approx!
100
+ end
101
+
102
+ def probability(v)
103
+ return @values[v].to_f
104
+ end
105
+
106
+ def +(other)
107
+ new_hash = {}
108
+
109
+ @values.each do |my_key, my_value|
110
+ other.values.each do |other_key, other_value|
111
+ sum_key = my_key + other_key
112
+
113
+ new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
114
+ end
115
+ end
116
+
117
+ return RandomVariable.new(new_hash, @exact)
118
+ end
119
+
120
+ def *(times)
121
+ if times.is_a? Integer
122
+ return rep(times)
123
+ elsif times.is_a? RandomVariable
124
+ return mul(times)
125
+ else
126
+ raise "The argument must be an Integer or a RandomVariable"
127
+ end
128
+ end
129
+
130
+ def mul(other)
131
+ new_hash = {}
132
+
133
+ @values.each do |my_key, my_value|
134
+ other.values.each do |other_key, other_value|
135
+ mul_key = my_key * other_key
136
+
137
+ new_hash[mul_key] = new_hash[mul_key].to_f + (my_value * other_value)
138
+ end
139
+ end
140
+
141
+ return RandomVariable.new(new_hash, @exact)
142
+ end
143
+
144
+ def rep(times)
145
+ rv = self
146
+ (times-1).times do
147
+ rv += self
148
+ end
149
+
150
+ return rv
151
+ end
152
+
153
+ def exact!
154
+ @exact = true
155
+ end
156
+
157
+ def approx!
158
+ return if @exact
159
+
160
+ to_delete = []
161
+ @values.each do |v, probability|
162
+ to_delete.push v if probability <= EPSILON
163
+ end
164
+
165
+ to_delete.each do |v|
166
+ probability = @values.delete v
167
+ nearest = @values.keys.min_by { |k| k.distance v }
168
+ @values[nearest] += probability
169
+ end
170
+ end
171
+
172
+ def extract
173
+ v = rand
174
+
175
+ cumulative = 0
176
+ @values.each do |key, prob|
177
+ cumulative += prob
178
+
179
+ return key if cumulative >= v
180
+ end
181
+ end
182
+
183
+ def self.complete(hash, key=0)
184
+ hash[key] = 1 - hash.values.sum
185
+ return RandomVariable.new(hash)
186
+ end
187
+ end
188
+
189
+ class UniformRandomVariable < RandomVariable
190
+ def initialize(values, exact = false)
191
+ super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
192
+ end
193
+ end
194
+
195
+ module Probabilities
196
+ def P(v)
197
+ if v.is_a? RandomVariableSlice
198
+ raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
199
+ return v.probability
200
+ else
201
+ raise "Cannot compute the expected value of a #{v.class}"
202
+ end
203
+ end
204
+
205
+ def E(v)
206
+ if v.is_a? RandomVariableSlice
207
+ return v.expected
208
+ else
209
+ raise "Cannot compute the expected value of a #{v.class}"
210
+ end
211
+ end
212
+ end
213
+
214
+ class RandomVariable
215
+ ENGLISH_ALPHABET = RandomVariable.new({
216
+ "a" => 0.08167,
217
+ "b" => 0.01492,
218
+ "c" => 0.02782,
219
+ "d" => 0.04253,
220
+ "e" => 0.12703,
221
+ "f" => 0.02228,
222
+ "g" => 0.02015,
223
+ "h" => 0.06094,
224
+ "i" => 0.06966,
225
+ "j" => 0.00153,
226
+ "k" => 0.00772,
227
+ "l" => 0.04025,
228
+ "m" => 0.02406,
229
+ "n" => 0.06749,
230
+ "o" => 0.07507,
231
+ "p" => 0.01929,
232
+ "q" => 0.00095,
233
+ "r" => 0.05987,
234
+ "s" => 0.06327,
235
+ "t" => 0.09056,
236
+ "u" => 0.02758,
237
+ "v" => 0.00978,
238
+ "w" => 0.02360,
239
+ "x" => 0.00150,
240
+ "y" => 0.01974,
241
+ "z" => 0.00074
242
+ })
243
+
244
+ DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
245
+
246
+ COIN = UniformRandomVariable.new(["h", "t"])
247
+ end
248
+ end
data/lib/rust-tests.rb CHANGED
@@ -7,6 +7,7 @@ module Rust::StatisticalTests
7
7
  attr_accessor :pvalue
8
8
  attr_accessor :exact
9
9
  attr_accessor :alpha
10
+ attr_accessor :hypothesis
10
11
 
11
12
  def initialize
12
13
  @statistics = {}
@@ -20,6 +21,16 @@ module Rust::StatisticalTests
20
21
  @statistics[name.to_sym] = value
21
22
  end
22
23
 
24
+ def adjusted_pvalue(method='bonferroni')
25
+ return 1 unless @hypothesis
26
+ @hypothesis.adjusted_pvalue_for(self, method)
27
+ end
28
+
29
+ def hypothesis=(value)
30
+ @hypothesis = value
31
+ @hypothesis.add(self)
32
+ end
33
+
23
34
  def significant
24
35
  pvalue < alpha
25
36
  end
@@ -31,32 +42,150 @@ module Rust::StatisticalTests
31
42
  (!exact ? " P-value is not exact." : "")
32
43
  end
33
44
  end
45
+
46
+ class Hypothesis
47
+ def self.find(title_or_instance)
48
+ return Hypothesis.new(nil) if title_or_instance == nil
49
+
50
+ if title_or_instance.is_a?(String)
51
+ ObjectSpace.each_object(Hypothesis) do |instance|
52
+ return instance if instance.title == title_or_instance
53
+ end
54
+
55
+ return Hypothesis.new(title_or_instance)
56
+ elsif title_or_instance.is_a?(Hypothesis)
57
+ return title_or_instance
58
+ end
59
+
60
+ raise TypeError, "Expected nil, String or Hypothesis"
61
+ end
62
+
63
+ attr_reader :results
64
+ attr_reader :title
65
+
66
+ def initialize(title)
67
+ @title = title
68
+ @results = []
69
+ end
70
+
71
+ def add(result)
72
+ @results << result
73
+ end
74
+
75
+ def adjusted_pvalue_for(instance, method)
76
+ p_values = @results.map { |r| r.pvalue }
77
+ index = @results.index(instance)
78
+
79
+ adjusted_pvalues = Rust::StatisticalTests::PValueAdjustment.method(method).adjust(*p_values)
80
+
81
+ if adjusted_pvalues.is_a?(Numeric)
82
+ return adjusted_pvalues
83
+ else
84
+ return adjusted_pvalues[index]
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ module Rust::StatisticalTests::PValueAdjustment
91
+ def self.method(name)
92
+ name = name.to_s
93
+ case name.downcase
94
+ when "bonferroni", "b"
95
+ return Bonferroni
96
+ when "holm", "h"
97
+ return Holm
98
+ when "hochberg"
99
+ return Hochberg
100
+ when "hommel"
101
+ return Hommel
102
+ when "benjaminihochberg", "bh"
103
+ return BenjaminiHochberg
104
+ when "benjaminiyekutieli", "by"
105
+ return BenjaminiYekutieli
106
+ end
107
+ end
108
+
109
+ class Bonferroni
110
+ def self.adjust(*p_values)
111
+ Rust.exclusive do
112
+ Rust['adjustment.p'] = p_values
113
+ return Rust._pull("p.adjust(adjustment.p, method=\"bonferroni\")")
114
+ end
115
+ end
116
+ end
117
+
118
+ class Holm
119
+ def self.adjust(*p_values)
120
+ Rust.exclusive do
121
+ Rust['adjustment.p'] = p_values
122
+ return Rust._pull("p.adjust(adjustment.p, method=\"holm\")")
123
+ end
124
+ end
125
+ end
126
+
127
+ class Hochberg
128
+ def self.adjust(*p_values)
129
+ Rust.exclusive do
130
+ Rust['adjustment.p'] = p_values
131
+ return Rust._pull("p.adjust(adjustment.p, method=\"hochberg\")")
132
+ end
133
+ end
134
+ end
135
+
136
+ class Hommel
137
+ def self.adjust(*p_values)
138
+ Rust.exclusive do
139
+ Rust['adjustment.p'] = p_values
140
+ return Rust._pull("p.adjust(adjustment.p, method=\"hommel\")")
141
+ end
142
+ end
143
+ end
144
+
145
+ class BenjaminiHochberg
146
+ def self.adjust(*p_values)
147
+ Rust.exclusive do
148
+ Rust['adjustment.p'] = p_values
149
+ return Rust._pull("p.adjust(adjustment.p, method=\"BH\")")
150
+ end
151
+ end
152
+ end
153
+
154
+ class BenjaminiYekutieli
155
+ def self.adjust(*p_values)
156
+ Rust.exclusive do
157
+ Rust['adjustment.p'] = p_values
158
+ return Rust._pull("p.adjust(adjustment.p, method=\"BY\")")
159
+ end
160
+ end
161
+ end
34
162
  end
35
163
 
36
164
  module Rust::StatisticalTests::Wilcoxon
37
165
  class << self
38
- def paired(d1, d2, alpha = 0.05)
166
+ def paired(d1, d2, alpha = 0.05, **options)
39
167
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
40
168
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
41
169
  raise "The two distributions have different size" if d1.size != d2.size
42
-
170
+
43
171
  Rust.exclusive do
44
172
  Rust["wilcox.a"] = d1
45
173
  Rust["wilcox.b"] = d2
46
174
 
47
175
  _, warnings = Rust._eval("wilcox.result = wilcox.test(wilcox.a, wilcox.b, alternative='two.sided', paired=T)", true)
48
176
  result = Rust::StatisticalTests::Result.new
49
- result.name = "Wilcoxon Signed-Rank test"
50
- result.pvalue = Rust._pull("wilcox.result$p.value")
51
- result[:w] = Rust._pull("wilcox.result$statistic")
52
- result.exact = !warnings.include?("cannot compute exact p-value with zeroes")
53
- result.alpha = alpha
177
+ result.name = "Wilcoxon Signed-Rank test"
178
+ result.pvalue = Rust._pull("wilcox.result$p.value")
179
+ result[:w] = Rust._pull("wilcox.result$statistic")
180
+ result.exact = !warnings.include?("cannot compute exact p-value with zeroes")
181
+ result.alpha = alpha
182
+ result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
54
183
 
55
184
  return result
56
185
  end
57
186
  end
58
187
 
59
- def unpaired(d1, d2, alpha = 0.05)
188
+ def unpaired(d1, d2, alpha = 0.05, **options)
60
189
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
61
190
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
62
191
 
@@ -66,11 +195,12 @@ module Rust::StatisticalTests::Wilcoxon
66
195
 
67
196
  _, warnings = Rust._eval("wilcox.result = wilcox.test(wilcox.a, wilcox.b, alternative='two.sided', paired=F)", true)
68
197
  result = Rust::StatisticalTests::Result.new
69
- result.name = "Wilcoxon Ranked-Sum test (a.k.a. Mann–Whitney U test)"
70
- result.pvalue = Rust._pull("wilcox.result$p.value")
71
- result[:w] = Rust._pull("wilcox.result$statistic")
72
- result.exact = !warnings.include?("cannot compute exact p-value with ties")
73
- result.alpha = alpha
198
+ result.name = "Wilcoxon Ranked-Sum test (a.k.a. Mann–Whitney U test)"
199
+ result.pvalue = Rust._pull("wilcox.result$p.value")
200
+ result[:w] = Rust._pull("wilcox.result$statistic")
201
+ result.exact = !warnings.include?("cannot compute exact p-value with ties")
202
+ result.alpha = alpha
203
+ result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
74
204
 
75
205
  return result
76
206
  end
@@ -80,7 +210,7 @@ end
80
210
 
81
211
  module Rust::StatisticalTests::T
82
212
  class << self
83
- def paired(d1, d2, alpha = 0.05)
213
+ def paired(d1, d2, alpha = 0.05, **options)
84
214
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
85
215
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
86
216
  raise "The two distributions have different size" if d1.size != d2.size
@@ -91,17 +221,18 @@ module Rust::StatisticalTests::T
91
221
 
92
222
  warnings = Rust._eval("t.result = t.test(t.a, t.b, alternative='two.sided', paired=T)")
93
223
  result = Rust::StatisticalTests::Result.new
94
- result.name = "Paired t-test"
95
- result.pvalue = Rust._pull("t.result$p.value")
96
- result[:t] = Rust._pull("t.result$statistic")
97
- result.exact = true
98
- result.alpha = alpha
224
+ result.name = "Paired t-test"
225
+ result.pvalue = Rust._pull("t.result$p.value")
226
+ result[:t] = Rust._pull("t.result$statistic")
227
+ result.exact = true
228
+ result.alpha = alpha
229
+ result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
99
230
 
100
231
  return result
101
232
  end
102
233
  end
103
234
 
104
- def unpaired(d1, d2, alpha = 0.05)
235
+ def unpaired(d1, d2, alpha = 0.05, **options)
105
236
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
106
237
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
107
238
 
@@ -111,11 +242,12 @@ module Rust::StatisticalTests::T
111
242
 
112
243
  Rust._eval("t.result = t.test(t.a, t.b, alternative='two.sided', paired=F)")
113
244
  result = Rust::StatisticalTests::Result.new
114
- result.name = "Welch Two Sample t-test"
115
- result.pvalue = Rust._pull("t.result$p.value")
116
- result[:t] = Rust._pull("t.result$statistic")
117
- result.exact = true
118
- result.alpha = alpha
245
+ result.name = "Welch Two Sample t-test"
246
+ result.pvalue = Rust._pull("t.result$p.value")
247
+ result[:t] = Rust._pull("t.result$statistic")
248
+ result.exact = true
249
+ result.alpha = alpha
250
+ result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
119
251
 
120
252
  return result
121
253
  end
@@ -125,18 +257,19 @@ end
125
257
 
126
258
  module Rust::StatisticalTests::Shapiro
127
259
  class << self
128
- def compute(vector, alpha = 0.05)
260
+ def compute(vector, alpha = 0.05, **options)
129
261
  raise TypeError, "Expecting Array of numerics" if !vector.is_a?(Array) || !vector.all? { |e| e.is_a?(Numeric) }
130
262
  Rust.exclusive do
131
263
  Rust['shapiro.v'] = vector
132
264
 
133
265
  Rust._eval("shapiro.result = shapiro.test(shapiro.v)")
134
266
  result = Rust::StatisticalTests::Result.new
135
- result.name = "Shapiro-Wilk normality test"
136
- result.pvalue = Rust._pull("shapiro.result$p.value")
137
- result[:W] = Rust._pull("shapiro.result$statistic")
138
- result.exact = true
139
- result.alpha = alpha
267
+ result.name = "Shapiro-Wilk normality test"
268
+ result.pvalue = Rust._pull("shapiro.result$p.value")
269
+ result[:W] = Rust._pull("shapiro.result$statistic")
270
+ result.exact = true
271
+ result.alpha = alpha
272
+ result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
140
273
 
141
274
  return result
142
275
  end
data/lib/rust.rb CHANGED
@@ -6,3 +6,4 @@ require_relative 'rust-effsize'
6
6
  require_relative 'rust-descriptive'
7
7
  require_relative 'rust-plots'
8
8
  require_relative 'rust-calls'
9
+ require_relative 'rust-probabilities'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rust
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.4'
4
+ version: '0.7'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-15 00:00:00.000000000 Z
11
+ date: 2021-02-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rinruby
@@ -63,6 +63,7 @@ files:
63
63
  - lib/rust-descriptive.rb
64
64
  - lib/rust-effsize.rb
65
65
  - lib/rust-plots.rb
66
+ - lib/rust-probabilities.rb
66
67
  - lib/rust-tests.rb
67
68
  - lib/rust.rb
68
69
  homepage: https://github.com/intersimone999/ruby-rust
@@ -84,7 +85,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
84
85
  - !ruby/object:Gem::Version
85
86
  version: '0'
86
87
  requirements: []
87
- rubygems_version: 3.1.4
88
+ rubygems_version: 3.2.7
88
89
  signing_key:
89
90
  specification_version: 4
90
91
  summary: Ruby advanced statistical library