rust 0.4 → 0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rust-basics.rb +1 -1
- data/lib/rust-calls.rb +11 -0
- data/lib/rust-core.rb +263 -11
- data/lib/rust-csv.rb +12 -3
- data/lib/rust-descriptive.rb +51 -10
- data/lib/rust-effsize.rb +16 -8
- data/lib/rust-plots.rb +77 -8
- data/lib/rust-probabilities.rb +248 -0
- data/lib/rust-tests.rb +164 -31
- data/lib/rust.rb +1 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 35c41ec98f5b286ef597096152249f19825a27bc33030e85ea196df8b778d3b1
|
4
|
+
data.tar.gz: 48b2e61f707ebcd05fa5f593016c8c0fb4b9d1e58751dd5deae83715583bd978
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b0cc1289721d52cd376e0a242c2be419ea511f03a0fdd42c4c7a1973cde24e8dfc467e9d355693f85632cccb98af6986777aa020b715db5f06aa88a63d154ea0
|
7
|
+
data.tar.gz: 4c91c808773dcd04913a594cfcaa7956318908e6d590ebec1d9a04beef18986adc70a3c5e480c39bdc9f753b5edf920a1939208b8b530ccfc67124ac153a4804
|
data/lib/rust-basics.rb
CHANGED
data/lib/rust-calls.rb
CHANGED
@@ -34,6 +34,17 @@ module Rust
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
+
class SimpleFormula
|
38
|
+
def initialize(dependent, independent)
|
39
|
+
@dependent = dependent
|
40
|
+
@independent = independent
|
41
|
+
end
|
42
|
+
|
43
|
+
def to_R
|
44
|
+
return "#@dependent ~ #@independent"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
37
48
|
class Variable
|
38
49
|
def initialize(name)
|
39
50
|
@name = name
|
data/lib/rust-core.rb
CHANGED
@@ -130,7 +130,10 @@ module Rust
|
|
130
130
|
@labels.each { |label| @data[label] = [] }
|
131
131
|
elsif labels_or_data.is_a? Hash
|
132
132
|
@labels = labels_or_data.keys.map { |l| l.to_s }
|
133
|
-
|
133
|
+
|
134
|
+
labels_or_data.each do |key, value|
|
135
|
+
@data[key.to_s] = value.clone
|
136
|
+
end
|
134
137
|
end
|
135
138
|
end
|
136
139
|
|
@@ -142,6 +145,14 @@ module Rust
|
|
142
145
|
end
|
143
146
|
end
|
144
147
|
|
148
|
+
def fast_row(i)
|
149
|
+
if i < 0 || i >= self.rows
|
150
|
+
return nil
|
151
|
+
else
|
152
|
+
return @labels.map { |label| @data[label][i] }
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
145
156
|
def shuffle(*args)
|
146
157
|
result = DataFrame.new(@labels)
|
147
158
|
|
@@ -174,6 +185,7 @@ module Rust
|
|
174
185
|
def column(name)
|
175
186
|
return @data[name]
|
176
187
|
end
|
188
|
+
alias :| :column
|
177
189
|
|
178
190
|
def rename_column!(old_name, new_name)
|
179
191
|
raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
|
@@ -195,6 +207,13 @@ module Rust
|
|
195
207
|
return result
|
196
208
|
end
|
197
209
|
|
210
|
+
def has_row?
|
211
|
+
self.each_with_index do |row, i|
|
212
|
+
return true if yield row, i
|
213
|
+
end
|
214
|
+
return false
|
215
|
+
end
|
216
|
+
|
198
217
|
def select_columns(cols=nil)
|
199
218
|
raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
|
200
219
|
|
@@ -215,6 +234,40 @@ module Rust
|
|
215
234
|
@data.delete(column)
|
216
235
|
end
|
217
236
|
|
237
|
+
def delete_row(i)
|
238
|
+
@data.each do |label, column|
|
239
|
+
column.delete_at(i)
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
def uniq_by(by)
|
244
|
+
result = self.clone
|
245
|
+
result.uniq_by!(by)
|
246
|
+
return result
|
247
|
+
end
|
248
|
+
|
249
|
+
def uniq_by!(by)
|
250
|
+
my_keys = {}
|
251
|
+
to_delete = []
|
252
|
+
self.each_with_index do |row, i|
|
253
|
+
key = []
|
254
|
+
by.each do |colname|
|
255
|
+
key << row[colname]
|
256
|
+
end
|
257
|
+
unless my_keys[key]
|
258
|
+
my_keys[key] = i
|
259
|
+
else
|
260
|
+
to_delete << (i-to_delete.size)
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
to_delete.each do |i|
|
265
|
+
self.delete_row(i)
|
266
|
+
end
|
267
|
+
|
268
|
+
return self
|
269
|
+
end
|
270
|
+
|
218
271
|
def column_names
|
219
272
|
return @labels.map { |k| k.to_s }
|
220
273
|
end
|
@@ -243,7 +296,7 @@ module Rust
|
|
243
296
|
row.each do |key, value|
|
244
297
|
@data[key.to_s] << value
|
245
298
|
end
|
246
|
-
|
299
|
+
|
247
300
|
return true
|
248
301
|
else
|
249
302
|
raise TypeError, "Expected an Array or a Hash"
|
@@ -275,6 +328,14 @@ module Rust
|
|
275
328
|
return self
|
276
329
|
end
|
277
330
|
|
331
|
+
def fast_each
|
332
|
+
self.fast_each_with_index do |element, i|
|
333
|
+
yield element
|
334
|
+
end
|
335
|
+
|
336
|
+
return self
|
337
|
+
end
|
338
|
+
|
278
339
|
def each_with_index
|
279
340
|
for i in 0...self.rows
|
280
341
|
element = {}
|
@@ -288,6 +349,19 @@ module Rust
|
|
288
349
|
return self
|
289
350
|
end
|
290
351
|
|
352
|
+
def fast_each_with_index
|
353
|
+
for i in 0...self.rows
|
354
|
+
element = []
|
355
|
+
@labels.each do |label|
|
356
|
+
element << @data[label][i]
|
357
|
+
end
|
358
|
+
|
359
|
+
yield element, i
|
360
|
+
end
|
361
|
+
|
362
|
+
return self
|
363
|
+
end
|
364
|
+
|
291
365
|
def load_in_r_as(variable_name)
|
292
366
|
command = []
|
293
367
|
|
@@ -397,6 +471,77 @@ module Rust
|
|
397
471
|
return result
|
398
472
|
end
|
399
473
|
|
474
|
+
def aggregate(by, **aggregators)
|
475
|
+
raise TypeError, "Expected a string" unless by.is_a?(String)
|
476
|
+
raise TypeError, "All the aggregators should be procs" unless aggregators.values.all? { |v| v.is_a?(Proc) }
|
477
|
+
raise "Expected a block for default aggregator" unless block_given?
|
478
|
+
|
479
|
+
aggregators = aggregators.map { |label, callable| [label.to_s, callable] }.to_h
|
480
|
+
|
481
|
+
sorted = self.sort_by(by)
|
482
|
+
|
483
|
+
current_value = nil
|
484
|
+
partials = []
|
485
|
+
partial = nil
|
486
|
+
sorted.column(by).each_with_index do |value, index|
|
487
|
+
if current_value != value
|
488
|
+
current_value = value
|
489
|
+
partials << partial if partial
|
490
|
+
partial = Rust::DataFrame.new(self.column_names)
|
491
|
+
end
|
492
|
+
partial << sorted.fast_row(index)
|
493
|
+
end
|
494
|
+
partials << partial
|
495
|
+
|
496
|
+
result = Rust::DataFrame.new(self.column_names)
|
497
|
+
partials.each do |partial|
|
498
|
+
aggregated_row = {}
|
499
|
+
aggregated_row[by] = partial.column(by)[0]
|
500
|
+
(self.column_names - [by]).each do |column|
|
501
|
+
if aggregators[column]
|
502
|
+
aggregated_row[column] = aggregators[column].call(partial.column(column))
|
503
|
+
else
|
504
|
+
aggregated_row[column] = yield partial.column(column)
|
505
|
+
end
|
506
|
+
end
|
507
|
+
|
508
|
+
result << aggregated_row
|
509
|
+
end
|
510
|
+
|
511
|
+
return result
|
512
|
+
end
|
513
|
+
|
514
|
+
def sort_by(column)
|
515
|
+
result = self.clone
|
516
|
+
result.sort_by!(column)
|
517
|
+
return result
|
518
|
+
end
|
519
|
+
|
520
|
+
def sort_by!(by)
|
521
|
+
copy = @data[by].clone
|
522
|
+
copy.sort!
|
523
|
+
|
524
|
+
indices = []
|
525
|
+
@data[by].each_with_index do |value, i|
|
526
|
+
index = copy.index(value)
|
527
|
+
indices << index
|
528
|
+
|
529
|
+
copy[index] = NilClass
|
530
|
+
end
|
531
|
+
|
532
|
+
(self.column_names - [by]).each do |column_name|
|
533
|
+
sorted = []
|
534
|
+
column = self.column(column_name)
|
535
|
+
column_i = 0
|
536
|
+
indices.each do |i|
|
537
|
+
sorted[i] = column[column_i]
|
538
|
+
column_i += 1
|
539
|
+
end
|
540
|
+
@data[column_name] = sorted
|
541
|
+
end
|
542
|
+
@data[by].sort!
|
543
|
+
end
|
544
|
+
|
400
545
|
def bind_rows!(dataframe)
|
401
546
|
raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
|
402
547
|
raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
|
@@ -480,7 +625,7 @@ module Rust
|
|
480
625
|
end
|
481
626
|
end
|
482
627
|
|
483
|
-
class Sequence
|
628
|
+
class Sequence < RustDatatype
|
484
629
|
attr_reader :min
|
485
630
|
attr_reader :max
|
486
631
|
|
@@ -511,6 +656,103 @@ module Rust
|
|
511
656
|
def to_R
|
512
657
|
"seq(from=#@min, to=#@max, by=#@step)"
|
513
658
|
end
|
659
|
+
|
660
|
+
def load_in_r_as(variable_name)
|
661
|
+
Rust._eval("#{variable_name} <- #{self.to_R}")
|
662
|
+
end
|
663
|
+
end
|
664
|
+
|
665
|
+
class DataFrameArray < Array
|
666
|
+
def bind_all
|
667
|
+
return nil if self.size == 0
|
668
|
+
|
669
|
+
result = self.first.clone
|
670
|
+
|
671
|
+
for i in 1...self.size
|
672
|
+
result .bind_rows!(self[i])
|
673
|
+
end
|
674
|
+
|
675
|
+
return result
|
676
|
+
end
|
677
|
+
end
|
678
|
+
|
679
|
+
class DataFrameHash < Hash
|
680
|
+
def bind_all
|
681
|
+
return nil if self.values.size == 0
|
682
|
+
|
683
|
+
result = self.values.first.clone
|
684
|
+
|
685
|
+
for i in 1...self.values.size
|
686
|
+
result .bind_rows!(self.values[i])
|
687
|
+
end
|
688
|
+
|
689
|
+
return result
|
690
|
+
end
|
691
|
+
end
|
692
|
+
|
693
|
+
class MathArray < Array
|
694
|
+
def -(other)
|
695
|
+
raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
|
696
|
+
raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
|
697
|
+
|
698
|
+
result = self.clone
|
699
|
+
other = [other] * self.size if other.is_a?(Numeric)
|
700
|
+
for i in 0...self.size
|
701
|
+
result[i] -= other[i]
|
702
|
+
end
|
703
|
+
|
704
|
+
return result
|
705
|
+
end
|
706
|
+
|
707
|
+
def *(other)
|
708
|
+
raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
|
709
|
+
raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
|
710
|
+
|
711
|
+
result = self.clone
|
712
|
+
other = [other] * self.size if other.is_a?(Numeric)
|
713
|
+
for i in 0...self.size
|
714
|
+
result[i] *= other[i]
|
715
|
+
end
|
716
|
+
|
717
|
+
return result
|
718
|
+
end
|
719
|
+
|
720
|
+
def +(other)
|
721
|
+
raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
|
722
|
+
raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
|
723
|
+
|
724
|
+
result = self.clone
|
725
|
+
other = [other] * self.size if other.is_a?(Numeric)
|
726
|
+
for i in 0...self.size
|
727
|
+
result[i] += other[i]
|
728
|
+
end
|
729
|
+
|
730
|
+
return result
|
731
|
+
end
|
732
|
+
|
733
|
+
def /(other) #To recover the syntax highlighting but in Kate: /
|
734
|
+
raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
|
735
|
+
raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
|
736
|
+
|
737
|
+
result = self.clone
|
738
|
+
other = [other] * self.size if other.is_a?(Numeric)
|
739
|
+
for i in 0...self.size
|
740
|
+
result[i] /= other[i]
|
741
|
+
end
|
742
|
+
|
743
|
+
return result
|
744
|
+
end
|
745
|
+
|
746
|
+
def **(other)
|
747
|
+
raise ArgumentError, "Expected numeric" if !other.is_a?(Numeric)
|
748
|
+
|
749
|
+
result = self.clone
|
750
|
+
for i in 0...self.size
|
751
|
+
result[i] = result[i] ** other
|
752
|
+
end
|
753
|
+
|
754
|
+
return result
|
755
|
+
end
|
514
756
|
end
|
515
757
|
end
|
516
758
|
|
@@ -554,6 +796,14 @@ class Array
|
|
554
796
|
def to_R
|
555
797
|
return "c(#{self.map { |e| e.to_R }.join(",")})"
|
556
798
|
end
|
799
|
+
|
800
|
+
def distribution
|
801
|
+
result = {}
|
802
|
+
self.each do |value|
|
803
|
+
result[value] = result[value].to_i + 1
|
804
|
+
end
|
805
|
+
return result
|
806
|
+
end
|
557
807
|
end
|
558
808
|
|
559
809
|
class String
|
@@ -569,19 +819,21 @@ class Range
|
|
569
819
|
end
|
570
820
|
|
571
821
|
module Rust::RBindings
|
572
|
-
def read_csv(filename, **options)
|
573
|
-
Rust::CSV.read(filename, **options)
|
574
|
-
end
|
575
|
-
|
576
|
-
def write_csv(filename, dataframe, **options)
|
577
|
-
Rust::CSV.write(filename, dataframe, **options)
|
578
|
-
end
|
579
|
-
|
580
822
|
def data_frame(*args)
|
581
823
|
Rust::DataFrame.new(*args)
|
582
824
|
end
|
583
825
|
end
|
584
826
|
|
827
|
+
module Rust::TestCases
|
828
|
+
def self.sample_dataframe(columns, size=100)
|
829
|
+
result = Rust::DataFrame.new(columns)
|
830
|
+
size.times do |i|
|
831
|
+
result << columns.map { |c| yield i, c }
|
832
|
+
end
|
833
|
+
return result
|
834
|
+
end
|
835
|
+
end
|
836
|
+
|
585
837
|
def bind_r!
|
586
838
|
include Rust::RBindings
|
587
839
|
end
|
data/lib/rust-csv.rb
CHANGED
@@ -3,7 +3,7 @@ require_relative 'rust-core'
|
|
3
3
|
module Rust
|
4
4
|
class CSV
|
5
5
|
def self.read_all(pattern, **options)
|
6
|
-
result =
|
6
|
+
result = DataFrameHash.new
|
7
7
|
Dir.glob(pattern).each do |filename|
|
8
8
|
result[filename] = CSV.read(filename, **options)
|
9
9
|
end
|
@@ -49,10 +49,9 @@ module Rust
|
|
49
49
|
raise TypeError, "Expected Rust::DataFrame" unless dataframe.is_a?(Rust::DataFrame)
|
50
50
|
|
51
51
|
write_headers = options[:headers] != false
|
52
|
-
options[:headers] = dataframe.column_names
|
52
|
+
options[:headers] = dataframe.column_names unless options[:headers]
|
53
53
|
|
54
54
|
hash = {}
|
55
|
-
labels = nil
|
56
55
|
::CSV.open(filename, 'w', write_headers: write_headers, **options) do |csv|
|
57
56
|
dataframe.each do |row|
|
58
57
|
csv << row
|
@@ -93,3 +92,13 @@ module Rust
|
|
93
92
|
end
|
94
93
|
end
|
95
94
|
end
|
95
|
+
|
96
|
+
module Rust::RBindings
|
97
|
+
def read_csv(filename, **options)
|
98
|
+
Rust::CSV.read(filename, **options)
|
99
|
+
end
|
100
|
+
|
101
|
+
def write_csv(filename, dataframe, **options)
|
102
|
+
Rust::CSV.write(filename, dataframe, **options)
|
103
|
+
end
|
104
|
+
end
|
data/lib/rust-descriptive.rb
CHANGED
@@ -50,18 +50,59 @@ module Rust::Descriptive
|
|
50
50
|
def quantile(data, percentiles=[0.0, 0.25, 0.5, 0.75, 1.0])
|
51
51
|
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
52
52
|
raise TypeError, "Expecting Array of numerics" if !percentiles.is_a?(Array) || !percentiles.all? { |e| e.is_a?(Numeric) }
|
53
|
-
raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
|
53
|
+
raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
|
54
54
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
55
|
+
n = data.size
|
56
|
+
quantiles = percentiles.size
|
57
|
+
percentiles = percentiles.map { |x| x > 1.0 ? 1.0 : (x < 0.0 ? 0.0 : x) }
|
58
|
+
|
59
|
+
rough_indices = percentiles.map { |x| 1 + [n - 1, 0].max * x - 1 }
|
60
|
+
floor_indices = rough_indices.map { |i| i.floor }
|
61
|
+
ceil_indices = rough_indices.map { |i| i.ceil }
|
62
|
+
|
63
|
+
data = data.sort
|
64
|
+
result = floor_indices.map { |i| data[i] }
|
65
|
+
result_ceil = ceil_indices.map { |i| data[i] }
|
66
|
+
|
67
|
+
indices_to_fix = (0...quantiles).select { |i| rough_indices[i] > floor_indices[i] && result_ceil[i] != result[i] }
|
68
|
+
index_approximation_errors = indices_to_fix.map { |i| rough_indices[i] - floor_indices[i] }
|
69
|
+
reduced_index_approximation_errors = index_approximation_errors.map { |i| (1 - i) }
|
70
|
+
hi_indices = indices_to_fix.map { |i| ceil_indices[i] }
|
71
|
+
data_hi_indices = hi_indices.map { |i| data[i] }
|
72
|
+
|
73
|
+
j = 0
|
74
|
+
indices_to_fix.each do |i|
|
75
|
+
result[i] = reduced_index_approximation_errors[j] * result[i] + index_approximation_errors[j] * data_hi_indices[j]
|
76
|
+
j += 1
|
64
77
|
end
|
78
|
+
|
79
|
+
return percentiles.zip(result).to_h
|
80
|
+
end
|
81
|
+
|
82
|
+
def outliers(data, k=1.5, **opts)
|
83
|
+
outliers_according_to(data, data, k, **opts)
|
84
|
+
end
|
85
|
+
|
86
|
+
def outliers_according_to(data, data_distribution, k=1.5, **opts)
|
87
|
+
quantiles = Rust::Descriptive.quantile(data_distribution, [0.25, 0.75])
|
88
|
+
q1 = quantiles[0.25]
|
89
|
+
q3 = quantiles[0.75]
|
90
|
+
iqr = q3 - q1
|
91
|
+
|
92
|
+
positive_outliers = data.select { |d| d > q3 + iqr * k }
|
93
|
+
negative_outliers = data.select { |d| d < q1 - iqr * k }
|
94
|
+
|
95
|
+
outliers = negative_outliers + positive_outliers
|
96
|
+
if opts[:side]
|
97
|
+
case opts[:side].to_sym
|
98
|
+
when :positive, :neg, :n, :+
|
99
|
+
outliers = positive_outliers
|
100
|
+
when :negative, :pos, :p, :-
|
101
|
+
outliers = negative_outliers
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
return outliers
|
65
106
|
end
|
66
107
|
end
|
67
108
|
end
|
data/lib/rust-effsize.rb
CHANGED
@@ -24,6 +24,10 @@ module Rust::EffectSize::CliffDelta
|
|
24
24
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
25
25
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
26
26
|
|
27
|
+
if d1.size <= 1 || d2.size <= 1
|
28
|
+
return Rust::EffectSize::Result.new
|
29
|
+
end
|
30
|
+
|
27
31
|
Rust.exclusive do
|
28
32
|
Rust['effsize.a'] = d1
|
29
33
|
Rust['effsize.b'] = d2
|
@@ -32,10 +36,10 @@ module Rust::EffectSize::CliffDelta
|
|
32
36
|
|
33
37
|
result = Rust::EffectSize::Result.new
|
34
38
|
result.name = "Cliff's delta"
|
35
|
-
result.estimate = Rust._pull("effsize.result$estimate")
|
36
|
-
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
|
37
|
-
result.confidence_level = Rust._pull("effsize.result$conf.level")
|
38
|
-
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
|
39
|
+
result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
|
40
|
+
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
|
41
|
+
result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
|
42
|
+
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
|
39
43
|
|
40
44
|
return result
|
41
45
|
end
|
@@ -49,6 +53,10 @@ module Rust::EffectSize::CohenD
|
|
49
53
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
50
54
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
51
55
|
|
56
|
+
if d1.size <= 1 || d2.size <= 1
|
57
|
+
return Rust::EffectSize::Result.new
|
58
|
+
end
|
59
|
+
|
52
60
|
Rust.exclusive do
|
53
61
|
Rust['effsize.a'] = d1
|
54
62
|
Rust['effsize.b'] = d2
|
@@ -57,10 +65,10 @@ module Rust::EffectSize::CohenD
|
|
57
65
|
|
58
66
|
result = Rust::EffectSize::Result.new
|
59
67
|
result.name = "Cohen's d"
|
60
|
-
result.estimate = Rust._pull("effsize.result$estimate")
|
61
|
-
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
|
62
|
-
result.confidence_level = Rust._pull("effsize.result$conf.level")
|
63
|
-
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
|
68
|
+
result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
|
69
|
+
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
|
70
|
+
result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
|
71
|
+
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
|
64
72
|
|
65
73
|
return result
|
66
74
|
end
|
data/lib/rust-plots.rb
CHANGED
@@ -21,6 +21,14 @@ module Rust::Plots
|
|
21
21
|
return self
|
22
22
|
end
|
23
23
|
|
24
|
+
def palette(size)
|
25
|
+
if size <= 1
|
26
|
+
return ['black']
|
27
|
+
else
|
28
|
+
return Rust._pull("hcl.colors(n=#{size})")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
24
32
|
def x_range(range)
|
25
33
|
@options['xlim'] = range
|
26
34
|
|
@@ -127,10 +135,18 @@ module Rust::Plots
|
|
127
135
|
end
|
128
136
|
|
129
137
|
class ScatterPlot < BasePlot
|
130
|
-
def initialize(x, y)
|
138
|
+
def initialize(x = nil, y = nil, **options)
|
131
139
|
super()
|
132
|
-
@
|
133
|
-
|
140
|
+
@series = []
|
141
|
+
if x && y
|
142
|
+
self.series(x, y, options)
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def series(x, y, **options)
|
147
|
+
@series << [x, y, options]
|
148
|
+
|
149
|
+
return self
|
134
150
|
end
|
135
151
|
|
136
152
|
def thickness(t)
|
@@ -159,13 +175,66 @@ module Rust::Plots
|
|
159
175
|
|
160
176
|
protected
|
161
177
|
def _show()
|
162
|
-
|
163
|
-
|
178
|
+
first = true
|
179
|
+
palette = self.palette(@series.size)
|
180
|
+
i = 0
|
181
|
+
|
182
|
+
base_options = {}
|
183
|
+
unless @options['xlim']
|
184
|
+
x_values = @series.map { |v| v[0] }.flatten
|
185
|
+
y_values = @series.map { |v| v[1] }.flatten
|
186
|
+
|
187
|
+
base_options[:xlim] = [x_values.min, x_values.max]
|
188
|
+
base_options[:ylim] = [y_values.min, y_values.max]
|
189
|
+
end
|
190
|
+
|
191
|
+
@series.each do |x, y, options|
|
192
|
+
options = options.merge(base_options)
|
193
|
+
Rust["plotter.x"] = x
|
194
|
+
Rust["plotter.y"] = y
|
195
|
+
|
196
|
+
function = nil
|
197
|
+
if first
|
198
|
+
function = Rust::Function.new("plot")
|
199
|
+
first = false
|
200
|
+
else
|
201
|
+
function = Rust::Function.new("lines")
|
202
|
+
end
|
203
|
+
|
204
|
+
augmented_options = {}
|
205
|
+
augmented_options['col'] = options[:color] || palette[i]
|
206
|
+
augmented_options['xlim'] = options[:xlim] if options[:xlim]
|
207
|
+
augmented_options['ylim'] = options[:ylim] if options[:ylim]
|
208
|
+
|
209
|
+
function.options = self._augmented_options(augmented_options)
|
210
|
+
function.arguments << Rust::Variable.new("plotter.x")
|
211
|
+
function.arguments << Rust::Variable.new("plotter.y")
|
212
|
+
|
213
|
+
function.call
|
214
|
+
|
215
|
+
i += 1
|
216
|
+
end
|
217
|
+
|
218
|
+
return self
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
class BarPlot < BasePlot
|
223
|
+
def initialize(bars)
|
224
|
+
super()
|
225
|
+
@bars = bars
|
226
|
+
end
|
227
|
+
|
228
|
+
protected
|
229
|
+
def _show()
|
230
|
+
Rust["plotter.bars"] = @bars.values
|
231
|
+
Rust["plotter.labels"] = @bars.keys
|
232
|
+
|
233
|
+
Rust._eval("names(plotter.bars) <- plotter.labels")
|
164
234
|
|
165
|
-
function = Rust::Function.new("
|
235
|
+
function = Rust::Function.new("barplot")
|
166
236
|
function.options = self._augmented_options
|
167
|
-
function.arguments << Rust::Variable.new("plotter.
|
168
|
-
function.arguments << Rust::Variable.new("plotter.y")
|
237
|
+
function.arguments << Rust::Variable.new("plotter.bars")
|
169
238
|
|
170
239
|
function.call
|
171
240
|
|
@@ -0,0 +1,248 @@
|
|
1
|
+
require_relative 'rust-core'
|
2
|
+
|
3
|
+
class Numeric
|
4
|
+
def distance(other)
|
5
|
+
raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
|
6
|
+
|
7
|
+
return (self - other).abs
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
class Array
|
12
|
+
def distance(other)
|
13
|
+
raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
|
14
|
+
|
15
|
+
longest, shortest = self.size > other.size ? [self, other] : [other, self]
|
16
|
+
|
17
|
+
distance = 0
|
18
|
+
for i in 0...longest.size
|
19
|
+
distance += longest[i].to_i.distance(shortest[i].to_i)
|
20
|
+
end
|
21
|
+
|
22
|
+
return distance
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class String
|
27
|
+
def distance(other)
|
28
|
+
raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
|
29
|
+
|
30
|
+
return self.bytes.distance other.bytes
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
module Rust
|
35
|
+
class RandomVariableSlice
|
36
|
+
def initialize(values)
|
37
|
+
raise TypeError, "Expected Hash" unless values.is_a?(Hash)
|
38
|
+
|
39
|
+
@values = values
|
40
|
+
end
|
41
|
+
|
42
|
+
def probability(v=nil)
|
43
|
+
unless v
|
44
|
+
return @values.values.sum
|
45
|
+
else
|
46
|
+
return @values[v]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def ml
|
51
|
+
@values.max_by { |k, v| v }[0]
|
52
|
+
end
|
53
|
+
|
54
|
+
def expected
|
55
|
+
@values.map { |k, v| k*v }.sum
|
56
|
+
end
|
57
|
+
|
58
|
+
def >(n)
|
59
|
+
self.so_that { |k| k > n}
|
60
|
+
end
|
61
|
+
|
62
|
+
def >=(n)
|
63
|
+
self.so_that { |k| k >= n}
|
64
|
+
end
|
65
|
+
|
66
|
+
def <(n)
|
67
|
+
self.so_that { |k| k < n}
|
68
|
+
end
|
69
|
+
|
70
|
+
def <=(n)
|
71
|
+
self.so_that { |k| k <= n}
|
72
|
+
end
|
73
|
+
|
74
|
+
def ==(n)
|
75
|
+
self.so_that { |k| k == n}
|
76
|
+
end
|
77
|
+
|
78
|
+
def so_that
|
79
|
+
RandomVariableSlice.new(@values.select { |k, v| yield(k) })
|
80
|
+
end
|
81
|
+
|
82
|
+
def between(a, b)
|
83
|
+
RandomVariableSlice.new(@values.select { |k, v| k.between? a, b })
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
class RandomVariable < RandomVariableSlice
|
88
|
+
EPSILON = 1e-7
|
89
|
+
|
90
|
+
attr_reader :values
|
91
|
+
|
92
|
+
def initialize(values = {0 => 1.0}, exact = false)
|
93
|
+
@values = values
|
94
|
+
@exact = exact
|
95
|
+
|
96
|
+
raise "All the probabilities should be in the range [0, 1]" unless @values.values.all? { |v| v.between? 0, 1 }
|
97
|
+
raise "The cumulative probability must be exactly 1 (#{@values.values.sum} instead)" unless @values.values.sum.between? 1-EPSILON, 1+EPSILON
|
98
|
+
|
99
|
+
approx!
|
100
|
+
end
|
101
|
+
|
102
|
+
def probability(v)
|
103
|
+
return @values[v].to_f
|
104
|
+
end
|
105
|
+
|
106
|
+
def +(other)
|
107
|
+
new_hash = {}
|
108
|
+
|
109
|
+
@values.each do |my_key, my_value|
|
110
|
+
other.values.each do |other_key, other_value|
|
111
|
+
sum_key = my_key + other_key
|
112
|
+
|
113
|
+
new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
return RandomVariable.new(new_hash, @exact)
|
118
|
+
end
|
119
|
+
|
120
|
+
def *(times)
|
121
|
+
if times.is_a? Integer
|
122
|
+
return rep(times)
|
123
|
+
elsif times.is_a? RandomVariable
|
124
|
+
return mul(times)
|
125
|
+
else
|
126
|
+
raise "The argument must be an Integer or a RandomVariable"
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def mul(other)
|
131
|
+
new_hash = {}
|
132
|
+
|
133
|
+
@values.each do |my_key, my_value|
|
134
|
+
other.values.each do |other_key, other_value|
|
135
|
+
mul_key = my_key * other_key
|
136
|
+
|
137
|
+
new_hash[mul_key] = new_hash[mul_key].to_f + (my_value * other_value)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
return RandomVariable.new(new_hash, @exact)
|
142
|
+
end
|
143
|
+
|
144
|
+
def rep(times)
|
145
|
+
rv = self
|
146
|
+
(times-1).times do
|
147
|
+
rv += self
|
148
|
+
end
|
149
|
+
|
150
|
+
return rv
|
151
|
+
end
|
152
|
+
|
153
|
+
def exact!
|
154
|
+
@exact = true
|
155
|
+
end
|
156
|
+
|
157
|
+
def approx!
|
158
|
+
return if @exact
|
159
|
+
|
160
|
+
to_delete = []
|
161
|
+
@values.each do |v, probability|
|
162
|
+
to_delete.push v if probability <= EPSILON
|
163
|
+
end
|
164
|
+
|
165
|
+
to_delete.each do |v|
|
166
|
+
probability = @values.delete v
|
167
|
+
nearest = @values.keys.min_by { |k| k.distance v }
|
168
|
+
@values[nearest] += probability
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def extract
|
173
|
+
v = rand
|
174
|
+
|
175
|
+
cumulative = 0
|
176
|
+
@values.each do |key, prob|
|
177
|
+
cumulative += prob
|
178
|
+
|
179
|
+
return key if cumulative >= v
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def self.complete(hash, key=0)
|
184
|
+
hash[key] = 1 - hash.values.sum
|
185
|
+
return RandomVariable.new(hash)
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
class UniformRandomVariable < RandomVariable
|
190
|
+
def initialize(values, exact = false)
|
191
|
+
super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
module Probabilities
|
196
|
+
def P(v)
|
197
|
+
if v.is_a? RandomVariableSlice
|
198
|
+
raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
|
199
|
+
return v.probability
|
200
|
+
else
|
201
|
+
raise "Cannot compute the expected value of a #{v.class}"
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
def E(v)
|
206
|
+
if v.is_a? RandomVariableSlice
|
207
|
+
return v.expected
|
208
|
+
else
|
209
|
+
raise "Cannot compute the expected value of a #{v.class}"
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
class RandomVariable
|
215
|
+
ENGLISH_ALPHABET = RandomVariable.new({
|
216
|
+
"a" => 0.08167,
|
217
|
+
"b" => 0.01492,
|
218
|
+
"c" => 0.02782,
|
219
|
+
"d" => 0.04253,
|
220
|
+
"e" => 0.12703,
|
221
|
+
"f" => 0.02228,
|
222
|
+
"g" => 0.02015,
|
223
|
+
"h" => 0.06094,
|
224
|
+
"i" => 0.06966,
|
225
|
+
"j" => 0.00153,
|
226
|
+
"k" => 0.00772,
|
227
|
+
"l" => 0.04025,
|
228
|
+
"m" => 0.02406,
|
229
|
+
"n" => 0.06749,
|
230
|
+
"o" => 0.07507,
|
231
|
+
"p" => 0.01929,
|
232
|
+
"q" => 0.00095,
|
233
|
+
"r" => 0.05987,
|
234
|
+
"s" => 0.06327,
|
235
|
+
"t" => 0.09056,
|
236
|
+
"u" => 0.02758,
|
237
|
+
"v" => 0.00978,
|
238
|
+
"w" => 0.02360,
|
239
|
+
"x" => 0.00150,
|
240
|
+
"y" => 0.01974,
|
241
|
+
"z" => 0.00074
|
242
|
+
})
|
243
|
+
|
244
|
+
DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
|
245
|
+
|
246
|
+
COIN = UniformRandomVariable.new(["h", "t"])
|
247
|
+
end
|
248
|
+
end
|
data/lib/rust-tests.rb
CHANGED
@@ -7,6 +7,7 @@ module Rust::StatisticalTests
|
|
7
7
|
attr_accessor :pvalue
|
8
8
|
attr_accessor :exact
|
9
9
|
attr_accessor :alpha
|
10
|
+
attr_accessor :hypothesis
|
10
11
|
|
11
12
|
def initialize
|
12
13
|
@statistics = {}
|
@@ -20,6 +21,16 @@ module Rust::StatisticalTests
|
|
20
21
|
@statistics[name.to_sym] = value
|
21
22
|
end
|
22
23
|
|
24
|
+
def adjusted_pvalue(method='bonferroni')
|
25
|
+
return 1 unless @hypothesis
|
26
|
+
@hypothesis.adjusted_pvalue_for(self, method)
|
27
|
+
end
|
28
|
+
|
29
|
+
def hypothesis=(value)
|
30
|
+
@hypothesis = value
|
31
|
+
@hypothesis.add(self)
|
32
|
+
end
|
33
|
+
|
23
34
|
def significant
|
24
35
|
pvalue < alpha
|
25
36
|
end
|
@@ -31,32 +42,150 @@ module Rust::StatisticalTests
|
|
31
42
|
(!exact ? " P-value is not exact." : "")
|
32
43
|
end
|
33
44
|
end
|
45
|
+
|
46
|
+
class Hypothesis
|
47
|
+
def self.find(title_or_instance)
|
48
|
+
return Hypothesis.new(nil) if title_or_instance == nil
|
49
|
+
|
50
|
+
if title_or_instance.is_a?(String)
|
51
|
+
ObjectSpace.each_object(Hypothesis) do |instance|
|
52
|
+
return instance if instance.title == title_or_instance
|
53
|
+
end
|
54
|
+
|
55
|
+
return Hypothesis.new(title_or_instance)
|
56
|
+
elsif title_or_instance.is_a?(Hypothesis)
|
57
|
+
return title_or_instance
|
58
|
+
end
|
59
|
+
|
60
|
+
raise TypeError, "Expected nil, String or Hypothesis"
|
61
|
+
end
|
62
|
+
|
63
|
+
attr_reader :results
|
64
|
+
attr_reader :title
|
65
|
+
|
66
|
+
def initialize(title)
|
67
|
+
@title = title
|
68
|
+
@results = []
|
69
|
+
end
|
70
|
+
|
71
|
+
def add(result)
|
72
|
+
@results << result
|
73
|
+
end
|
74
|
+
|
75
|
+
def adjusted_pvalue_for(instance, method)
|
76
|
+
p_values = @results.map { |r| r.pvalue }
|
77
|
+
index = @results.index(instance)
|
78
|
+
|
79
|
+
adjusted_pvalues = Rust::StatisticalTests::PValueAdjustment.method(method).adjust(*p_values)
|
80
|
+
|
81
|
+
if adjusted_pvalues.is_a?(Numeric)
|
82
|
+
return adjusted_pvalues
|
83
|
+
else
|
84
|
+
return adjusted_pvalues[index]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
module Rust::StatisticalTests::PValueAdjustment
|
91
|
+
def self.method(name)
|
92
|
+
name = name.to_s
|
93
|
+
case name.downcase
|
94
|
+
when "bonferroni", "b"
|
95
|
+
return Bonferroni
|
96
|
+
when "holm", "h"
|
97
|
+
return Holm
|
98
|
+
when "hochberg"
|
99
|
+
return Hochberg
|
100
|
+
when "hommel"
|
101
|
+
return Hommel
|
102
|
+
when "benjaminihochberg", "bh"
|
103
|
+
return BenjaminiHochberg
|
104
|
+
when "benjaminiyekutieli", "by"
|
105
|
+
return BenjaminiYekutieli
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
class Bonferroni
|
110
|
+
def self.adjust(*p_values)
|
111
|
+
Rust.exclusive do
|
112
|
+
Rust['adjustment.p'] = p_values
|
113
|
+
return Rust._pull("p.adjust(adjustment.p, method=\"bonferroni\")")
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
class Holm
|
119
|
+
def self.adjust(*p_values)
|
120
|
+
Rust.exclusive do
|
121
|
+
Rust['adjustment.p'] = p_values
|
122
|
+
return Rust._pull("p.adjust(adjustment.p, method=\"holm\")")
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
class Hochberg
|
128
|
+
def self.adjust(*p_values)
|
129
|
+
Rust.exclusive do
|
130
|
+
Rust['adjustment.p'] = p_values
|
131
|
+
return Rust._pull("p.adjust(adjustment.p, method=\"hochberg\")")
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
class Hommel
|
137
|
+
def self.adjust(*p_values)
|
138
|
+
Rust.exclusive do
|
139
|
+
Rust['adjustment.p'] = p_values
|
140
|
+
return Rust._pull("p.adjust(adjustment.p, method=\"hommel\")")
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
class BenjaminiHochberg
|
146
|
+
def self.adjust(*p_values)
|
147
|
+
Rust.exclusive do
|
148
|
+
Rust['adjustment.p'] = p_values
|
149
|
+
return Rust._pull("p.adjust(adjustment.p, method=\"BH\")")
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
class BenjaminiYekutieli
|
155
|
+
def self.adjust(*p_values)
|
156
|
+
Rust.exclusive do
|
157
|
+
Rust['adjustment.p'] = p_values
|
158
|
+
return Rust._pull("p.adjust(adjustment.p, method=\"BY\")")
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
34
162
|
end
|
35
163
|
|
36
164
|
module Rust::StatisticalTests::Wilcoxon
|
37
165
|
class << self
|
38
|
-
|
166
|
+
def paired(d1, d2, alpha = 0.05, **options)
|
39
167
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
40
168
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
41
169
|
raise "The two distributions have different size" if d1.size != d2.size
|
42
|
-
|
170
|
+
|
43
171
|
Rust.exclusive do
|
44
172
|
Rust["wilcox.a"] = d1
|
45
173
|
Rust["wilcox.b"] = d2
|
46
174
|
|
47
175
|
_, warnings = Rust._eval("wilcox.result = wilcox.test(wilcox.a, wilcox.b, alternative='two.sided', paired=T)", true)
|
48
176
|
result = Rust::StatisticalTests::Result.new
|
49
|
-
result.name
|
50
|
-
result.pvalue
|
51
|
-
result[:w]
|
52
|
-
result.exact
|
53
|
-
result.alpha
|
177
|
+
result.name = "Wilcoxon Signed-Rank test"
|
178
|
+
result.pvalue = Rust._pull("wilcox.result$p.value")
|
179
|
+
result[:w] = Rust._pull("wilcox.result$statistic")
|
180
|
+
result.exact = !warnings.include?("cannot compute exact p-value with zeroes")
|
181
|
+
result.alpha = alpha
|
182
|
+
result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
|
54
183
|
|
55
184
|
return result
|
56
185
|
end
|
57
186
|
end
|
58
187
|
|
59
|
-
def unpaired(d1, d2, alpha = 0.05)
|
188
|
+
def unpaired(d1, d2, alpha = 0.05, **options)
|
60
189
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
61
190
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
62
191
|
|
@@ -66,11 +195,12 @@ module Rust::StatisticalTests::Wilcoxon
|
|
66
195
|
|
67
196
|
_, warnings = Rust._eval("wilcox.result = wilcox.test(wilcox.a, wilcox.b, alternative='two.sided', paired=F)", true)
|
68
197
|
result = Rust::StatisticalTests::Result.new
|
69
|
-
result.name
|
70
|
-
result.pvalue
|
71
|
-
result[:w]
|
72
|
-
result.exact
|
73
|
-
result.alpha
|
198
|
+
result.name = "Wilcoxon Ranked-Sum test (a.k.a. Mann–Whitney U test)"
|
199
|
+
result.pvalue = Rust._pull("wilcox.result$p.value")
|
200
|
+
result[:w] = Rust._pull("wilcox.result$statistic")
|
201
|
+
result.exact = !warnings.include?("cannot compute exact p-value with ties")
|
202
|
+
result.alpha = alpha
|
203
|
+
result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
|
74
204
|
|
75
205
|
return result
|
76
206
|
end
|
@@ -80,7 +210,7 @@ end
|
|
80
210
|
|
81
211
|
module Rust::StatisticalTests::T
|
82
212
|
class << self
|
83
|
-
def paired(d1, d2, alpha = 0.05)
|
213
|
+
def paired(d1, d2, alpha = 0.05, **options)
|
84
214
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
85
215
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
86
216
|
raise "The two distributions have different size" if d1.size != d2.size
|
@@ -91,17 +221,18 @@ module Rust::StatisticalTests::T
|
|
91
221
|
|
92
222
|
warnings = Rust._eval("t.result = t.test(t.a, t.b, alternative='two.sided', paired=T)")
|
93
223
|
result = Rust::StatisticalTests::Result.new
|
94
|
-
result.name
|
95
|
-
result.pvalue
|
96
|
-
result[:t]
|
97
|
-
result.exact
|
98
|
-
result.alpha
|
224
|
+
result.name = "Paired t-test"
|
225
|
+
result.pvalue = Rust._pull("t.result$p.value")
|
226
|
+
result[:t] = Rust._pull("t.result$statistic")
|
227
|
+
result.exact = true
|
228
|
+
result.alpha = alpha
|
229
|
+
result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
|
99
230
|
|
100
231
|
return result
|
101
232
|
end
|
102
233
|
end
|
103
234
|
|
104
|
-
def unpaired(d1, d2, alpha = 0.05)
|
235
|
+
def unpaired(d1, d2, alpha = 0.05, **options)
|
105
236
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
106
237
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
107
238
|
|
@@ -111,11 +242,12 @@ module Rust::StatisticalTests::T
|
|
111
242
|
|
112
243
|
Rust._eval("t.result = t.test(t.a, t.b, alternative='two.sided', paired=F)")
|
113
244
|
result = Rust::StatisticalTests::Result.new
|
114
|
-
result.name
|
115
|
-
result.pvalue
|
116
|
-
result[:t]
|
117
|
-
result.exact
|
118
|
-
result.alpha
|
245
|
+
result.name = "Welch Two Sample t-test"
|
246
|
+
result.pvalue = Rust._pull("t.result$p.value")
|
247
|
+
result[:t] = Rust._pull("t.result$statistic")
|
248
|
+
result.exact = true
|
249
|
+
result.alpha = alpha
|
250
|
+
result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
|
119
251
|
|
120
252
|
return result
|
121
253
|
end
|
@@ -125,18 +257,19 @@ end
|
|
125
257
|
|
126
258
|
module Rust::StatisticalTests::Shapiro
|
127
259
|
class << self
|
128
|
-
def compute(vector, alpha = 0.05)
|
260
|
+
def compute(vector, alpha = 0.05, **options)
|
129
261
|
raise TypeError, "Expecting Array of numerics" if !vector.is_a?(Array) || !vector.all? { |e| e.is_a?(Numeric) }
|
130
262
|
Rust.exclusive do
|
131
263
|
Rust['shapiro.v'] = vector
|
132
264
|
|
133
265
|
Rust._eval("shapiro.result = shapiro.test(shapiro.v)")
|
134
266
|
result = Rust::StatisticalTests::Result.new
|
135
|
-
result.name
|
136
|
-
result.pvalue
|
137
|
-
result[:W]
|
138
|
-
result.exact
|
139
|
-
result.alpha
|
267
|
+
result.name = "Shapiro-Wilk normality test"
|
268
|
+
result.pvalue = Rust._pull("shapiro.result$p.value")
|
269
|
+
result[:W] = Rust._pull("shapiro.result$statistic")
|
270
|
+
result.exact = true
|
271
|
+
result.alpha = alpha
|
272
|
+
result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
|
140
273
|
|
141
274
|
return result
|
142
275
|
end
|
data/lib/rust.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rust
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.7'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Scalabrino
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-02-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rinruby
|
@@ -63,6 +63,7 @@ files:
|
|
63
63
|
- lib/rust-descriptive.rb
|
64
64
|
- lib/rust-effsize.rb
|
65
65
|
- lib/rust-plots.rb
|
66
|
+
- lib/rust-probabilities.rb
|
66
67
|
- lib/rust-tests.rb
|
67
68
|
- lib/rust.rb
|
68
69
|
homepage: https://github.com/intersimone999/ruby-rust
|
@@ -84,7 +85,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
84
85
|
- !ruby/object:Gem::Version
|
85
86
|
version: '0'
|
86
87
|
requirements: []
|
87
|
-
rubygems_version: 3.
|
88
|
+
rubygems_version: 3.2.7
|
88
89
|
signing_key:
|
89
90
|
specification_version: 4
|
90
91
|
summary: Ruby advanced statistical library
|