rust 0.4 → 0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rust-basics.rb +1 -1
- data/lib/rust-calls.rb +11 -0
- data/lib/rust-core.rb +263 -11
- data/lib/rust-csv.rb +12 -3
- data/lib/rust-descriptive.rb +51 -10
- data/lib/rust-effsize.rb +16 -8
- data/lib/rust-plots.rb +77 -8
- data/lib/rust-probabilities.rb +248 -0
- data/lib/rust-tests.rb +164 -31
- data/lib/rust.rb +1 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 35c41ec98f5b286ef597096152249f19825a27bc33030e85ea196df8b778d3b1
|
4
|
+
data.tar.gz: 48b2e61f707ebcd05fa5f593016c8c0fb4b9d1e58751dd5deae83715583bd978
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b0cc1289721d52cd376e0a242c2be419ea511f03a0fdd42c4c7a1973cde24e8dfc467e9d355693f85632cccb98af6986777aa020b715db5f06aa88a63d154ea0
|
7
|
+
data.tar.gz: 4c91c808773dcd04913a594cfcaa7956318908e6d590ebec1d9a04beef18986adc70a3c5e480c39bdc9f753b5edf920a1939208b8b530ccfc67124ac153a4804
|
data/lib/rust-basics.rb
CHANGED
data/lib/rust-calls.rb
CHANGED
@@ -34,6 +34,17 @@ module Rust
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
+
class SimpleFormula
|
38
|
+
def initialize(dependent, independent)
|
39
|
+
@dependent = dependent
|
40
|
+
@independent = independent
|
41
|
+
end
|
42
|
+
|
43
|
+
def to_R
|
44
|
+
return "#@dependent ~ #@independent"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
37
48
|
class Variable
|
38
49
|
def initialize(name)
|
39
50
|
@name = name
|
data/lib/rust-core.rb
CHANGED
@@ -130,7 +130,10 @@ module Rust
|
|
130
130
|
@labels.each { |label| @data[label] = [] }
|
131
131
|
elsif labels_or_data.is_a? Hash
|
132
132
|
@labels = labels_or_data.keys.map { |l| l.to_s }
|
133
|
-
|
133
|
+
|
134
|
+
labels_or_data.each do |key, value|
|
135
|
+
@data[key.to_s] = value.clone
|
136
|
+
end
|
134
137
|
end
|
135
138
|
end
|
136
139
|
|
@@ -142,6 +145,14 @@ module Rust
|
|
142
145
|
end
|
143
146
|
end
|
144
147
|
|
148
|
+
def fast_row(i)
|
149
|
+
if i < 0 || i >= self.rows
|
150
|
+
return nil
|
151
|
+
else
|
152
|
+
return @labels.map { |label| @data[label][i] }
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
145
156
|
def shuffle(*args)
|
146
157
|
result = DataFrame.new(@labels)
|
147
158
|
|
@@ -174,6 +185,7 @@ module Rust
|
|
174
185
|
def column(name)
|
175
186
|
return @data[name]
|
176
187
|
end
|
188
|
+
alias :| :column
|
177
189
|
|
178
190
|
def rename_column!(old_name, new_name)
|
179
191
|
raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
|
@@ -195,6 +207,13 @@ module Rust
|
|
195
207
|
return result
|
196
208
|
end
|
197
209
|
|
210
|
+
def has_row?
|
211
|
+
self.each_with_index do |row, i|
|
212
|
+
return true if yield row, i
|
213
|
+
end
|
214
|
+
return false
|
215
|
+
end
|
216
|
+
|
198
217
|
def select_columns(cols=nil)
|
199
218
|
raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
|
200
219
|
|
@@ -215,6 +234,40 @@ module Rust
|
|
215
234
|
@data.delete(column)
|
216
235
|
end
|
217
236
|
|
237
|
+
def delete_row(i)
|
238
|
+
@data.each do |label, column|
|
239
|
+
column.delete_at(i)
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
def uniq_by(by)
|
244
|
+
result = self.clone
|
245
|
+
result.uniq_by!(by)
|
246
|
+
return result
|
247
|
+
end
|
248
|
+
|
249
|
+
def uniq_by!(by)
|
250
|
+
my_keys = {}
|
251
|
+
to_delete = []
|
252
|
+
self.each_with_index do |row, i|
|
253
|
+
key = []
|
254
|
+
by.each do |colname|
|
255
|
+
key << row[colname]
|
256
|
+
end
|
257
|
+
unless my_keys[key]
|
258
|
+
my_keys[key] = i
|
259
|
+
else
|
260
|
+
to_delete << (i-to_delete.size)
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
to_delete.each do |i|
|
265
|
+
self.delete_row(i)
|
266
|
+
end
|
267
|
+
|
268
|
+
return self
|
269
|
+
end
|
270
|
+
|
218
271
|
def column_names
|
219
272
|
return @labels.map { |k| k.to_s }
|
220
273
|
end
|
@@ -243,7 +296,7 @@ module Rust
|
|
243
296
|
row.each do |key, value|
|
244
297
|
@data[key.to_s] << value
|
245
298
|
end
|
246
|
-
|
299
|
+
|
247
300
|
return true
|
248
301
|
else
|
249
302
|
raise TypeError, "Expected an Array or a Hash"
|
@@ -275,6 +328,14 @@ module Rust
|
|
275
328
|
return self
|
276
329
|
end
|
277
330
|
|
331
|
+
def fast_each
|
332
|
+
self.fast_each_with_index do |element, i|
|
333
|
+
yield element
|
334
|
+
end
|
335
|
+
|
336
|
+
return self
|
337
|
+
end
|
338
|
+
|
278
339
|
def each_with_index
|
279
340
|
for i in 0...self.rows
|
280
341
|
element = {}
|
@@ -288,6 +349,19 @@ module Rust
|
|
288
349
|
return self
|
289
350
|
end
|
290
351
|
|
352
|
+
def fast_each_with_index
|
353
|
+
for i in 0...self.rows
|
354
|
+
element = []
|
355
|
+
@labels.each do |label|
|
356
|
+
element << @data[label][i]
|
357
|
+
end
|
358
|
+
|
359
|
+
yield element, i
|
360
|
+
end
|
361
|
+
|
362
|
+
return self
|
363
|
+
end
|
364
|
+
|
291
365
|
def load_in_r_as(variable_name)
|
292
366
|
command = []
|
293
367
|
|
@@ -397,6 +471,77 @@ module Rust
|
|
397
471
|
return result
|
398
472
|
end
|
399
473
|
|
474
|
+
def aggregate(by, **aggregators)
|
475
|
+
raise TypeError, "Expected a string" unless by.is_a?(String)
|
476
|
+
raise TypeError, "All the aggregators should be procs" unless aggregators.values.all? { |v| v.is_a?(Proc) }
|
477
|
+
raise "Expected a block for default aggregator" unless block_given?
|
478
|
+
|
479
|
+
aggregators = aggregators.map { |label, callable| [label.to_s, callable] }.to_h
|
480
|
+
|
481
|
+
sorted = self.sort_by(by)
|
482
|
+
|
483
|
+
current_value = nil
|
484
|
+
partials = []
|
485
|
+
partial = nil
|
486
|
+
sorted.column(by).each_with_index do |value, index|
|
487
|
+
if current_value != value
|
488
|
+
current_value = value
|
489
|
+
partials << partial if partial
|
490
|
+
partial = Rust::DataFrame.new(self.column_names)
|
491
|
+
end
|
492
|
+
partial << sorted.fast_row(index)
|
493
|
+
end
|
494
|
+
partials << partial
|
495
|
+
|
496
|
+
result = Rust::DataFrame.new(self.column_names)
|
497
|
+
partials.each do |partial|
|
498
|
+
aggregated_row = {}
|
499
|
+
aggregated_row[by] = partial.column(by)[0]
|
500
|
+
(self.column_names - [by]).each do |column|
|
501
|
+
if aggregators[column]
|
502
|
+
aggregated_row[column] = aggregators[column].call(partial.column(column))
|
503
|
+
else
|
504
|
+
aggregated_row[column] = yield partial.column(column)
|
505
|
+
end
|
506
|
+
end
|
507
|
+
|
508
|
+
result << aggregated_row
|
509
|
+
end
|
510
|
+
|
511
|
+
return result
|
512
|
+
end
|
513
|
+
|
514
|
+
def sort_by(column)
|
515
|
+
result = self.clone
|
516
|
+
result.sort_by!(column)
|
517
|
+
return result
|
518
|
+
end
|
519
|
+
|
520
|
+
def sort_by!(by)
|
521
|
+
copy = @data[by].clone
|
522
|
+
copy.sort!
|
523
|
+
|
524
|
+
indices = []
|
525
|
+
@data[by].each_with_index do |value, i|
|
526
|
+
index = copy.index(value)
|
527
|
+
indices << index
|
528
|
+
|
529
|
+
copy[index] = NilClass
|
530
|
+
end
|
531
|
+
|
532
|
+
(self.column_names - [by]).each do |column_name|
|
533
|
+
sorted = []
|
534
|
+
column = self.column(column_name)
|
535
|
+
column_i = 0
|
536
|
+
indices.each do |i|
|
537
|
+
sorted[i] = column[column_i]
|
538
|
+
column_i += 1
|
539
|
+
end
|
540
|
+
@data[column_name] = sorted
|
541
|
+
end
|
542
|
+
@data[by].sort!
|
543
|
+
end
|
544
|
+
|
400
545
|
def bind_rows!(dataframe)
|
401
546
|
raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
|
402
547
|
raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
|
@@ -480,7 +625,7 @@ module Rust
|
|
480
625
|
end
|
481
626
|
end
|
482
627
|
|
483
|
-
class Sequence
|
628
|
+
class Sequence < RustDatatype
|
484
629
|
attr_reader :min
|
485
630
|
attr_reader :max
|
486
631
|
|
@@ -511,6 +656,103 @@ module Rust
|
|
511
656
|
def to_R
|
512
657
|
"seq(from=#@min, to=#@max, by=#@step)"
|
513
658
|
end
|
659
|
+
|
660
|
+
def load_in_r_as(variable_name)
|
661
|
+
Rust._eval("#{variable_name} <- #{self.to_R}")
|
662
|
+
end
|
663
|
+
end
|
664
|
+
|
665
|
+
class DataFrameArray < Array
|
666
|
+
def bind_all
|
667
|
+
return nil if self.size == 0
|
668
|
+
|
669
|
+
result = self.first.clone
|
670
|
+
|
671
|
+
for i in 1...self.size
|
672
|
+
result .bind_rows!(self[i])
|
673
|
+
end
|
674
|
+
|
675
|
+
return result
|
676
|
+
end
|
677
|
+
end
|
678
|
+
|
679
|
+
class DataFrameHash < Hash
|
680
|
+
def bind_all
|
681
|
+
return nil if self.values.size == 0
|
682
|
+
|
683
|
+
result = self.values.first.clone
|
684
|
+
|
685
|
+
for i in 1...self.values.size
|
686
|
+
result .bind_rows!(self.values[i])
|
687
|
+
end
|
688
|
+
|
689
|
+
return result
|
690
|
+
end
|
691
|
+
end
|
692
|
+
|
693
|
+
class MathArray < Array
|
694
|
+
def -(other)
|
695
|
+
raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
|
696
|
+
raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
|
697
|
+
|
698
|
+
result = self.clone
|
699
|
+
other = [other] * self.size if other.is_a?(Numeric)
|
700
|
+
for i in 0...self.size
|
701
|
+
result[i] -= other[i]
|
702
|
+
end
|
703
|
+
|
704
|
+
return result
|
705
|
+
end
|
706
|
+
|
707
|
+
def *(other)
|
708
|
+
raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
|
709
|
+
raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
|
710
|
+
|
711
|
+
result = self.clone
|
712
|
+
other = [other] * self.size if other.is_a?(Numeric)
|
713
|
+
for i in 0...self.size
|
714
|
+
result[i] *= other[i]
|
715
|
+
end
|
716
|
+
|
717
|
+
return result
|
718
|
+
end
|
719
|
+
|
720
|
+
def +(other)
|
721
|
+
raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
|
722
|
+
raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
|
723
|
+
|
724
|
+
result = self.clone
|
725
|
+
other = [other] * self.size if other.is_a?(Numeric)
|
726
|
+
for i in 0...self.size
|
727
|
+
result[i] += other[i]
|
728
|
+
end
|
729
|
+
|
730
|
+
return result
|
731
|
+
end
|
732
|
+
|
733
|
+
def /(other) #To recover the syntax highlighting but in Kate: /
|
734
|
+
raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
|
735
|
+
raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
|
736
|
+
|
737
|
+
result = self.clone
|
738
|
+
other = [other] * self.size if other.is_a?(Numeric)
|
739
|
+
for i in 0...self.size
|
740
|
+
result[i] /= other[i]
|
741
|
+
end
|
742
|
+
|
743
|
+
return result
|
744
|
+
end
|
745
|
+
|
746
|
+
def **(other)
|
747
|
+
raise ArgumentError, "Expected numeric" if !other.is_a?(Numeric)
|
748
|
+
|
749
|
+
result = self.clone
|
750
|
+
for i in 0...self.size
|
751
|
+
result[i] = result[i] ** other
|
752
|
+
end
|
753
|
+
|
754
|
+
return result
|
755
|
+
end
|
514
756
|
end
|
515
757
|
end
|
516
758
|
|
@@ -554,6 +796,14 @@ class Array
|
|
554
796
|
def to_R
|
555
797
|
return "c(#{self.map { |e| e.to_R }.join(",")})"
|
556
798
|
end
|
799
|
+
|
800
|
+
def distribution
|
801
|
+
result = {}
|
802
|
+
self.each do |value|
|
803
|
+
result[value] = result[value].to_i + 1
|
804
|
+
end
|
805
|
+
return result
|
806
|
+
end
|
557
807
|
end
|
558
808
|
|
559
809
|
class String
|
@@ -569,19 +819,21 @@ class Range
|
|
569
819
|
end
|
570
820
|
|
571
821
|
module Rust::RBindings
|
572
|
-
def read_csv(filename, **options)
|
573
|
-
Rust::CSV.read(filename, **options)
|
574
|
-
end
|
575
|
-
|
576
|
-
def write_csv(filename, dataframe, **options)
|
577
|
-
Rust::CSV.write(filename, dataframe, **options)
|
578
|
-
end
|
579
|
-
|
580
822
|
def data_frame(*args)
|
581
823
|
Rust::DataFrame.new(*args)
|
582
824
|
end
|
583
825
|
end
|
584
826
|
|
827
|
+
module Rust::TestCases
|
828
|
+
def self.sample_dataframe(columns, size=100)
|
829
|
+
result = Rust::DataFrame.new(columns)
|
830
|
+
size.times do |i|
|
831
|
+
result << columns.map { |c| yield i, c }
|
832
|
+
end
|
833
|
+
return result
|
834
|
+
end
|
835
|
+
end
|
836
|
+
|
585
837
|
def bind_r!
|
586
838
|
include Rust::RBindings
|
587
839
|
end
|
data/lib/rust-csv.rb
CHANGED
@@ -3,7 +3,7 @@ require_relative 'rust-core'
|
|
3
3
|
module Rust
|
4
4
|
class CSV
|
5
5
|
def self.read_all(pattern, **options)
|
6
|
-
result =
|
6
|
+
result = DataFrameHash.new
|
7
7
|
Dir.glob(pattern).each do |filename|
|
8
8
|
result[filename] = CSV.read(filename, **options)
|
9
9
|
end
|
@@ -49,10 +49,9 @@ module Rust
|
|
49
49
|
raise TypeError, "Expected Rust::DataFrame" unless dataframe.is_a?(Rust::DataFrame)
|
50
50
|
|
51
51
|
write_headers = options[:headers] != false
|
52
|
-
options[:headers] = dataframe.column_names
|
52
|
+
options[:headers] = dataframe.column_names unless options[:headers]
|
53
53
|
|
54
54
|
hash = {}
|
55
|
-
labels = nil
|
56
55
|
::CSV.open(filename, 'w', write_headers: write_headers, **options) do |csv|
|
57
56
|
dataframe.each do |row|
|
58
57
|
csv << row
|
@@ -93,3 +92,13 @@ module Rust
|
|
93
92
|
end
|
94
93
|
end
|
95
94
|
end
|
95
|
+
|
96
|
+
module Rust::RBindings
|
97
|
+
def read_csv(filename, **options)
|
98
|
+
Rust::CSV.read(filename, **options)
|
99
|
+
end
|
100
|
+
|
101
|
+
def write_csv(filename, dataframe, **options)
|
102
|
+
Rust::CSV.write(filename, dataframe, **options)
|
103
|
+
end
|
104
|
+
end
|
data/lib/rust-descriptive.rb
CHANGED
@@ -50,18 +50,59 @@ module Rust::Descriptive
|
|
50
50
|
def quantile(data, percentiles=[0.0, 0.25, 0.5, 0.75, 1.0])
|
51
51
|
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
52
52
|
raise TypeError, "Expecting Array of numerics" if !percentiles.is_a?(Array) || !percentiles.all? { |e| e.is_a?(Numeric) }
|
53
|
-
raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
|
53
|
+
raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
|
54
54
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
55
|
+
n = data.size
|
56
|
+
quantiles = percentiles.size
|
57
|
+
percentiles = percentiles.map { |x| x > 1.0 ? 1.0 : (x < 0.0 ? 0.0 : x) }
|
58
|
+
|
59
|
+
rough_indices = percentiles.map { |x| 1 + [n - 1, 0].max * x - 1 }
|
60
|
+
floor_indices = rough_indices.map { |i| i.floor }
|
61
|
+
ceil_indices = rough_indices.map { |i| i.ceil }
|
62
|
+
|
63
|
+
data = data.sort
|
64
|
+
result = floor_indices.map { |i| data[i] }
|
65
|
+
result_ceil = ceil_indices.map { |i| data[i] }
|
66
|
+
|
67
|
+
indices_to_fix = (0...quantiles).select { |i| rough_indices[i] > floor_indices[i] && result_ceil[i] != result[i] }
|
68
|
+
index_approximation_errors = indices_to_fix.map { |i| rough_indices[i] - floor_indices[i] }
|
69
|
+
reduced_index_approximation_errors = index_approximation_errors.map { |i| (1 - i) }
|
70
|
+
hi_indices = indices_to_fix.map { |i| ceil_indices[i] }
|
71
|
+
data_hi_indices = hi_indices.map { |i| data[i] }
|
72
|
+
|
73
|
+
j = 0
|
74
|
+
indices_to_fix.each do |i|
|
75
|
+
result[i] = reduced_index_approximation_errors[j] * result[i] + index_approximation_errors[j] * data_hi_indices[j]
|
76
|
+
j += 1
|
64
77
|
end
|
78
|
+
|
79
|
+
return percentiles.zip(result).to_h
|
80
|
+
end
|
81
|
+
|
82
|
+
def outliers(data, k=1.5, **opts)
|
83
|
+
outliers_according_to(data, data, k, **opts)
|
84
|
+
end
|
85
|
+
|
86
|
+
def outliers_according_to(data, data_distribution, k=1.5, **opts)
|
87
|
+
quantiles = Rust::Descriptive.quantile(data_distribution, [0.25, 0.75])
|
88
|
+
q1 = quantiles[0.25]
|
89
|
+
q3 = quantiles[0.75]
|
90
|
+
iqr = q3 - q1
|
91
|
+
|
92
|
+
positive_outliers = data.select { |d| d > q3 + iqr * k }
|
93
|
+
negative_outliers = data.select { |d| d < q1 - iqr * k }
|
94
|
+
|
95
|
+
outliers = negative_outliers + positive_outliers
|
96
|
+
if opts[:side]
|
97
|
+
case opts[:side].to_sym
|
98
|
+
when :positive, :neg, :n, :+
|
99
|
+
outliers = positive_outliers
|
100
|
+
when :negative, :pos, :p, :-
|
101
|
+
outliers = negative_outliers
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
return outliers
|
65
106
|
end
|
66
107
|
end
|
67
108
|
end
|
data/lib/rust-effsize.rb
CHANGED
@@ -24,6 +24,10 @@ module Rust::EffectSize::CliffDelta
|
|
24
24
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
25
25
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
26
26
|
|
27
|
+
if d1.size <= 1 || d2.size <= 1
|
28
|
+
return Rust::EffectSize::Result.new
|
29
|
+
end
|
30
|
+
|
27
31
|
Rust.exclusive do
|
28
32
|
Rust['effsize.a'] = d1
|
29
33
|
Rust['effsize.b'] = d2
|
@@ -32,10 +36,10 @@ module Rust::EffectSize::CliffDelta
|
|
32
36
|
|
33
37
|
result = Rust::EffectSize::Result.new
|
34
38
|
result.name = "Cliff's delta"
|
35
|
-
result.estimate = Rust._pull("effsize.result$estimate")
|
36
|
-
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
|
37
|
-
result.confidence_level = Rust._pull("effsize.result$conf.level")
|
38
|
-
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
|
39
|
+
result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
|
40
|
+
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
|
41
|
+
result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
|
42
|
+
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
|
39
43
|
|
40
44
|
return result
|
41
45
|
end
|
@@ -49,6 +53,10 @@ module Rust::EffectSize::CohenD
|
|
49
53
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
50
54
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
51
55
|
|
56
|
+
if d1.size <= 1 || d2.size <= 1
|
57
|
+
return Rust::EffectSize::Result.new
|
58
|
+
end
|
59
|
+
|
52
60
|
Rust.exclusive do
|
53
61
|
Rust['effsize.a'] = d1
|
54
62
|
Rust['effsize.b'] = d2
|
@@ -57,10 +65,10 @@ module Rust::EffectSize::CohenD
|
|
57
65
|
|
58
66
|
result = Rust::EffectSize::Result.new
|
59
67
|
result.name = "Cohen's d"
|
60
|
-
result.estimate = Rust._pull("effsize.result$estimate")
|
61
|
-
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
|
62
|
-
result.confidence_level = Rust._pull("effsize.result$conf.level")
|
63
|
-
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
|
68
|
+
result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
|
69
|
+
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
|
70
|
+
result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
|
71
|
+
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
|
64
72
|
|
65
73
|
return result
|
66
74
|
end
|
data/lib/rust-plots.rb
CHANGED
@@ -21,6 +21,14 @@ module Rust::Plots
|
|
21
21
|
return self
|
22
22
|
end
|
23
23
|
|
24
|
+
def palette(size)
|
25
|
+
if size <= 1
|
26
|
+
return ['black']
|
27
|
+
else
|
28
|
+
return Rust._pull("hcl.colors(n=#{size})")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
24
32
|
def x_range(range)
|
25
33
|
@options['xlim'] = range
|
26
34
|
|
@@ -127,10 +135,18 @@ module Rust::Plots
|
|
127
135
|
end
|
128
136
|
|
129
137
|
class ScatterPlot < BasePlot
|
130
|
-
def initialize(x, y)
|
138
|
+
def initialize(x = nil, y = nil, **options)
|
131
139
|
super()
|
132
|
-
@
|
133
|
-
|
140
|
+
@series = []
|
141
|
+
if x && y
|
142
|
+
self.series(x, y, options)
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def series(x, y, **options)
|
147
|
+
@series << [x, y, options]
|
148
|
+
|
149
|
+
return self
|
134
150
|
end
|
135
151
|
|
136
152
|
def thickness(t)
|
@@ -159,13 +175,66 @@ module Rust::Plots
|
|
159
175
|
|
160
176
|
protected
|
161
177
|
def _show()
|
162
|
-
|
163
|
-
|
178
|
+
first = true
|
179
|
+
palette = self.palette(@series.size)
|
180
|
+
i = 0
|
181
|
+
|
182
|
+
base_options = {}
|
183
|
+
unless @options['xlim']
|
184
|
+
x_values = @series.map { |v| v[0] }.flatten
|
185
|
+
y_values = @series.map { |v| v[1] }.flatten
|
186
|
+
|
187
|
+
base_options[:xlim] = [x_values.min, x_values.max]
|
188
|
+
base_options[:ylim] = [y_values.min, y_values.max]
|
189
|
+
end
|
190
|
+
|
191
|
+
@series.each do |x, y, options|
|
192
|
+
options = options.merge(base_options)
|
193
|
+
Rust["plotter.x"] = x
|
194
|
+
Rust["plotter.y"] = y
|
195
|
+
|
196
|
+
function = nil
|
197
|
+
if first
|
198
|
+
function = Rust::Function.new("plot")
|
199
|
+
first = false
|
200
|
+
else
|
201
|
+
function = Rust::Function.new("lines")
|
202
|
+
end
|
203
|
+
|
204
|
+
augmented_options = {}
|
205
|
+
augmented_options['col'] = options[:color] || palette[i]
|
206
|
+
augmented_options['xlim'] = options[:xlim] if options[:xlim]
|
207
|
+
augmented_options['ylim'] = options[:ylim] if options[:ylim]
|
208
|
+
|
209
|
+
function.options = self._augmented_options(augmented_options)
|
210
|
+
function.arguments << Rust::Variable.new("plotter.x")
|
211
|
+
function.arguments << Rust::Variable.new("plotter.y")
|
212
|
+
|
213
|
+
function.call
|
214
|
+
|
215
|
+
i += 1
|
216
|
+
end
|
217
|
+
|
218
|
+
return self
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
class BarPlot < BasePlot
|
223
|
+
def initialize(bars)
|
224
|
+
super()
|
225
|
+
@bars = bars
|
226
|
+
end
|
227
|
+
|
228
|
+
protected
|
229
|
+
def _show()
|
230
|
+
Rust["plotter.bars"] = @bars.values
|
231
|
+
Rust["plotter.labels"] = @bars.keys
|
232
|
+
|
233
|
+
Rust._eval("names(plotter.bars) <- plotter.labels")
|
164
234
|
|
165
|
-
function = Rust::Function.new("
|
235
|
+
function = Rust::Function.new("barplot")
|
166
236
|
function.options = self._augmented_options
|
167
|
-
function.arguments << Rust::Variable.new("plotter.
|
168
|
-
function.arguments << Rust::Variable.new("plotter.y")
|
237
|
+
function.arguments << Rust::Variable.new("plotter.bars")
|
169
238
|
|
170
239
|
function.call
|
171
240
|
|
@@ -0,0 +1,248 @@
|
|
1
|
+
require_relative 'rust-core'
|
2
|
+
|
3
|
+
class Numeric
|
4
|
+
def distance(other)
|
5
|
+
raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
|
6
|
+
|
7
|
+
return (self - other).abs
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
class Array
|
12
|
+
def distance(other)
|
13
|
+
raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
|
14
|
+
|
15
|
+
longest, shortest = self.size > other.size ? [self, other] : [other, self]
|
16
|
+
|
17
|
+
distance = 0
|
18
|
+
for i in 0...longest.size
|
19
|
+
distance += longest[i].to_i.distance(shortest[i].to_i)
|
20
|
+
end
|
21
|
+
|
22
|
+
return distance
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class String
|
27
|
+
def distance(other)
|
28
|
+
raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
|
29
|
+
|
30
|
+
return self.bytes.distance other.bytes
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
module Rust
|
35
|
+
class RandomVariableSlice
|
36
|
+
def initialize(values)
|
37
|
+
raise TypeError, "Expected Hash" unless values.is_a?(Hash)
|
38
|
+
|
39
|
+
@values = values
|
40
|
+
end
|
41
|
+
|
42
|
+
def probability(v=nil)
|
43
|
+
unless v
|
44
|
+
return @values.values.sum
|
45
|
+
else
|
46
|
+
return @values[v]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def ml
|
51
|
+
@values.max_by { |k, v| v }[0]
|
52
|
+
end
|
53
|
+
|
54
|
+
def expected
|
55
|
+
@values.map { |k, v| k*v }.sum
|
56
|
+
end
|
57
|
+
|
58
|
+
def >(n)
|
59
|
+
self.so_that { |k| k > n}
|
60
|
+
end
|
61
|
+
|
62
|
+
def >=(n)
|
63
|
+
self.so_that { |k| k >= n}
|
64
|
+
end
|
65
|
+
|
66
|
+
def <(n)
|
67
|
+
self.so_that { |k| k < n}
|
68
|
+
end
|
69
|
+
|
70
|
+
def <=(n)
|
71
|
+
self.so_that { |k| k <= n}
|
72
|
+
end
|
73
|
+
|
74
|
+
def ==(n)
|
75
|
+
self.so_that { |k| k == n}
|
76
|
+
end
|
77
|
+
|
78
|
+
def so_that
|
79
|
+
RandomVariableSlice.new(@values.select { |k, v| yield(k) })
|
80
|
+
end
|
81
|
+
|
82
|
+
def between(a, b)
|
83
|
+
RandomVariableSlice.new(@values.select { |k, v| k.between? a, b })
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
class RandomVariable < RandomVariableSlice
|
88
|
+
EPSILON = 1e-7
|
89
|
+
|
90
|
+
attr_reader :values
|
91
|
+
|
92
|
+
def initialize(values = {0 => 1.0}, exact = false)
|
93
|
+
@values = values
|
94
|
+
@exact = exact
|
95
|
+
|
96
|
+
raise "All the probabilities should be in the range [0, 1]" unless @values.values.all? { |v| v.between? 0, 1 }
|
97
|
+
raise "The cumulative probability must be exactly 1 (#{@values.values.sum} instead)" unless @values.values.sum.between? 1-EPSILON, 1+EPSILON
|
98
|
+
|
99
|
+
approx!
|
100
|
+
end
|
101
|
+
|
102
|
+
def probability(v)
|
103
|
+
return @values[v].to_f
|
104
|
+
end
|
105
|
+
|
106
|
+
def +(other)
|
107
|
+
new_hash = {}
|
108
|
+
|
109
|
+
@values.each do |my_key, my_value|
|
110
|
+
other.values.each do |other_key, other_value|
|
111
|
+
sum_key = my_key + other_key
|
112
|
+
|
113
|
+
new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
return RandomVariable.new(new_hash, @exact)
|
118
|
+
end
|
119
|
+
|
120
|
+
def *(times)
|
121
|
+
if times.is_a? Integer
|
122
|
+
return rep(times)
|
123
|
+
elsif times.is_a? RandomVariable
|
124
|
+
return mul(times)
|
125
|
+
else
|
126
|
+
raise "The argument must be an Integer or a RandomVariable"
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def mul(other)
|
131
|
+
new_hash = {}
|
132
|
+
|
133
|
+
@values.each do |my_key, my_value|
|
134
|
+
other.values.each do |other_key, other_value|
|
135
|
+
mul_key = my_key * other_key
|
136
|
+
|
137
|
+
new_hash[mul_key] = new_hash[mul_key].to_f + (my_value * other_value)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
return RandomVariable.new(new_hash, @exact)
|
142
|
+
end
|
143
|
+
|
144
|
+
def rep(times)
|
145
|
+
rv = self
|
146
|
+
(times-1).times do
|
147
|
+
rv += self
|
148
|
+
end
|
149
|
+
|
150
|
+
return rv
|
151
|
+
end
|
152
|
+
|
153
|
+
def exact!
|
154
|
+
@exact = true
|
155
|
+
end
|
156
|
+
|
157
|
+
def approx!
|
158
|
+
return if @exact
|
159
|
+
|
160
|
+
to_delete = []
|
161
|
+
@values.each do |v, probability|
|
162
|
+
to_delete.push v if probability <= EPSILON
|
163
|
+
end
|
164
|
+
|
165
|
+
to_delete.each do |v|
|
166
|
+
probability = @values.delete v
|
167
|
+
nearest = @values.keys.min_by { |k| k.distance v }
|
168
|
+
@values[nearest] += probability
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def extract
|
173
|
+
v = rand
|
174
|
+
|
175
|
+
cumulative = 0
|
176
|
+
@values.each do |key, prob|
|
177
|
+
cumulative += prob
|
178
|
+
|
179
|
+
return key if cumulative >= v
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def self.complete(hash, key=0)
|
184
|
+
hash[key] = 1 - hash.values.sum
|
185
|
+
return RandomVariable.new(hash)
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
class UniformRandomVariable < RandomVariable
|
190
|
+
def initialize(values, exact = false)
|
191
|
+
super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
module Probabilities
|
196
|
+
def P(v)
|
197
|
+
if v.is_a? RandomVariableSlice
|
198
|
+
raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
|
199
|
+
return v.probability
|
200
|
+
else
|
201
|
+
raise "Cannot compute the expected value of a #{v.class}"
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
def E(v)
|
206
|
+
if v.is_a? RandomVariableSlice
|
207
|
+
return v.expected
|
208
|
+
else
|
209
|
+
raise "Cannot compute the expected value of a #{v.class}"
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
class RandomVariable
|
215
|
+
ENGLISH_ALPHABET = RandomVariable.new({
|
216
|
+
"a" => 0.08167,
|
217
|
+
"b" => 0.01492,
|
218
|
+
"c" => 0.02782,
|
219
|
+
"d" => 0.04253,
|
220
|
+
"e" => 0.12703,
|
221
|
+
"f" => 0.02228,
|
222
|
+
"g" => 0.02015,
|
223
|
+
"h" => 0.06094,
|
224
|
+
"i" => 0.06966,
|
225
|
+
"j" => 0.00153,
|
226
|
+
"k" => 0.00772,
|
227
|
+
"l" => 0.04025,
|
228
|
+
"m" => 0.02406,
|
229
|
+
"n" => 0.06749,
|
230
|
+
"o" => 0.07507,
|
231
|
+
"p" => 0.01929,
|
232
|
+
"q" => 0.00095,
|
233
|
+
"r" => 0.05987,
|
234
|
+
"s" => 0.06327,
|
235
|
+
"t" => 0.09056,
|
236
|
+
"u" => 0.02758,
|
237
|
+
"v" => 0.00978,
|
238
|
+
"w" => 0.02360,
|
239
|
+
"x" => 0.00150,
|
240
|
+
"y" => 0.01974,
|
241
|
+
"z" => 0.00074
|
242
|
+
})
|
243
|
+
|
244
|
+
DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
|
245
|
+
|
246
|
+
COIN = UniformRandomVariable.new(["h", "t"])
|
247
|
+
end
|
248
|
+
end
|
data/lib/rust-tests.rb
CHANGED
@@ -7,6 +7,7 @@ module Rust::StatisticalTests
|
|
7
7
|
attr_accessor :pvalue
|
8
8
|
attr_accessor :exact
|
9
9
|
attr_accessor :alpha
|
10
|
+
attr_accessor :hypothesis
|
10
11
|
|
11
12
|
def initialize
|
12
13
|
@statistics = {}
|
@@ -20,6 +21,16 @@ module Rust::StatisticalTests
|
|
20
21
|
@statistics[name.to_sym] = value
|
21
22
|
end
|
22
23
|
|
24
|
+
def adjusted_pvalue(method='bonferroni')
|
25
|
+
return 1 unless @hypothesis
|
26
|
+
@hypothesis.adjusted_pvalue_for(self, method)
|
27
|
+
end
|
28
|
+
|
29
|
+
def hypothesis=(value)
|
30
|
+
@hypothesis = value
|
31
|
+
@hypothesis.add(self)
|
32
|
+
end
|
33
|
+
|
23
34
|
def significant
|
24
35
|
pvalue < alpha
|
25
36
|
end
|
@@ -31,32 +42,150 @@ module Rust::StatisticalTests
|
|
31
42
|
(!exact ? " P-value is not exact." : "")
|
32
43
|
end
|
33
44
|
end
|
45
|
+
|
46
|
+
class Hypothesis
|
47
|
+
def self.find(title_or_instance)
|
48
|
+
return Hypothesis.new(nil) if title_or_instance == nil
|
49
|
+
|
50
|
+
if title_or_instance.is_a?(String)
|
51
|
+
ObjectSpace.each_object(Hypothesis) do |instance|
|
52
|
+
return instance if instance.title == title_or_instance
|
53
|
+
end
|
54
|
+
|
55
|
+
return Hypothesis.new(title_or_instance)
|
56
|
+
elsif title_or_instance.is_a?(Hypothesis)
|
57
|
+
return title_or_instance
|
58
|
+
end
|
59
|
+
|
60
|
+
raise TypeError, "Expected nil, String or Hypothesis"
|
61
|
+
end
|
62
|
+
|
63
|
+
attr_reader :results
|
64
|
+
attr_reader :title
|
65
|
+
|
66
|
+
def initialize(title)
|
67
|
+
@title = title
|
68
|
+
@results = []
|
69
|
+
end
|
70
|
+
|
71
|
+
def add(result)
|
72
|
+
@results << result
|
73
|
+
end
|
74
|
+
|
75
|
+
def adjusted_pvalue_for(instance, method)
|
76
|
+
p_values = @results.map { |r| r.pvalue }
|
77
|
+
index = @results.index(instance)
|
78
|
+
|
79
|
+
adjusted_pvalues = Rust::StatisticalTests::PValueAdjustment.method(method).adjust(*p_values)
|
80
|
+
|
81
|
+
if adjusted_pvalues.is_a?(Numeric)
|
82
|
+
return adjusted_pvalues
|
83
|
+
else
|
84
|
+
return adjusted_pvalues[index]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
module Rust::StatisticalTests::PValueAdjustment
|
91
|
+
def self.method(name)
|
92
|
+
name = name.to_s
|
93
|
+
case name.downcase
|
94
|
+
when "bonferroni", "b"
|
95
|
+
return Bonferroni
|
96
|
+
when "holm", "h"
|
97
|
+
return Holm
|
98
|
+
when "hochberg"
|
99
|
+
return Hochberg
|
100
|
+
when "hommel"
|
101
|
+
return Hommel
|
102
|
+
when "benjaminihochberg", "bh"
|
103
|
+
return BenjaminiHochberg
|
104
|
+
when "benjaminiyekutieli", "by"
|
105
|
+
return BenjaminiYekutieli
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
class Bonferroni
|
110
|
+
def self.adjust(*p_values)
|
111
|
+
Rust.exclusive do
|
112
|
+
Rust['adjustment.p'] = p_values
|
113
|
+
return Rust._pull("p.adjust(adjustment.p, method=\"bonferroni\")")
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
class Holm
|
119
|
+
def self.adjust(*p_values)
|
120
|
+
Rust.exclusive do
|
121
|
+
Rust['adjustment.p'] = p_values
|
122
|
+
return Rust._pull("p.adjust(adjustment.p, method=\"holm\")")
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
class Hochberg
|
128
|
+
def self.adjust(*p_values)
|
129
|
+
Rust.exclusive do
|
130
|
+
Rust['adjustment.p'] = p_values
|
131
|
+
return Rust._pull("p.adjust(adjustment.p, method=\"hochberg\")")
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
class Hommel
|
137
|
+
def self.adjust(*p_values)
|
138
|
+
Rust.exclusive do
|
139
|
+
Rust['adjustment.p'] = p_values
|
140
|
+
return Rust._pull("p.adjust(adjustment.p, method=\"hommel\")")
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
class BenjaminiHochberg
|
146
|
+
def self.adjust(*p_values)
|
147
|
+
Rust.exclusive do
|
148
|
+
Rust['adjustment.p'] = p_values
|
149
|
+
return Rust._pull("p.adjust(adjustment.p, method=\"BH\")")
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
class BenjaminiYekutieli
|
155
|
+
def self.adjust(*p_values)
|
156
|
+
Rust.exclusive do
|
157
|
+
Rust['adjustment.p'] = p_values
|
158
|
+
return Rust._pull("p.adjust(adjustment.p, method=\"BY\")")
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
34
162
|
end
|
35
163
|
|
36
164
|
module Rust::StatisticalTests::Wilcoxon
|
37
165
|
class << self
|
38
|
-
|
166
|
+
def paired(d1, d2, alpha = 0.05, **options)
|
39
167
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
40
168
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
41
169
|
raise "The two distributions have different size" if d1.size != d2.size
|
42
|
-
|
170
|
+
|
43
171
|
Rust.exclusive do
|
44
172
|
Rust["wilcox.a"] = d1
|
45
173
|
Rust["wilcox.b"] = d2
|
46
174
|
|
47
175
|
_, warnings = Rust._eval("wilcox.result = wilcox.test(wilcox.a, wilcox.b, alternative='two.sided', paired=T)", true)
|
48
176
|
result = Rust::StatisticalTests::Result.new
|
49
|
-
result.name
|
50
|
-
result.pvalue
|
51
|
-
result[:w]
|
52
|
-
result.exact
|
53
|
-
result.alpha
|
177
|
+
result.name = "Wilcoxon Signed-Rank test"
|
178
|
+
result.pvalue = Rust._pull("wilcox.result$p.value")
|
179
|
+
result[:w] = Rust._pull("wilcox.result$statistic")
|
180
|
+
result.exact = !warnings.include?("cannot compute exact p-value with zeroes")
|
181
|
+
result.alpha = alpha
|
182
|
+
result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
|
54
183
|
|
55
184
|
return result
|
56
185
|
end
|
57
186
|
end
|
58
187
|
|
59
|
-
def unpaired(d1, d2, alpha = 0.05)
|
188
|
+
def unpaired(d1, d2, alpha = 0.05, **options)
|
60
189
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
61
190
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
62
191
|
|
@@ -66,11 +195,12 @@ module Rust::StatisticalTests::Wilcoxon
|
|
66
195
|
|
67
196
|
_, warnings = Rust._eval("wilcox.result = wilcox.test(wilcox.a, wilcox.b, alternative='two.sided', paired=F)", true)
|
68
197
|
result = Rust::StatisticalTests::Result.new
|
69
|
-
result.name
|
70
|
-
result.pvalue
|
71
|
-
result[:w]
|
72
|
-
result.exact
|
73
|
-
result.alpha
|
198
|
+
result.name = "Wilcoxon Ranked-Sum test (a.k.a. Mann–Whitney U test)"
|
199
|
+
result.pvalue = Rust._pull("wilcox.result$p.value")
|
200
|
+
result[:w] = Rust._pull("wilcox.result$statistic")
|
201
|
+
result.exact = !warnings.include?("cannot compute exact p-value with ties")
|
202
|
+
result.alpha = alpha
|
203
|
+
result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
|
74
204
|
|
75
205
|
return result
|
76
206
|
end
|
@@ -80,7 +210,7 @@ end
|
|
80
210
|
|
81
211
|
module Rust::StatisticalTests::T
|
82
212
|
class << self
|
83
|
-
def paired(d1, d2, alpha = 0.05)
|
213
|
+
def paired(d1, d2, alpha = 0.05, **options)
|
84
214
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
85
215
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
86
216
|
raise "The two distributions have different size" if d1.size != d2.size
|
@@ -91,17 +221,18 @@ module Rust::StatisticalTests::T
|
|
91
221
|
|
92
222
|
warnings = Rust._eval("t.result = t.test(t.a, t.b, alternative='two.sided', paired=T)")
|
93
223
|
result = Rust::StatisticalTests::Result.new
|
94
|
-
result.name
|
95
|
-
result.pvalue
|
96
|
-
result[:t]
|
97
|
-
result.exact
|
98
|
-
result.alpha
|
224
|
+
result.name = "Paired t-test"
|
225
|
+
result.pvalue = Rust._pull("t.result$p.value")
|
226
|
+
result[:t] = Rust._pull("t.result$statistic")
|
227
|
+
result.exact = true
|
228
|
+
result.alpha = alpha
|
229
|
+
result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
|
99
230
|
|
100
231
|
return result
|
101
232
|
end
|
102
233
|
end
|
103
234
|
|
104
|
-
def unpaired(d1, d2, alpha = 0.05)
|
235
|
+
def unpaired(d1, d2, alpha = 0.05, **options)
|
105
236
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
106
237
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
107
238
|
|
@@ -111,11 +242,12 @@ module Rust::StatisticalTests::T
|
|
111
242
|
|
112
243
|
Rust._eval("t.result = t.test(t.a, t.b, alternative='two.sided', paired=F)")
|
113
244
|
result = Rust::StatisticalTests::Result.new
|
114
|
-
result.name
|
115
|
-
result.pvalue
|
116
|
-
result[:t]
|
117
|
-
result.exact
|
118
|
-
result.alpha
|
245
|
+
result.name = "Welch Two Sample t-test"
|
246
|
+
result.pvalue = Rust._pull("t.result$p.value")
|
247
|
+
result[:t] = Rust._pull("t.result$statistic")
|
248
|
+
result.exact = true
|
249
|
+
result.alpha = alpha
|
250
|
+
result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
|
119
251
|
|
120
252
|
return result
|
121
253
|
end
|
@@ -125,18 +257,19 @@ end
|
|
125
257
|
|
126
258
|
module Rust::StatisticalTests::Shapiro
|
127
259
|
class << self
|
128
|
-
def compute(vector, alpha = 0.05)
|
260
|
+
def compute(vector, alpha = 0.05, **options)
|
129
261
|
raise TypeError, "Expecting Array of numerics" if !vector.is_a?(Array) || !vector.all? { |e| e.is_a?(Numeric) }
|
130
262
|
Rust.exclusive do
|
131
263
|
Rust['shapiro.v'] = vector
|
132
264
|
|
133
265
|
Rust._eval("shapiro.result = shapiro.test(shapiro.v)")
|
134
266
|
result = Rust::StatisticalTests::Result.new
|
135
|
-
result.name
|
136
|
-
result.pvalue
|
137
|
-
result[:W]
|
138
|
-
result.exact
|
139
|
-
result.alpha
|
267
|
+
result.name = "Shapiro-Wilk normality test"
|
268
|
+
result.pvalue = Rust._pull("shapiro.result$p.value")
|
269
|
+
result[:W] = Rust._pull("shapiro.result$statistic")
|
270
|
+
result.exact = true
|
271
|
+
result.alpha = alpha
|
272
|
+
result.hypothesis = Rust::StatisticalTests::Hypothesis.find(options[:hypothesis])
|
140
273
|
|
141
274
|
return result
|
142
275
|
end
|
data/lib/rust.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rust
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.7'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Scalabrino
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-02-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rinruby
|
@@ -63,6 +63,7 @@ files:
|
|
63
63
|
- lib/rust-descriptive.rb
|
64
64
|
- lib/rust-effsize.rb
|
65
65
|
- lib/rust-plots.rb
|
66
|
+
- lib/rust-probabilities.rb
|
66
67
|
- lib/rust-tests.rb
|
67
68
|
- lib/rust.rb
|
68
69
|
homepage: https://github.com/intersimone999/ruby-rust
|
@@ -84,7 +85,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
84
85
|
- !ruby/object:Gem::Version
|
85
86
|
version: '0'
|
86
87
|
requirements: []
|
87
|
-
rubygems_version: 3.
|
88
|
+
rubygems_version: 3.2.7
|
88
89
|
signing_key:
|
89
90
|
specification_version: 4
|
90
91
|
summary: Ruby advanced statistical library
|