rust 0.3 → 0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ruby-rust +3 -0
- data/lib/{rust-csv.rb → rust/core/csv.rb} +14 -4
- data/lib/rust/core/rust.rb +157 -0
- data/lib/rust/core/types/all.rb +4 -0
- data/lib/{rust-core.rb → rust/core/types/dataframe.rb} +183 -245
- data/lib/rust/core/types/datatype.rb +161 -0
- data/lib/rust/core/types/factor.rb +131 -0
- data/lib/rust/core/types/language.rb +166 -0
- data/lib/rust/core/types/list.rb +81 -0
- data/lib/rust/core/types/matrix.rb +132 -0
- data/lib/rust/core/types/s4class.rb +59 -0
- data/lib/rust/core/types/utils.rb +109 -0
- data/lib/rust/core.rb +7 -0
- data/lib/rust/models/all.rb +4 -0
- data/lib/rust/models/anova.rb +60 -0
- data/lib/rust/models/regression.rb +205 -0
- data/lib/rust/plots/all.rb +4 -0
- data/lib/rust/plots/basic-plots.rb +111 -0
- data/lib/{rust-plots.rb → rust/plots/core.rb} +64 -129
- data/lib/rust/plots/distribution-plots.rb +62 -0
- data/lib/rust/stats/all.rb +4 -0
- data/lib/{rust-basics.rb → rust/stats/correlation.rb} +11 -5
- data/lib/rust/stats/descriptive.rb +128 -0
- data/lib/{rust-effsize.rb → rust/stats/effsize.rb} +23 -21
- data/lib/rust/stats/probabilities.rb +248 -0
- data/lib/rust/stats/tests.rb +292 -0
- data/lib/rust.rb +4 -8
- metadata +31 -12
- data/lib/rust-calls.rb +0 -69
- data/lib/rust-descriptive.rb +0 -59
- data/lib/rust-tests.rb +0 -165
@@ -0,0 +1,62 @@
|
|
1
|
+
require_relative 'core'
|
2
|
+
|
3
|
+
module Rust::Plots
|
4
|
+
class DistributionPlot < BasePlot
|
5
|
+
def initialize
|
6
|
+
super()
|
7
|
+
@series = []
|
8
|
+
end
|
9
|
+
|
10
|
+
def series(data, **options)
|
11
|
+
@series << [data, options]
|
12
|
+
|
13
|
+
return self
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class DensityPlot < DistributionPlot
|
18
|
+
protected
|
19
|
+
def _show()
|
20
|
+
first = true
|
21
|
+
@series.each do |data, options|
|
22
|
+
Rust["plotter.series"] = data
|
23
|
+
|
24
|
+
if first
|
25
|
+
first = false
|
26
|
+
command = "plot"
|
27
|
+
else
|
28
|
+
command = "lines"
|
29
|
+
end
|
30
|
+
|
31
|
+
function = Rust::Function.new(command)
|
32
|
+
function.options = self._augmented_options({"col" => options[:color]})
|
33
|
+
function.arguments << Rust::Variable.new("density(plotter.series)")
|
34
|
+
function.call
|
35
|
+
end
|
36
|
+
|
37
|
+
return self
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
class BoxPlot < DistributionPlot
|
42
|
+
protected
|
43
|
+
def _show()
|
44
|
+
function = Rust::Function.new("boxplot")
|
45
|
+
|
46
|
+
names = []
|
47
|
+
@series.each_with_index do |data, i|
|
48
|
+
series, options = *data
|
49
|
+
varname = "plotter.series#{i}"
|
50
|
+
Rust[varname] = series
|
51
|
+
function.arguments << Rust::Variable.new(varname)
|
52
|
+
names << (options[:name] || (i+1).to_s)
|
53
|
+
end
|
54
|
+
|
55
|
+
function.options = self._augmented_options({'names' => names})
|
56
|
+
|
57
|
+
function.call
|
58
|
+
|
59
|
+
return self
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require_relative '
|
1
|
+
require_relative '../core'
|
2
2
|
|
3
|
-
module Rust::
|
3
|
+
module Rust::Correlation
|
4
4
|
class Pearson
|
5
5
|
def self.test(d1, d2)
|
6
6
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
@@ -10,13 +10,14 @@ module Rust:: Correlation
|
|
10
10
|
Rust['correlation.a'] = d1
|
11
11
|
Rust['correlation.b'] = d2
|
12
12
|
|
13
|
-
Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='p')")
|
13
|
+
_, warnings = Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='p')", true)
|
14
14
|
|
15
15
|
result = Result.new
|
16
16
|
result.name = "Pearson's product-moment correlation"
|
17
17
|
result.statistics['t'] = Rust._pull('correlation.result$statistic')
|
18
18
|
result.pvalue = Rust._pull('correlation.result$p.value')
|
19
19
|
result.correlation = Rust._pull('correlation.result$estimate')
|
20
|
+
result.exact = !warnings.include?("Cannot compute exact p-value with ties")
|
20
21
|
|
21
22
|
return result
|
22
23
|
end
|
@@ -36,13 +37,14 @@ module Rust:: Correlation
|
|
36
37
|
Rust['correlation.a'] = d1
|
37
38
|
Rust['correlation.b'] = d2
|
38
39
|
|
39
|
-
Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='s')")
|
40
|
+
_, warnings = Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='s')", true)
|
40
41
|
|
41
42
|
result = Result.new
|
42
43
|
result.name = "Spearman's rank correlation rho"
|
43
44
|
result.statistics['S'] = Rust._pull('correlation.result$statistic')
|
44
45
|
result.pvalue = Rust._pull('correlation.result$p.value')
|
45
46
|
result.correlation = Rust._pull('correlation.result$estimate')
|
47
|
+
result.exact = !warnings.include?("Cannot compute exact p-value with ties")
|
46
48
|
|
47
49
|
return result
|
48
50
|
end
|
@@ -62,13 +64,14 @@ module Rust:: Correlation
|
|
62
64
|
Rust['correlation.a'] = d1
|
63
65
|
Rust['correlation.b'] = d2
|
64
66
|
|
65
|
-
Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='
|
67
|
+
_, warnings = Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='k')", true)
|
66
68
|
|
67
69
|
result = Result.new
|
68
70
|
result.name = "Kendall's rank correlation tau"
|
69
71
|
result.statistics['T'] = Rust._pull('correlation.result$statistic')
|
70
72
|
result.pvalue = Rust._pull('correlation.result$p.value')
|
71
73
|
result.correlation = Rust._pull('correlation.result$estimate')
|
74
|
+
result.exact = !warnings.include?("Cannot compute exact p-value with ties")
|
72
75
|
|
73
76
|
return result
|
74
77
|
end
|
@@ -84,11 +87,13 @@ module Rust:: Correlation
|
|
84
87
|
attr_accessor :statistics
|
85
88
|
attr_accessor :pvalue
|
86
89
|
attr_accessor :correlation
|
90
|
+
attr_accessor :exact
|
87
91
|
|
88
92
|
alias :estimate :correlation
|
89
93
|
|
90
94
|
def initialize
|
91
95
|
@statistics = {}
|
96
|
+
@exact = true
|
92
97
|
end
|
93
98
|
|
94
99
|
def [](name)
|
@@ -101,6 +106,7 @@ module Rust:: Correlation
|
|
101
106
|
|
102
107
|
def to_s
|
103
108
|
return "#{name}. Correlation = #{correlation}, P-value = #{pvalue} " +
|
109
|
+
(!@exact ? "P-value is not exact. " : "") +
|
104
110
|
"#{ statistics.map { |k, v| k.to_s + " -> " + v.to_s }.join(", ") }."
|
105
111
|
end
|
106
112
|
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
require_relative '../core'
|
2
|
+
|
3
|
+
module Rust::Descriptive
|
4
|
+
class << self
|
5
|
+
def mean(data)
|
6
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
7
|
+
|
8
|
+
return data.sum.to_f / data.size
|
9
|
+
end
|
10
|
+
|
11
|
+
def standard_deviation(data)
|
12
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
13
|
+
|
14
|
+
return Math.sqrt(variance(data))
|
15
|
+
end
|
16
|
+
alias :sd :standard_deviation
|
17
|
+
alias :stddev :standard_deviation
|
18
|
+
|
19
|
+
def variance(data)
|
20
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
21
|
+
return Float::NAN if data.size < 2
|
22
|
+
|
23
|
+
mean = mean(data)
|
24
|
+
return data.map { |v| (v - mean) ** 2 }.sum.to_f / (data.size - 1)
|
25
|
+
end
|
26
|
+
alias :var :variance
|
27
|
+
|
28
|
+
def median(data)
|
29
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
30
|
+
|
31
|
+
sorted = data.sort
|
32
|
+
if data.size == 0
|
33
|
+
return Float::NAN
|
34
|
+
elsif data.size.odd?
|
35
|
+
return sorted[data.size / 2]
|
36
|
+
else
|
37
|
+
i = (data.size / 2)
|
38
|
+
return (sorted[i - 1] + sorted[i]) / 2.0
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def sum(data)
|
43
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
44
|
+
|
45
|
+
return data.sum
|
46
|
+
end
|
47
|
+
|
48
|
+
def quantile(data, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0])
|
49
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
50
|
+
raise TypeError, "Expecting Array of numerics" if !percentiles.is_a?(Array) || !percentiles.all? { |e| e.is_a?(Numeric) }
|
51
|
+
raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
|
52
|
+
|
53
|
+
n = data.size
|
54
|
+
quantiles = percentiles.size
|
55
|
+
percentiles = percentiles.map { |x| x > 1.0 ? 1.0 : (x < 0.0 ? 0.0 : x) }
|
56
|
+
|
57
|
+
rough_indices = percentiles.map { |x| 1 + [n - 1, 0].max * x - 1 }
|
58
|
+
floor_indices = rough_indices.map { |i| i.floor }
|
59
|
+
ceil_indices = rough_indices.map { |i| i.ceil }
|
60
|
+
|
61
|
+
data = data.sort
|
62
|
+
result = floor_indices.map { |i| data[i] }
|
63
|
+
result_ceil = ceil_indices.map { |i| data[i] }
|
64
|
+
|
65
|
+
indices_to_fix = (0...quantiles).select { |i| rough_indices[i] > floor_indices[i] && result_ceil[i] != result[i] }
|
66
|
+
index_approximation_errors = indices_to_fix.map { |i| rough_indices[i] - floor_indices[i] }
|
67
|
+
reduced_index_approximation_errors = index_approximation_errors.map { |i| (1 - i) }
|
68
|
+
hi_indices = indices_to_fix.map { |i| ceil_indices[i] }
|
69
|
+
data_hi_indices = hi_indices.map { |i| data[i] }
|
70
|
+
|
71
|
+
j = 0
|
72
|
+
indices_to_fix.each do |i|
|
73
|
+
result[i] = reduced_index_approximation_errors[j] * result[i] + index_approximation_errors[j] * data_hi_indices[j]
|
74
|
+
j += 1
|
75
|
+
end
|
76
|
+
|
77
|
+
return percentiles.zip(result).to_h
|
78
|
+
end
|
79
|
+
|
80
|
+
def outliers(data, k=1.5, **opts)
|
81
|
+
outliers_according_to(data, data, k, **opts)
|
82
|
+
end
|
83
|
+
|
84
|
+
def outliers_according_to(data, data_distribution, k=1.5, **opts)
|
85
|
+
quantiles = Rust::Descriptive.quantile(data_distribution, [0.25, 0.75])
|
86
|
+
q1 = quantiles[0.25]
|
87
|
+
q3 = quantiles[0.75]
|
88
|
+
iqr = q3 - q1
|
89
|
+
|
90
|
+
positive_outliers = data.select { |d| d > q3 + iqr * k }
|
91
|
+
negative_outliers = data.select { |d| d < q1 - iqr * k }
|
92
|
+
|
93
|
+
outliers = negative_outliers + positive_outliers
|
94
|
+
if opts[:side]
|
95
|
+
case opts[:side].to_sym
|
96
|
+
when :positive, :neg, :n, :+
|
97
|
+
outliers = positive_outliers
|
98
|
+
when :negative, :pos, :p, :-
|
99
|
+
outliers = negative_outliers
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
return outliers
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
module Rust::RBindings
|
109
|
+
def mean(series)
|
110
|
+
Rust::Descriptive.mean(series)
|
111
|
+
end
|
112
|
+
|
113
|
+
def median(series)
|
114
|
+
Rust::Descriptive.median(series)
|
115
|
+
end
|
116
|
+
|
117
|
+
def var(series)
|
118
|
+
Rust::Descriptive.variance(series)
|
119
|
+
end
|
120
|
+
|
121
|
+
def sd(series)
|
122
|
+
Rust::Descriptive.standard_deviation(series)
|
123
|
+
end
|
124
|
+
|
125
|
+
def quantile(series, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0])
|
126
|
+
Rust::Descriptive.quantile(series, percentiles)
|
127
|
+
end
|
128
|
+
end
|
@@ -1,8 +1,6 @@
|
|
1
|
-
|
1
|
+
require_relative '../core'
|
2
2
|
|
3
|
-
Rust.
|
4
|
-
Rust._eval("library(effsize)")
|
5
|
-
end
|
3
|
+
Rust.prerequisite('effsize')
|
6
4
|
|
7
5
|
module Rust::EffectSize
|
8
6
|
class Result
|
@@ -16,14 +14,16 @@ module Rust::EffectSize
|
|
16
14
|
return "#{name} = #{estimate} (#{magnitude}) [#{confidence_interval.min}, #{confidence_interval.max}]"
|
17
15
|
end
|
18
16
|
end
|
19
|
-
end
|
20
17
|
|
21
|
-
|
22
|
-
|
23
|
-
def compute(d1, d2)
|
18
|
+
class CliffDelta
|
19
|
+
def self.compute(d1, d2)
|
24
20
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
25
21
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
26
22
|
|
23
|
+
if d1.size <= 1 || d2.size <= 1
|
24
|
+
return Rust::EffectSize::Result.new
|
25
|
+
end
|
26
|
+
|
27
27
|
Rust.exclusive do
|
28
28
|
Rust['effsize.a'] = d1
|
29
29
|
Rust['effsize.b'] = d2
|
@@ -32,23 +32,25 @@ module Rust::EffectSize::CliffDelta
|
|
32
32
|
|
33
33
|
result = Rust::EffectSize::Result.new
|
34
34
|
result.name = "Cliff's delta"
|
35
|
-
result.estimate = Rust._pull("effsize.result$estimate")
|
36
|
-
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
|
37
|
-
result.confidence_level = Rust._pull("effsize.result$conf.level")
|
38
|
-
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
|
35
|
+
result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
|
36
|
+
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
|
37
|
+
result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
|
38
|
+
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
|
39
39
|
|
40
40
|
return result
|
41
41
|
end
|
42
42
|
end
|
43
43
|
end
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
class << self
|
48
|
-
def compute(d1, d2)
|
44
|
+
|
45
|
+
class CohenD
|
46
|
+
def self.compute(d1, d2)
|
49
47
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
50
48
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
51
49
|
|
50
|
+
if d1.size <= 1 || d2.size <= 1
|
51
|
+
return Rust::EffectSize::Result.new
|
52
|
+
end
|
53
|
+
|
52
54
|
Rust.exclusive do
|
53
55
|
Rust['effsize.a'] = d1
|
54
56
|
Rust['effsize.b'] = d2
|
@@ -57,10 +59,10 @@ module Rust::EffectSize::CohenD
|
|
57
59
|
|
58
60
|
result = Rust::EffectSize::Result.new
|
59
61
|
result.name = "Cohen's d"
|
60
|
-
result.estimate = Rust._pull("effsize.result$estimate")
|
61
|
-
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
|
62
|
-
result.confidence_level = Rust._pull("effsize.result$conf.level")
|
63
|
-
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
|
62
|
+
result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
|
63
|
+
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
|
64
|
+
result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
|
65
|
+
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
|
64
66
|
|
65
67
|
return result
|
66
68
|
end
|
@@ -0,0 +1,248 @@
|
|
1
|
+
require_relative '../core'
|
2
|
+
|
3
|
+
class Numeric
|
4
|
+
def distance(other)
|
5
|
+
raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
|
6
|
+
|
7
|
+
return (self - other).abs
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
class Array
|
12
|
+
def distance(other)
|
13
|
+
raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
|
14
|
+
|
15
|
+
longest, shortest = self.size > other.size ? [self, other] : [other, self]
|
16
|
+
|
17
|
+
distance = 0
|
18
|
+
for i in 0...longest.size
|
19
|
+
distance += longest[i].to_i.distance(shortest[i].to_i)
|
20
|
+
end
|
21
|
+
|
22
|
+
return distance
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class String
|
27
|
+
def distance(other)
|
28
|
+
raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
|
29
|
+
|
30
|
+
return self.bytes.distance other.bytes
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
module Rust
|
35
|
+
class RandomVariableSlice
|
36
|
+
def initialize(values)
|
37
|
+
raise TypeError, "Expected Hash" unless values.is_a?(Hash)
|
38
|
+
|
39
|
+
@values = values
|
40
|
+
end
|
41
|
+
|
42
|
+
def probability(v=nil)
|
43
|
+
unless v
|
44
|
+
return @values.values.sum
|
45
|
+
else
|
46
|
+
return @values[v]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def ml
|
51
|
+
@values.max_by { |k, v| v }[0]
|
52
|
+
end
|
53
|
+
|
54
|
+
def expected
|
55
|
+
@values.map { |k, v| k*v }.sum
|
56
|
+
end
|
57
|
+
|
58
|
+
def >(n)
|
59
|
+
self.so_that { |k| k > n}
|
60
|
+
end
|
61
|
+
|
62
|
+
def >=(n)
|
63
|
+
self.so_that { |k| k >= n}
|
64
|
+
end
|
65
|
+
|
66
|
+
def <(n)
|
67
|
+
self.so_that { |k| k < n}
|
68
|
+
end
|
69
|
+
|
70
|
+
def <=(n)
|
71
|
+
self.so_that { |k| k <= n}
|
72
|
+
end
|
73
|
+
|
74
|
+
def ==(n)
|
75
|
+
self.so_that { |k| k == n}
|
76
|
+
end
|
77
|
+
|
78
|
+
def so_that
|
79
|
+
RandomVariableSlice.new(@values.select { |k, v| yield(k) })
|
80
|
+
end
|
81
|
+
|
82
|
+
def between(a, b)
|
83
|
+
RandomVariableSlice.new(@values.select { |k, v| k.between? a, b })
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
class RandomVariable < RandomVariableSlice
|
88
|
+
EPSILON = 1e-7
|
89
|
+
|
90
|
+
attr_reader :values
|
91
|
+
|
92
|
+
def initialize(values = {0 => 1.0}, exact = false)
|
93
|
+
@values = values
|
94
|
+
@exact = exact
|
95
|
+
|
96
|
+
raise "All the probabilities should be in the range [0, 1]" unless @values.values.all? { |v| v.between? 0, 1 }
|
97
|
+
raise "The cumulative probability must be exactly 1 (#{@values.values.sum} instead)" unless @values.values.sum.between? 1-EPSILON, 1+EPSILON
|
98
|
+
|
99
|
+
approx!
|
100
|
+
end
|
101
|
+
|
102
|
+
def probability(v)
|
103
|
+
return @values[v].to_f
|
104
|
+
end
|
105
|
+
|
106
|
+
def +(other)
|
107
|
+
new_hash = {}
|
108
|
+
|
109
|
+
@values.each do |my_key, my_value|
|
110
|
+
other.values.each do |other_key, other_value|
|
111
|
+
sum_key = my_key + other_key
|
112
|
+
|
113
|
+
new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
return RandomVariable.new(new_hash, @exact)
|
118
|
+
end
|
119
|
+
|
120
|
+
def *(times)
|
121
|
+
if times.is_a? Integer
|
122
|
+
return rep(times)
|
123
|
+
elsif times.is_a? RandomVariable
|
124
|
+
return mul(times)
|
125
|
+
else
|
126
|
+
raise "The argument must be an Integer or a RandomVariable"
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def mul(other)
|
131
|
+
new_hash = {}
|
132
|
+
|
133
|
+
@values.each do |my_key, my_value|
|
134
|
+
other.values.each do |other_key, other_value|
|
135
|
+
mul_key = my_key * other_key
|
136
|
+
|
137
|
+
new_hash[mul_key] = new_hash[mul_key].to_f + (my_value * other_value)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
return RandomVariable.new(new_hash, @exact)
|
142
|
+
end
|
143
|
+
|
144
|
+
def rep(times)
|
145
|
+
rv = self
|
146
|
+
(times-1).times do
|
147
|
+
rv += self
|
148
|
+
end
|
149
|
+
|
150
|
+
return rv
|
151
|
+
end
|
152
|
+
|
153
|
+
def exact!
|
154
|
+
@exact = true
|
155
|
+
end
|
156
|
+
|
157
|
+
def approx!
|
158
|
+
return if @exact
|
159
|
+
|
160
|
+
to_delete = []
|
161
|
+
@values.each do |v, probability|
|
162
|
+
to_delete.push v if probability <= EPSILON
|
163
|
+
end
|
164
|
+
|
165
|
+
to_delete.each do |v|
|
166
|
+
probability = @values.delete v
|
167
|
+
nearest = @values.keys.min_by { |k| k.distance v }
|
168
|
+
@values[nearest] += probability
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def extract
|
173
|
+
v = rand
|
174
|
+
|
175
|
+
cumulative = 0
|
176
|
+
@values.each do |key, prob|
|
177
|
+
cumulative += prob
|
178
|
+
|
179
|
+
return key if cumulative >= v
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def self.complete(hash, key=0)
|
184
|
+
hash[key] = 1 - hash.values.sum
|
185
|
+
return RandomVariable.new(hash)
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
class UniformRandomVariable < RandomVariable
|
190
|
+
def initialize(values, exact = false)
|
191
|
+
super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
module Probabilities
|
196
|
+
def P(v)
|
197
|
+
if v.is_a? RandomVariableSlice
|
198
|
+
raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
|
199
|
+
return v.probability
|
200
|
+
else
|
201
|
+
raise "Cannot compute the expected value of a #{v.class}"
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
def E(v)
|
206
|
+
if v.is_a? RandomVariableSlice
|
207
|
+
return v.expected
|
208
|
+
else
|
209
|
+
raise "Cannot compute the expected value of a #{v.class}"
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
class RandomVariable
|
215
|
+
ENGLISH_ALPHABET = RandomVariable.new({
|
216
|
+
"a" => 0.08167,
|
217
|
+
"b" => 0.01492,
|
218
|
+
"c" => 0.02782,
|
219
|
+
"d" => 0.04253,
|
220
|
+
"e" => 0.12703,
|
221
|
+
"f" => 0.02228,
|
222
|
+
"g" => 0.02015,
|
223
|
+
"h" => 0.06094,
|
224
|
+
"i" => 0.06966,
|
225
|
+
"j" => 0.00153,
|
226
|
+
"k" => 0.00772,
|
227
|
+
"l" => 0.04025,
|
228
|
+
"m" => 0.02406,
|
229
|
+
"n" => 0.06749,
|
230
|
+
"o" => 0.07507,
|
231
|
+
"p" => 0.01929,
|
232
|
+
"q" => 0.00095,
|
233
|
+
"r" => 0.05987,
|
234
|
+
"s" => 0.06327,
|
235
|
+
"t" => 0.09056,
|
236
|
+
"u" => 0.02758,
|
237
|
+
"v" => 0.00978,
|
238
|
+
"w" => 0.02360,
|
239
|
+
"x" => 0.00150,
|
240
|
+
"y" => 0.01974,
|
241
|
+
"z" => 0.00074
|
242
|
+
})
|
243
|
+
|
244
|
+
DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
|
245
|
+
|
246
|
+
COIN = UniformRandomVariable.new(["h", "t"])
|
247
|
+
end
|
248
|
+
end
|