rust 0.3 → 0.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ruby-rust +3 -0
- data/lib/{rust-csv.rb → rust/core/csv.rb} +14 -4
- data/lib/rust/core/rust.rb +157 -0
- data/lib/rust/core/types/all.rb +4 -0
- data/lib/{rust-core.rb → rust/core/types/dataframe.rb} +183 -245
- data/lib/rust/core/types/datatype.rb +161 -0
- data/lib/rust/core/types/factor.rb +131 -0
- data/lib/rust/core/types/language.rb +166 -0
- data/lib/rust/core/types/list.rb +81 -0
- data/lib/rust/core/types/matrix.rb +132 -0
- data/lib/rust/core/types/s4class.rb +59 -0
- data/lib/rust/core/types/utils.rb +109 -0
- data/lib/rust/core.rb +7 -0
- data/lib/rust/models/all.rb +4 -0
- data/lib/rust/models/anova.rb +60 -0
- data/lib/rust/models/regression.rb +205 -0
- data/lib/rust/plots/all.rb +4 -0
- data/lib/rust/plots/basic-plots.rb +111 -0
- data/lib/{rust-plots.rb → rust/plots/core.rb} +64 -129
- data/lib/rust/plots/distribution-plots.rb +62 -0
- data/lib/rust/stats/all.rb +4 -0
- data/lib/{rust-basics.rb → rust/stats/correlation.rb} +11 -5
- data/lib/rust/stats/descriptive.rb +128 -0
- data/lib/{rust-effsize.rb → rust/stats/effsize.rb} +23 -21
- data/lib/rust/stats/probabilities.rb +248 -0
- data/lib/rust/stats/tests.rb +292 -0
- data/lib/rust.rb +4 -8
- metadata +31 -12
- data/lib/rust-calls.rb +0 -69
- data/lib/rust-descriptive.rb +0 -59
- data/lib/rust-tests.rb +0 -165
@@ -0,0 +1,62 @@
|
|
1
|
+
require_relative 'core'
|
2
|
+
|
3
|
+
module Rust::Plots
|
4
|
+
class DistributionPlot < BasePlot
|
5
|
+
def initialize
|
6
|
+
super()
|
7
|
+
@series = []
|
8
|
+
end
|
9
|
+
|
10
|
+
def series(data, **options)
|
11
|
+
@series << [data, options]
|
12
|
+
|
13
|
+
return self
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class DensityPlot < DistributionPlot
|
18
|
+
protected
|
19
|
+
def _show()
|
20
|
+
first = true
|
21
|
+
@series.each do |data, options|
|
22
|
+
Rust["plotter.series"] = data
|
23
|
+
|
24
|
+
if first
|
25
|
+
first = false
|
26
|
+
command = "plot"
|
27
|
+
else
|
28
|
+
command = "lines"
|
29
|
+
end
|
30
|
+
|
31
|
+
function = Rust::Function.new(command)
|
32
|
+
function.options = self._augmented_options({"col" => options[:color]})
|
33
|
+
function.arguments << Rust::Variable.new("density(plotter.series)")
|
34
|
+
function.call
|
35
|
+
end
|
36
|
+
|
37
|
+
return self
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
class BoxPlot < DistributionPlot
|
42
|
+
protected
|
43
|
+
def _show()
|
44
|
+
function = Rust::Function.new("boxplot")
|
45
|
+
|
46
|
+
names = []
|
47
|
+
@series.each_with_index do |data, i|
|
48
|
+
series, options = *data
|
49
|
+
varname = "plotter.series#{i}"
|
50
|
+
Rust[varname] = series
|
51
|
+
function.arguments << Rust::Variable.new(varname)
|
52
|
+
names << (options[:name] || (i+1).to_s)
|
53
|
+
end
|
54
|
+
|
55
|
+
function.options = self._augmented_options({'names' => names})
|
56
|
+
|
57
|
+
function.call
|
58
|
+
|
59
|
+
return self
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require_relative '
|
1
|
+
require_relative '../core'
|
2
2
|
|
3
|
-
module Rust::
|
3
|
+
module Rust::Correlation
|
4
4
|
class Pearson
|
5
5
|
def self.test(d1, d2)
|
6
6
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
@@ -10,13 +10,14 @@ module Rust:: Correlation
|
|
10
10
|
Rust['correlation.a'] = d1
|
11
11
|
Rust['correlation.b'] = d2
|
12
12
|
|
13
|
-
Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='p')")
|
13
|
+
_, warnings = Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='p')", true)
|
14
14
|
|
15
15
|
result = Result.new
|
16
16
|
result.name = "Pearson's product-moment correlation"
|
17
17
|
result.statistics['t'] = Rust._pull('correlation.result$statistic')
|
18
18
|
result.pvalue = Rust._pull('correlation.result$p.value')
|
19
19
|
result.correlation = Rust._pull('correlation.result$estimate')
|
20
|
+
result.exact = !warnings.include?("Cannot compute exact p-value with ties")
|
20
21
|
|
21
22
|
return result
|
22
23
|
end
|
@@ -36,13 +37,14 @@ module Rust:: Correlation
|
|
36
37
|
Rust['correlation.a'] = d1
|
37
38
|
Rust['correlation.b'] = d2
|
38
39
|
|
39
|
-
Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='s')")
|
40
|
+
_, warnings = Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='s')", true)
|
40
41
|
|
41
42
|
result = Result.new
|
42
43
|
result.name = "Spearman's rank correlation rho"
|
43
44
|
result.statistics['S'] = Rust._pull('correlation.result$statistic')
|
44
45
|
result.pvalue = Rust._pull('correlation.result$p.value')
|
45
46
|
result.correlation = Rust._pull('correlation.result$estimate')
|
47
|
+
result.exact = !warnings.include?("Cannot compute exact p-value with ties")
|
46
48
|
|
47
49
|
return result
|
48
50
|
end
|
@@ -62,13 +64,14 @@ module Rust:: Correlation
|
|
62
64
|
Rust['correlation.a'] = d1
|
63
65
|
Rust['correlation.b'] = d2
|
64
66
|
|
65
|
-
Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='
|
67
|
+
_, warnings = Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='k')", true)
|
66
68
|
|
67
69
|
result = Result.new
|
68
70
|
result.name = "Kendall's rank correlation tau"
|
69
71
|
result.statistics['T'] = Rust._pull('correlation.result$statistic')
|
70
72
|
result.pvalue = Rust._pull('correlation.result$p.value')
|
71
73
|
result.correlation = Rust._pull('correlation.result$estimate')
|
74
|
+
result.exact = !warnings.include?("Cannot compute exact p-value with ties")
|
72
75
|
|
73
76
|
return result
|
74
77
|
end
|
@@ -84,11 +87,13 @@ module Rust:: Correlation
|
|
84
87
|
attr_accessor :statistics
|
85
88
|
attr_accessor :pvalue
|
86
89
|
attr_accessor :correlation
|
90
|
+
attr_accessor :exact
|
87
91
|
|
88
92
|
alias :estimate :correlation
|
89
93
|
|
90
94
|
def initialize
|
91
95
|
@statistics = {}
|
96
|
+
@exact = true
|
92
97
|
end
|
93
98
|
|
94
99
|
def [](name)
|
@@ -101,6 +106,7 @@ module Rust:: Correlation
|
|
101
106
|
|
102
107
|
def to_s
|
103
108
|
return "#{name}. Correlation = #{correlation}, P-value = #{pvalue} " +
|
109
|
+
(!@exact ? "P-value is not exact. " : "") +
|
104
110
|
"#{ statistics.map { |k, v| k.to_s + " -> " + v.to_s }.join(", ") }."
|
105
111
|
end
|
106
112
|
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
require_relative '../core'
|
2
|
+
|
3
|
+
module Rust::Descriptive
|
4
|
+
class << self
|
5
|
+
def mean(data)
|
6
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
7
|
+
|
8
|
+
return data.sum.to_f / data.size
|
9
|
+
end
|
10
|
+
|
11
|
+
def standard_deviation(data)
|
12
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
13
|
+
|
14
|
+
return Math.sqrt(variance(data))
|
15
|
+
end
|
16
|
+
alias :sd :standard_deviation
|
17
|
+
alias :stddev :standard_deviation
|
18
|
+
|
19
|
+
def variance(data)
|
20
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
21
|
+
return Float::NAN if data.size < 2
|
22
|
+
|
23
|
+
mean = mean(data)
|
24
|
+
return data.map { |v| (v - mean) ** 2 }.sum.to_f / (data.size - 1)
|
25
|
+
end
|
26
|
+
alias :var :variance
|
27
|
+
|
28
|
+
def median(data)
|
29
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
30
|
+
|
31
|
+
sorted = data.sort
|
32
|
+
if data.size == 0
|
33
|
+
return Float::NAN
|
34
|
+
elsif data.size.odd?
|
35
|
+
return sorted[data.size / 2]
|
36
|
+
else
|
37
|
+
i = (data.size / 2)
|
38
|
+
return (sorted[i - 1] + sorted[i]) / 2.0
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def sum(data)
|
43
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
44
|
+
|
45
|
+
return data.sum
|
46
|
+
end
|
47
|
+
|
48
|
+
def quantile(data, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0])
|
49
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
50
|
+
raise TypeError, "Expecting Array of numerics" if !percentiles.is_a?(Array) || !percentiles.all? { |e| e.is_a?(Numeric) }
|
51
|
+
raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
|
52
|
+
|
53
|
+
n = data.size
|
54
|
+
quantiles = percentiles.size
|
55
|
+
percentiles = percentiles.map { |x| x > 1.0 ? 1.0 : (x < 0.0 ? 0.0 : x) }
|
56
|
+
|
57
|
+
rough_indices = percentiles.map { |x| 1 + [n - 1, 0].max * x - 1 }
|
58
|
+
floor_indices = rough_indices.map { |i| i.floor }
|
59
|
+
ceil_indices = rough_indices.map { |i| i.ceil }
|
60
|
+
|
61
|
+
data = data.sort
|
62
|
+
result = floor_indices.map { |i| data[i] }
|
63
|
+
result_ceil = ceil_indices.map { |i| data[i] }
|
64
|
+
|
65
|
+
indices_to_fix = (0...quantiles).select { |i| rough_indices[i] > floor_indices[i] && result_ceil[i] != result[i] }
|
66
|
+
index_approximation_errors = indices_to_fix.map { |i| rough_indices[i] - floor_indices[i] }
|
67
|
+
reduced_index_approximation_errors = index_approximation_errors.map { |i| (1 - i) }
|
68
|
+
hi_indices = indices_to_fix.map { |i| ceil_indices[i] }
|
69
|
+
data_hi_indices = hi_indices.map { |i| data[i] }
|
70
|
+
|
71
|
+
j = 0
|
72
|
+
indices_to_fix.each do |i|
|
73
|
+
result[i] = reduced_index_approximation_errors[j] * result[i] + index_approximation_errors[j] * data_hi_indices[j]
|
74
|
+
j += 1
|
75
|
+
end
|
76
|
+
|
77
|
+
return percentiles.zip(result).to_h
|
78
|
+
end
|
79
|
+
|
80
|
+
def outliers(data, k=1.5, **opts)
|
81
|
+
outliers_according_to(data, data, k, **opts)
|
82
|
+
end
|
83
|
+
|
84
|
+
def outliers_according_to(data, data_distribution, k=1.5, **opts)
|
85
|
+
quantiles = Rust::Descriptive.quantile(data_distribution, [0.25, 0.75])
|
86
|
+
q1 = quantiles[0.25]
|
87
|
+
q3 = quantiles[0.75]
|
88
|
+
iqr = q3 - q1
|
89
|
+
|
90
|
+
positive_outliers = data.select { |d| d > q3 + iqr * k }
|
91
|
+
negative_outliers = data.select { |d| d < q1 - iqr * k }
|
92
|
+
|
93
|
+
outliers = negative_outliers + positive_outliers
|
94
|
+
if opts[:side]
|
95
|
+
case opts[:side].to_sym
|
96
|
+
when :positive, :neg, :n, :+
|
97
|
+
outliers = positive_outliers
|
98
|
+
when :negative, :pos, :p, :-
|
99
|
+
outliers = negative_outliers
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
return outliers
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
module Rust::RBindings
|
109
|
+
def mean(series)
|
110
|
+
Rust::Descriptive.mean(series)
|
111
|
+
end
|
112
|
+
|
113
|
+
def median(series)
|
114
|
+
Rust::Descriptive.median(series)
|
115
|
+
end
|
116
|
+
|
117
|
+
def var(series)
|
118
|
+
Rust::Descriptive.variance(series)
|
119
|
+
end
|
120
|
+
|
121
|
+
def sd(series)
|
122
|
+
Rust::Descriptive.standard_deviation(series)
|
123
|
+
end
|
124
|
+
|
125
|
+
def quantile(series, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0])
|
126
|
+
Rust::Descriptive.quantile(series, percentiles)
|
127
|
+
end
|
128
|
+
end
|
@@ -1,8 +1,6 @@
|
|
1
|
-
|
1
|
+
require_relative '../core'
|
2
2
|
|
3
|
-
Rust.
|
4
|
-
Rust._eval("library(effsize)")
|
5
|
-
end
|
3
|
+
Rust.prerequisite('effsize')
|
6
4
|
|
7
5
|
module Rust::EffectSize
|
8
6
|
class Result
|
@@ -16,14 +14,16 @@ module Rust::EffectSize
|
|
16
14
|
return "#{name} = #{estimate} (#{magnitude}) [#{confidence_interval.min}, #{confidence_interval.max}]"
|
17
15
|
end
|
18
16
|
end
|
19
|
-
end
|
20
17
|
|
21
|
-
|
22
|
-
|
23
|
-
def compute(d1, d2)
|
18
|
+
class CliffDelta
|
19
|
+
def self.compute(d1, d2)
|
24
20
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
25
21
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
26
22
|
|
23
|
+
if d1.size <= 1 || d2.size <= 1
|
24
|
+
return Rust::EffectSize::Result.new
|
25
|
+
end
|
26
|
+
|
27
27
|
Rust.exclusive do
|
28
28
|
Rust['effsize.a'] = d1
|
29
29
|
Rust['effsize.b'] = d2
|
@@ -32,23 +32,25 @@ module Rust::EffectSize::CliffDelta
|
|
32
32
|
|
33
33
|
result = Rust::EffectSize::Result.new
|
34
34
|
result.name = "Cliff's delta"
|
35
|
-
result.estimate = Rust._pull("effsize.result$estimate")
|
36
|
-
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
|
37
|
-
result.confidence_level = Rust._pull("effsize.result$conf.level")
|
38
|
-
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
|
35
|
+
result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
|
36
|
+
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
|
37
|
+
result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
|
38
|
+
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
|
39
39
|
|
40
40
|
return result
|
41
41
|
end
|
42
42
|
end
|
43
43
|
end
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
class << self
|
48
|
-
def compute(d1, d2)
|
44
|
+
|
45
|
+
class CohenD
|
46
|
+
def self.compute(d1, d2)
|
49
47
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
50
48
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
51
49
|
|
50
|
+
if d1.size <= 1 || d2.size <= 1
|
51
|
+
return Rust::EffectSize::Result.new
|
52
|
+
end
|
53
|
+
|
52
54
|
Rust.exclusive do
|
53
55
|
Rust['effsize.a'] = d1
|
54
56
|
Rust['effsize.b'] = d2
|
@@ -57,10 +59,10 @@ module Rust::EffectSize::CohenD
|
|
57
59
|
|
58
60
|
result = Rust::EffectSize::Result.new
|
59
61
|
result.name = "Cohen's d"
|
60
|
-
result.estimate = Rust._pull("effsize.result$estimate")
|
61
|
-
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
|
62
|
-
result.confidence_level = Rust._pull("effsize.result$conf.level")
|
63
|
-
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
|
62
|
+
result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
|
63
|
+
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
|
64
|
+
result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
|
65
|
+
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
|
64
66
|
|
65
67
|
return result
|
66
68
|
end
|
@@ -0,0 +1,248 @@
|
|
1
|
+
require_relative '../core'
|
2
|
+
|
3
|
+
class Numeric
|
4
|
+
def distance(other)
|
5
|
+
raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
|
6
|
+
|
7
|
+
return (self - other).abs
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
class Array
|
12
|
+
def distance(other)
|
13
|
+
raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
|
14
|
+
|
15
|
+
longest, shortest = self.size > other.size ? [self, other] : [other, self]
|
16
|
+
|
17
|
+
distance = 0
|
18
|
+
for i in 0...longest.size
|
19
|
+
distance += longest[i].to_i.distance(shortest[i].to_i)
|
20
|
+
end
|
21
|
+
|
22
|
+
return distance
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class String
|
27
|
+
def distance(other)
|
28
|
+
raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
|
29
|
+
|
30
|
+
return self.bytes.distance other.bytes
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
module Rust
|
35
|
+
class RandomVariableSlice
|
36
|
+
def initialize(values)
|
37
|
+
raise TypeError, "Expected Hash" unless values.is_a?(Hash)
|
38
|
+
|
39
|
+
@values = values
|
40
|
+
end
|
41
|
+
|
42
|
+
def probability(v=nil)
|
43
|
+
unless v
|
44
|
+
return @values.values.sum
|
45
|
+
else
|
46
|
+
return @values[v]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def ml
|
51
|
+
@values.max_by { |k, v| v }[0]
|
52
|
+
end
|
53
|
+
|
54
|
+
def expected
|
55
|
+
@values.map { |k, v| k*v }.sum
|
56
|
+
end
|
57
|
+
|
58
|
+
def >(n)
|
59
|
+
self.so_that { |k| k > n}
|
60
|
+
end
|
61
|
+
|
62
|
+
def >=(n)
|
63
|
+
self.so_that { |k| k >= n}
|
64
|
+
end
|
65
|
+
|
66
|
+
def <(n)
|
67
|
+
self.so_that { |k| k < n}
|
68
|
+
end
|
69
|
+
|
70
|
+
def <=(n)
|
71
|
+
self.so_that { |k| k <= n}
|
72
|
+
end
|
73
|
+
|
74
|
+
def ==(n)
|
75
|
+
self.so_that { |k| k == n}
|
76
|
+
end
|
77
|
+
|
78
|
+
def so_that
|
79
|
+
RandomVariableSlice.new(@values.select { |k, v| yield(k) })
|
80
|
+
end
|
81
|
+
|
82
|
+
def between(a, b)
|
83
|
+
RandomVariableSlice.new(@values.select { |k, v| k.between? a, b })
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
class RandomVariable < RandomVariableSlice
|
88
|
+
EPSILON = 1e-7
|
89
|
+
|
90
|
+
attr_reader :values
|
91
|
+
|
92
|
+
def initialize(values = {0 => 1.0}, exact = false)
|
93
|
+
@values = values
|
94
|
+
@exact = exact
|
95
|
+
|
96
|
+
raise "All the probabilities should be in the range [0, 1]" unless @values.values.all? { |v| v.between? 0, 1 }
|
97
|
+
raise "The cumulative probability must be exactly 1 (#{@values.values.sum} instead)" unless @values.values.sum.between? 1-EPSILON, 1+EPSILON
|
98
|
+
|
99
|
+
approx!
|
100
|
+
end
|
101
|
+
|
102
|
+
def probability(v)
|
103
|
+
return @values[v].to_f
|
104
|
+
end
|
105
|
+
|
106
|
+
def +(other)
|
107
|
+
new_hash = {}
|
108
|
+
|
109
|
+
@values.each do |my_key, my_value|
|
110
|
+
other.values.each do |other_key, other_value|
|
111
|
+
sum_key = my_key + other_key
|
112
|
+
|
113
|
+
new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
return RandomVariable.new(new_hash, @exact)
|
118
|
+
end
|
119
|
+
|
120
|
+
def *(times)
|
121
|
+
if times.is_a? Integer
|
122
|
+
return rep(times)
|
123
|
+
elsif times.is_a? RandomVariable
|
124
|
+
return mul(times)
|
125
|
+
else
|
126
|
+
raise "The argument must be an Integer or a RandomVariable"
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def mul(other)
|
131
|
+
new_hash = {}
|
132
|
+
|
133
|
+
@values.each do |my_key, my_value|
|
134
|
+
other.values.each do |other_key, other_value|
|
135
|
+
mul_key = my_key * other_key
|
136
|
+
|
137
|
+
new_hash[mul_key] = new_hash[mul_key].to_f + (my_value * other_value)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
return RandomVariable.new(new_hash, @exact)
|
142
|
+
end
|
143
|
+
|
144
|
+
def rep(times)
|
145
|
+
rv = self
|
146
|
+
(times-1).times do
|
147
|
+
rv += self
|
148
|
+
end
|
149
|
+
|
150
|
+
return rv
|
151
|
+
end
|
152
|
+
|
153
|
+
def exact!
|
154
|
+
@exact = true
|
155
|
+
end
|
156
|
+
|
157
|
+
def approx!
|
158
|
+
return if @exact
|
159
|
+
|
160
|
+
to_delete = []
|
161
|
+
@values.each do |v, probability|
|
162
|
+
to_delete.push v if probability <= EPSILON
|
163
|
+
end
|
164
|
+
|
165
|
+
to_delete.each do |v|
|
166
|
+
probability = @values.delete v
|
167
|
+
nearest = @values.keys.min_by { |k| k.distance v }
|
168
|
+
@values[nearest] += probability
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def extract
|
173
|
+
v = rand
|
174
|
+
|
175
|
+
cumulative = 0
|
176
|
+
@values.each do |key, prob|
|
177
|
+
cumulative += prob
|
178
|
+
|
179
|
+
return key if cumulative >= v
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def self.complete(hash, key=0)
|
184
|
+
hash[key] = 1 - hash.values.sum
|
185
|
+
return RandomVariable.new(hash)
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
class UniformRandomVariable < RandomVariable
|
190
|
+
def initialize(values, exact = false)
|
191
|
+
super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
module Probabilities
|
196
|
+
def P(v)
|
197
|
+
if v.is_a? RandomVariableSlice
|
198
|
+
raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
|
199
|
+
return v.probability
|
200
|
+
else
|
201
|
+
raise "Cannot compute the expected value of a #{v.class}"
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
def E(v)
|
206
|
+
if v.is_a? RandomVariableSlice
|
207
|
+
return v.expected
|
208
|
+
else
|
209
|
+
raise "Cannot compute the expected value of a #{v.class}"
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
class RandomVariable
|
215
|
+
ENGLISH_ALPHABET = RandomVariable.new({
|
216
|
+
"a" => 0.08167,
|
217
|
+
"b" => 0.01492,
|
218
|
+
"c" => 0.02782,
|
219
|
+
"d" => 0.04253,
|
220
|
+
"e" => 0.12703,
|
221
|
+
"f" => 0.02228,
|
222
|
+
"g" => 0.02015,
|
223
|
+
"h" => 0.06094,
|
224
|
+
"i" => 0.06966,
|
225
|
+
"j" => 0.00153,
|
226
|
+
"k" => 0.00772,
|
227
|
+
"l" => 0.04025,
|
228
|
+
"m" => 0.02406,
|
229
|
+
"n" => 0.06749,
|
230
|
+
"o" => 0.07507,
|
231
|
+
"p" => 0.01929,
|
232
|
+
"q" => 0.00095,
|
233
|
+
"r" => 0.05987,
|
234
|
+
"s" => 0.06327,
|
235
|
+
"t" => 0.09056,
|
236
|
+
"u" => 0.02758,
|
237
|
+
"v" => 0.00978,
|
238
|
+
"w" => 0.02360,
|
239
|
+
"x" => 0.00150,
|
240
|
+
"y" => 0.01974,
|
241
|
+
"z" => 0.00074
|
242
|
+
})
|
243
|
+
|
244
|
+
DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
|
245
|
+
|
246
|
+
COIN = UniformRandomVariable.new(["h", "t"])
|
247
|
+
end
|
248
|
+
end
|