rust 0.4 → 0.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ruby-rust +3 -0
- data/lib/{rust-csv.rb → rust/core/csv.rb} +35 -4
- data/lib/rust/core/rust.rb +221 -0
- data/lib/rust/core/types/all.rb +4 -0
- data/lib/{rust-core.rb → rust/core/types/dataframe.rb} +324 -244
- data/lib/rust/core/types/datatype.rb +195 -0
- data/lib/rust/core/types/factor.rb +158 -0
- data/lib/rust/core/types/language.rb +199 -0
- data/lib/rust/core/types/list.rb +97 -0
- data/lib/rust/core/types/matrix.rb +155 -0
- data/lib/rust/core/types/s4class.rb +78 -0
- data/lib/rust/core/types/utils.rb +122 -0
- data/lib/rust/core.rb +7 -0
- data/lib/rust/models/all.rb +4 -0
- data/lib/rust/models/anova.rb +77 -0
- data/lib/rust/models/regression.rb +258 -0
- data/lib/rust/plots/all.rb +4 -0
- data/lib/rust/plots/basic-plots.rb +143 -0
- data/lib/{rust-plots.rb → rust/plots/core.rb} +98 -107
- data/lib/rust/plots/distribution-plots.rb +75 -0
- data/lib/rust/stats/all.rb +4 -0
- data/lib/{rust-basics.rb → rust/stats/correlation.rb} +46 -3
- data/lib/rust/stats/descriptive.rb +157 -0
- data/lib/{rust-effsize.rb → rust/stats/effsize.rb} +44 -21
- data/lib/rust/stats/probabilities.rb +356 -0
- data/lib/rust/stats/tests.rb +384 -0
- data/lib/rust.rb +4 -8
- metadata +31 -12
- data/lib/rust-calls.rb +0 -69
- data/lib/rust-descriptive.rb +0 -67
- data/lib/rust-tests.rb +0 -165
@@ -0,0 +1,157 @@
|
|
1
|
+
require_relative '../core'
|
2
|
+
|
3
|
+
##
|
4
|
+
# Module containing utilities for descriptive statistics.
|
5
|
+
|
6
|
+
module Rust::Descriptive
|
7
|
+
class << self
|
8
|
+
|
9
|
+
##
|
10
|
+
# Computes the arithmetic mean of the given +data+.
|
11
|
+
|
12
|
+
def mean(data)
|
13
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
14
|
+
|
15
|
+
return data.sum.to_f / data.size
|
16
|
+
end
|
17
|
+
|
18
|
+
##
|
19
|
+
# Computes the standard deviation of the given +data+.
|
20
|
+
|
21
|
+
def standard_deviation(data)
|
22
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
23
|
+
|
24
|
+
return Math.sqrt(variance(data))
|
25
|
+
end
|
26
|
+
alias :sd :standard_deviation
|
27
|
+
alias :stddev :standard_deviation
|
28
|
+
|
29
|
+
##
|
30
|
+
# Computes the variance of the given +data+.
|
31
|
+
|
32
|
+
def variance(data)
|
33
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
34
|
+
return Float::NAN if data.size < 2
|
35
|
+
|
36
|
+
mean = mean(data)
|
37
|
+
return data.map { |v| (v - mean) ** 2 }.sum.to_f / (data.size - 1)
|
38
|
+
end
|
39
|
+
alias :var :variance
|
40
|
+
|
41
|
+
##
|
42
|
+
# Computes the median of the given +data+.
|
43
|
+
|
44
|
+
def median(data)
|
45
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
46
|
+
|
47
|
+
sorted = data.sort
|
48
|
+
if data.size == 0
|
49
|
+
return Float::NAN
|
50
|
+
elsif data.size.odd?
|
51
|
+
return sorted[data.size / 2]
|
52
|
+
else
|
53
|
+
i = (data.size / 2)
|
54
|
+
return (sorted[i - 1] + sorted[i]) / 2.0
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
##
|
59
|
+
# Sums the given +data+.
|
60
|
+
|
61
|
+
def sum(data)
|
62
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
63
|
+
|
64
|
+
return data.sum
|
65
|
+
end
|
66
|
+
|
67
|
+
##
|
68
|
+
# Returns the quantiles of the given +data+, given the +percentiles+ (optional).
|
69
|
+
|
70
|
+
def quantile(data, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0])
|
71
|
+
raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
|
72
|
+
raise TypeError, "Expecting Array of numerics" if !percentiles.is_a?(Array) || !percentiles.all? { |e| e.is_a?(Numeric) }
|
73
|
+
raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
|
74
|
+
|
75
|
+
n = data.size
|
76
|
+
quantiles = percentiles.size
|
77
|
+
percentiles = percentiles.map { |x| x > 1.0 ? 1.0 : (x < 0.0 ? 0.0 : x) }
|
78
|
+
|
79
|
+
rough_indices = percentiles.map { |x| 1 + [n - 1, 0].max * x - 1 }
|
80
|
+
floor_indices = rough_indices.map { |i| i.floor }
|
81
|
+
ceil_indices = rough_indices.map { |i| i.ceil }
|
82
|
+
|
83
|
+
data = data.sort
|
84
|
+
result = floor_indices.map { |i| data[i] }
|
85
|
+
result_ceil = ceil_indices.map { |i| data[i] }
|
86
|
+
|
87
|
+
indices_to_fix = (0...quantiles).select { |i| rough_indices[i] > floor_indices[i] && result_ceil[i] != result[i] }
|
88
|
+
index_approximation_errors = indices_to_fix.map { |i| rough_indices[i] - floor_indices[i] }
|
89
|
+
reduced_index_approximation_errors = index_approximation_errors.map { |i| (1 - i) }
|
90
|
+
hi_indices = indices_to_fix.map { |i| ceil_indices[i] }
|
91
|
+
data_hi_indices = hi_indices.map { |i| data[i] }
|
92
|
+
|
93
|
+
j = 0
|
94
|
+
indices_to_fix.each do |i|
|
95
|
+
result[i] = reduced_index_approximation_errors[j] * result[i] + index_approximation_errors[j] * data_hi_indices[j]
|
96
|
+
j += 1
|
97
|
+
end
|
98
|
+
|
99
|
+
return percentiles.zip(result).to_h
|
100
|
+
end
|
101
|
+
|
102
|
+
##
|
103
|
+
# Returns the outliers in +data+ using Tukey's fences, with a given +k+.
|
104
|
+
|
105
|
+
def outliers(data, k=1.5, **opts)
|
106
|
+
outliers_according_to(data, data, k, **opts)
|
107
|
+
end
|
108
|
+
|
109
|
+
##
|
110
|
+
# Returns the outliers in +data+ using Tukey's fences, with a given +k+, with respect to different data
|
111
|
+
# distribution (+data_distribution+).
|
112
|
+
|
113
|
+
def outliers_according_to(data, data_distribution, k=1.5, **opts)
|
114
|
+
quantiles = Rust::Descriptive.quantile(data_distribution, [0.25, 0.75])
|
115
|
+
q1 = quantiles[0.25]
|
116
|
+
q3 = quantiles[0.75]
|
117
|
+
iqr = q3 - q1
|
118
|
+
|
119
|
+
positive_outliers = data.select { |d| d > q3 + iqr * k }
|
120
|
+
negative_outliers = data.select { |d| d < q1 - iqr * k }
|
121
|
+
|
122
|
+
outliers = negative_outliers + positive_outliers
|
123
|
+
if opts[:side]
|
124
|
+
case opts[:side].to_sym
|
125
|
+
when :positive, :neg, :n, :+
|
126
|
+
outliers = positive_outliers
|
127
|
+
when :negative, :pos, :p, :-
|
128
|
+
outliers = negative_outliers
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
return outliers
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
module Rust::RBindings
|
138
|
+
def mean(series)
|
139
|
+
Rust::Descriptive.mean(series)
|
140
|
+
end
|
141
|
+
|
142
|
+
def median(series)
|
143
|
+
Rust::Descriptive.median(series)
|
144
|
+
end
|
145
|
+
|
146
|
+
def var(series)
|
147
|
+
Rust::Descriptive.variance(series)
|
148
|
+
end
|
149
|
+
|
150
|
+
def sd(series)
|
151
|
+
Rust::Descriptive.standard_deviation(series)
|
152
|
+
end
|
153
|
+
|
154
|
+
def quantile(series, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0])
|
155
|
+
Rust::Descriptive.quantile(series, percentiles)
|
156
|
+
end
|
157
|
+
end
|
@@ -1,10 +1,15 @@
|
|
1
|
-
|
1
|
+
require_relative '../core'
|
2
2
|
|
3
|
-
Rust.
|
4
|
-
|
5
|
-
|
3
|
+
Rust.prerequisite('effsize')
|
4
|
+
|
5
|
+
##
|
6
|
+
# Module containing utilities for computing effect size statistics.
|
6
7
|
|
7
8
|
module Rust::EffectSize
|
9
|
+
|
10
|
+
##
|
11
|
+
# Effect size results.
|
12
|
+
|
8
13
|
class Result
|
9
14
|
attr_accessor :name
|
10
15
|
attr_accessor :estimate
|
@@ -16,14 +21,23 @@ module Rust::EffectSize
|
|
16
21
|
return "#{name} = #{estimate} (#{magnitude}) [#{confidence_interval.min}, #{confidence_interval.max}]"
|
17
22
|
end
|
18
23
|
end
|
19
|
-
end
|
20
24
|
|
21
|
-
|
22
|
-
|
23
|
-
|
25
|
+
##
|
26
|
+
# Cliff delta effect size statistics.
|
27
|
+
|
28
|
+
class CliffDelta
|
29
|
+
|
30
|
+
##
|
31
|
+
# Computes and returns the effect size for +d1+ and +d2+.
|
32
|
+
|
33
|
+
def self.compute(d1, d2)
|
24
34
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
25
35
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
26
36
|
|
37
|
+
if d1.size <= 1 || d2.size <= 1
|
38
|
+
return Rust::EffectSize::Result.new
|
39
|
+
end
|
40
|
+
|
27
41
|
Rust.exclusive do
|
28
42
|
Rust['effsize.a'] = d1
|
29
43
|
Rust['effsize.b'] = d2
|
@@ -32,23 +46,32 @@ module Rust::EffectSize::CliffDelta
|
|
32
46
|
|
33
47
|
result = Rust::EffectSize::Result.new
|
34
48
|
result.name = "Cliff's delta"
|
35
|
-
result.estimate = Rust._pull("effsize.result$estimate")
|
36
|
-
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
|
37
|
-
result.confidence_level = Rust._pull("effsize.result$conf.level")
|
38
|
-
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
|
49
|
+
result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
|
50
|
+
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
|
51
|
+
result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
|
52
|
+
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
|
39
53
|
|
40
54
|
return result
|
41
55
|
end
|
42
56
|
end
|
43
57
|
end
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
58
|
+
|
59
|
+
##
|
60
|
+
# Cohen D effect size statistics.
|
61
|
+
|
62
|
+
class CohenD
|
63
|
+
|
64
|
+
##
|
65
|
+
# Computes and returns the effect size for +d1+ and +d2+.
|
66
|
+
|
67
|
+
def self.compute(d1, d2)
|
49
68
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
50
69
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
51
70
|
|
71
|
+
if d1.size <= 1 || d2.size <= 1
|
72
|
+
return Rust::EffectSize::Result.new
|
73
|
+
end
|
74
|
+
|
52
75
|
Rust.exclusive do
|
53
76
|
Rust['effsize.a'] = d1
|
54
77
|
Rust['effsize.b'] = d2
|
@@ -57,10 +80,10 @@ module Rust::EffectSize::CohenD
|
|
57
80
|
|
58
81
|
result = Rust::EffectSize::Result.new
|
59
82
|
result.name = "Cohen's d"
|
60
|
-
result.estimate = Rust._pull("effsize.result$estimate")
|
61
|
-
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
|
62
|
-
result.confidence_level = Rust._pull("effsize.result$conf.level")
|
63
|
-
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
|
83
|
+
result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
|
84
|
+
result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
|
85
|
+
result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
|
86
|
+
result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
|
64
87
|
|
65
88
|
return result
|
66
89
|
end
|
@@ -0,0 +1,356 @@
|
|
1
|
+
require_relative '../core'
|
2
|
+
|
3
|
+
class Numeric
|
4
|
+
|
5
|
+
##
|
6
|
+
# Computes the distance between this and another number.
|
7
|
+
|
8
|
+
def _rust_prob_distance(other)
|
9
|
+
raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
|
10
|
+
|
11
|
+
return (self - other).abs
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
class Array
|
16
|
+
|
17
|
+
##
|
18
|
+
# Computes the distance between this and another array.
|
19
|
+
|
20
|
+
def _rust_prob_distance(other)
|
21
|
+
raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
|
22
|
+
|
23
|
+
longest, shortest = self.size > other.size ? [self, other] : [other, self]
|
24
|
+
|
25
|
+
distance = 0
|
26
|
+
for i in 0...longest.size
|
27
|
+
distance += longest[i].to_i._rust_prob_distance(shortest[i].to_i)
|
28
|
+
end
|
29
|
+
|
30
|
+
return distance
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class String
|
35
|
+
|
36
|
+
##
|
37
|
+
# Computes the distance between this and another string.
|
38
|
+
|
39
|
+
def _rust_prob_distance(other)
|
40
|
+
raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
|
41
|
+
|
42
|
+
return self.bytes._rust_prob_distance other.bytes
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
module Rust
|
47
|
+
|
48
|
+
##
|
49
|
+
# Represents a slice of a random variable, for which no check is made in terms of cumulative probability.
|
50
|
+
|
51
|
+
class RandomVariableSlice
|
52
|
+
|
53
|
+
##
|
54
|
+
# Creates a new slice of random variable. +values+ is a hash of values associated with their probabilities.
|
55
|
+
|
56
|
+
def initialize(values)
|
57
|
+
raise TypeError, "Expected Hash" unless values.is_a?(Hash)
|
58
|
+
|
59
|
+
@values = values
|
60
|
+
end
|
61
|
+
|
62
|
+
##
|
63
|
+
# Gets the probability of a value +v+. If +v+ is not specified, returns the cumulative probability of the whole
|
64
|
+
# slice.
|
65
|
+
|
66
|
+
def probability(v=nil)
|
67
|
+
unless v
|
68
|
+
return @values.values.sum
|
69
|
+
else
|
70
|
+
return @values[v]
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
##
|
75
|
+
# Returns the value with the maximum probability.
|
76
|
+
|
77
|
+
def ml
|
78
|
+
@values.max_by { |k, v| v }[0]
|
79
|
+
end
|
80
|
+
|
81
|
+
##
|
82
|
+
# Returns the expected value for this slice.
|
83
|
+
|
84
|
+
def expected
|
85
|
+
@values.map { |k, v| k*v }.sum
|
86
|
+
end
|
87
|
+
|
88
|
+
##
|
89
|
+
# Returns a slice with the values that are greater than +n+.
|
90
|
+
|
91
|
+
def >(n)
|
92
|
+
self.so_that { |k| k > n }
|
93
|
+
end
|
94
|
+
|
95
|
+
##
|
96
|
+
# Returns a slice with the values that are greater than or equal to +n+.
|
97
|
+
|
98
|
+
def >=(n)
|
99
|
+
self.so_that { |k| k >= n }
|
100
|
+
end
|
101
|
+
|
102
|
+
##
|
103
|
+
# Returns a slice with the values that are lower than +n+.
|
104
|
+
|
105
|
+
def <(n)
|
106
|
+
self.so_that { |k| k < n }
|
107
|
+
end
|
108
|
+
|
109
|
+
##
|
110
|
+
# Returns a slice with the values that are lower than or equal to +n+.
|
111
|
+
|
112
|
+
def <=(n)
|
113
|
+
self.so_that { |k| k <= n }
|
114
|
+
end
|
115
|
+
|
116
|
+
##
|
117
|
+
# Returns a slice with the value +n+.
|
118
|
+
|
119
|
+
def ==(n)
|
120
|
+
self.so_that { |k| k == n }
|
121
|
+
end
|
122
|
+
|
123
|
+
##
|
124
|
+
# Returns a slice with the values between +a+ and +b+.
|
125
|
+
|
126
|
+
def between(a, b)
|
127
|
+
self.so_that { |k| k.between(a, b) }
|
128
|
+
end
|
129
|
+
|
130
|
+
##
|
131
|
+
# Returns a slice with the values for which the given block returns true.
|
132
|
+
|
133
|
+
def so_that
|
134
|
+
RandomVariableSlice.new(@values.select { |k, v| yield(k) })
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
##
|
139
|
+
# Represents a random variable. The cumulative probability of the values must equal 1.
|
140
|
+
|
141
|
+
class RandomVariable < RandomVariableSlice
|
142
|
+
EPSILON = 1e-7
|
143
|
+
|
144
|
+
attr_reader :values
|
145
|
+
|
146
|
+
##
|
147
|
+
# Creates a new random variable. +values+ is a hash of values associated with their probabilities.
|
148
|
+
# +exact+ indicates whether this variable, when combined with others, should force to keep all the values, even
|
149
|
+
# the most unlikely ones. If this is +false+ (default), the most improbable values (lower than EPSILON) are
|
150
|
+
# removed for efficiency reasons.
|
151
|
+
|
152
|
+
def initialize(values = {0 => 1.0}, exact = false)
|
153
|
+
@values = values
|
154
|
+
@exact = exact
|
155
|
+
|
156
|
+
raise "All the probabilities should be in the range [0, 1]" unless @values.values.all? { |v| v.between? 0, 1 }
|
157
|
+
raise "The cumulative probability must be exactly 1 (#{@values.values.sum} instead)" unless @values.values.sum.between? 1-EPSILON, 1+EPSILON
|
158
|
+
|
159
|
+
approx!
|
160
|
+
end
|
161
|
+
|
162
|
+
##
|
163
|
+
# Returns the probability of value +v+.
|
164
|
+
|
165
|
+
def probability(v)
|
166
|
+
return @values[v].to_f
|
167
|
+
end
|
168
|
+
|
169
|
+
##
|
170
|
+
# Returns a new random variable which represents the sum of this and the +other+ random variable.
|
171
|
+
|
172
|
+
def +(other)
|
173
|
+
new_hash = {}
|
174
|
+
|
175
|
+
@values.each do |my_key, my_value|
|
176
|
+
other.values.each do |other_key, other_value|
|
177
|
+
sum_key = my_key + other_key
|
178
|
+
|
179
|
+
new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
return RandomVariable.new(new_hash, @exact)
|
184
|
+
end
|
185
|
+
|
186
|
+
##
|
187
|
+
# Based on the type of +arg+, either mul (product with another random variable) or rep (repeated sum) is called.
|
188
|
+
|
189
|
+
def *(arg)
|
190
|
+
if arg.is_a? Integer
|
191
|
+
return rep(arg)
|
192
|
+
elsif arg.is_a? RandomVariable
|
193
|
+
return mul(arg)
|
194
|
+
else
|
195
|
+
raise "The argument must be an Integer or a RandomVariable"
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
##
|
200
|
+
# Returns a new random variable which represents the product of this and the +other+ random variable.
|
201
|
+
|
202
|
+
def mul(other)
|
203
|
+
new_hash = {}
|
204
|
+
|
205
|
+
@values.each do |my_key, my_value|
|
206
|
+
other.values.each do |other_key, other_value|
|
207
|
+
mul_key = my_key * other_key
|
208
|
+
|
209
|
+
new_hash[mul_key] = new_hash[mul_key].to_f + (my_value * other_value)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
return RandomVariable.new(new_hash, @exact)
|
214
|
+
end
|
215
|
+
|
216
|
+
##
|
217
|
+
# Returns a new random variable which represents the sum of this random variable with itself +n+ times.
|
218
|
+
|
219
|
+
def rep(times)
|
220
|
+
rv = self
|
221
|
+
(times-1).times do
|
222
|
+
rv += self
|
223
|
+
end
|
224
|
+
|
225
|
+
return rv
|
226
|
+
end
|
227
|
+
|
228
|
+
##
|
229
|
+
# Makes sure that the operations yield all the values, even the most unlikely ones.
|
230
|
+
|
231
|
+
def exact!
|
232
|
+
@exact = true
|
233
|
+
end
|
234
|
+
|
235
|
+
##
|
236
|
+
# If this variable is not exact, the values with probability lower than EPSLION are removed.
|
237
|
+
|
238
|
+
def approx!
|
239
|
+
return if @exact
|
240
|
+
|
241
|
+
to_delete = []
|
242
|
+
@values.each do |v, probability|
|
243
|
+
to_delete.push v if probability <= EPSILON
|
244
|
+
end
|
245
|
+
|
246
|
+
to_delete.each do |v|
|
247
|
+
probability = @values.delete v
|
248
|
+
nearest = @values.keys.min_by { |k| k._rust_prob_distance v }
|
249
|
+
@values[nearest] += probability
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
##
|
254
|
+
# Returns a random value, according to the data distribution.
|
255
|
+
|
256
|
+
def extract
|
257
|
+
v = rand
|
258
|
+
|
259
|
+
cumulative = 0
|
260
|
+
@values.sort_by { |k, v| k }.each do |key, prob|
|
261
|
+
cumulative += prob
|
262
|
+
|
263
|
+
return key if cumulative >= v
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
##
|
268
|
+
# Creates a random variable by partially specifying the values through +hash+. The remaining probability is
|
269
|
+
# attributed to +key+ (0, by default).
|
270
|
+
|
271
|
+
def self.complete(hash, key=0)
|
272
|
+
hash[key] = 1 - hash.values.sum
|
273
|
+
return RandomVariable.new(hash)
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
##
|
278
|
+
# Represents a uniform random variable.
|
279
|
+
|
280
|
+
class UniformRandomVariable < RandomVariable
|
281
|
+
|
282
|
+
##
|
283
|
+
# Creates random variables for which all the +values+ have the same probability (1 / values.size).
|
284
|
+
|
285
|
+
def initialize(values, exact = false)
|
286
|
+
super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
290
|
+
##
|
291
|
+
# Module that contains utilities for handling random variables.
|
292
|
+
|
293
|
+
module Probabilities
|
294
|
+
|
295
|
+
##
|
296
|
+
# Computes the probability of the random variable +v+.
|
297
|
+
|
298
|
+
def P(v)
|
299
|
+
if v.is_a? RandomVariableSlice
|
300
|
+
raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
|
301
|
+
return v.probability
|
302
|
+
else
|
303
|
+
raise "Cannot compute the expected value of a #{v.class}"
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
307
|
+
##
|
308
|
+
# Computes the expected value of the random variable +v+.
|
309
|
+
|
310
|
+
def E(v)
|
311
|
+
if v.is_a? RandomVariableSlice
|
312
|
+
return v.expected
|
313
|
+
else
|
314
|
+
raise "Cannot compute the expected value of a #{v.class}"
|
315
|
+
end
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
##
|
320
|
+
# Module containing examples of commonly-used random variables.
|
321
|
+
|
322
|
+
module RandomVariableExamples
|
323
|
+
ENGLISH_ALPHABET = RandomVariable.new({
|
324
|
+
"a" => 0.08167,
|
325
|
+
"b" => 0.01492,
|
326
|
+
"c" => 0.02782,
|
327
|
+
"d" => 0.04253,
|
328
|
+
"e" => 0.12703,
|
329
|
+
"f" => 0.02228,
|
330
|
+
"g" => 0.02015,
|
331
|
+
"h" => 0.06094,
|
332
|
+
"i" => 0.06966,
|
333
|
+
"j" => 0.00153,
|
334
|
+
"k" => 0.00772,
|
335
|
+
"l" => 0.04025,
|
336
|
+
"m" => 0.02406,
|
337
|
+
"n" => 0.06749,
|
338
|
+
"o" => 0.07507,
|
339
|
+
"p" => 0.01929,
|
340
|
+
"q" => 0.00095,
|
341
|
+
"r" => 0.05987,
|
342
|
+
"s" => 0.06327,
|
343
|
+
"t" => 0.09056,
|
344
|
+
"u" => 0.02758,
|
345
|
+
"v" => 0.00978,
|
346
|
+
"w" => 0.02360,
|
347
|
+
"x" => 0.00150,
|
348
|
+
"y" => 0.01974,
|
349
|
+
"z" => 0.00074
|
350
|
+
})
|
351
|
+
|
352
|
+
DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
|
353
|
+
|
354
|
+
COIN = UniformRandomVariable.new(["h", "t"])
|
355
|
+
end
|
356
|
+
end
|