random_value_sampler 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +79 -0
- data/lib/random_value_sampler.rb +456 -0
- data/test/random_value_sampler_test.rb +892 -0
- metadata +57 -0
data/README
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
rpmf
|
|
2
|
+
========
|
|
3
|
+
|
|
4
|
+
Class to allow sampling from very, very simple probability mass functions
|
|
5
|
+
(uniform and arbitrary non-uniform). Values can be any object;
|
|
6
|
+
for uniform distributions, a Range can be used to specify a range of
|
|
7
|
+
discrete values.
|
|
8
|
+
|
|
9
|
+
To specify a uniform distribution, only the values need to be specified, and
|
|
10
|
+
can be:
|
|
11
|
+
- an Array of values (it is assumed the values are distinct, but you may
|
|
12
|
+
insert duplicates if you know what you're doing and realize you're probably
|
|
13
|
+
no longer dealing with a truly uniform distribution anymore (but this could
|
|
14
|
+
be used to "cheat" to generate distributions that are 'nearly' uniform where
|
|
15
|
+
probability mass is quantized (e.g. a 1/3, 2/3 distribution). This may
|
|
16
|
+
prove to be a more efficient implementation in such cases as the non-uniform
|
|
17
|
+
pmf is more computationally demanding).
|
|
18
|
+
- a ruby Range object; Rpmf honors the inclusion/exclusion of last/end
|
|
19
|
+
of the Range (as defined by exclude_end? method). the Range must be of
|
|
20
|
+
numeric type unless you REALLY know what you're doing (e.g. the Xs class
|
|
21
|
+
example in the Range rdoc won't work).
|
|
22
|
+
- a single numeric type specifying an upper bound (zero is assumed as
|
|
23
|
+
lower bound--both zero and upper bound are included in distribution)
|
|
24
|
+
|
|
25
|
+
To specify a non-uniform distribution, the values and probability mass
|
|
26
|
+
must be specified. It is not necessary for the probability mass to
|
|
27
|
+
represent a true probability distribution (needn't sum to 1), as the class
|
|
28
|
+
will normalize accordingly. The pmf may be specified as a Hash or an Array:
|
|
29
|
+
- Hash, where the hash keys are the possible values the random variable
|
|
30
|
+
can take on; the hash values are the 'frequency counts' or non-normalized
|
|
31
|
+
probability mass
|
|
32
|
+
- Array, each element of which is a two-element array. each two element
|
|
33
|
+
array's first element is the value; the last element is the frequency
|
|
34
|
+
count for that value
|
|
35
|
+
|
|
36
|
+
Examples
|
|
37
|
+
=========
|
|
38
|
+
|
|
39
|
+
require 'rpmf'
|
|
40
|
+
|
|
41
|
+
uniform
|
|
42
|
+
-------
|
|
43
|
+
|
|
44
|
+
# generate a uniform pmf over [1,5]
|
|
45
|
+
a = Rpmf.new_uniform([1,2,3,4,5])
|
|
46
|
+
|
|
47
|
+
# generate a uniform pmf over some words
|
|
48
|
+
a = Rpmf.new_uniform(["one", "two", "buckle", "my", "shoe"])
|
|
49
|
+
|
|
50
|
+
# generate a 'quantized' pmf by using duplicate entries
|
|
51
|
+
a = Rpmf.new_uniform([1, 2, 2, 3, 3, 3])
|
|
52
|
+
a = Rpmf.new_uniform(["the", "the", "a", "the", "and", "zyzzyva"])
|
|
53
|
+
|
|
54
|
+
# generate a uniform pmf over [1,5] using a Range
|
|
55
|
+
a = Rpmf.new_uniform(1..5)
|
|
56
|
+
a = Rpmf.new_uniform(1...6)
|
|
57
|
+
|
|
58
|
+
# generate a uniform pmf over [0,5] by specifying upper limit
|
|
59
|
+
a = Rpmf.new_uniform(5)
|
|
60
|
+
|
|
61
|
+
non-uniform
|
|
62
|
+
-----------
|
|
63
|
+
|
|
64
|
+
# generate a non-uniform pmf using the Hash form:
|
|
65
|
+
|
|
66
|
+
# values are 5 and 10, with probability 0.4 and 0.6, respectively
|
|
67
|
+
a = Rpmf.new_non_uniform( { 5 => 20, 10 => 30 } )
|
|
68
|
+
|
|
69
|
+
# values are "probable", "possible" and "not likely" with probability
|
|
70
|
+
# 0.75, 0.20 and 0.05, respectively.
|
|
71
|
+
a = Rpmf.new_non_uniform( { "probable" => 75,
|
|
72
|
+
"possible" => 20,
|
|
73
|
+
"not likely" => 5 } )
|
|
74
|
+
|
|
75
|
+
# generate a non-uniform pmf using the Array form (same examples as above)
|
|
76
|
+
a = Rpmf.new_non_uniform( [ [5,20], [10,30] )
|
|
77
|
+
a = Rpmf.new_non_uniform( [ ["probable",75],
|
|
78
|
+
["possible" => 20],
|
|
79
|
+
["not likely" => 5 ] ] )
|
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
# simple class for generating and sampling from a probability distribution,
|
|
2
|
+
# including implementation of sampling from uniform and arbitrary distributions
|
|
3
|
+
# on discrete random variables, by passing in an object that represents
|
|
4
|
+
# the probability mass function (PMF) for a distribution.
|
|
5
|
+
#
|
|
6
|
+
# the PMF can be computed from non-distributions (e.g. frequency counts)
|
|
7
|
+
# provided in the form of a hash or array of tuples (that is, an array of
|
|
8
|
+
# [arrays of length 2]).
|
|
9
|
+
#
|
|
10
|
+
# the values of the random variable can be anything, but the frequencies/
|
|
11
|
+
# probabilities must be numeric (or convertible to numeric via .to_f())
|
|
12
|
+
#
|
|
13
|
+
# note that if a value is repeated multiple times in the frequency count/
|
|
14
|
+
# distribution passed in, then the frequency mass is simply summed for
|
|
15
|
+
# each occurrence of the value. this will allow you to pass in a large array
|
|
16
|
+
# of each occurrence of values in the data set. For example, you could pass
|
|
17
|
+
# in an array of tuples where each value is a word in a document and every
|
|
18
|
+
# value is set to 1 so that you don't actually have to do the word
|
|
19
|
+
# counting yourself.
|
|
20
|
+
#
|
|
21
|
+
# (if you would like to ensure uniqueness, provide Set as the values variable
|
|
22
|
+
# to the new_uniform() factory method)
|
|
23
|
+
#
|
|
24
|
+
# PMFscan also be created for uniform distributions by simply specifying the
|
|
25
|
+
# values the random variable may take on.
|
|
26
|
+
#
|
|
27
|
+
# you can also create a RandomValueSampler directly, by passing in an object
|
|
28
|
+
# that represents the distribution/probability function you'd like to sample
|
|
29
|
+
# from. (this allows for continous random variables as well). the object
|
|
30
|
+
# simply needs to respond_to? the folowing methods:
|
|
31
|
+
# - sample_from_distribution -> single value sampled from distribution
|
|
32
|
+
# and then permanently remove the value from the distribution
|
|
33
|
+
# - all_values -> Array of all values
|
|
34
|
+
# - num_values -> integer giving the number of possible values
|
|
35
|
+
# - probability_of(val) -> probability (numeric type)
|
|
36
|
+
# --> since this library was created for discrete random variables, this
|
|
37
|
+
# method was included. just create a dummy implementation (maybe return
|
|
38
|
+
# 0, to be 'correct') if your distribution is a continuous variable?
|
|
39
|
+
# NOTE: if the object also responds to sample_from_distribution_and_remove(),
|
|
40
|
+
# the sample_unique() method will likely run faster.
|
|
41
|
+
#-------------------------------------------------------------------------------
|
|
42
|
+
class RandomValueSampler
|
|
43
|
+
|
|
44
|
+
# instantiate RandomValueSampler given a probability_function object. the
|
|
45
|
+
# object must respond to:
|
|
46
|
+
# - sample_from_distribution -> single value sampled from distribution
|
|
47
|
+
# - all_values -> Array of all values
|
|
48
|
+
# - num_values -> integer giving the number of possible values
|
|
49
|
+
# - probability_of(val) -> probability (numeric type)
|
|
50
|
+
#
|
|
51
|
+
# if you're creating a discrete random variable with uniform or arbitrary
|
|
52
|
+
# PMF, recommend using the new_uniform() or new_non_uniform() methods instead
|
|
53
|
+
#
|
|
54
|
+
# use this if you have a continuous random variable or want to create your
|
|
55
|
+
# own standard PMF (e.g. geometric, bernoulli, binomial...)
|
|
56
|
+
#-----------------------------------------------------------------------------
|
|
57
|
+
def initialize(pmf)
|
|
58
|
+
unless pmf.respond_to?(:sample_from_distribution) &&
|
|
59
|
+
pmf.respond_to?(:all_values) &&
|
|
60
|
+
pmf.respond_to?(:num_values) &&
|
|
61
|
+
pmf.respond_to?(:probability_of)
|
|
62
|
+
|
|
63
|
+
raise "Received non-pmf-like object of type '#{pmf.class.name}'"
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
@pmf = pmf
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# create a sampler for a uniform distribution given an array of values, a
|
|
70
|
+
# range of values, or a scalar defining a range
|
|
71
|
+
#
|
|
72
|
+
# cases:
|
|
73
|
+
# - Set of values: each member will receive equal probability
|
|
74
|
+
# - Array of values: the array can contain a sequence of any objects and
|
|
75
|
+
# each will be assigned equal probability
|
|
76
|
+
# - Range object (e.g. 3..18): distribution will be uniform over the
|
|
77
|
+
# entire range specified (including first and last in the range)
|
|
78
|
+
# - scalar: the distribution will be uniform over [0, value] (0 and
|
|
79
|
+
# value will be included in the distribution)
|
|
80
|
+
#
|
|
81
|
+
# note that if a value is repeated multiple times in the frequency count/
|
|
82
|
+
# distribution passed in, then the frequency mass is simply summed for
|
|
83
|
+
# each occurrence of the value. this will allow you to pass in a large array
|
|
84
|
+
# of each occurrence of values in the data set. this could be done to
|
|
85
|
+
# 'optimize' a distribution that is very nearly uniform....also see comments
|
|
86
|
+
# on this class.
|
|
87
|
+
#-----------------------------------------------------------------------------
|
|
88
|
+
def self.new_uniform(values)
|
|
89
|
+
self.new(UniformPmf.new(values))
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# create a sampler for a non-uniform distribution given either a hash or an
|
|
94
|
+
# array of tuples specifying the probability mass (or frequency count) for
|
|
95
|
+
# each value. if the frequency counts don't represent a proper distribution,
|
|
96
|
+
# they will be normalized to form a distribution, but the original values
|
|
97
|
+
# will be left untouched.
|
|
98
|
+
#
|
|
99
|
+
# if you happen to have a uniform distribution (and know it), it is
|
|
100
|
+
# recommended that you use new_uniform() as it will be much more efficient
|
|
101
|
+
#
|
|
102
|
+
# cases:
|
|
103
|
+
# - Hash: keys == the random variable values; values == the frequency count/
|
|
104
|
+
# probability mass assigned to that value
|
|
105
|
+
# - Array: each element in the array is a two-element array.
|
|
106
|
+
# first == the random variable value; last == the frequency count/
|
|
107
|
+
# probability mass assigned to that value
|
|
108
|
+
#-----------------------------------------------------------------------------
|
|
109
|
+
def self.new_non_uniform(values_and_counts)
|
|
110
|
+
self.new(NonUniformPmf.new(values_and_counts))
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# returns n (pseudo-) independent samples from the pmf defined by this
|
|
115
|
+
# object, returning the result in an array. n is optional, default is 1
|
|
116
|
+
# duplicates ARE allowed; if you want all samples to be unique, then call
|
|
117
|
+
# sample_unique.
|
|
118
|
+
#
|
|
119
|
+
# this performs "sampling with replacement"
|
|
120
|
+
#-----------------------------------------------------------------------------
|
|
121
|
+
def sample(n = 1)
|
|
122
|
+
raise "n must be 0 or greater to sample" if n <= 0
|
|
123
|
+
|
|
124
|
+
samples = []
|
|
125
|
+
|
|
126
|
+
(1..n).each do
|
|
127
|
+
samples << pmf.sample_from_distribution
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
samples.length == 1 ? samples.first : samples
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# returns n (pseudo-) independent samples from the pmf defined by this
|
|
135
|
+
# object, with the condition that each value can only be represented once
|
|
136
|
+
# in the result (no duplicates). n is optional, default is 1.
|
|
137
|
+
#
|
|
138
|
+
# probably only makes sense to call this method if you're sampling a
|
|
139
|
+
# discrete (vs. continuous) random variable, in which case the probability of
|
|
140
|
+
# getting the same value twice is in theory zero, but in practice should be
|
|
141
|
+
# exceedingly low (unless you're testing the precision of the data type you're
|
|
142
|
+
# using.
|
|
143
|
+
#
|
|
144
|
+
# this performs "sampling without replacement"
|
|
145
|
+
#-----------------------------------------------------------------------------
|
|
146
|
+
def sample_unique(n = 1)
|
|
147
|
+
raise "n must be 0 or greater to sample_unique" if n <= 0
|
|
148
|
+
|
|
149
|
+
# take care of edge cases: where they ask for more samples than there are
|
|
150
|
+
# entries in the distribution (error)
|
|
151
|
+
if n > pmf.num_values
|
|
152
|
+
raise("Invalid request to pull #{n} unique samples from a distribution " +
|
|
153
|
+
"with only #{pmf.num_values} distinct values")
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# use a set in case the calling code added multiple copies of the same
|
|
157
|
+
# object into distribution
|
|
158
|
+
samples = Set.new
|
|
159
|
+
while samples.length < n
|
|
160
|
+
if pmf.respond_to?(:sample_from_distribution_and_remove)
|
|
161
|
+
samples << pmf.sample_from_distribution_and_remove
|
|
162
|
+
else
|
|
163
|
+
samples << pmf.sample_from_distribution
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
return samples.length == 1 ? samples.first : samples.to_a
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# some pass-through methods...
|
|
172
|
+
|
|
173
|
+
# returns probability of a given value
|
|
174
|
+
#-----------------------------------------------------------------------------
|
|
175
|
+
def probability_of(val)
|
|
176
|
+
pmf.probability_of(val)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# returns array of all possible values for the rv. be careful calling this
|
|
180
|
+
# on pmfs with lots of values...a very large array will be created...which
|
|
181
|
+
# wouldn't happen if you just use the sampling methods....
|
|
182
|
+
#-----------------------------------------------------------------------------
|
|
183
|
+
def all_values
|
|
184
|
+
pmf.all_values
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# returns the number of possible values for the rv
|
|
188
|
+
#-----------------------------------------------------------------------------
|
|
189
|
+
def num_values
|
|
190
|
+
pmf.num_values
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
# streamlines the case of uniform distributions where we can be a little
|
|
195
|
+
# more efficient
|
|
196
|
+
#-----------------------------------------------------------------------------
|
|
197
|
+
class UniformPmf
|
|
198
|
+
|
|
199
|
+
attr_reader :num_values, :values
|
|
200
|
+
|
|
201
|
+
# create a uniform pmf given an array of values, a range of values, or a
|
|
202
|
+
# scalar defining a range
|
|
203
|
+
#
|
|
204
|
+
# cases:
|
|
205
|
+
# - Set of values
|
|
206
|
+
# - Array of values: the array can contain a sequence of any objects and
|
|
207
|
+
# each will be assigned equal probability. NOTE: does NOT ensure that
|
|
208
|
+
# duplicates are removed, so if values are entered more than once in
|
|
209
|
+
# the array, the distribution will likely no longer be uniform.
|
|
210
|
+
# - Range object (e.g. 3..18): distribution will be uniform over the
|
|
211
|
+
# entire range specified...note that the range must support the minus
|
|
212
|
+
# operator (so most appropriate for ranges defined with numeric
|
|
213
|
+
# endpoints--e.g. the Xs example in the Range class wouldn't work)
|
|
214
|
+
# - scalar: the distribution will be uniform over [0, value] (0 and
|
|
215
|
+
# value will be included in the distribution)
|
|
216
|
+
#---------------------------------------------------------------------------
|
|
217
|
+
def initialize(vals)
|
|
218
|
+
if !vals.is_a?(Set) && !vals.is_a?(Array) && !vals.is_a?(Range)
|
|
219
|
+
val = vals.to_i
|
|
220
|
+
if val < 0
|
|
221
|
+
raise "Scalar input must be at least 0 to create distribution"
|
|
222
|
+
end
|
|
223
|
+
vals = 0..val
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
if vals.is_a? Set
|
|
227
|
+
if vals.length == 0
|
|
228
|
+
raise "Cannot create uniform distribution from empty set"
|
|
229
|
+
end
|
|
230
|
+
@num_values = vals.length
|
|
231
|
+
@values = vals.to_a
|
|
232
|
+
elsif vals.is_a? Array
|
|
233
|
+
if vals.length == 0
|
|
234
|
+
raise "Cannot create uniform distribution from empty array"
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
@num_values = vals.length
|
|
238
|
+
@values = vals
|
|
239
|
+
else
|
|
240
|
+
@num_values = vals.last - vals.first + (vals.exclude_end? ? 0 : 1)
|
|
241
|
+
@values = vals
|
|
242
|
+
|
|
243
|
+
if @num_values <= 0
|
|
244
|
+
raise "Cannot create distribution from empty range: #{vals.inspect}"
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
# sample from the distribution, returning the sampled value
|
|
250
|
+
#---------------------------------------------------------------------------
|
|
251
|
+
def sample_from_distribution
|
|
252
|
+
index = (rand() * @num_values).floor
|
|
253
|
+
if @values.is_a? Array
|
|
254
|
+
sample = @values[index]
|
|
255
|
+
else
|
|
256
|
+
sample = @values.first + index
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
# sample from the distribution, and then remove that value from the
|
|
262
|
+
# distribution forever. note that this may make a distribution defined
|
|
263
|
+
# by a range perform worse.
|
|
264
|
+
#-----------------------------------------------------------------------------
|
|
265
|
+
def sample_from_distribution_and_remove
|
|
266
|
+
sample = sample_from_distribution
|
|
267
|
+
|
|
268
|
+
if @values.is_a?(Range)
|
|
269
|
+
@values = @values.to_a
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
@values.delete(sample)
|
|
273
|
+
@num_values -= 1
|
|
274
|
+
@probability = nil # force recalculation of probability next time
|
|
275
|
+
|
|
276
|
+
return sample
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
# returns all possible values for the pmf
|
|
281
|
+
#---------------------------------------------------------------------------
|
|
282
|
+
def all_values
|
|
283
|
+
values.to_a
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
# returns the probability of the given value (including zero if the value
|
|
288
|
+
# is not a possible value for the random variable)
|
|
289
|
+
#---------------------------------------------------------------------------
|
|
290
|
+
def probability_of(value)
|
|
291
|
+
@probability ||= 1.0 / @num_values.to_f
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
end # end UniformPmf inner class
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
# class to handle the non-uniform pmf case, optimized to take advantage of
|
|
298
|
+
# the equal proability mass assigned to each value
|
|
299
|
+
#-----------------------------------------------------------------------------
|
|
300
|
+
class NonUniformPmf
|
|
301
|
+
# initialize the non-uniform distribution from frequency counts. will
|
|
302
|
+
# normalize the frequecy counts to a distribution (yes, even if a
|
|
303
|
+
# distribution is passed in as argument--yes, could be optimized to allow
|
|
304
|
+
# caller to specify that it is a distribution, but that could create 'bugs'
|
|
305
|
+
# in this code, and it's probably not _THAT_ expensive compared to sampling
|
|
306
|
+
# from the distribution).
|
|
307
|
+
#
|
|
308
|
+
# arguments:
|
|
309
|
+
# - frequency_counts: hash, or array of two-element arrays of random
|
|
310
|
+
# variable values and the associated frequency for each
|
|
311
|
+
#---------------------------------------------------------------------------
|
|
312
|
+
def initialize(frequency_counts)
|
|
313
|
+
@total_mass = 0.0
|
|
314
|
+
|
|
315
|
+
if frequency_counts.nil? ||
|
|
316
|
+
(!frequency_counts.is_a?(Hash) && !frequency_counts.is_a?(Array)) ||
|
|
317
|
+
frequency_counts.empty?
|
|
318
|
+
|
|
319
|
+
raise "no (or empty) frequency counts or distribution was specified"
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
if frequency_counts.is_a? Hash
|
|
323
|
+
populate_distribution_from_hash frequency_counts
|
|
324
|
+
elsif frequency_counts.is_a? Array
|
|
325
|
+
populate_distribution_from_array frequency_counts
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
if @total_mass <= 0.0
|
|
329
|
+
raise("Received invalid frequency counts where total mass sums to " +
|
|
330
|
+
"#{@total_mass}")
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
# returns probability of given value
|
|
336
|
+
#---------------------------------------------------------------------------
|
|
337
|
+
def probability_of(val)
|
|
338
|
+
distribution_lookup[val] / @total_mass
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
# take one sample from the distribution
|
|
343
|
+
#---------------------------------------------------------------------------
|
|
344
|
+
def sample_from_distribution
|
|
345
|
+
sampled_mass = rand() * @total_mass
|
|
346
|
+
summed_mass = 0
|
|
347
|
+
val = nil
|
|
348
|
+
|
|
349
|
+
distribution.each do |val_and_prob|
|
|
350
|
+
val = val_and_prob.first
|
|
351
|
+
prob = val_and_prob.last
|
|
352
|
+
|
|
353
|
+
summed_mass += prob
|
|
354
|
+
|
|
355
|
+
if summed_mass > sampled_mass
|
|
356
|
+
break
|
|
357
|
+
end
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
val
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
# take one sample from the distribution and remove from distribution forever
|
|
365
|
+
#-----------------------------------------------------------------------------
|
|
366
|
+
def sample_from_distribution_and_remove
|
|
367
|
+
sample = sample_from_distribution
|
|
368
|
+
mass = distribution_lookup[sample]
|
|
369
|
+
|
|
370
|
+
@total_mass -= mass
|
|
371
|
+
distribution_lookup.delete(sample)
|
|
372
|
+
distribution.delete_if { |d| d.first == sample }
|
|
373
|
+
|
|
374
|
+
sample
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
# return the number of possible values
|
|
379
|
+
#---------------------------------------------------------------------------
|
|
380
|
+
def num_values
|
|
381
|
+
distribution.length
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
# returns an array of all possible random variable values
|
|
386
|
+
#---------------------------------------------------------------------------
|
|
387
|
+
def all_values
|
|
388
|
+
# the rv values are the keys in the lookup hash
|
|
389
|
+
distribution_lookup.keys
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
protected
|
|
393
|
+
|
|
394
|
+
attr_reader :distribution, :distribution_lookup
|
|
395
|
+
|
|
396
|
+
# populates the distribution from the frequency counts in Hash form
|
|
397
|
+
#---------------------------------------------------------------------------
|
|
398
|
+
def populate_distribution_from_hash(frequency_counts)
|
|
399
|
+
@distribution_lookup = {}
|
|
400
|
+
|
|
401
|
+
frequency_counts.each_pair do |val, freq|
|
|
402
|
+
freq = freq.to_f
|
|
403
|
+
raise "Invalid negative frequency (#{freq}) for value #{val}" if freq < 0
|
|
404
|
+
|
|
405
|
+
@total_mass += freq
|
|
406
|
+
if @distribution_lookup.has_key? val
|
|
407
|
+
@distribution_lookup[val] += freq
|
|
408
|
+
else
|
|
409
|
+
@distribution_lookup[val] = freq
|
|
410
|
+
end
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
populate_distribution_array
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
# populates the distribution from the frequency counts in Array form
|
|
418
|
+
#---------------------------------------------------------------------------
|
|
419
|
+
def populate_distribution_from_array(frequency_counts)
|
|
420
|
+
@distribution_lookup = {}
|
|
421
|
+
|
|
422
|
+
frequency_counts.each do |val_freq|
|
|
423
|
+
val = val_freq.first
|
|
424
|
+
freq = val_freq.last.to_f
|
|
425
|
+
raise "Invalid negative frequency (#{freq}) for value #{val}" if freq < 0
|
|
426
|
+
|
|
427
|
+
@total_mass += freq
|
|
428
|
+
if @distribution_lookup.has_key? val
|
|
429
|
+
@distribution_lookup[val] += freq
|
|
430
|
+
else
|
|
431
|
+
@distribution_lookup[val] = freq
|
|
432
|
+
end
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
populate_distribution_array
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
# populates the distribution array
|
|
439
|
+
#---------------------------------------------------------------------------
|
|
440
|
+
def populate_distribution_array
|
|
441
|
+
@distribution = []
|
|
442
|
+
|
|
443
|
+
@distribution_lookup.each_pair do |val, freq|
|
|
444
|
+
next if freq == 0
|
|
445
|
+
@distribution << [val, freq]
|
|
446
|
+
end
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
end # end NonUniformPmf inner class
|
|
450
|
+
|
|
451
|
+
protected
|
|
452
|
+
|
|
453
|
+
# the underlying pmf object
|
|
454
|
+
attr_reader :pmf
|
|
455
|
+
|
|
456
|
+
end # end random value sampler class
|