random_value_sampler 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +79 -0
- data/lib/random_value_sampler.rb +456 -0
- data/test/random_value_sampler_test.rb +892 -0
- metadata +57 -0
data/README
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
rpmf
|
2
|
+
========
|
3
|
+
|
4
|
+
Class to allow sampling from very, very simple probability mass functions
|
5
|
+
(uniform and arbitrary non-uniform). Values can be any object;
|
6
|
+
for uniform distributions, a Range can be used to specify a range of
|
7
|
+
discrete values.
|
8
|
+
|
9
|
+
To specify a uniform distribution, only the values need to be specified, and
|
10
|
+
can be:
|
11
|
+
- an Array of values (it is assumed the values are distinct, but you may
|
12
|
+
insert duplicates if you know what you're doing and realize you're probably
|
13
|
+
no longer dealing with a truly uniform distribution anymore (but this could
|
14
|
+
be used to "cheat" to generate distributions that are 'nearly' uniform where
|
15
|
+
probability mass is quantized (e.g. a 1/3, 2/3 distribution). This may
|
16
|
+
prove to be a more efficient implementation in such cases as the non-uniform
|
17
|
+
pmf is more computationally demanding).
|
18
|
+
- a ruby Range object; Rpmf honors the inclusion/exclusion of last/end
|
19
|
+
of the Range (as defined by exclude_end? method). the Range must be of
|
20
|
+
numeric type unless you REALLY know what you're doing (e.g. the Xs class
|
21
|
+
example in the Range rdoc won't work).
|
22
|
+
- a single numeric type specifying an upper bound (zero is assumed as
|
23
|
+
lower bound--both zero and upper bound are included in distribution)
|
24
|
+
|
25
|
+
To specify a non-uniform distribution, the values and probability mass
|
26
|
+
must be specified. It is not necessary for the probability mass to
|
27
|
+
represent a true probability distribution (needn't sum to 1), as the class
|
28
|
+
will normalize accordingly. The pmf may be specified as a Hash or an Array:
|
29
|
+
- Hash, where the hash keys are the possible values the random variable
|
30
|
+
can take on; the hash values are the 'frequency counts' or non-normalized
|
31
|
+
probability mass
|
32
|
+
- Array, each element of which is a two-element array. each two element
|
33
|
+
array's first element is the value; the last element is the frequency
|
34
|
+
count for that value
|
35
|
+
|
36
|
+
Examples
|
37
|
+
=========
|
38
|
+
|
39
|
+
require 'rpmf'
|
40
|
+
|
41
|
+
uniform
|
42
|
+
-------
|
43
|
+
|
44
|
+
# generate a uniform pmf over [1,5]
|
45
|
+
a = Rpmf.new_uniform([1,2,3,4,5])
|
46
|
+
|
47
|
+
# generate a uniform pmf over some words
|
48
|
+
a = Rpmf.new_uniform(["one", "two", "buckle", "my", "shoe"])
|
49
|
+
|
50
|
+
# generate a 'quantized' pmf by using duplicate entries
|
51
|
+
a = Rpmf.new_uniform([1, 2, 2, 3, 3, 3])
|
52
|
+
a = Rpmf.new_uniform(["the", "the", "a", "the", "and", "zyzzyva"])
|
53
|
+
|
54
|
+
# generate a uniform pmf over [1,5] using a Range
|
55
|
+
a = Rpmf.new_uniform(1..5)
|
56
|
+
a = Rpmf.new_uniform(1...6)
|
57
|
+
|
58
|
+
# generate a uniform pmf over [0,5] by specifying upper limit
|
59
|
+
a = Rpmf.new_uniform(5)
|
60
|
+
|
61
|
+
non-uniform
|
62
|
+
-----------
|
63
|
+
|
64
|
+
# generate a non-uniform pmf using the Hash form:
|
65
|
+
|
66
|
+
# values are 5 and 10, with probability 0.4 and 0.6, respectively
|
67
|
+
a = Rpmf.new_non_uniform( { 5 => 20, 10 => 30 } )
|
68
|
+
|
69
|
+
# values are "probable", "possible" and "not likely" with probability
|
70
|
+
# 0.75, 0.20 and 0.05, respectively.
|
71
|
+
a = Rpmf.new_non_uniform( { "probable" => 75,
|
72
|
+
"possible" => 20,
|
73
|
+
"not likely" => 5 } )
|
74
|
+
|
75
|
+
# generate a non-uniform pmf using the Array form (same examples as above)
|
76
|
+
a = Rpmf.new_non_uniform( [ [5,20], [10,30] )
|
77
|
+
a = Rpmf.new_non_uniform( [ ["probable",75],
|
78
|
+
["possible" => 20],
|
79
|
+
["not likely" => 5 ] ] )
|
@@ -0,0 +1,456 @@
|
|
1
|
+
# simple class for generating and sampling from a probability distribution,
|
2
|
+
# including implementation of sampling from uniform and arbitrary distributions
|
3
|
+
# on discrete random variables, by passing in an object that represents
|
4
|
+
# the probability mass function (PMF) for a distribution.
|
5
|
+
#
|
6
|
+
# the PMF can be computed from non-distributions (e.g. frequency counts)
|
7
|
+
# provided in the form of a hash or array of tuples (that is, an array of
|
8
|
+
# [arrays of length 2]).
|
9
|
+
#
|
10
|
+
# the values of the random variable can be anything, but the frequencies/
|
11
|
+
# probabilities must be numeric (or convertible to numeric via .to_f())
|
12
|
+
#
|
13
|
+
# note that if a value is repeated multiple times in the frequency count/
|
14
|
+
# distribution passed in, then the frequency mass is simply summed for
|
15
|
+
# each occurrence of the value. this will allow you to pass in a large array
|
16
|
+
# of each occurrence of values in the data set. For example, you could pass
|
17
|
+
# in an array of tuples where each value is a word in a document and every
|
18
|
+
# value is set to 1 so that you don't actually have to do the word
|
19
|
+
# counting yourself.
|
20
|
+
#
|
21
|
+
# (if you would like to ensure uniqueness, provide Set as the values variable
|
22
|
+
# to the new_uniform() factory method)
|
23
|
+
#
|
24
|
+
# PMFscan also be created for uniform distributions by simply specifying the
|
25
|
+
# values the random variable may take on.
|
26
|
+
#
|
27
|
+
# you can also create a RandomValueSampler directly, by passing in an object
|
28
|
+
# that represents the distribution/probability function you'd like to sample
|
29
|
+
# from. (this allows for continous random variables as well). the object
|
30
|
+
# simply needs to respond_to? the folowing methods:
|
31
|
+
# - sample_from_distribution -> single value sampled from distribution
|
32
|
+
# and then permanently remove the value from the distribution
|
33
|
+
# - all_values -> Array of all values
|
34
|
+
# - num_values -> integer giving the number of possible values
|
35
|
+
# - probability_of(val) -> probability (numeric type)
|
36
|
+
# --> since this library was created for discrete random variables, this
|
37
|
+
# method was included. just create a dummy implementation (maybe return
|
38
|
+
# 0, to be 'correct') if your distribution is a continuous variable?
|
39
|
+
# NOTE: if the object also responds to sample_from_distribution_and_remove(),
|
40
|
+
# the sample_unique() method will likely run faster.
|
41
|
+
#-------------------------------------------------------------------------------
|
42
|
+
class RandomValueSampler
|
43
|
+
|
44
|
+
# instantiate RandomValueSampler given a probability_function object. the
|
45
|
+
# object must respond to:
|
46
|
+
# - sample_from_distribution -> single value sampled from distribution
|
47
|
+
# - all_values -> Array of all values
|
48
|
+
# - num_values -> integer giving the number of possible values
|
49
|
+
# - probability_of(val) -> probability (numeric type)
|
50
|
+
#
|
51
|
+
# if you're creating a discrete random variable with uniform or arbitrary
|
52
|
+
# PMF, recommend using the new_uniform() or new_non_uniform() methods instead
|
53
|
+
#
|
54
|
+
# use this if you have a continuous random variable or want to create your
|
55
|
+
# own standard PMF (e.g. geometric, bernoulli, binomial...)
|
56
|
+
#-----------------------------------------------------------------------------
|
57
|
+
def initialize(pmf)
|
58
|
+
unless pmf.respond_to?(:sample_from_distribution) &&
|
59
|
+
pmf.respond_to?(:all_values) &&
|
60
|
+
pmf.respond_to?(:num_values) &&
|
61
|
+
pmf.respond_to?(:probability_of)
|
62
|
+
|
63
|
+
raise "Received non-pmf-like object of type '#{pmf.class.name}'"
|
64
|
+
end
|
65
|
+
|
66
|
+
@pmf = pmf
|
67
|
+
end
|
68
|
+
|
69
|
+
# create a sampler for a uniform distribution given an array of values, a
|
70
|
+
# range of values, or a scalar defining a range
|
71
|
+
#
|
72
|
+
# cases:
|
73
|
+
# - Set of values: each member will receive equal probability
|
74
|
+
# - Array of values: the array can contain a sequence of any objects and
|
75
|
+
# each will be assigned equal probability
|
76
|
+
# - Range object (e.g. 3..18): distribution will be uniform over the
|
77
|
+
# entire range specified (including first and last in the range)
|
78
|
+
# - scalar: the distribution will be uniform over [0, value] (0 and
|
79
|
+
# value will be included in the distribution)
|
80
|
+
#
|
81
|
+
# note that if a value is repeated multiple times in the frequency count/
|
82
|
+
# distribution passed in, then the frequency mass is simply summed for
|
83
|
+
# each occurrence of the value. this will allow you to pass in a large array
|
84
|
+
# of each occurrence of values in the data set. this could be done to
|
85
|
+
# 'optimize' a distribution that is very nearly uniform....also see comments
|
86
|
+
# on this class.
|
87
|
+
#-----------------------------------------------------------------------------
|
88
|
+
def self.new_uniform(values)
|
89
|
+
self.new(UniformPmf.new(values))
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
# create a sampler for a non-uniform distribution given either a hash or an
|
94
|
+
# array of tuples specifying the probability mass (or frequency count) for
|
95
|
+
# each value. if the frequency counts don't represent a proper distribution,
|
96
|
+
# they will be normalized to form a distribution, but the original values
|
97
|
+
# will be left untouched.
|
98
|
+
#
|
99
|
+
# if you happen to have a uniform distribution (and know it), it is
|
100
|
+
# recommended that you use new_uniform() as it will be much more efficient
|
101
|
+
#
|
102
|
+
# cases:
|
103
|
+
# - Hash: keys == the random variable values; values == the frequency count/
|
104
|
+
# probability mass assigned to that value
|
105
|
+
# - Array: each element in the array is a two-element array.
|
106
|
+
# first == the random variable value; last == the frequency count/
|
107
|
+
# probability mass assigned to that value
|
108
|
+
#-----------------------------------------------------------------------------
|
109
|
+
def self.new_non_uniform(values_and_counts)
|
110
|
+
self.new(NonUniformPmf.new(values_and_counts))
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
# returns n (pseudo-) independent samples from the pmf defined by this
|
115
|
+
# object, returning the result in an array. n is optional, default is 1
|
116
|
+
# duplicates ARE allowed; if you want all samples to be unique, then call
|
117
|
+
# sample_unique.
|
118
|
+
#
|
119
|
+
# this performs "sampling with replacement"
|
120
|
+
#-----------------------------------------------------------------------------
|
121
|
+
def sample(n = 1)
|
122
|
+
raise "n must be 0 or greater to sample" if n <= 0
|
123
|
+
|
124
|
+
samples = []
|
125
|
+
|
126
|
+
(1..n).each do
|
127
|
+
samples << pmf.sample_from_distribution
|
128
|
+
end
|
129
|
+
|
130
|
+
samples.length == 1 ? samples.first : samples
|
131
|
+
end
|
132
|
+
|
133
|
+
|
134
|
+
# returns n (pseudo-) independent samples from the pmf defined by this
|
135
|
+
# object, with the condition that each value can only be represented once
|
136
|
+
# in the result (no duplicates). n is optional, default is 1.
|
137
|
+
#
|
138
|
+
# probably only makes sense to call this method if you're sampling a
|
139
|
+
# discrete (vs. continuous) random variable, in which case the probability of
|
140
|
+
# getting the same value twice is in theory zero, but in practice should be
|
141
|
+
# exceedingly low (unless you're testing the precision of the data type you're
|
142
|
+
# using.
|
143
|
+
#
|
144
|
+
# this performs "sampling without replacement"
|
145
|
+
#-----------------------------------------------------------------------------
|
146
|
+
def sample_unique(n = 1)
|
147
|
+
raise "n must be 0 or greater to sample_unique" if n <= 0
|
148
|
+
|
149
|
+
# take care of edge cases: where they ask for more samples than there are
|
150
|
+
# entries in the distribution (error)
|
151
|
+
if n > pmf.num_values
|
152
|
+
raise("Invalid request to pull #{n} unique samples from a distribution " +
|
153
|
+
"with only #{pmf.num_values} distinct values")
|
154
|
+
end
|
155
|
+
|
156
|
+
# use a set in case the calling code added multiple copies of the same
|
157
|
+
# object into distribution
|
158
|
+
samples = Set.new
|
159
|
+
while samples.length < n
|
160
|
+
if pmf.respond_to?(:sample_from_distribution_and_remove)
|
161
|
+
samples << pmf.sample_from_distribution_and_remove
|
162
|
+
else
|
163
|
+
samples << pmf.sample_from_distribution
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
return samples.length == 1 ? samples.first : samples.to_a
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
# some pass-through methods...
|
172
|
+
|
173
|
+
# returns probability of a given value
|
174
|
+
#-----------------------------------------------------------------------------
|
175
|
+
def probability_of(val)
|
176
|
+
pmf.probability_of(val)
|
177
|
+
end
|
178
|
+
|
179
|
+
# returns array of all possible values for the rv. be careful calling this
|
180
|
+
# on pmfs with lots of values...a very large array will be created...which
|
181
|
+
# wouldn't happen if you just use the sampling methods....
|
182
|
+
#-----------------------------------------------------------------------------
|
183
|
+
def all_values
|
184
|
+
pmf.all_values
|
185
|
+
end
|
186
|
+
|
187
|
+
# returns the number of possible values for the rv
|
188
|
+
#-----------------------------------------------------------------------------
|
189
|
+
def num_values
|
190
|
+
pmf.num_values
|
191
|
+
end
|
192
|
+
|
193
|
+
|
194
|
+
# streamlines the case of uniform distributions where we can be a little
|
195
|
+
# more efficient
|
196
|
+
#-----------------------------------------------------------------------------
|
197
|
+
class UniformPmf
|
198
|
+
|
199
|
+
attr_reader :num_values, :values
|
200
|
+
|
201
|
+
# create a uniform pmf given an array of values, a range of values, or a
|
202
|
+
# scalar defining a range
|
203
|
+
#
|
204
|
+
# cases:
|
205
|
+
# - Set of values
|
206
|
+
# - Array of values: the array can contain a sequence of any objects and
|
207
|
+
# each will be assigned equal probability. NOTE: does NOT ensure that
|
208
|
+
# duplicates are removed, so if values are entered more than once in
|
209
|
+
# the array, the distribution will likely no longer be uniform.
|
210
|
+
# - Range object (e.g. 3..18): distribution will be uniform over the
|
211
|
+
# entire range specified...note that the range must support the minus
|
212
|
+
# operator (so most appropriate for ranges defined with numeric
|
213
|
+
# endpoints--e.g. the Xs example in the Range class wouldn't work)
|
214
|
+
# - scalar: the distribution will be uniform over [0, value] (0 and
|
215
|
+
# value will be included in the distribution)
|
216
|
+
#---------------------------------------------------------------------------
|
217
|
+
def initialize(vals)
|
218
|
+
if !vals.is_a?(Set) && !vals.is_a?(Array) && !vals.is_a?(Range)
|
219
|
+
val = vals.to_i
|
220
|
+
if val < 0
|
221
|
+
raise "Scalar input must be at least 0 to create distribution"
|
222
|
+
end
|
223
|
+
vals = 0..val
|
224
|
+
end
|
225
|
+
|
226
|
+
if vals.is_a? Set
|
227
|
+
if vals.length == 0
|
228
|
+
raise "Cannot create uniform distribution from empty set"
|
229
|
+
end
|
230
|
+
@num_values = vals.length
|
231
|
+
@values = vals.to_a
|
232
|
+
elsif vals.is_a? Array
|
233
|
+
if vals.length == 0
|
234
|
+
raise "Cannot create uniform distribution from empty array"
|
235
|
+
end
|
236
|
+
|
237
|
+
@num_values = vals.length
|
238
|
+
@values = vals
|
239
|
+
else
|
240
|
+
@num_values = vals.last - vals.first + (vals.exclude_end? ? 0 : 1)
|
241
|
+
@values = vals
|
242
|
+
|
243
|
+
if @num_values <= 0
|
244
|
+
raise "Cannot create distribution from empty range: #{vals.inspect}"
|
245
|
+
end
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
# sample from the distribution, returning the sampled value
|
250
|
+
#---------------------------------------------------------------------------
|
251
|
+
def sample_from_distribution
|
252
|
+
index = (rand() * @num_values).floor
|
253
|
+
if @values.is_a? Array
|
254
|
+
sample = @values[index]
|
255
|
+
else
|
256
|
+
sample = @values.first + index
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
|
261
|
+
# sample from the distribution, and then remove that value from the
|
262
|
+
# distribution forever. note that this may make a distribution defined
|
263
|
+
# by a range perform worse.
|
264
|
+
#-----------------------------------------------------------------------------
|
265
|
+
def sample_from_distribution_and_remove
|
266
|
+
sample = sample_from_distribution
|
267
|
+
|
268
|
+
if @values.is_a?(Range)
|
269
|
+
@values = @values.to_a
|
270
|
+
end
|
271
|
+
|
272
|
+
@values.delete(sample)
|
273
|
+
@num_values -= 1
|
274
|
+
@probability = nil # force recalculation of probability next time
|
275
|
+
|
276
|
+
return sample
|
277
|
+
end
|
278
|
+
|
279
|
+
|
280
|
+
# returns all possible values for the pmf
|
281
|
+
#---------------------------------------------------------------------------
|
282
|
+
def all_values
|
283
|
+
values.to_a
|
284
|
+
end
|
285
|
+
|
286
|
+
|
287
|
+
# returns the probability of the given value (including zero if the value
|
288
|
+
# is not a possible value for the random variable)
|
289
|
+
#---------------------------------------------------------------------------
|
290
|
+
def probability_of(value)
|
291
|
+
@probability ||= 1.0 / @num_values.to_f
|
292
|
+
end
|
293
|
+
|
294
|
+
end # end UniformPmf inner class
|
295
|
+
|
296
|
+
|
297
|
+
# class to handle the non-uniform pmf case, optimized to take advantage of
|
298
|
+
# the equal proability mass assigned to each value
|
299
|
+
#-----------------------------------------------------------------------------
|
300
|
+
class NonUniformPmf
|
301
|
+
# initialize the non-uniform distribution from frequency counts. will
|
302
|
+
# normalize the frequecy counts to a distribution (yes, even if a
|
303
|
+
# distribution is passed in as argument--yes, could be optimized to allow
|
304
|
+
# caller to specify that it is a distribution, but that could create 'bugs'
|
305
|
+
# in this code, and it's probably not _THAT_ expensive compared to sampling
|
306
|
+
# from the distribution).
|
307
|
+
#
|
308
|
+
# arguments:
|
309
|
+
# - frequency_counts: hash, or array of two-element arrays of random
|
310
|
+
# variable values and the associated frequency for each
|
311
|
+
#---------------------------------------------------------------------------
|
312
|
+
def initialize(frequency_counts)
|
313
|
+
@total_mass = 0.0
|
314
|
+
|
315
|
+
if frequency_counts.nil? ||
|
316
|
+
(!frequency_counts.is_a?(Hash) && !frequency_counts.is_a?(Array)) ||
|
317
|
+
frequency_counts.empty?
|
318
|
+
|
319
|
+
raise "no (or empty) frequency counts or distribution was specified"
|
320
|
+
end
|
321
|
+
|
322
|
+
if frequency_counts.is_a? Hash
|
323
|
+
populate_distribution_from_hash frequency_counts
|
324
|
+
elsif frequency_counts.is_a? Array
|
325
|
+
populate_distribution_from_array frequency_counts
|
326
|
+
end
|
327
|
+
|
328
|
+
if @total_mass <= 0.0
|
329
|
+
raise("Received invalid frequency counts where total mass sums to " +
|
330
|
+
"#{@total_mass}")
|
331
|
+
end
|
332
|
+
end
|
333
|
+
|
334
|
+
|
335
|
+
# returns probability of given value
|
336
|
+
#---------------------------------------------------------------------------
|
337
|
+
def probability_of(val)
|
338
|
+
distribution_lookup[val] / @total_mass
|
339
|
+
end
|
340
|
+
|
341
|
+
|
342
|
+
# take one sample from the distribution
|
343
|
+
#---------------------------------------------------------------------------
|
344
|
+
def sample_from_distribution
|
345
|
+
sampled_mass = rand() * @total_mass
|
346
|
+
summed_mass = 0
|
347
|
+
val = nil
|
348
|
+
|
349
|
+
distribution.each do |val_and_prob|
|
350
|
+
val = val_and_prob.first
|
351
|
+
prob = val_and_prob.last
|
352
|
+
|
353
|
+
summed_mass += prob
|
354
|
+
|
355
|
+
if summed_mass > sampled_mass
|
356
|
+
break
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
val
|
361
|
+
end
|
362
|
+
|
363
|
+
|
364
|
+
# take one sample from the distribution and remove from distribution forever
|
365
|
+
#-----------------------------------------------------------------------------
|
366
|
+
def sample_from_distribution_and_remove
|
367
|
+
sample = sample_from_distribution
|
368
|
+
mass = distribution_lookup[sample]
|
369
|
+
|
370
|
+
@total_mass -= mass
|
371
|
+
distribution_lookup.delete(sample)
|
372
|
+
distribution.delete_if { |d| d.first == sample }
|
373
|
+
|
374
|
+
sample
|
375
|
+
end
|
376
|
+
|
377
|
+
|
378
|
+
# return the number of possible values
|
379
|
+
#---------------------------------------------------------------------------
|
380
|
+
def num_values
|
381
|
+
distribution.length
|
382
|
+
end
|
383
|
+
|
384
|
+
|
385
|
+
# returns an array of all possible random variable values
|
386
|
+
#---------------------------------------------------------------------------
|
387
|
+
def all_values
|
388
|
+
# the rv values are the keys in the lookup hash
|
389
|
+
distribution_lookup.keys
|
390
|
+
end
|
391
|
+
|
392
|
+
protected
|
393
|
+
|
394
|
+
attr_reader :distribution, :distribution_lookup
|
395
|
+
|
396
|
+
# populates the distribution from the frequency counts in Hash form
|
397
|
+
#---------------------------------------------------------------------------
|
398
|
+
def populate_distribution_from_hash(frequency_counts)
|
399
|
+
@distribution_lookup = {}
|
400
|
+
|
401
|
+
frequency_counts.each_pair do |val, freq|
|
402
|
+
freq = freq.to_f
|
403
|
+
raise "Invalid negative frequency (#{freq}) for value #{val}" if freq < 0
|
404
|
+
|
405
|
+
@total_mass += freq
|
406
|
+
if @distribution_lookup.has_key? val
|
407
|
+
@distribution_lookup[val] += freq
|
408
|
+
else
|
409
|
+
@distribution_lookup[val] = freq
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
populate_distribution_array
|
414
|
+
end
|
415
|
+
|
416
|
+
|
417
|
+
# populates the distribution from the frequency counts in Array form
|
418
|
+
#---------------------------------------------------------------------------
|
419
|
+
def populate_distribution_from_array(frequency_counts)
|
420
|
+
@distribution_lookup = {}
|
421
|
+
|
422
|
+
frequency_counts.each do |val_freq|
|
423
|
+
val = val_freq.first
|
424
|
+
freq = val_freq.last.to_f
|
425
|
+
raise "Invalid negative frequency (#{freq}) for value #{val}" if freq < 0
|
426
|
+
|
427
|
+
@total_mass += freq
|
428
|
+
if @distribution_lookup.has_key? val
|
429
|
+
@distribution_lookup[val] += freq
|
430
|
+
else
|
431
|
+
@distribution_lookup[val] = freq
|
432
|
+
end
|
433
|
+
end
|
434
|
+
|
435
|
+
populate_distribution_array
|
436
|
+
end
|
437
|
+
|
438
|
+
# populates the distribution array
|
439
|
+
#---------------------------------------------------------------------------
|
440
|
+
def populate_distribution_array
|
441
|
+
@distribution = []
|
442
|
+
|
443
|
+
@distribution_lookup.each_pair do |val, freq|
|
444
|
+
next if freq == 0
|
445
|
+
@distribution << [val, freq]
|
446
|
+
end
|
447
|
+
end
|
448
|
+
|
449
|
+
end # end NonUniformPmf inner class
|
450
|
+
|
451
|
+
protected
|
452
|
+
|
453
|
+
# the underlying pmf object
|
454
|
+
attr_reader :pmf
|
455
|
+
|
456
|
+
end # end random value sampler class
|