random_value_sampler 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,890 @@
1
+ require 'set'
2
+ require 'test/unit'
3
+
4
+ require 'random_value_sampler'
5
+
6
+ #
7
+ # rough outline of this file:
8
+ # * test cases: these just call helper methods to run tests on all of the
9
+ # data cases created below in the setup() method
10
+ # - error inputs
11
+ # - verifying distribution validity
12
+ # * helper methods: mostly verify_xxx() methods that are called by the
13
+ # test cases to compute test results (this is the code
14
+ # most important to review)
15
+ # * setup() method: the method called before each test case is run...to
16
+ # generate data for testing
17
+ #-------------------------------------------------------------------------------
18
+ #
19
+ # rough outline of tests:
20
+ #
21
+ # error inputs (invalid distribution specifications, invalid sample requests)
22
+ # for each valid input case, run the following tests:
23
+ # for EACH valid input case:
24
+ # confirm # values
25
+ # confirm the array of values returned meet specification
26
+ # confirm probability_of
27
+ # uniform:
28
+ # each value in set/array/range has the same value
29
+ # (and they sum to 1 or within v. small tolerance)
30
+ # non-uniform:
31
+ # each value matches that in the original specification
32
+ # values (just) outside values have probability zero
33
+ #-----------------------------------------------------------------------------
34
+ class RandomValueSamplerTest < Test::Unit::TestCase
35
+
36
+ ###############
37
+ # ERROR INPUTS
38
+ ###############
39
+
40
+ def test_uniform_error_inputs
41
+ # this line just makes sure that we're running the test on each data
42
+ # case we create in the setup() method. the idea is that if someone adds
43
+ # a new @uniform_xxxxxx case, then they'd add it to the
44
+ # @uniform_error_inputs array, and this assertion would fail...reminding
45
+ # them to add an assert_raises call here for the new data case. (this
46
+ # pattern is repeated throughout the test cases in this file)
47
+ assert_equal(@uniform_error_inputs.length, 4)
48
+
49
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform @uniform_set_error_empty }
50
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform @uniform_array_error_empty }
51
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform @uniform_range_error_empty }
52
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform @uniform_single_error_negative }
53
+ end
54
+
55
+ def test_non_uniform_error_inputs
56
+ assert_equal(@nonuniform_error_inputs.length, 6)
57
+
58
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_hash_error_empty }
59
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_hash_error_negative }
60
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_hash_error_all_zeros }
61
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_arrayoftuples_error_empty }
62
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_arrayoftuples_error_negative }
63
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_arrayoftuples_error_all_zeros }
64
+ end
65
+
66
+ def test_uniform_exception_on_too_many_sample_unique
67
+ # singleton set
68
+ assert_raises(RuntimeError) do
69
+ rsampler = RandomValueSampler.new_uniform @uniform_set_single_string
70
+ rsampler.sample_unique 2
71
+ end
72
+
73
+ # singleton array
74
+ assert_raises(RuntimeError) do
75
+ rsampler = RandomValueSampler.new_uniform @uniform_array_single_numeric
76
+ rsampler.sample_unique(@uniform_array_single_numeric.length + 1)
77
+ end
78
+
79
+ # singleton Range
80
+ assert_raises(RuntimeError) do
81
+ rsampler = RandomValueSampler.new_uniform @uniform_range_single_exclusive
82
+ rsampler.sample_unique(@uniform_range_single_exclusive.to_a.length + 1)
83
+ end
84
+
85
+ # singleton value
86
+ assert_raises(RuntimeError) do
87
+ rsampler = RandomValueSampler.new_uniform @uniform_single_zero
88
+ rsampler.sample_unique 2
89
+ end
90
+
91
+ # size N set
92
+ assert_raises(RuntimeError) do
93
+ rsampler = RandomValueSampler.new_uniform @uniform_set_10_string
94
+ rsampler.sample_unique(@uniform_set_10_string.length + 1)
95
+ end
96
+
97
+ # size N array
98
+ assert_raises(RuntimeError) do
99
+ rsampler = RandomValueSampler.new_uniform @uniform_array_10_numeric
100
+ rsampler.sample_unique(@uniform_array_10_numeric.length + 1)
101
+ end
102
+
103
+ # size N Range inclusive
104
+ assert_raises(RuntimeError) do
105
+ rsampler = RandomValueSampler.new_uniform @uniform_range_10_inclusive
106
+ rsampler.sample_unique(@uniform_range_10_inclusive.to_a.length + 1)
107
+ end
108
+
109
+ # size N Range exclusive
110
+ assert_raises(RuntimeError) do
111
+ rsampler = RandomValueSampler.new_uniform @uniform_range_10_exclusive
112
+ rsampler.sample_unique(@uniform_range_10_exclusive.to_a.length + 1)
113
+ end
114
+
115
+ # scalar defining Range size N
116
+ assert_raises(RuntimeError) do
117
+ rsampler = RandomValueSampler.new_uniform @uniform_single_nonzero
118
+ rsampler.sample_unique(@uniform_single_nonzero + 2)
119
+ end
120
+ end
121
+
122
+ def test_non_uniform_exception_on_too_many_sample_unique
123
+ assert_raises(RuntimeError) do
124
+ rsampler = RandomValueSampler.new_non_uniform @nonuniform_hash_single_string
125
+ rsampler.sample_unique 2
126
+ end
127
+ assert_raises(RuntimeError) do
128
+ rsampler = RandomValueSampler.new_non_uniform @nonuniform_hash_10_sum_to_1
129
+ rsampler.sample_unique(@nonuniform_hash_10_sum_to_1.length + 1)
130
+ end
131
+ assert_raises(RuntimeError) do
132
+ rsampler = RandomValueSampler.new_non_uniform @nonuniform_arrayoftuples_single_string
133
+ rsampler.sample_unique 2
134
+ end
135
+ assert_raises(RuntimeError) do
136
+ rsampler = RandomValueSampler.new_non_uniform @nonuniform_arrayoftuples_10_sum_to_1
137
+ rsampler.sample_unique(@nonuniform_arrayoftuples_10_sum_gt_1.length + 1)
138
+ end
139
+ end
140
+
141
+ def test_negative_num_samples
142
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform([1,2,3,4]).sample(-1) }
143
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform([1,2,3,4]).sample_unique(-1) }
144
+ end
145
+
146
+ ###################################################
147
+ # VERIFYING VALIDITY, CONSISTENCY OF DISTRIBUTIONS
148
+ ###################################################
149
+
150
+ def test_uniform_probability_of
151
+ assert_equal(@uniform_sets.length, 3)
152
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_set_single_string),
153
+ @uniform_set_single_string)
154
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_set_10_string),
155
+ @uniform_set_10_string)
156
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_set_10_numeric),
157
+ @uniform_set_10_numeric)
158
+
159
+ assert_equal(@uniform_arrays.length, 3)
160
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_array_single_numeric),
161
+ @uniform_array_single_numeric)
162
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_array_10_string),
163
+ @uniform_array_10_string)
164
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_array_10_numeric),
165
+ @uniform_array_10_numeric)
166
+
167
+ assert_equal(@uniform_ranges.length, 4)
168
+
169
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_range_single_exclusive),
170
+ @uniform_range_single_exclusive)
171
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_range_single_inclusive),
172
+ @uniform_range_single_inclusive)
173
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_range_10_exclusive),
174
+ @uniform_range_10_exclusive)
175
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_range_10_inclusive),
176
+ @uniform_range_10_inclusive)
177
+
178
+ assert_equal(@uniform_singles.length, 2)
179
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_single_zero),
180
+ @uniform_single_zero)
181
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_single_nonzero),
182
+ @uniform_single_nonzero)
183
+ end
184
+
185
+ def test_non_uniform_probability_of
186
+ assert_equal(@nonuniform_hashes.length, 4)
187
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string),
188
+ @nonuniform_hash_single_string)
189
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1),
190
+ @nonuniform_hash_10_sum_to_1)
191
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1),
192
+ @nonuniform_hash_10_sum_gt_1)
193
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1),
194
+ @nonuniform_hash_10_sum_lt_1)
195
+
196
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
197
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string),
198
+ @nonuniform_arrayoftuples_single_string)
199
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1),
200
+ @nonuniform_arrayoftuples_10_sum_to_1)
201
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1),
202
+ @nonuniform_arrayoftuples_10_sum_gt_1)
203
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1),
204
+ @nonuniform_arrayoftuples_10_sum_lt_1)
205
+ end
206
+
207
+ def test_uniform_valid_distributions
208
+ assert_equal(@uniform_sets.length, 3)
209
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_set_single_string))
210
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_set_10_string))
211
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_set_10_numeric))
212
+
213
+
214
+ assert_equal(@uniform_arrays.length, 3)
215
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_array_single_numeric))
216
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_array_10_string))
217
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_array_10_numeric))
218
+
219
+
220
+ assert_equal(@uniform_ranges.length, 4)
221
+
222
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_range_single_exclusive))
223
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_range_single_inclusive))
224
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_range_10_exclusive))
225
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_range_10_inclusive))
226
+
227
+
228
+ assert_equal(@uniform_singles.length, 2)
229
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_single_zero))
230
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_single_nonzero))
231
+ end
232
+
233
+ def test_non_uniform_valid_distributions
234
+ assert_equal(@nonuniform_hashes.length, 4)
235
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string))
236
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1))
237
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1))
238
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1))
239
+
240
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
241
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string))
242
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1))
243
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1))
244
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1))
245
+ end
246
+
247
+ def test_uniform_values_match
248
+ assert_equal(@uniform_sets.length, 3)
249
+ verify_values(RandomValueSampler.new_uniform(@uniform_set_single_string),
250
+ @uniform_set_single_string)
251
+ verify_values(RandomValueSampler.new_uniform(@uniform_set_10_string),
252
+ @uniform_set_10_string)
253
+ verify_values(RandomValueSampler.new_uniform(@uniform_set_10_numeric),
254
+ @uniform_set_10_numeric)
255
+
256
+ assert_equal(@uniform_arrays.length, 3)
257
+ verify_values(RandomValueSampler.new_uniform(@uniform_array_single_numeric),
258
+ @uniform_array_single_numeric)
259
+ verify_values(RandomValueSampler.new_uniform(@uniform_array_10_string),
260
+ @uniform_array_10_string)
261
+ verify_values(RandomValueSampler.new_uniform(@uniform_array_10_numeric),
262
+ @uniform_array_10_numeric)
263
+
264
+ assert_equal(@uniform_ranges.length, 4)
265
+
266
+ verify_values(RandomValueSampler.new_uniform(@uniform_range_single_exclusive),
267
+ @uniform_range_single_exclusive)
268
+ verify_values(RandomValueSampler.new_uniform(@uniform_range_single_inclusive),
269
+ @uniform_range_single_inclusive)
270
+ verify_values(RandomValueSampler.new_uniform(@uniform_range_10_exclusive),
271
+ @uniform_range_10_exclusive)
272
+ verify_values(RandomValueSampler.new_uniform(@uniform_range_10_inclusive),
273
+ @uniform_range_10_inclusive)
274
+
275
+ assert_equal(@uniform_singles.length, 2)
276
+ verify_values(RandomValueSampler.new_uniform(@uniform_single_zero),
277
+ @uniform_single_zero)
278
+ verify_values(RandomValueSampler.new_uniform(@uniform_single_nonzero),
279
+ @uniform_single_nonzero)
280
+ end
281
+
282
+ def test_non_uniform_values_match
283
+ assert_equal(@nonuniform_hashes.length, 4)
284
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string),
285
+ @nonuniform_hash_single_string)
286
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1),
287
+ @nonuniform_hash_10_sum_to_1)
288
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1),
289
+ @nonuniform_hash_10_sum_gt_1)
290
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1),
291
+ @nonuniform_hash_10_sum_lt_1)
292
+
293
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
294
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string),
295
+ @nonuniform_arrayoftuples_single_string)
296
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1),
297
+ @nonuniform_arrayoftuples_10_sum_to_1)
298
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1),
299
+ @nonuniform_arrayoftuples_10_sum_gt_1)
300
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1),
301
+ @nonuniform_arrayoftuples_10_sum_lt_1)
302
+ end
303
+
304
+ def test_uniform_num_values
305
+ assert_equal(@uniform_sets.length, 3)
306
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_set_single_string))
307
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_set_10_string))
308
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_set_10_numeric))
309
+
310
+
311
+ assert_equal(@uniform_arrays.length, 3)
312
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_array_single_numeric))
313
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_array_10_string))
314
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_array_10_numeric))
315
+
316
+
317
+ assert_equal(@uniform_ranges.length, 4)
318
+
319
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_range_single_exclusive))
320
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_range_single_inclusive))
321
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_range_10_exclusive))
322
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_range_10_inclusive))
323
+
324
+
325
+ assert_equal(@uniform_singles.length, 2)
326
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_single_zero))
327
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_single_nonzero))
328
+ end
329
+
330
+ def test_non_uniform_num_values
331
+ assert_equal(@nonuniform_hashes.length, 4)
332
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string))
333
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1))
334
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1))
335
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1))
336
+
337
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
338
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string))
339
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1))
340
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1))
341
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1))
342
+ end
343
+
344
+ # sample a bunch of times and make sure that all of the values that come back
345
+ # are in the set of valid raw values
346
+ #-----------------------------------------------------------------------------
347
+ def test_uniform_sample_values_are_valid
348
+ assert_equal(@uniform_sets.length, 3)
349
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_single_string),
350
+ @uniform_set_single_string)
351
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_10_string),
352
+ @uniform_set_10_string)
353
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_10_numeric),
354
+ @uniform_set_10_numeric)
355
+
356
+ assert_equal(@uniform_arrays.length, 3)
357
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_single_numeric),
358
+ @uniform_array_single_numeric)
359
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_10_string),
360
+ @uniform_array_10_string)
361
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_10_numeric),
362
+ @uniform_array_10_numeric)
363
+
364
+ assert_equal(@uniform_ranges.length, 4)
365
+
366
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_single_exclusive),
367
+ @uniform_range_single_exclusive)
368
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_single_inclusive),
369
+ @uniform_range_single_inclusive)
370
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_10_exclusive),
371
+ @uniform_range_10_exclusive)
372
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_10_inclusive),
373
+ @uniform_range_10_inclusive)
374
+
375
+ assert_equal(@uniform_singles.length, 2)
376
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_single_zero),
377
+ @uniform_single_zero)
378
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_single_nonzero),
379
+ @uniform_single_nonzero)
380
+ end
381
+
382
+ def test_non_uniform_sample_values_are_valid
383
+ assert_equal(@nonuniform_hashes.length, 4)
384
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string),
385
+ @nonuniform_hash_single_string)
386
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1),
387
+ @nonuniform_hash_10_sum_to_1)
388
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1),
389
+ @nonuniform_hash_10_sum_gt_1)
390
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1),
391
+ @nonuniform_hash_10_sum_lt_1)
392
+
393
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
394
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string),
395
+ @nonuniform_arrayoftuples_single_string)
396
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1),
397
+ @nonuniform_arrayoftuples_10_sum_to_1)
398
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1),
399
+ @nonuniform_arrayoftuples_10_sum_gt_1)
400
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1),
401
+ @nonuniform_arrayoftuples_10_sum_lt_1)
402
+ end
403
+
404
+ def test_uniform_sample_values_are_valid
405
+ assert_equal(@uniform_sets.length, 3)
406
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_single_string),
407
+ @uniform_set_single_string)
408
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_10_string),
409
+ @uniform_set_10_string)
410
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_10_numeric),
411
+ @uniform_set_10_numeric)
412
+
413
+ assert_equal(@uniform_arrays.length, 3)
414
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_single_numeric),
415
+ @uniform_array_single_numeric)
416
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_10_string),
417
+ @uniform_array_10_string)
418
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_10_numeric),
419
+ @uniform_array_10_numeric)
420
+
421
+ assert_equal(@uniform_ranges.length, 4)
422
+
423
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_single_exclusive),
424
+ @uniform_range_single_exclusive)
425
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_single_inclusive),
426
+ @uniform_range_single_inclusive)
427
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_10_exclusive),
428
+ @uniform_range_10_exclusive)
429
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_10_inclusive),
430
+ @uniform_range_10_inclusive)
431
+
432
+ assert_equal(@uniform_singles.length, 2)
433
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_single_zero),
434
+ @uniform_single_zero)
435
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_single_nonzero),
436
+ @uniform_single_nonzero)
437
+ end
438
+
439
+ def test_non_uniform_sample_values_are_valid
440
+ assert_equal(@nonuniform_hashes.length, 4)
441
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string),
442
+ @nonuniform_hash_single_string)
443
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1),
444
+ @nonuniform_hash_10_sum_to_1)
445
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1),
446
+ @nonuniform_hash_10_sum_gt_1)
447
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1),
448
+ @nonuniform_hash_10_sum_lt_1)
449
+
450
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
451
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string),
452
+ @nonuniform_arrayoftuples_single_string)
453
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1),
454
+ @nonuniform_arrayoftuples_10_sum_to_1)
455
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1),
456
+ @nonuniform_arrayoftuples_10_sum_gt_1)
457
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1),
458
+ @nonuniform_arrayoftuples_10_sum_lt_1)
459
+ end
460
+
461
+ ####################
462
+ # SAMPLING ACCURACY
463
+ ####################
464
+
465
+ def test_uniform_sampling_accuracy
466
+ assert_equal(@uniform_sets.length, 3)
467
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_set_single_string),
468
+ @uniform_set_single_string)
469
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_set_10_string),
470
+ @uniform_set_10_string)
471
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_set_10_numeric),
472
+ @uniform_set_10_numeric)
473
+
474
+ assert_equal(@uniform_arrays.length, 3)
475
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_array_single_numeric),
476
+ @uniform_array_single_numeric)
477
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_array_10_string),
478
+ @uniform_array_10_string)
479
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_array_10_numeric),
480
+ @uniform_array_10_numeric)
481
+
482
+ assert_equal(@uniform_ranges.length, 4)
483
+
484
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_range_single_exclusive),
485
+ @uniform_range_single_exclusive)
486
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_range_single_inclusive),
487
+ @uniform_range_single_inclusive)
488
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_range_10_exclusive),
489
+ @uniform_range_10_exclusive)
490
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_range_10_inclusive),
491
+ @uniform_range_10_inclusive)
492
+
493
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_single_zero),
494
+ @uniform_single_zero)
495
+ # avoiding low probability of single_nonzero...
496
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(9), 9)
497
+
498
+ end
499
+
500
+ # avoid super low probabilities cause they can easily cause "errors" when
501
+ # assessing distribution accuracy
502
+ def test_non_uniform_sampling_accuracy
503
+ verify_distribution_accuracy(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string),
504
+ @nonuniform_hash_single_string)
505
+ verify_distribution_accuracy(RandomValueSampler.new_non_uniform( { "one" => 1, "two" => 2, "three" => 3 } ),
506
+ { "one" => 1, "two" => 2, "three" => 3 } )
507
+
508
+ verify_distribution_accuracy(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string),
509
+ @nonuniform_arrayoftuples_single_string)
510
+ verify_distribution_accuracy(RandomValueSampler.new_non_uniform( [["heavy", 90], ["light", 10]]),
511
+ [["heavy", 90], ["light", 10]])
512
+ end
513
+
514
+ #################
515
+ # HELPER METHODS
516
+ #################
517
+
518
+ # verifies that probability_of returns correct results for values in and out
519
+ # of pmf values set (should return 0 if outside set)
520
+ #-----------------------------------------------------------------------------
521
+ def verify_probability_of(rsampler, values)
522
+ vals_and_probs = extract_hash_of_vals_and_probs(values)
523
+
524
+ vals_and_probs.each_pair do |val, prob|
525
+ assert_in_delta(prob, rsampler.probability_of(val), 2e-4)
526
+ end
527
+ end
528
+
529
+ # verify that a distribution is represented (sum of probability mass is
530
+ # (very, very, very, very close to) 1
531
+ #-----------------------------------------------------------------------------
532
+ def verify_distribution(rsampler)
533
+ total_mass = 0
534
+ rsampler.all_values.each do |val|
535
+ total_mass += rsampler.probability_of(val)
536
+ end
537
+
538
+ assert_in_delta(1.0, total_mass, 2e-4)
539
+ end
540
+
541
+ # verifies the list of values returned by rsampler are in the values passed
542
+ # in as raw values
543
+ #-----------------------------------------------------------------------------
544
+ def verify_values(rsampler, values)
545
+ raw_val_set = Set.new(extract_array_of_values(values))
546
+ rsampler_val_set = Set.new(rsampler.all_values)
547
+
548
+ assert_equal(raw_val_set, rsampler_val_set)
549
+ end
550
+
551
+ # verifies the number of values indicated by rsampler. kinda dumb, just checks
552
+ # that it matches the length of the array returned by values (might catch
553
+ # some errors when using Ranges, for example)
554
+ #-----------------------------------------------------------------------------
555
+ def verify_num_values(rsampler)
556
+ assert_equal(rsampler.all_values.length, rsampler.num_values)
557
+ end
558
+
559
+ # verify after many iterations that all values returned by sampling are
560
+ # valid values for the rsampler. covers single and multiple samples.
561
+ #-----------------------------------------------------------------------------
562
+ def verify_sample_values_are_valid(rsampler, values)
563
+ vals_and_probs = extract_hash_of_vals_and_probs(values)
564
+ vals_and_probs.delete_if { |val, prob| prob == 0 }
565
+
566
+ valid_value_set = Set.new(vals_and_probs.keys)
567
+
568
+ (1..1000).each do
569
+ sample = rsampler.sample
570
+ assert(valid_value_set.include?(sample),
571
+ "<#{sample}> is not a valid sample in raw values: <#{values}>")
572
+ end
573
+
574
+ (1..1000).each do
575
+ rsampler.sample(10).each do |s|
576
+ assert(valid_value_set.include?(s),
577
+ "<#{s}> is not a valid multi-sample in raw values: <#{values}>")
578
+ end
579
+ end
580
+ end
581
+
582
+ # verify after many iterations that all values returned by sampling unique are
583
+ # valid values for the rsampler. covers single and multiple samples.
584
+ #-----------------------------------------------------------------------------
585
+ def verify_sample_unique_values_are_valid(rsampler, values)
586
+ vals_and_probs = extract_hash_of_vals_and_probs(values)
587
+ vals_and_probs.delete_if { |val, prob| prob == 0 }
588
+
589
+ valid_value_set = Set.new(vals_and_probs.keys)
590
+
591
+ num_multi_samples = [valid_value_set.length, 5].min
592
+
593
+ (1..1000).each do
594
+ test_rsampler = Marshal.load(Marshal.dump(rsampler))
595
+
596
+ sample = test_rsampler.sample_unique
597
+ assert(valid_value_set.include?(sample),
598
+ "<#{sample}> is not a valid sample in raw values: <#{values.inspect}>")
599
+ end
600
+
601
+ (1..1000).each do
602
+ test_rsampler = Marshal.load(Marshal.dump(rsampler))
603
+
604
+ if num_multi_samples > 1
605
+ test_rsampler.sample_unique(num_multi_samples).each do |s|
606
+ assert(valid_value_set.include?(s),
607
+ "<#{s}> is not a valid multi-sample in raw values: <#{values.inspect}>")
608
+ end
609
+ else
610
+ sample = test_rsampler.sample_unique(num_multi_samples)
611
+ assert(valid_value_set.include?(sample),
612
+ "<#{sample}> is not a valid multi-sample in raw values: <#{values.inspect}>")
613
+ end
614
+ end
615
+ end
616
+
617
+ # helper to convert whatever original data type we had into an array
618
+ #-----------------------------------------------------------------------------
619
+ def extract_array_of_values(values)
620
+ if values.is_a?(Set) || values.is_a?(Range)
621
+ values = values.to_a
622
+ elsif values.is_a?(Array)
623
+ if values.first.is_a?(Array)
624
+ # don't overwrite object, overwrite reference so that original object remains
625
+ # intact if needed
626
+ values = values.map { |val_and_pm| val_and_pm.first }
627
+ end # otherwise, don't need to do anything; already an array
628
+ elsif values.is_a?(Hash)
629
+ values = values.keys
630
+ else
631
+ values = (0..values).to_a
632
+ end
633
+
634
+ values
635
+ end
636
+
637
+ # generate a hash of values => probabilities from raw data
638
+ #-----------------------------------------------------------------------------
639
+ def extract_hash_of_vals_and_probs(values)
640
+ vals_and_probs = {}
641
+
642
+ # convert the single scalar case to a Range
643
+ if !values.is_a?(Hash) &&
644
+ !values.is_a?(Array) &&
645
+ !values.is_a?(Range) &&
646
+ !values.is_a?(Set)
647
+
648
+ values = 0..values
649
+ end
650
+
651
+ if values.is_a?(Hash)
652
+ vals_and_probs = values
653
+ elsif values.is_a?(Array) && values.first.is_a?(Array)
654
+ vals_and_probs = Hash[*(values.flatten)]
655
+ elsif values.is_a?(Range)
656
+ prob = 1.0 / values.to_a.length.to_f
657
+ values.each { |v| vals_and_probs.merge! v => prob }
658
+ elsif values.is_a?(Set) || values.is_a?(Array)
659
+ prob = 1.0 / values.length.to_f
660
+ values.each { |v| vals_and_probs.merge! v => prob }
661
+ end
662
+
663
+ total_mass = 0
664
+ vals_and_probs.each_pair { |val, prob| total_mass += prob }
665
+ vals_and_probs.each_pair do |val, prob|
666
+ vals_and_probs.merge! val => prob / total_mass.to_f
667
+ end
668
+
669
+ vals_and_probs
670
+ end
671
+
672
+ # sample a bunch from the distribution and compare the result to
673
+ # the original distribution. try sampling many times and making sure
674
+ # that the resulting frequencies are accurate within 30% ???
675
+ # this is VERY approximate, and is really only able to catch
676
+ # egregious errors...and is a little susceptible to noise on small
677
+ # probabilities.
678
+ #
679
+ # NOTE: this only works if theere are no duplicate values in the
680
+ # distribution, as this method uses a hash to store counts of samples.
681
+ #-----------------------------------------------------------------------------
682
+ def verify_distribution_accuracy(rsampler, values)
683
+ vals_and_probs = extract_hash_of_vals_and_probs(values)
684
+
685
+ val_counts = {}
686
+ vals_and_probs.keys.each { |val, prob| val_counts.merge! val => 0 }
687
+
688
+ # sample a bunch and count frequency of each value
689
+ num_samples = 50000
690
+ rsampler.sample(num_samples).each { |v| val_counts[v] = val_counts[v] + 1 }
691
+
692
+ # convert counts to probabilities
693
+ val_counts.each_pair do |val, count|
694
+ val_counts.merge! val => (count.to_f / num_samples.to_f)
695
+ end
696
+
697
+ vals_and_probs.each_pair do |val, true_prob|
698
+ assert_in_delta( (true_prob - val_counts[val]) / true_prob,
699
+ 0.0,
700
+ 0.1,
701
+ "observed sample frequency (<#{val_counts[val]}>) of " +
702
+ "<#{val}> doesn't appear to match true distribution " +
703
+ "(prob of <#{true_prob}>. It's possible that this was " +
704
+ "noise, so try again before assuming something's wrong")
705
+ end
706
+
707
+ end
708
+
709
+ # cases to test:
710
+ # -------------
711
+ #
712
+ # uniform
713
+ # valid inputs
714
+ # Set
715
+ # array
716
+ # Range inclusive
717
+ # Range exclusive
718
+ # scalar
719
+ # edge cases
720
+ # single value
721
+ # set
722
+ # array
723
+ # 1..1
724
+ # 1...2
725
+ # 0 (converted to 0..0)
726
+ # invalid inputs
727
+ # empty set
728
+ # empty array
729
+ # nil
730
+ # string
731
+ # NOTE: though it should work fine in the class itself, to avoid having to handle
732
+ # lots of cases in the test code, we're not using arrays as the possible values
733
+ # in the distribution (makes it hard to distinguish between the array of tuples
734
+ # (non-uniform) and the array of values (uniform) cases.
735
+ #
736
+ # non-uniform
737
+ # valid inputs
738
+ # hash
739
+ # array of arrays
740
+ # edge case
741
+ # 1 entry
742
+ # for EACH case above:
743
+ #
744
+ # invalid inputs
745
+ # empty hash
746
+ # empty array
747
+ # array of scalars
748
+ # negative frequency count
749
+ # hash
750
+ # array
751
+ # non-empty but all counts == 0
752
+ # hash
753
+ # array
754
+
755
+ # create a set of test data to play with for each test
756
+ #
757
+ # naming conventions:
758
+ # <pmftype>_<datatype>_<case>
759
+ #
760
+ # where:
761
+ # pmftype is "uniform" or "nonuniform"
762
+ # datatype is "set", "array", "range", "scalar", "arrayoftuples", or "hash"
763
+ #
764
+ # where: case is one of the following:
765
+ # error_<condition>
766
+ # single_<type>
767
+ # 10_<type>
768
+ #
769
+ # where:
770
+ # condition is a description of the error case (e.g. "empty", "allzero"...)
771
+ # type is "numeric", "string" or "mixed"
772
+ #-----------------------------------------------------------------------------
773
+ def setup
774
+ ##########
775
+ # UNIFORM
776
+ ##########
777
+ array_of_ten_string = ['a','b','c','d','e','f','g','h','i','j']
778
+
779
+ # valid inputs
780
+
781
+ @uniform_set_single_string = Set.new("one")
782
+ @uniform_set_10_string = Set.new(array_of_ten_string)
783
+ @uniform_set_10_numeric = Set.new(3..12)
784
+
785
+ @uniform_array_single_numeric = [22]
786
+ @uniform_array_10_string = array_of_ten_string
787
+ @uniform_array_10_numeric = (101...111).to_a
788
+
789
+ @uniform_range_single_exclusive = 1...2
790
+ @uniform_range_single_inclusive = 2..2
791
+ @uniform_range_10_exclusive = 1...11
792
+ @uniform_range_10_inclusive = -2..7
793
+
794
+ @uniform_single_zero = 0
795
+ @uniform_single_nonzero = 22
796
+
797
+ @uniform_sets = [
798
+ @uniform_set_single_string,
799
+ @uniform_set_10_string,
800
+ @uniform_set_10_numeric
801
+ ]
802
+ @uniform_arrays = [
803
+ @uniform_array_single_numeric,
804
+ @uniform_array_10_string,
805
+ @uniform_array_10_numeric
806
+ ]
807
+ @uniform_ranges = [
808
+ @uniform_range_single_exclusive,
809
+ @uniform_range_single_inclusive,
810
+ @uniform_range_10_exclusive,
811
+ @uniform_range_10_inclusive
812
+ ]
813
+ @uniform_singles = [
814
+ @uniform_single_zero,
815
+ @uniform_single_nonzero
816
+ ]
817
+
818
+ # error inputs
819
+
820
+ @uniform_set_error_empty = Set.new
821
+ @uniform_array_error_empty = []
822
+ @uniform_range_error_empty = 0..-1
823
+ @uniform_single_error_negative = -1
824
+
825
+ @uniform_error_inputs = [
826
+ @uniform_set_error_empty,
827
+ @uniform_array_error_empty,
828
+ @uniform_range_error_empty,
829
+ @uniform_single_error_negative
830
+ ]
831
+
832
+ ##############
833
+ # NON-UNIFORM
834
+ ##############
835
+
836
+ hash_10_sum_to_1 = {}
837
+ (-9..-1).each { |exp| hash_10_sum_to_1.merge! exp => 2**exp }
838
+ hash_10_sum_to_1.merge! "the end" => 2**-9
839
+
840
+ hash_10_sum_gt_1 = hash_10_sum_to_1.clone
841
+ hash_10_sum_gt_1.each_pair { |k,v| hash_10_sum_gt_1[k] = v*10 }
842
+
843
+ hash_10_sum_lt_1 = hash_10_sum_to_1.clone
844
+ hash_10_sum_lt_1.each_pair { |k,v| hash_10_sum_gt_1[k] = v/10 }
845
+
846
+ @nonuniform_hash_single_string = { "one_and_only" => 13 }
847
+ @nonuniform_hash_10_sum_to_1 = hash_10_sum_to_1
848
+ @nonuniform_hash_10_sum_gt_1 = hash_10_sum_gt_1
849
+ @nonuniform_hash_10_sum_lt_1 = hash_10_sum_lt_1
850
+
851
+ @nonuniform_arrayoftuples_single_string = { "one_and_only" => 13 }.to_a
852
+ @nonuniform_arrayoftuples_10_sum_to_1 = hash_10_sum_to_1.to_a
853
+ @nonuniform_arrayoftuples_10_sum_gt_1 = hash_10_sum_gt_1.to_a
854
+ @nonuniform_arrayoftuples_10_sum_lt_1 = hash_10_sum_lt_1.to_a
855
+
856
+ @nonuniform_hashes = [
857
+ @nonuniform_hash_single_string,
858
+ @nonuniform_hash_10_sum_to_1,
859
+ @nonuniform_hash_10_sum_gt_1,
860
+ @nonuniform_hash_10_sum_lt_1
861
+ ]
862
+
863
+ @nonuniform_arrayoftuples = [
864
+ @nonuniform_arrayoftuples_single_string,
865
+ @nonuniform_arrayoftuples_10_sum_to_1,
866
+ @nonuniform_arrayoftuples_10_sum_gt_1,
867
+ @nonuniform_arrayoftuples_10_sum_lt_1
868
+ ]
869
+
870
+ # error inputs
871
+
872
+ @nonuniform_hash_error_empty = {}
873
+ @nonuniform_hash_error_negative = { "negative" => -1 }
874
+ @nonuniform_hash_error_all_zeros = { :one => 0, :two => 0, :three => 0 }
875
+
876
+ @nonuniform_arrayoftuples_error_empty = {}.to_a
877
+ @nonuniform_arrayoftuples_error_negative = { "negative" => -1 }.to_a
878
+ @nonuniform_arrayoftuples_error_all_zeros = { :one => 0, :two => 0, :three => 0 }.to_a
879
+
880
+ @nonuniform_error_inputs = [
881
+ @nonuniform_hash_error_empty,
882
+ @nonuniform_hash_error_negative,
883
+ @nonuniform_hash_error_all_zeros,
884
+ @nonuniform_arrayoftuples_error_empty,
885
+ @nonuniform_arrayoftuples_error_negative,
886
+ @nonuniform_arrayoftuples_error_all_zeros
887
+ ]
888
+ end
889
+
890
+ end