random_value_sampler 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,890 @@
1
+ require 'set'
2
+ require 'test/unit'
3
+
4
+ require 'random_value_sampler'
5
+
6
+ #
7
+ # rough outline of this file:
8
+ # * test cases: these just call helper methods to run tests on all of the
9
+ # data cases created below in the setup() method
10
+ # - error inputs
11
+ # - verifying distribution validity
12
+ # * helper methods: mostly verify_xxx() methods that are called by the
13
+ # test cases to compute test results (this is the code
14
+ # most important to review)
15
+ # * setup() method: the method called before each test case is run...to
16
+ # generate data for testing
17
+ #-------------------------------------------------------------------------------
18
+ #
19
+ # rough outline of tests:
20
+ #
21
+ # error inputs (invalid distribution specifications, invalid sample requests)
22
+ # for each valid input case, run the following tests:
23
+ # for EACH valid input case:
24
+ # confirm # values
25
+ # confirm the array of values returned meet specification
26
+ # confirm probability_of
27
+ # uniform:
28
+ # each value in set/array/range has the same value
29
+ # (and they sum to 1 or within v. small tolerance)
30
+ # non-uniform:
31
+ # each value matches that in the original specification
32
+ # values (just) outside values have probability zero
33
+ #-----------------------------------------------------------------------------
34
+ class RandomValueSamplerTest < Test::Unit::TestCase
35
+
36
+ ###############
37
+ # ERROR INPUTS
38
+ ###############
39
+
40
+ def test_uniform_error_inputs
41
+ # this line just makes sure that we're running the test on each data
42
+ # case we create in the setup() method. the idea is that if someone adds
43
+ # a new @uniform_xxxxxx case, then they'd add it to the
44
+ # @uniform_error_inputs array, and this assertion would fail...reminding
45
+ # them to add an assert_raises call here for the new data case. (this
46
+ # pattern is repeated throughout the test cases in this file)
47
+ assert_equal(@uniform_error_inputs.length, 4)
48
+
49
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform @uniform_set_error_empty }
50
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform @uniform_array_error_empty }
51
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform @uniform_range_error_empty }
52
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform @uniform_single_error_negative }
53
+ end
54
+
55
+ def test_non_uniform_error_inputs
56
+ assert_equal(@nonuniform_error_inputs.length, 6)
57
+
58
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_hash_error_empty }
59
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_hash_error_negative }
60
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_hash_error_all_zeros }
61
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_arrayoftuples_error_empty }
62
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_arrayoftuples_error_negative }
63
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_arrayoftuples_error_all_zeros }
64
+ end
65
+
66
+ def test_uniform_exception_on_too_many_sample_unique
67
+ # singleton set
68
+ assert_raises(RuntimeError) do
69
+ rsampler = RandomValueSampler.new_uniform @uniform_set_single_string
70
+ rsampler.sample_unique 2
71
+ end
72
+
73
+ # singleton array
74
+ assert_raises(RuntimeError) do
75
+ rsampler = RandomValueSampler.new_uniform @uniform_array_single_numeric
76
+ rsampler.sample_unique(@uniform_array_single_numeric.length + 1)
77
+ end
78
+
79
+ # singleton Range
80
+ assert_raises(RuntimeError) do
81
+ rsampler = RandomValueSampler.new_uniform @uniform_range_single_exclusive
82
+ rsampler.sample_unique(@uniform_range_single_exclusive.to_a.length + 1)
83
+ end
84
+
85
+ # singleton value
86
+ assert_raises(RuntimeError) do
87
+ rsampler = RandomValueSampler.new_uniform @uniform_single_zero
88
+ rsampler.sample_unique 2
89
+ end
90
+
91
+ # size N set
92
+ assert_raises(RuntimeError) do
93
+ rsampler = RandomValueSampler.new_uniform @uniform_set_10_string
94
+ rsampler.sample_unique(@uniform_set_10_string.length + 1)
95
+ end
96
+
97
+ # size N array
98
+ assert_raises(RuntimeError) do
99
+ rsampler = RandomValueSampler.new_uniform @uniform_array_10_numeric
100
+ rsampler.sample_unique(@uniform_array_10_numeric.length + 1)
101
+ end
102
+
103
+ # size N Range inclusive
104
+ assert_raises(RuntimeError) do
105
+ rsampler = RandomValueSampler.new_uniform @uniform_range_10_inclusive
106
+ rsampler.sample_unique(@uniform_range_10_inclusive.to_a.length + 1)
107
+ end
108
+
109
+ # size N Range exclusive
110
+ assert_raises(RuntimeError) do
111
+ rsampler = RandomValueSampler.new_uniform @uniform_range_10_exclusive
112
+ rsampler.sample_unique(@uniform_range_10_exclusive.to_a.length + 1)
113
+ end
114
+
115
+ # scalar defining Range size N
116
+ assert_raises(RuntimeError) do
117
+ rsampler = RandomValueSampler.new_uniform @uniform_single_nonzero
118
+ rsampler.sample_unique(@uniform_single_nonzero + 2)
119
+ end
120
+ end
121
+
122
+ def test_non_uniform_exception_on_too_many_sample_unique
123
+ assert_raises(RuntimeError) do
124
+ rsampler = RandomValueSampler.new_non_uniform @nonuniform_hash_single_string
125
+ rsampler.sample_unique 2
126
+ end
127
+ assert_raises(RuntimeError) do
128
+ rsampler = RandomValueSampler.new_non_uniform @nonuniform_hash_10_sum_to_1
129
+ rsampler.sample_unique(@nonuniform_hash_10_sum_to_1.length + 1)
130
+ end
131
+ assert_raises(RuntimeError) do
132
+ rsampler = RandomValueSampler.new_non_uniform @nonuniform_arrayoftuples_single_string
133
+ rsampler.sample_unique 2
134
+ end
135
+ assert_raises(RuntimeError) do
136
+ rsampler = RandomValueSampler.new_non_uniform @nonuniform_arrayoftuples_10_sum_to_1
137
+ rsampler.sample_unique(@nonuniform_arrayoftuples_10_sum_gt_1.length + 1)
138
+ end
139
+ end
140
+
141
+ def test_negative_num_samples
142
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform([1,2,3,4]).sample(-1) }
143
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform([1,2,3,4]).sample_unique(-1) }
144
+ end
145
+
146
+ ###################################################
147
+ # VERIFYING VALIDITY, CONSISTENCY OF DISTRIBUTIONS
148
+ ###################################################
149
+
150
+ def test_uniform_probability_of
151
+ assert_equal(@uniform_sets.length, 3)
152
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_set_single_string),
153
+ @uniform_set_single_string)
154
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_set_10_string),
155
+ @uniform_set_10_string)
156
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_set_10_numeric),
157
+ @uniform_set_10_numeric)
158
+
159
+ assert_equal(@uniform_arrays.length, 3)
160
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_array_single_numeric),
161
+ @uniform_array_single_numeric)
162
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_array_10_string),
163
+ @uniform_array_10_string)
164
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_array_10_numeric),
165
+ @uniform_array_10_numeric)
166
+
167
+ assert_equal(@uniform_ranges.length, 4)
168
+
169
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_range_single_exclusive),
170
+ @uniform_range_single_exclusive)
171
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_range_single_inclusive),
172
+ @uniform_range_single_inclusive)
173
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_range_10_exclusive),
174
+ @uniform_range_10_exclusive)
175
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_range_10_inclusive),
176
+ @uniform_range_10_inclusive)
177
+
178
+ assert_equal(@uniform_singles.length, 2)
179
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_single_zero),
180
+ @uniform_single_zero)
181
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_single_nonzero),
182
+ @uniform_single_nonzero)
183
+ end
184
+
185
+ def test_non_uniform_probability_of
186
+ assert_equal(@nonuniform_hashes.length, 4)
187
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string),
188
+ @nonuniform_hash_single_string)
189
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1),
190
+ @nonuniform_hash_10_sum_to_1)
191
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1),
192
+ @nonuniform_hash_10_sum_gt_1)
193
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1),
194
+ @nonuniform_hash_10_sum_lt_1)
195
+
196
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
197
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string),
198
+ @nonuniform_arrayoftuples_single_string)
199
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1),
200
+ @nonuniform_arrayoftuples_10_sum_to_1)
201
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1),
202
+ @nonuniform_arrayoftuples_10_sum_gt_1)
203
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1),
204
+ @nonuniform_arrayoftuples_10_sum_lt_1)
205
+ end
206
+
207
+ def test_uniform_valid_distributions
208
+ assert_equal(@uniform_sets.length, 3)
209
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_set_single_string))
210
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_set_10_string))
211
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_set_10_numeric))
212
+
213
+
214
+ assert_equal(@uniform_arrays.length, 3)
215
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_array_single_numeric))
216
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_array_10_string))
217
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_array_10_numeric))
218
+
219
+
220
+ assert_equal(@uniform_ranges.length, 4)
221
+
222
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_range_single_exclusive))
223
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_range_single_inclusive))
224
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_range_10_exclusive))
225
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_range_10_inclusive))
226
+
227
+
228
+ assert_equal(@uniform_singles.length, 2)
229
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_single_zero))
230
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_single_nonzero))
231
+ end
232
+
233
+ def test_non_uniform_valid_distributions
234
+ assert_equal(@nonuniform_hashes.length, 4)
235
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string))
236
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1))
237
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1))
238
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1))
239
+
240
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
241
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string))
242
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1))
243
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1))
244
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1))
245
+ end
246
+
247
+ def test_uniform_values_match
248
+ assert_equal(@uniform_sets.length, 3)
249
+ verify_values(RandomValueSampler.new_uniform(@uniform_set_single_string),
250
+ @uniform_set_single_string)
251
+ verify_values(RandomValueSampler.new_uniform(@uniform_set_10_string),
252
+ @uniform_set_10_string)
253
+ verify_values(RandomValueSampler.new_uniform(@uniform_set_10_numeric),
254
+ @uniform_set_10_numeric)
255
+
256
+ assert_equal(@uniform_arrays.length, 3)
257
+ verify_values(RandomValueSampler.new_uniform(@uniform_array_single_numeric),
258
+ @uniform_array_single_numeric)
259
+ verify_values(RandomValueSampler.new_uniform(@uniform_array_10_string),
260
+ @uniform_array_10_string)
261
+ verify_values(RandomValueSampler.new_uniform(@uniform_array_10_numeric),
262
+ @uniform_array_10_numeric)
263
+
264
+ assert_equal(@uniform_ranges.length, 4)
265
+
266
+ verify_values(RandomValueSampler.new_uniform(@uniform_range_single_exclusive),
267
+ @uniform_range_single_exclusive)
268
+ verify_values(RandomValueSampler.new_uniform(@uniform_range_single_inclusive),
269
+ @uniform_range_single_inclusive)
270
+ verify_values(RandomValueSampler.new_uniform(@uniform_range_10_exclusive),
271
+ @uniform_range_10_exclusive)
272
+ verify_values(RandomValueSampler.new_uniform(@uniform_range_10_inclusive),
273
+ @uniform_range_10_inclusive)
274
+
275
+ assert_equal(@uniform_singles.length, 2)
276
+ verify_values(RandomValueSampler.new_uniform(@uniform_single_zero),
277
+ @uniform_single_zero)
278
+ verify_values(RandomValueSampler.new_uniform(@uniform_single_nonzero),
279
+ @uniform_single_nonzero)
280
+ end
281
+
282
+ def test_non_uniform_values_match
283
+ assert_equal(@nonuniform_hashes.length, 4)
284
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string),
285
+ @nonuniform_hash_single_string)
286
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1),
287
+ @nonuniform_hash_10_sum_to_1)
288
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1),
289
+ @nonuniform_hash_10_sum_gt_1)
290
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1),
291
+ @nonuniform_hash_10_sum_lt_1)
292
+
293
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
294
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string),
295
+ @nonuniform_arrayoftuples_single_string)
296
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1),
297
+ @nonuniform_arrayoftuples_10_sum_to_1)
298
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1),
299
+ @nonuniform_arrayoftuples_10_sum_gt_1)
300
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1),
301
+ @nonuniform_arrayoftuples_10_sum_lt_1)
302
+ end
303
+
304
+ def test_uniform_num_values
305
+ assert_equal(@uniform_sets.length, 3)
306
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_set_single_string))
307
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_set_10_string))
308
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_set_10_numeric))
309
+
310
+
311
+ assert_equal(@uniform_arrays.length, 3)
312
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_array_single_numeric))
313
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_array_10_string))
314
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_array_10_numeric))
315
+
316
+
317
+ assert_equal(@uniform_ranges.length, 4)
318
+
319
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_range_single_exclusive))
320
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_range_single_inclusive))
321
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_range_10_exclusive))
322
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_range_10_inclusive))
323
+
324
+
325
+ assert_equal(@uniform_singles.length, 2)
326
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_single_zero))
327
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_single_nonzero))
328
+ end
329
+
330
+ def test_non_uniform_num_values
331
+ assert_equal(@nonuniform_hashes.length, 4)
332
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string))
333
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1))
334
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1))
335
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1))
336
+
337
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
338
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string))
339
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1))
340
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1))
341
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1))
342
+ end
343
+
344
+ # sample a bunch of times and make sure that all of the values that come back
345
+ # are in the set of valid raw values
346
+ #-----------------------------------------------------------------------------
347
+ def test_uniform_sample_values_are_valid
348
+ assert_equal(@uniform_sets.length, 3)
349
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_single_string),
350
+ @uniform_set_single_string)
351
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_10_string),
352
+ @uniform_set_10_string)
353
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_10_numeric),
354
+ @uniform_set_10_numeric)
355
+
356
+ assert_equal(@uniform_arrays.length, 3)
357
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_single_numeric),
358
+ @uniform_array_single_numeric)
359
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_10_string),
360
+ @uniform_array_10_string)
361
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_10_numeric),
362
+ @uniform_array_10_numeric)
363
+
364
+ assert_equal(@uniform_ranges.length, 4)
365
+
366
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_single_exclusive),
367
+ @uniform_range_single_exclusive)
368
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_single_inclusive),
369
+ @uniform_range_single_inclusive)
370
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_10_exclusive),
371
+ @uniform_range_10_exclusive)
372
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_10_inclusive),
373
+ @uniform_range_10_inclusive)
374
+
375
+ assert_equal(@uniform_singles.length, 2)
376
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_single_zero),
377
+ @uniform_single_zero)
378
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_single_nonzero),
379
+ @uniform_single_nonzero)
380
+ end
381
+
382
+ def test_non_uniform_sample_values_are_valid
383
+ assert_equal(@nonuniform_hashes.length, 4)
384
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string),
385
+ @nonuniform_hash_single_string)
386
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1),
387
+ @nonuniform_hash_10_sum_to_1)
388
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1),
389
+ @nonuniform_hash_10_sum_gt_1)
390
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1),
391
+ @nonuniform_hash_10_sum_lt_1)
392
+
393
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
394
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string),
395
+ @nonuniform_arrayoftuples_single_string)
396
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1),
397
+ @nonuniform_arrayoftuples_10_sum_to_1)
398
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1),
399
+ @nonuniform_arrayoftuples_10_sum_gt_1)
400
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1),
401
+ @nonuniform_arrayoftuples_10_sum_lt_1)
402
+ end
403
+
404
+ def test_uniform_sample_values_are_valid
405
+ assert_equal(@uniform_sets.length, 3)
406
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_single_string),
407
+ @uniform_set_single_string)
408
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_10_string),
409
+ @uniform_set_10_string)
410
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_10_numeric),
411
+ @uniform_set_10_numeric)
412
+
413
+ assert_equal(@uniform_arrays.length, 3)
414
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_single_numeric),
415
+ @uniform_array_single_numeric)
416
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_10_string),
417
+ @uniform_array_10_string)
418
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_10_numeric),
419
+ @uniform_array_10_numeric)
420
+
421
+ assert_equal(@uniform_ranges.length, 4)
422
+
423
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_single_exclusive),
424
+ @uniform_range_single_exclusive)
425
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_single_inclusive),
426
+ @uniform_range_single_inclusive)
427
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_10_exclusive),
428
+ @uniform_range_10_exclusive)
429
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_10_inclusive),
430
+ @uniform_range_10_inclusive)
431
+
432
+ assert_equal(@uniform_singles.length, 2)
433
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_single_zero),
434
+ @uniform_single_zero)
435
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_single_nonzero),
436
+ @uniform_single_nonzero)
437
+ end
438
+
439
+ def test_non_uniform_sample_values_are_valid
440
+ assert_equal(@nonuniform_hashes.length, 4)
441
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string),
442
+ @nonuniform_hash_single_string)
443
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1),
444
+ @nonuniform_hash_10_sum_to_1)
445
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1),
446
+ @nonuniform_hash_10_sum_gt_1)
447
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1),
448
+ @nonuniform_hash_10_sum_lt_1)
449
+
450
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
451
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string),
452
+ @nonuniform_arrayoftuples_single_string)
453
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1),
454
+ @nonuniform_arrayoftuples_10_sum_to_1)
455
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1),
456
+ @nonuniform_arrayoftuples_10_sum_gt_1)
457
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1),
458
+ @nonuniform_arrayoftuples_10_sum_lt_1)
459
+ end
460
+
461
+ ####################
462
+ # SAMPLING ACCURACY
463
+ ####################
464
+
465
+ def test_uniform_sampling_accuracy
466
+ assert_equal(@uniform_sets.length, 3)
467
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_set_single_string),
468
+ @uniform_set_single_string)
469
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_set_10_string),
470
+ @uniform_set_10_string)
471
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_set_10_numeric),
472
+ @uniform_set_10_numeric)
473
+
474
+ assert_equal(@uniform_arrays.length, 3)
475
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_array_single_numeric),
476
+ @uniform_array_single_numeric)
477
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_array_10_string),
478
+ @uniform_array_10_string)
479
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_array_10_numeric),
480
+ @uniform_array_10_numeric)
481
+
482
+ assert_equal(@uniform_ranges.length, 4)
483
+
484
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_range_single_exclusive),
485
+ @uniform_range_single_exclusive)
486
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_range_single_inclusive),
487
+ @uniform_range_single_inclusive)
488
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_range_10_exclusive),
489
+ @uniform_range_10_exclusive)
490
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_range_10_inclusive),
491
+ @uniform_range_10_inclusive)
492
+
493
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_single_zero),
494
+ @uniform_single_zero)
495
+ # avoiding low probability of single_nonzero...
496
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(9), 9)
497
+
498
+ end
499
+
500
+ # avoid super low probabilities cause they can easily cause "errors" when
501
+ # assessing distribution accuracy
502
+ def test_non_uniform_sampling_accuracy
503
+ verify_distribution_accuracy(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string),
504
+ @nonuniform_hash_single_string)
505
+ verify_distribution_accuracy(RandomValueSampler.new_non_uniform( { "one" => 1, "two" => 2, "three" => 3 } ),
506
+ { "one" => 1, "two" => 2, "three" => 3 } )
507
+
508
+ verify_distribution_accuracy(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string),
509
+ @nonuniform_arrayoftuples_single_string)
510
+ verify_distribution_accuracy(RandomValueSampler.new_non_uniform( [["heavy", 90], ["light", 10]]),
511
+ [["heavy", 90], ["light", 10]])
512
+ end
513
+
514
+ #################
515
+ # HELPER METHODS
516
+ #################
517
+
518
+ # verifies that probability_of returns correct results for values in and out
519
+ # of pmf values set (should return 0 if outside set)
520
+ #-----------------------------------------------------------------------------
521
+ def verify_probability_of(rsampler, values)
522
+ vals_and_probs = extract_hash_of_vals_and_probs(values)
523
+
524
+ vals_and_probs.each_pair do |val, prob|
525
+ assert_in_delta(prob, rsampler.probability_of(val), 2e-4)
526
+ end
527
+ end
528
+
529
+ # verify that a distribution is represented (sum of probability mass is
530
+ # (very, very, very, very close to) 1
531
+ #-----------------------------------------------------------------------------
532
+ def verify_distribution(rsampler)
533
+ total_mass = 0
534
+ rsampler.all_values.each do |val|
535
+ total_mass += rsampler.probability_of(val)
536
+ end
537
+
538
+ assert_in_delta(1.0, total_mass, 2e-4)
539
+ end
540
+
541
+ # verifies the list of values returned by rsampler are in the values passed
542
+ # in as raw values
543
+ #-----------------------------------------------------------------------------
544
+ def verify_values(rsampler, values)
545
+ raw_val_set = Set.new(extract_array_of_values(values))
546
+ rsampler_val_set = Set.new(rsampler.all_values)
547
+
548
+ assert_equal(raw_val_set, rsampler_val_set)
549
+ end
550
+
551
+ # verifies the number of values indicated by rsampler. kinda dumb, just checks
552
+ # that it matches the length of the array returned by values (might catch
553
+ # some errors when using Ranges, for example)
554
+ #-----------------------------------------------------------------------------
555
+ def verify_num_values(rsampler)
556
+ assert_equal(rsampler.all_values.length, rsampler.num_values)
557
+ end
558
+
559
+ # verify after many iterations that all values returned by sampling are
560
+ # valid values for the rsampler. covers single and multiple samples.
561
+ #-----------------------------------------------------------------------------
562
+ def verify_sample_values_are_valid(rsampler, values)
563
+ vals_and_probs = extract_hash_of_vals_and_probs(values)
564
+ vals_and_probs.delete_if { |val, prob| prob == 0 }
565
+
566
+ valid_value_set = Set.new(vals_and_probs.keys)
567
+
568
+ (1..1000).each do
569
+ sample = rsampler.sample
570
+ assert(valid_value_set.include?(sample),
571
+ "<#{sample}> is not a valid sample in raw values: <#{values}>")
572
+ end
573
+
574
+ (1..1000).each do
575
+ rsampler.sample(10).each do |s|
576
+ assert(valid_value_set.include?(s),
577
+ "<#{s}> is not a valid multi-sample in raw values: <#{values}>")
578
+ end
579
+ end
580
+ end
581
+
582
+ # verify after many iterations that all values returned by sampling unique are
583
+ # valid values for the rsampler. covers single and multiple samples.
584
+ #-----------------------------------------------------------------------------
585
+ def verify_sample_unique_values_are_valid(rsampler, values)
586
+ vals_and_probs = extract_hash_of_vals_and_probs(values)
587
+ vals_and_probs.delete_if { |val, prob| prob == 0 }
588
+
589
+ valid_value_set = Set.new(vals_and_probs.keys)
590
+
591
+ num_multi_samples = [valid_value_set.length, 5].min
592
+
593
+ (1..1000).each do
594
+ test_rsampler = Marshal.load(Marshal.dump(rsampler))
595
+
596
+ sample = test_rsampler.sample_unique
597
+ assert(valid_value_set.include?(sample),
598
+ "<#{sample}> is not a valid sample in raw values: <#{values.inspect}>")
599
+ end
600
+
601
+ (1..1000).each do
602
+ test_rsampler = Marshal.load(Marshal.dump(rsampler))
603
+
604
+ if num_multi_samples > 1
605
+ test_rsampler.sample_unique(num_multi_samples).each do |s|
606
+ assert(valid_value_set.include?(s),
607
+ "<#{s}> is not a valid multi-sample in raw values: <#{values.inspect}>")
608
+ end
609
+ else
610
+ sample = test_rsampler.sample_unique(num_multi_samples)
611
+ assert(valid_value_set.include?(sample),
612
+ "<#{sample}> is not a valid multi-sample in raw values: <#{values.inspect}>")
613
+ end
614
+ end
615
+ end
616
+
617
+ # helper to convert whatever original data type we had into an array
618
+ #-----------------------------------------------------------------------------
619
+ def extract_array_of_values(values)
620
+ if values.is_a?(Set) || values.is_a?(Range)
621
+ values = values.to_a
622
+ elsif values.is_a?(Array)
623
+ if values.first.is_a?(Array)
624
+ # don't overwrite object, overwrite reference so that original object remains
625
+ # intact if needed
626
+ values = values.map { |val_and_pm| val_and_pm.first }
627
+ end # otherwise, don't need to do anything; already an array
628
+ elsif values.is_a?(Hash)
629
+ values = values.keys
630
+ else
631
+ values = (0..values).to_a
632
+ end
633
+
634
+ values
635
+ end
636
+
637
+ # generate a hash of values => probabilities from raw data
638
+ #-----------------------------------------------------------------------------
639
+ def extract_hash_of_vals_and_probs(values)
640
+ vals_and_probs = {}
641
+
642
+ # convert the single scalar case to a Range
643
+ if !values.is_a?(Hash) &&
644
+ !values.is_a?(Array) &&
645
+ !values.is_a?(Range) &&
646
+ !values.is_a?(Set)
647
+
648
+ values = 0..values
649
+ end
650
+
651
+ if values.is_a?(Hash)
652
+ vals_and_probs = values
653
+ elsif values.is_a?(Array) && values.first.is_a?(Array)
654
+ vals_and_probs = Hash[*(values.flatten)]
655
+ elsif values.is_a?(Range)
656
+ prob = 1.0 / values.to_a.length.to_f
657
+ values.each { |v| vals_and_probs.merge! v => prob }
658
+ elsif values.is_a?(Set) || values.is_a?(Array)
659
+ prob = 1.0 / values.length.to_f
660
+ values.each { |v| vals_and_probs.merge! v => prob }
661
+ end
662
+
663
+ total_mass = 0
664
+ vals_and_probs.each_pair { |val, prob| total_mass += prob }
665
+ vals_and_probs.each_pair do |val, prob|
666
+ vals_and_probs.merge! val => prob / total_mass.to_f
667
+ end
668
+
669
+ vals_and_probs
670
+ end
671
+
672
+ # sample a bunch from the distribution and compare the result to
673
+ # the original distribution. try sampling many times and making sure
674
+ # that the resulting frequencies are accurate within 30% ???
675
+ # this is VERY approximate, and is really only able to catch
676
+ # egregious errors...and is a little susceptible to noise on small
677
+ # probabilities.
678
+ #
679
+ # NOTE: this only works if theere are no duplicate values in the
680
+ # distribution, as this method uses a hash to store counts of samples.
681
+ #-----------------------------------------------------------------------------
682
+ def verify_distribution_accuracy(rsampler, values)
683
+ vals_and_probs = extract_hash_of_vals_and_probs(values)
684
+
685
+ val_counts = {}
686
+ vals_and_probs.keys.each { |val, prob| val_counts.merge! val => 0 }
687
+
688
+ # sample a bunch and count frequency of each value
689
+ num_samples = 50000
690
+ rsampler.sample(num_samples).each { |v| val_counts[v] = val_counts[v] + 1 }
691
+
692
+ # convert counts to probabilities
693
+ val_counts.each_pair do |val, count|
694
+ val_counts.merge! val => (count.to_f / num_samples.to_f)
695
+ end
696
+
697
+ vals_and_probs.each_pair do |val, true_prob|
698
+ assert_in_delta( (true_prob - val_counts[val]) / true_prob,
699
+ 0.0,
700
+ 0.1,
701
+ "observed sample frequency (<#{val_counts[val]}>) of " +
702
+ "<#{val}> doesn't appear to match true distribution " +
703
+ "(prob of <#{true_prob}>. It's possible that this was " +
704
+ "noise, so try again before assuming something's wrong")
705
+ end
706
+
707
+ end
708
+
709
+ # cases to test:
710
+ # -------------
711
+ #
712
+ # uniform
713
+ # valid inputs
714
+ # Set
715
+ # array
716
+ # Range inclusive
717
+ # Range exclusive
718
+ # scalar
719
+ # edge cases
720
+ # single value
721
+ # set
722
+ # array
723
+ # 1..1
724
+ # 1...2
725
+ # 0 (converted to 0..0)
726
+ # invalid inputs
727
+ # empty set
728
+ # empty array
729
+ # nil
730
+ # string
731
+ # NOTE: though it should work fine in the class itself, to avoid having to handle
732
+ # lots of cases in the test code, we're not using arrays as the possible values
733
+ # in the distribution (makes it hard to distinguish between the array of tuples
734
+ # (non-uniform) and the array of values (uniform) cases.
735
+ #
736
+ # non-uniform
737
+ # valid inputs
738
+ # hash
739
+ # array of arrays
740
+ # edge case
741
+ # 1 entry
742
+ # for EACH case above:
743
+ #
744
+ # invalid inputs
745
+ # empty hash
746
+ # empty array
747
+ # array of scalars
748
+ # negative frequency count
749
+ # hash
750
+ # array
751
+ # non-empty but all counts == 0
752
+ # hash
753
+ # array
754
+
755
+ # create a set of test data to play with for each test
756
+ #
757
+ # naming conventions:
758
+ # <pmftype>_<datatype>_<case>
759
+ #
760
+ # where:
761
+ # pmftype is "uniform" or "nonuniform"
762
+ # datatype is "set", "array", "range", "scalar", "arrayoftuples", or "hash"
763
+ #
764
+ # where: case is one of the following:
765
+ # error_<condition>
766
+ # single_<type>
767
+ # 10_<type>
768
+ #
769
+ # where:
770
+ # condition is a description of the error case (e.g. "empty", "allzero"...)
771
+ # type is "numeric", "string" or "mixed"
772
+ #-----------------------------------------------------------------------------
773
+ def setup
774
+ ##########
775
+ # UNIFORM
776
+ ##########
777
+ array_of_ten_string = ['a','b','c','d','e','f','g','h','i','j']
778
+
779
+ # valid inputs
780
+
781
+ @uniform_set_single_string = Set.new("one")
782
+ @uniform_set_10_string = Set.new(array_of_ten_string)
783
+ @uniform_set_10_numeric = Set.new(3..12)
784
+
785
+ @uniform_array_single_numeric = [22]
786
+ @uniform_array_10_string = array_of_ten_string
787
+ @uniform_array_10_numeric = (101...111).to_a
788
+
789
+ @uniform_range_single_exclusive = 1...2
790
+ @uniform_range_single_inclusive = 2..2
791
+ @uniform_range_10_exclusive = 1...11
792
+ @uniform_range_10_inclusive = -2..7
793
+
794
+ @uniform_single_zero = 0
795
+ @uniform_single_nonzero = 22
796
+
797
+ @uniform_sets = [
798
+ @uniform_set_single_string,
799
+ @uniform_set_10_string,
800
+ @uniform_set_10_numeric
801
+ ]
802
+ @uniform_arrays = [
803
+ @uniform_array_single_numeric,
804
+ @uniform_array_10_string,
805
+ @uniform_array_10_numeric
806
+ ]
807
+ @uniform_ranges = [
808
+ @uniform_range_single_exclusive,
809
+ @uniform_range_single_inclusive,
810
+ @uniform_range_10_exclusive,
811
+ @uniform_range_10_inclusive
812
+ ]
813
+ @uniform_singles = [
814
+ @uniform_single_zero,
815
+ @uniform_single_nonzero
816
+ ]
817
+
818
+ # error inputs
819
+
820
+ @uniform_set_error_empty = Set.new
821
+ @uniform_array_error_empty = []
822
+ @uniform_range_error_empty = 0..-1
823
+ @uniform_single_error_negative = -1
824
+
825
+ @uniform_error_inputs = [
826
+ @uniform_set_error_empty,
827
+ @uniform_array_error_empty,
828
+ @uniform_range_error_empty,
829
+ @uniform_single_error_negative
830
+ ]
831
+
832
+ ##############
833
+ # NON-UNIFORM
834
+ ##############
835
+
836
+ hash_10_sum_to_1 = {}
837
+ (-9..-1).each { |exp| hash_10_sum_to_1.merge! exp => 2**exp }
838
+ hash_10_sum_to_1.merge! "the end" => 2**-9
839
+
840
+ hash_10_sum_gt_1 = hash_10_sum_to_1.clone
841
+ hash_10_sum_gt_1.each_pair { |k,v| hash_10_sum_gt_1[k] = v*10 }
842
+
843
+ hash_10_sum_lt_1 = hash_10_sum_to_1.clone
844
+ hash_10_sum_lt_1.each_pair { |k,v| hash_10_sum_gt_1[k] = v/10 }
845
+
846
+ @nonuniform_hash_single_string = { "one_and_only" => 13 }
847
+ @nonuniform_hash_10_sum_to_1 = hash_10_sum_to_1
848
+ @nonuniform_hash_10_sum_gt_1 = hash_10_sum_gt_1
849
+ @nonuniform_hash_10_sum_lt_1 = hash_10_sum_lt_1
850
+
851
+ @nonuniform_arrayoftuples_single_string = { "one_and_only" => 13 }.to_a
852
+ @nonuniform_arrayoftuples_10_sum_to_1 = hash_10_sum_to_1.to_a
853
+ @nonuniform_arrayoftuples_10_sum_gt_1 = hash_10_sum_gt_1.to_a
854
+ @nonuniform_arrayoftuples_10_sum_lt_1 = hash_10_sum_lt_1.to_a
855
+
856
+ @nonuniform_hashes = [
857
+ @nonuniform_hash_single_string,
858
+ @nonuniform_hash_10_sum_to_1,
859
+ @nonuniform_hash_10_sum_gt_1,
860
+ @nonuniform_hash_10_sum_lt_1
861
+ ]
862
+
863
+ @nonuniform_arrayoftuples = [
864
+ @nonuniform_arrayoftuples_single_string,
865
+ @nonuniform_arrayoftuples_10_sum_to_1,
866
+ @nonuniform_arrayoftuples_10_sum_gt_1,
867
+ @nonuniform_arrayoftuples_10_sum_lt_1
868
+ ]
869
+
870
+ # error inputs
871
+
872
+ @nonuniform_hash_error_empty = {}
873
+ @nonuniform_hash_error_negative = { "negative" => -1 }
874
+ @nonuniform_hash_error_all_zeros = { :one => 0, :two => 0, :three => 0 }
875
+
876
+ @nonuniform_arrayoftuples_error_empty = {}.to_a
877
+ @nonuniform_arrayoftuples_error_negative = { "negative" => -1 }.to_a
878
+ @nonuniform_arrayoftuples_error_all_zeros = { :one => 0, :two => 0, :three => 0 }.to_a
879
+
880
+ @nonuniform_error_inputs = [
881
+ @nonuniform_hash_error_empty,
882
+ @nonuniform_hash_error_negative,
883
+ @nonuniform_hash_error_all_zeros,
884
+ @nonuniform_arrayoftuples_error_empty,
885
+ @nonuniform_arrayoftuples_error_negative,
886
+ @nonuniform_arrayoftuples_error_all_zeros
887
+ ]
888
+ end
889
+
890
+ end