random_value_sampler 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,892 @@
1
+ require 'set'
2
+ require 'test/unit'
3
+
4
+ require 'random_value_sampler'
5
+
6
+ #
7
+ # rough outline of this file:
8
+ # * test cases: these just call helper methods to run tests on all of the
9
+ # data cases created below in the setup() method
10
+ # - error inputs
11
+ # - verifying distribution validity
12
+ # * helper methods: mostly verify_xxx() methods that are called by the
13
+ # test cases to compute test results (this is the code
14
+ # most important to review)
15
+ # * setup() method: the method called before each test case is run...to
16
+ # generate data for testing
17
+ #-------------------------------------------------------------------------------
18
+ #
19
+ # rough outline of tests:
20
+ #
21
+ # error inputs (invalid distribution specifications, invalid sample requests)
22
+ # for each valid input case, run the following tests:
23
+ # for EACH valid input case:
24
+ # confirm # values
25
+ # confirm the array of values returned meet specification
26
+ # confirm probability_of
27
+ # uniform:
28
+ # each value in set/array/range has the same value
29
+ # (and they sum to 1 or within v. small tolerance)
30
+ # non-uniform:
31
+ # each value matches that in the original specification
32
+ # values (just) outside values have probability zero
33
+ #-----------------------------------------------------------------------------
34
+ class RandomValueSamplerTest < Test::Unit::TestCase
35
+
36
+ ###############
37
+ # ERROR INPUTS
38
+ ###############
39
+
40
+ def test_uniform_error_inputs
41
+ # this line just makes sure that we're running the test on each data
42
+ # case we create in the setup() method. the idea is that if someone adds
43
+ # a new @uniform_xxxxxx case, then they'd add it to the
44
+ # @uniform_error_inputs array, and this assertion would fail...reminding
45
+ # them to add an assert_raises call here for the new data case. (this
46
+ # pattern is repeated throughout the test cases in this file)
47
+ assert_equal(@uniform_error_inputs.length, 4)
48
+
49
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform @uniform_set_error_empty }
50
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform @uniform_array_error_empty }
51
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform @uniform_range_error_empty }
52
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform @uniform_single_error_negative }
53
+ end
54
+
55
+ def test_non_uniform_error_inputs
56
+ assert_equal(@nonuniform_error_inputs.length, 6)
57
+
58
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_hash_error_empty }
59
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_hash_error_negative }
60
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_hash_error_all_zeros }
61
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_arrayoftuples_error_empty }
62
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_arrayoftuples_error_negative }
63
+ assert_raises(RuntimeError) { RandomValueSampler.new_non_uniform @nonuniform_arrayoftuples_error_all_zeros }
64
+ end
65
+
66
+ def test_uniform_exception_on_too_many_sample_unique
67
+ # singleton set
68
+ assert_raises(RuntimeError) do
69
+ rsampler = RandomValueSampler.new_uniform @uniform_set_single_string
70
+ rsampler.sample_unique 2
71
+ end
72
+
73
+ # singleton array
74
+ assert_raises(RuntimeError) do
75
+ rsampler = RandomValueSampler.new_uniform @uniform_array_single_numeric
76
+ rsampler.sample_unique(@uniform_array_single_numeric.length + 1)
77
+ end
78
+
79
+ # singleton Range
80
+ assert_raises(RuntimeError) do
81
+ rsampler = RandomValueSampler.new_uniform @uniform_range_single_exclusive
82
+ rsampler.sample_unique(@uniform_range_single_exclusive.to_a.length + 1)
83
+ end
84
+
85
+ # singleton value
86
+ assert_raises(RuntimeError) do
87
+ rsampler = RandomValueSampler.new_uniform @uniform_single_zero
88
+ rsampler.sample_unique 2
89
+ end
90
+
91
+ # size N set
92
+ assert_raises(RuntimeError) do
93
+ rsampler = RandomValueSampler.new_uniform @uniform_set_10_string
94
+ rsampler.sample_unique(@uniform_set_10_string.length + 1)
95
+ end
96
+
97
+ # size N array
98
+ assert_raises(RuntimeError) do
99
+ rsampler = RandomValueSampler.new_uniform @uniform_array_10_numeric
100
+ rsampler.sample_unique(@uniform_array_10_numeric.length + 1)
101
+ end
102
+
103
+ # size N Range inclusive
104
+ assert_raises(RuntimeError) do
105
+ rsampler = RandomValueSampler.new_uniform @uniform_range_10_inclusive
106
+ rsampler.sample_unique(@uniform_range_10_inclusive.to_a.length + 1)
107
+ end
108
+
109
+ # size N Range exclusive
110
+ assert_raises(RuntimeError) do
111
+ rsampler = RandomValueSampler.new_uniform @uniform_range_10_exclusive
112
+ rsampler.sample_unique(@uniform_range_10_exclusive.to_a.length + 1)
113
+ end
114
+
115
+ # scalar defining Range size N
116
+ assert_raises(RuntimeError) do
117
+ rsampler = RandomValueSampler.new_uniform @uniform_single_nonzero
118
+ rsampler.sample_unique(@uniform_single_nonzero + 2)
119
+ end
120
+ end
121
+
122
+ def test_non_uniform_exception_on_too_many_sample_unique
123
+ assert_raises(RuntimeError) do
124
+ rsampler = RandomValueSampler.new_non_uniform @nonuniform_hash_single_string
125
+ rsampler.sample_unique 2
126
+ end
127
+ assert_raises(RuntimeError) do
128
+ rsampler = RandomValueSampler.new_non_uniform @nonuniform_hash_10_sum_to_1
129
+ rsampler.sample_unique(@nonuniform_hash_10_sum_to_1.length + 1)
130
+ end
131
+ assert_raises(RuntimeError) do
132
+ rsampler = RandomValueSampler.new_non_uniform @nonuniform_arrayoftuples_single_string
133
+ rsampler.sample_unique 2
134
+ end
135
+ assert_raises(RuntimeError) do
136
+ rsampler = RandomValueSampler.new_non_uniform @nonuniform_arrayoftuples_10_sum_to_1
137
+ rsampler.sample_unique(@nonuniform_arrayoftuples_10_sum_gt_1.length + 1)
138
+ end
139
+ end
140
+
141
+ def test_zero_or_negative_num_samples
142
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform([1,2,3,4]).sample(-1) }
143
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform([1,2,3,4]).sample_unique(-1) }
144
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform([1,2,3,4]).sample(0) }
145
+ assert_raises(RuntimeError) { RandomValueSampler.new_uniform([1,2,3,4]).sample_unique(0) }
146
+ end
147
+
148
+ ###################################################
149
+ # VERIFYING VALIDITY, CONSISTENCY OF DISTRIBUTIONS
150
+ ###################################################
151
+
152
+ def test_uniform_probability_of
153
+ assert_equal(@uniform_sets.length, 3)
154
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_set_single_string),
155
+ @uniform_set_single_string)
156
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_set_10_string),
157
+ @uniform_set_10_string)
158
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_set_10_numeric),
159
+ @uniform_set_10_numeric)
160
+
161
+ assert_equal(@uniform_arrays.length, 3)
162
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_array_single_numeric),
163
+ @uniform_array_single_numeric)
164
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_array_10_string),
165
+ @uniform_array_10_string)
166
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_array_10_numeric),
167
+ @uniform_array_10_numeric)
168
+
169
+ assert_equal(@uniform_ranges.length, 4)
170
+
171
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_range_single_exclusive),
172
+ @uniform_range_single_exclusive)
173
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_range_single_inclusive),
174
+ @uniform_range_single_inclusive)
175
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_range_10_exclusive),
176
+ @uniform_range_10_exclusive)
177
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_range_10_inclusive),
178
+ @uniform_range_10_inclusive)
179
+
180
+ assert_equal(@uniform_singles.length, 2)
181
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_single_zero),
182
+ @uniform_single_zero)
183
+ verify_probability_of(RandomValueSampler.new_uniform(@uniform_single_nonzero),
184
+ @uniform_single_nonzero)
185
+ end
186
+
187
+ def test_non_uniform_probability_of
188
+ assert_equal(@nonuniform_hashes.length, 4)
189
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string),
190
+ @nonuniform_hash_single_string)
191
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1),
192
+ @nonuniform_hash_10_sum_to_1)
193
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1),
194
+ @nonuniform_hash_10_sum_gt_1)
195
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1),
196
+ @nonuniform_hash_10_sum_lt_1)
197
+
198
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
199
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string),
200
+ @nonuniform_arrayoftuples_single_string)
201
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1),
202
+ @nonuniform_arrayoftuples_10_sum_to_1)
203
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1),
204
+ @nonuniform_arrayoftuples_10_sum_gt_1)
205
+ verify_probability_of(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1),
206
+ @nonuniform_arrayoftuples_10_sum_lt_1)
207
+ end
208
+
209
+ def test_uniform_valid_distributions
210
+ assert_equal(@uniform_sets.length, 3)
211
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_set_single_string))
212
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_set_10_string))
213
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_set_10_numeric))
214
+
215
+
216
+ assert_equal(@uniform_arrays.length, 3)
217
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_array_single_numeric))
218
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_array_10_string))
219
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_array_10_numeric))
220
+
221
+
222
+ assert_equal(@uniform_ranges.length, 4)
223
+
224
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_range_single_exclusive))
225
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_range_single_inclusive))
226
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_range_10_exclusive))
227
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_range_10_inclusive))
228
+
229
+
230
+ assert_equal(@uniform_singles.length, 2)
231
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_single_zero))
232
+ verify_distribution(RandomValueSampler.new_uniform(@uniform_single_nonzero))
233
+ end
234
+
235
+ def test_non_uniform_valid_distributions
236
+ assert_equal(@nonuniform_hashes.length, 4)
237
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string))
238
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1))
239
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1))
240
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1))
241
+
242
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
243
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string))
244
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1))
245
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1))
246
+ verify_distribution(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1))
247
+ end
248
+
249
+ def test_uniform_values_match
250
+ assert_equal(@uniform_sets.length, 3)
251
+ verify_values(RandomValueSampler.new_uniform(@uniform_set_single_string),
252
+ @uniform_set_single_string)
253
+ verify_values(RandomValueSampler.new_uniform(@uniform_set_10_string),
254
+ @uniform_set_10_string)
255
+ verify_values(RandomValueSampler.new_uniform(@uniform_set_10_numeric),
256
+ @uniform_set_10_numeric)
257
+
258
+ assert_equal(@uniform_arrays.length, 3)
259
+ verify_values(RandomValueSampler.new_uniform(@uniform_array_single_numeric),
260
+ @uniform_array_single_numeric)
261
+ verify_values(RandomValueSampler.new_uniform(@uniform_array_10_string),
262
+ @uniform_array_10_string)
263
+ verify_values(RandomValueSampler.new_uniform(@uniform_array_10_numeric),
264
+ @uniform_array_10_numeric)
265
+
266
+ assert_equal(@uniform_ranges.length, 4)
267
+
268
+ verify_values(RandomValueSampler.new_uniform(@uniform_range_single_exclusive),
269
+ @uniform_range_single_exclusive)
270
+ verify_values(RandomValueSampler.new_uniform(@uniform_range_single_inclusive),
271
+ @uniform_range_single_inclusive)
272
+ verify_values(RandomValueSampler.new_uniform(@uniform_range_10_exclusive),
273
+ @uniform_range_10_exclusive)
274
+ verify_values(RandomValueSampler.new_uniform(@uniform_range_10_inclusive),
275
+ @uniform_range_10_inclusive)
276
+
277
+ assert_equal(@uniform_singles.length, 2)
278
+ verify_values(RandomValueSampler.new_uniform(@uniform_single_zero),
279
+ @uniform_single_zero)
280
+ verify_values(RandomValueSampler.new_uniform(@uniform_single_nonzero),
281
+ @uniform_single_nonzero)
282
+ end
283
+
284
+ def test_non_uniform_values_match
285
+ assert_equal(@nonuniform_hashes.length, 4)
286
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string),
287
+ @nonuniform_hash_single_string)
288
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1),
289
+ @nonuniform_hash_10_sum_to_1)
290
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1),
291
+ @nonuniform_hash_10_sum_gt_1)
292
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1),
293
+ @nonuniform_hash_10_sum_lt_1)
294
+
295
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
296
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string),
297
+ @nonuniform_arrayoftuples_single_string)
298
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1),
299
+ @nonuniform_arrayoftuples_10_sum_to_1)
300
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1),
301
+ @nonuniform_arrayoftuples_10_sum_gt_1)
302
+ verify_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1),
303
+ @nonuniform_arrayoftuples_10_sum_lt_1)
304
+ end
305
+
306
+ def test_uniform_num_values
307
+ assert_equal(@uniform_sets.length, 3)
308
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_set_single_string))
309
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_set_10_string))
310
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_set_10_numeric))
311
+
312
+
313
+ assert_equal(@uniform_arrays.length, 3)
314
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_array_single_numeric))
315
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_array_10_string))
316
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_array_10_numeric))
317
+
318
+
319
+ assert_equal(@uniform_ranges.length, 4)
320
+
321
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_range_single_exclusive))
322
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_range_single_inclusive))
323
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_range_10_exclusive))
324
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_range_10_inclusive))
325
+
326
+
327
+ assert_equal(@uniform_singles.length, 2)
328
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_single_zero))
329
+ verify_num_values(RandomValueSampler.new_uniform(@uniform_single_nonzero))
330
+ end
331
+
332
+ def test_non_uniform_num_values
333
+ assert_equal(@nonuniform_hashes.length, 4)
334
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string))
335
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1))
336
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1))
337
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1))
338
+
339
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
340
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string))
341
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1))
342
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1))
343
+ verify_num_values(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1))
344
+ end
345
+
346
+ # sample a bunch of times and make sure that all of the values that come back
347
+ # are in the set of valid raw values
348
+ #-----------------------------------------------------------------------------
349
+ def test_uniform_sample_values_are_valid
350
+ assert_equal(@uniform_sets.length, 3)
351
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_single_string),
352
+ @uniform_set_single_string)
353
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_10_string),
354
+ @uniform_set_10_string)
355
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_10_numeric),
356
+ @uniform_set_10_numeric)
357
+
358
+ assert_equal(@uniform_arrays.length, 3)
359
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_single_numeric),
360
+ @uniform_array_single_numeric)
361
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_10_string),
362
+ @uniform_array_10_string)
363
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_10_numeric),
364
+ @uniform_array_10_numeric)
365
+
366
+ assert_equal(@uniform_ranges.length, 4)
367
+
368
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_single_exclusive),
369
+ @uniform_range_single_exclusive)
370
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_single_inclusive),
371
+ @uniform_range_single_inclusive)
372
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_10_exclusive),
373
+ @uniform_range_10_exclusive)
374
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_10_inclusive),
375
+ @uniform_range_10_inclusive)
376
+
377
+ assert_equal(@uniform_singles.length, 2)
378
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_single_zero),
379
+ @uniform_single_zero)
380
+ verify_sample_values_are_valid(RandomValueSampler.new_uniform(@uniform_single_nonzero),
381
+ @uniform_single_nonzero)
382
+ end
383
+
384
+ def test_non_uniform_sample_values_are_valid
385
+ assert_equal(@nonuniform_hashes.length, 4)
386
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string),
387
+ @nonuniform_hash_single_string)
388
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1),
389
+ @nonuniform_hash_10_sum_to_1)
390
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1),
391
+ @nonuniform_hash_10_sum_gt_1)
392
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1),
393
+ @nonuniform_hash_10_sum_lt_1)
394
+
395
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
396
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string),
397
+ @nonuniform_arrayoftuples_single_string)
398
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1),
399
+ @nonuniform_arrayoftuples_10_sum_to_1)
400
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1),
401
+ @nonuniform_arrayoftuples_10_sum_gt_1)
402
+ verify_sample_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1),
403
+ @nonuniform_arrayoftuples_10_sum_lt_1)
404
+ end
405
+
406
+ def test_uniform_sample_values_are_valid
407
+ assert_equal(@uniform_sets.length, 3)
408
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_single_string),
409
+ @uniform_set_single_string)
410
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_10_string),
411
+ @uniform_set_10_string)
412
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_set_10_numeric),
413
+ @uniform_set_10_numeric)
414
+
415
+ assert_equal(@uniform_arrays.length, 3)
416
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_single_numeric),
417
+ @uniform_array_single_numeric)
418
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_10_string),
419
+ @uniform_array_10_string)
420
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_array_10_numeric),
421
+ @uniform_array_10_numeric)
422
+
423
+ assert_equal(@uniform_ranges.length, 4)
424
+
425
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_single_exclusive),
426
+ @uniform_range_single_exclusive)
427
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_single_inclusive),
428
+ @uniform_range_single_inclusive)
429
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_10_exclusive),
430
+ @uniform_range_10_exclusive)
431
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_range_10_inclusive),
432
+ @uniform_range_10_inclusive)
433
+
434
+ assert_equal(@uniform_singles.length, 2)
435
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_single_zero),
436
+ @uniform_single_zero)
437
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_uniform(@uniform_single_nonzero),
438
+ @uniform_single_nonzero)
439
+ end
440
+
441
+ def test_non_uniform_sample_values_are_valid
442
+ assert_equal(@nonuniform_hashes.length, 4)
443
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string),
444
+ @nonuniform_hash_single_string)
445
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_to_1),
446
+ @nonuniform_hash_10_sum_to_1)
447
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_gt_1),
448
+ @nonuniform_hash_10_sum_gt_1)
449
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_hash_10_sum_lt_1),
450
+ @nonuniform_hash_10_sum_lt_1)
451
+
452
+ assert_equal(@nonuniform_arrayoftuples.length, 4)
453
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string),
454
+ @nonuniform_arrayoftuples_single_string)
455
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_to_1),
456
+ @nonuniform_arrayoftuples_10_sum_to_1)
457
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_gt_1),
458
+ @nonuniform_arrayoftuples_10_sum_gt_1)
459
+ verify_sample_unique_values_are_valid(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_10_sum_lt_1),
460
+ @nonuniform_arrayoftuples_10_sum_lt_1)
461
+ end
462
+
463
+ ####################
464
+ # SAMPLING ACCURACY
465
+ ####################
466
+
467
+ def test_uniform_sampling_accuracy
468
+ assert_equal(@uniform_sets.length, 3)
469
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_set_single_string),
470
+ @uniform_set_single_string)
471
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_set_10_string),
472
+ @uniform_set_10_string)
473
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_set_10_numeric),
474
+ @uniform_set_10_numeric)
475
+
476
+ assert_equal(@uniform_arrays.length, 3)
477
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_array_single_numeric),
478
+ @uniform_array_single_numeric)
479
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_array_10_string),
480
+ @uniform_array_10_string)
481
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_array_10_numeric),
482
+ @uniform_array_10_numeric)
483
+
484
+ assert_equal(@uniform_ranges.length, 4)
485
+
486
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_range_single_exclusive),
487
+ @uniform_range_single_exclusive)
488
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_range_single_inclusive),
489
+ @uniform_range_single_inclusive)
490
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_range_10_exclusive),
491
+ @uniform_range_10_exclusive)
492
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_range_10_inclusive),
493
+ @uniform_range_10_inclusive)
494
+
495
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(@uniform_single_zero),
496
+ @uniform_single_zero)
497
+ # avoiding low probability of single_nonzero...
498
+ verify_distribution_accuracy(RandomValueSampler.new_uniform(9), 9)
499
+
500
+ end
501
+
502
+ # avoid super low probabilities cause they can easily cause "errors" when
503
+ # assessing distribution accuracy
504
+ def test_non_uniform_sampling_accuracy
505
+ verify_distribution_accuracy(RandomValueSampler.new_non_uniform(@nonuniform_hash_single_string),
506
+ @nonuniform_hash_single_string)
507
+ verify_distribution_accuracy(RandomValueSampler.new_non_uniform( { "one" => 1, "two" => 2, "three" => 3 } ),
508
+ { "one" => 1, "two" => 2, "three" => 3 } )
509
+
510
+ verify_distribution_accuracy(RandomValueSampler.new_non_uniform(@nonuniform_arrayoftuples_single_string),
511
+ @nonuniform_arrayoftuples_single_string)
512
+ verify_distribution_accuracy(RandomValueSampler.new_non_uniform( [["heavy", 90], ["light", 10]]),
513
+ [["heavy", 90], ["light", 10]])
514
+ end
515
+
516
+ #################
517
+ # HELPER METHODS
518
+ #################
519
+
520
+ # verifies that probability_of returns correct results for values in and out
521
+ # of pmf values set (should return 0 if outside set)
522
+ #-----------------------------------------------------------------------------
523
+ def verify_probability_of(rsampler, values)
524
+ vals_and_probs = extract_hash_of_vals_and_probs(values)
525
+
526
+ vals_and_probs.each_pair do |val, prob|
527
+ assert_in_delta(prob, rsampler.probability_of(val), 2e-4)
528
+ end
529
+ end
530
+
531
+ # verify that a distribution is represented (sum of probability mass is
532
+ # (very, very, very, very close to) 1
533
+ #-----------------------------------------------------------------------------
534
+ def verify_distribution(rsampler)
535
+ total_mass = 0
536
+ rsampler.all_values.each do |val|
537
+ total_mass += rsampler.probability_of(val)
538
+ end
539
+
540
+ assert_in_delta(1.0, total_mass, 2e-4)
541
+ end
542
+
543
+ # verifies the list of values returned by rsampler are in the values passed
544
+ # in as raw values
545
+ #-----------------------------------------------------------------------------
546
+ def verify_values(rsampler, values)
547
+ raw_val_set = Set.new(extract_array_of_values(values))
548
+ rsampler_val_set = Set.new(rsampler.all_values)
549
+
550
+ assert_equal(raw_val_set, rsampler_val_set)
551
+ end
552
+
553
+ # verifies the number of values indicated by rsampler. kinda dumb, just checks
554
+ # that it matches the length of the array returned by values (might catch
555
+ # some errors when using Ranges, for example)
556
+ #-----------------------------------------------------------------------------
557
+ def verify_num_values(rsampler)
558
+ assert_equal(rsampler.all_values.length, rsampler.num_values)
559
+ end
560
+
561
+ # verify after many iterations that all values returned by sampling are
562
+ # valid values for the rsampler. covers single and multiple samples.
563
+ #-----------------------------------------------------------------------------
564
+ def verify_sample_values_are_valid(rsampler, values)
565
+ vals_and_probs = extract_hash_of_vals_and_probs(values)
566
+ vals_and_probs.delete_if { |val, prob| prob == 0 }
567
+
568
+ valid_value_set = Set.new(vals_and_probs.keys)
569
+
570
+ (1..1000).each do
571
+ sample = rsampler.sample
572
+ assert(valid_value_set.include?(sample),
573
+ "<#{sample}> is not a valid sample in raw values: <#{values}>")
574
+ end
575
+
576
+ (1..1000).each do
577
+ rsampler.sample(10).each do |s|
578
+ assert(valid_value_set.include?(s),
579
+ "<#{s}> is not a valid multi-sample in raw values: <#{values}>")
580
+ end
581
+ end
582
+ end
583
+
584
+ # verify after many iterations that all values returned by sampling unique are
585
+ # valid values for the rsampler. covers single and multiple samples.
586
+ #-----------------------------------------------------------------------------
587
+ def verify_sample_unique_values_are_valid(rsampler, values)
588
+ vals_and_probs = extract_hash_of_vals_and_probs(values)
589
+ vals_and_probs.delete_if { |val, prob| prob == 0 }
590
+
591
+ valid_value_set = Set.new(vals_and_probs.keys)
592
+
593
+ num_multi_samples = [valid_value_set.length, 5].min
594
+
595
+ (1..1000).each do
596
+ test_rsampler = Marshal.load(Marshal.dump(rsampler))
597
+
598
+ sample = test_rsampler.sample_unique
599
+ assert(valid_value_set.include?(sample),
600
+ "<#{sample}> is not a valid sample in raw values: <#{values.inspect}>")
601
+ end
602
+
603
+ (1..1000).each do
604
+ test_rsampler = Marshal.load(Marshal.dump(rsampler))
605
+
606
+ if num_multi_samples > 1
607
+ test_rsampler.sample_unique(num_multi_samples).each do |s|
608
+ assert(valid_value_set.include?(s),
609
+ "<#{s}> is not a valid multi-sample in raw values: <#{values.inspect}>")
610
+ end
611
+ else
612
+ sample = test_rsampler.sample_unique(num_multi_samples)
613
+ assert(valid_value_set.include?(sample),
614
+ "<#{sample}> is not a valid multi-sample in raw values: <#{values.inspect}>")
615
+ end
616
+ end
617
+ end
618
+
619
+ # helper to convert whatever original data type we had into an array
620
+ #-----------------------------------------------------------------------------
621
+ def extract_array_of_values(values)
622
+ if values.is_a?(Set) || values.is_a?(Range)
623
+ values = values.to_a
624
+ elsif values.is_a?(Array)
625
+ if values.first.is_a?(Array)
626
+ # don't overwrite object, overwrite reference so that original object remains
627
+ # intact if needed
628
+ values = values.map { |val_and_pm| val_and_pm.first }
629
+ end # otherwise, don't need to do anything; already an array
630
+ elsif values.is_a?(Hash)
631
+ values = values.keys
632
+ else
633
+ values = (0..values).to_a
634
+ end
635
+
636
+ values
637
+ end
638
+
639
+ # generate a hash of values => probabilities from raw data
640
+ #-----------------------------------------------------------------------------
641
+ def extract_hash_of_vals_and_probs(values)
642
+ vals_and_probs = {}
643
+
644
+ # convert the single scalar case to a Range
645
+ if !values.is_a?(Hash) &&
646
+ !values.is_a?(Array) &&
647
+ !values.is_a?(Range) &&
648
+ !values.is_a?(Set)
649
+
650
+ values = 0..values
651
+ end
652
+
653
+ if values.is_a?(Hash)
654
+ vals_and_probs = values
655
+ elsif values.is_a?(Array) && values.first.is_a?(Array)
656
+ vals_and_probs = Hash[*(values.flatten)]
657
+ elsif values.is_a?(Range)
658
+ prob = 1.0 / values.to_a.length.to_f
659
+ values.each { |v| vals_and_probs.merge! v => prob }
660
+ elsif values.is_a?(Set) || values.is_a?(Array)
661
+ prob = 1.0 / values.length.to_f
662
+ values.each { |v| vals_and_probs.merge! v => prob }
663
+ end
664
+
665
+ total_mass = 0
666
+ vals_and_probs.each_pair { |val, prob| total_mass += prob }
667
+ vals_and_probs.each_pair do |val, prob|
668
+ vals_and_probs.merge! val => prob / total_mass.to_f
669
+ end
670
+
671
+ vals_and_probs
672
+ end
673
+
674
+ # sample a bunch from the distribution and compare the result to
675
+ # the original distribution. try sampling many times and making sure
676
+ # that the resulting frequencies are accurate within 30% ???
677
+ # this is VERY approximate, and is really only able to catch
678
+ # egregious errors...and is a little susceptible to noise on small
679
+ # probabilities.
680
+ #
681
+ # NOTE: this only works if theere are no duplicate values in the
682
+ # distribution, as this method uses a hash to store counts of samples.
683
+ #-----------------------------------------------------------------------------
684
+ def verify_distribution_accuracy(rsampler, values)
685
+ vals_and_probs = extract_hash_of_vals_and_probs(values)
686
+
687
+ val_counts = {}
688
+ vals_and_probs.keys.each { |val, prob| val_counts.merge! val => 0 }
689
+
690
+ # sample a bunch and count frequency of each value
691
+ num_samples = 50000
692
+ rsampler.sample(num_samples).each { |v| val_counts[v] = val_counts[v] + 1 }
693
+
694
+ # convert counts to probabilities
695
+ val_counts.each_pair do |val, count|
696
+ val_counts.merge! val => (count.to_f / num_samples.to_f)
697
+ end
698
+
699
+ vals_and_probs.each_pair do |val, true_prob|
700
+ assert_in_delta( (true_prob - val_counts[val]) / true_prob,
701
+ 0.0,
702
+ 0.1,
703
+ "observed sample frequency (<#{val_counts[val]}>) of " +
704
+ "<#{val}> doesn't appear to match true distribution " +
705
+ "(prob of <#{true_prob}>. It's possible that this was " +
706
+ "noise, so try again before assuming something's wrong")
707
+ end
708
+
709
+ end
710
+
711
+ # cases to test:
712
+ # -------------
713
+ #
714
+ # uniform
715
+ # valid inputs
716
+ # Set
717
+ # array
718
+ # Range inclusive
719
+ # Range exclusive
720
+ # scalar
721
+ # edge cases
722
+ # single value
723
+ # set
724
+ # array
725
+ # 1..1
726
+ # 1...2
727
+ # 0 (converted to 0..0)
728
+ # invalid inputs
729
+ # empty set
730
+ # empty array
731
+ # nil
732
+ # string
733
+ # NOTE: though it should work fine in the class itself, to avoid having to handle
734
+ # lots of cases in the test code, we're not using arrays as the possible values
735
+ # in the distribution (makes it hard to distinguish between the array of tuples
736
+ # (non-uniform) and the array of values (uniform) cases.
737
+ #
738
+ # non-uniform
739
+ # valid inputs
740
+ # hash
741
+ # array of arrays
742
+ # edge case
743
+ # 1 entry
744
+ # for EACH case above:
745
+ #
746
+ # invalid inputs
747
+ # empty hash
748
+ # empty array
749
+ # array of scalars
750
+ # negative frequency count
751
+ # hash
752
+ # array
753
+ # non-empty but all counts == 0
754
+ # hash
755
+ # array
756
+
757
+ # create a set of test data to play with for each test
758
+ #
759
+ # naming conventions:
760
+ # <pmftype>_<datatype>_<case>
761
+ #
762
+ # where:
763
+ # pmftype is "uniform" or "nonuniform"
764
+ # datatype is "set", "array", "range", "scalar", "arrayoftuples", or "hash"
765
+ #
766
+ # where: case is one of the following:
767
+ # error_<condition>
768
+ # single_<type>
769
+ # 10_<type>
770
+ #
771
+ # where:
772
+ # condition is a description of the error case (e.g. "empty", "allzero"...)
773
+ # type is "numeric", "string" or "mixed"
774
+ #-----------------------------------------------------------------------------
775
+ def setup
776
+ ##########
777
+ # UNIFORM
778
+ ##########
779
+ array_of_ten_string = ['a','b','c','d','e','f','g','h','i','j']
780
+
781
+ # valid inputs
782
+
783
+ @uniform_set_single_string = Set.new("one")
784
+ @uniform_set_10_string = Set.new(array_of_ten_string)
785
+ @uniform_set_10_numeric = Set.new(3..12)
786
+
787
+ @uniform_array_single_numeric = [22]
788
+ @uniform_array_10_string = array_of_ten_string
789
+ @uniform_array_10_numeric = (101...111).to_a
790
+
791
+ @uniform_range_single_exclusive = 1...2
792
+ @uniform_range_single_inclusive = 2..2
793
+ @uniform_range_10_exclusive = 1...11
794
+ @uniform_range_10_inclusive = -2..7
795
+
796
+ @uniform_single_zero = 0
797
+ @uniform_single_nonzero = 22
798
+
799
+ @uniform_sets = [
800
+ @uniform_set_single_string,
801
+ @uniform_set_10_string,
802
+ @uniform_set_10_numeric
803
+ ]
804
+ @uniform_arrays = [
805
+ @uniform_array_single_numeric,
806
+ @uniform_array_10_string,
807
+ @uniform_array_10_numeric
808
+ ]
809
+ @uniform_ranges = [
810
+ @uniform_range_single_exclusive,
811
+ @uniform_range_single_inclusive,
812
+ @uniform_range_10_exclusive,
813
+ @uniform_range_10_inclusive
814
+ ]
815
+ @uniform_singles = [
816
+ @uniform_single_zero,
817
+ @uniform_single_nonzero
818
+ ]
819
+
820
+ # error inputs
821
+
822
+ @uniform_set_error_empty = Set.new
823
+ @uniform_array_error_empty = []
824
+ @uniform_range_error_empty = 0..-1
825
+ @uniform_single_error_negative = -1
826
+
827
+ @uniform_error_inputs = [
828
+ @uniform_set_error_empty,
829
+ @uniform_array_error_empty,
830
+ @uniform_range_error_empty,
831
+ @uniform_single_error_negative
832
+ ]
833
+
834
+ ##############
835
+ # NON-UNIFORM
836
+ ##############
837
+
838
+ hash_10_sum_to_1 = {}
839
+ (-9..-1).each { |exp| hash_10_sum_to_1.merge! exp => 2**exp }
840
+ hash_10_sum_to_1.merge! "the end" => 2**-9
841
+
842
+ hash_10_sum_gt_1 = hash_10_sum_to_1.clone
843
+ hash_10_sum_gt_1.each_pair { |k,v| hash_10_sum_gt_1[k] = v*10 }
844
+
845
+ hash_10_sum_lt_1 = hash_10_sum_to_1.clone
846
+ hash_10_sum_lt_1.each_pair { |k,v| hash_10_sum_gt_1[k] = v/10 }
847
+
848
+ @nonuniform_hash_single_string = { "one_and_only" => 13 }
849
+ @nonuniform_hash_10_sum_to_1 = hash_10_sum_to_1
850
+ @nonuniform_hash_10_sum_gt_1 = hash_10_sum_gt_1
851
+ @nonuniform_hash_10_sum_lt_1 = hash_10_sum_lt_1
852
+
853
+ @nonuniform_arrayoftuples_single_string = { "one_and_only" => 13 }.to_a
854
+ @nonuniform_arrayoftuples_10_sum_to_1 = hash_10_sum_to_1.to_a
855
+ @nonuniform_arrayoftuples_10_sum_gt_1 = hash_10_sum_gt_1.to_a
856
+ @nonuniform_arrayoftuples_10_sum_lt_1 = hash_10_sum_lt_1.to_a
857
+
858
+ @nonuniform_hashes = [
859
+ @nonuniform_hash_single_string,
860
+ @nonuniform_hash_10_sum_to_1,
861
+ @nonuniform_hash_10_sum_gt_1,
862
+ @nonuniform_hash_10_sum_lt_1
863
+ ]
864
+
865
+ @nonuniform_arrayoftuples = [
866
+ @nonuniform_arrayoftuples_single_string,
867
+ @nonuniform_arrayoftuples_10_sum_to_1,
868
+ @nonuniform_arrayoftuples_10_sum_gt_1,
869
+ @nonuniform_arrayoftuples_10_sum_lt_1
870
+ ]
871
+
872
+ # error inputs
873
+
874
+ @nonuniform_hash_error_empty = {}
875
+ @nonuniform_hash_error_negative = { "negative" => -1 }
876
+ @nonuniform_hash_error_all_zeros = { :one => 0, :two => 0, :three => 0 }
877
+
878
+ @nonuniform_arrayoftuples_error_empty = {}.to_a
879
+ @nonuniform_arrayoftuples_error_negative = { "negative" => -1 }.to_a
880
+ @nonuniform_arrayoftuples_error_all_zeros = { :one => 0, :two => 0, :three => 0 }.to_a
881
+
882
+ @nonuniform_error_inputs = [
883
+ @nonuniform_hash_error_empty,
884
+ @nonuniform_hash_error_negative,
885
+ @nonuniform_hash_error_all_zeros,
886
+ @nonuniform_arrayoftuples_error_empty,
887
+ @nonuniform_arrayoftuples_error_negative,
888
+ @nonuniform_arrayoftuples_error_all_zeros
889
+ ]
890
+ end
891
+
892
+ end