aliastable 3.0.2 → 3.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/aliastable.gemspec +3 -3
- data/lib/{alias.rb → aliastable.rb} +24 -33
- data/test/test_alias.rb +14 -14
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 64997be896934a0e70ba0c4776e26f08a68fae50
|
4
|
+
data.tar.gz: 7e7d8a38a21f8355be9c0e83559fb3e5689ea556
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e49595d9a6da320b2e22db5672f5fc4546a4919edc3915dbd6e9b1cec69beff947b16006f76a55727c147946dd08cec4f2aeaad4be31df6e9a606de27190ed6d
|
7
|
+
data.tar.gz: f696a2b2f0b5de5eb7b9932f4a121f0b1e94e988284eb0123e79c45bef8e59dee1c3b089ab5e24ddd448ab17e22acfe367cbb52e8b3130e7745307c3bd4ae638
|
data/aliastable.gemspec
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
# -*- ruby -*-
|
2
|
-
_VERSION = "3.0.
|
2
|
+
_VERSION = "3.0.3"
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "aliastable"
|
6
6
|
s.version = _VERSION
|
7
|
-
s.date = "2015-
|
7
|
+
s.date = "2015-09-10"
|
8
8
|
s.summary = "Efficiently generate random outcomes from an arbitrary categorical distribution."
|
9
9
|
s.email = "pjs@alum.mit.edu"
|
10
10
|
s.description = "If a categorical distribution has k distinct values, traditional approaches will require O(k) work to pick an outcome with the correct probabilities. This algorithm uses conditional probability to construct a table which will yield outcomes with the correct probabilities, but in O(1) time."
|
11
11
|
s.author = "Paul J Sanchez"
|
12
12
|
s.files = %w[
|
13
13
|
aliastable.gemspec
|
14
|
-
lib/
|
14
|
+
lib/aliastable.rb
|
15
15
|
Rakefile
|
16
16
|
test/infile.bad.1
|
17
17
|
test/infile.bad.2
|
@@ -12,55 +12,47 @@ class AliasTable
|
|
12
12
|
# Construct an alias table from a set of values and their associated
|
13
13
|
# probabilities. Values and their probabilities must be synchronized,
|
14
14
|
# i.e., they must be arrays of the same length. Values can be
|
15
|
-
# anything, but the probabilities must be positive numbers
|
16
|
-
# sum to one.
|
15
|
+
# anything, but the probabilities must be positive Rational numbers
|
16
|
+
# that sum to one.
|
17
17
|
#
|
18
18
|
# *Arguments*::
|
19
|
-
# - +x_set+ -> the set of values to generate
|
19
|
+
# - +x_set+ -> the set of values from which to generate.
|
20
20
|
# - +p_value+ -> the synchronized set of probabilities associated
|
21
21
|
# with the value set. These values should be Rationals to avoid
|
22
22
|
# rounding errors.
|
23
23
|
# *Raises*::
|
24
24
|
# - RuntimeError if +x_set+ and +p_value+s are different lengths.
|
25
|
-
# - RuntimeError if any +p_value+
|
26
|
-
# - RuntimeError if +p_value+ don't sum to one.
|
25
|
+
# - RuntimeError if any +p_value+ is negative.
|
26
|
+
# - RuntimeError if +p_value+s don't sum to one. Rationals will avoid this.
|
27
27
|
#
|
28
|
-
def initialize(
|
29
|
-
if
|
30
|
-
|
28
|
+
def initialize(x_set, p_value)
|
29
|
+
if x_set.length != p_value.length
|
30
|
+
fail 'Args to AliasTable must be vectors of the same length.'
|
31
31
|
end
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
end
|
37
|
-
unless p_val.reduce(:+) == Rational(1)
|
38
|
-
raise "p_values must sum to 1.0"
|
39
|
-
end
|
40
|
-
@x = x_values.clone.freeze
|
32
|
+
fail 'p_values must be positive' unless p_value.all? { |value| value > 0 }
|
33
|
+
@p_primary = p_value.map(&:to_r)
|
34
|
+
fail 'p_values must sum to 1' unless @p_primary.reduce(:+) == Rational(1)
|
35
|
+
@x = x_set.clone.freeze
|
41
36
|
@alias = Array.new(@x.length)
|
42
|
-
|
43
|
-
|
44
|
-
deficit_set = []
|
45
|
-
surplus_set = []
|
46
|
-
|
47
|
-
unless p_val[i] == equiprob
|
48
|
-
(p_val[i] < equiprob ? deficit_set : surplus_set) << i
|
49
|
-
end
|
50
|
-
end
|
51
|
-
until deficit_set.empty? do
|
37
|
+
parity = Rational(1, @x.length)
|
38
|
+
group = @p_primary.each_index.group_by { |i| @p_primary[i] <=> parity }
|
39
|
+
deficit_set = group[-1]
|
40
|
+
surplus_set = group[1]
|
41
|
+
until deficit_set.empty?
|
52
42
|
deficit = deficit_set.pop
|
53
43
|
surplus = surplus_set.pop
|
54
|
-
@p_primary[
|
44
|
+
@p_primary[surplus] -= parity - @p_primary[deficit]
|
45
|
+
@p_primary[deficit] /= parity
|
55
46
|
@alias[deficit] = @x[surplus]
|
56
|
-
|
57
|
-
|
58
|
-
|
47
|
+
if @p_primary[surplus] == parity
|
48
|
+
@p_primary[surplus] = Rational(1)
|
49
|
+
else
|
50
|
+
(@p_primary[surplus] < parity ? deficit_set : surplus_set) << surplus
|
59
51
|
end
|
60
52
|
end
|
61
53
|
end
|
62
54
|
|
63
|
-
#
|
55
|
+
# Return a random outcome from this object's distribution.
|
64
56
|
# The generate method is O(1) time, but is not an inversion
|
65
57
|
# since two uniforms are used for each value that gets generated.
|
66
58
|
#
|
@@ -68,5 +60,4 @@ class AliasTable
|
|
68
60
|
column = rand(@x.length)
|
69
61
|
rand <= @p_primary[column] ? @x[column] : @alias[column]
|
70
62
|
end
|
71
|
-
|
72
63
|
end
|
data/test/test_alias.rb
CHANGED
@@ -1,42 +1,42 @@
|
|
1
1
|
#!/usr/bin/env ruby -w
|
2
2
|
|
3
|
-
|
3
|
+
require_relative '../lib/aliastable.rb'
|
4
4
|
|
5
5
|
nvars = 1_000_000
|
6
6
|
begin
|
7
|
-
at = AliasTable.new(
|
8
|
-
nvars.times {print at.generate, "\n"}
|
9
|
-
rescue
|
10
|
-
|
7
|
+
at = AliasTable.new(%w(yes no), [0.3, 0.3, 0.4])
|
8
|
+
nvars.times { print at.generate, "\n" }
|
9
|
+
rescue RuntimeError => e
|
10
|
+
p e
|
11
11
|
puts
|
12
12
|
end
|
13
|
-
Dir[
|
13
|
+
Dir['test/infile.*'].each do |f_name|
|
14
14
|
x = []
|
15
15
|
probs = []
|
16
|
-
f = File.open(f_name,
|
16
|
+
f = File.open(f_name, 'r')
|
17
17
|
counts = {}
|
18
18
|
expected_counts = {}
|
19
|
-
while line = f.gets
|
19
|
+
while line = f.gets
|
20
20
|
inputs = line.strip.split(/[\s,;:]+/)
|
21
21
|
x << inputs[0]
|
22
22
|
counts[inputs[0]] = 0
|
23
23
|
probs << inputs[1].to_r
|
24
24
|
n_hat = probs[-1] * nvars
|
25
|
-
half_width = 2.5 * Math
|
25
|
+
half_width = 2.5 * Math.sqrt(n_hat * (1.0 - probs[-1])) if n_hat > 0
|
26
26
|
expected_counts[inputs[0]] = [n_hat, half_width]
|
27
27
|
end
|
28
28
|
f.close
|
29
29
|
begin
|
30
30
|
at = AliasTable.new(x, probs)
|
31
|
-
nvars.times {counts[at.generate] += 1}
|
32
|
-
puts
|
31
|
+
nvars.times { counts[at.generate] += 1 }
|
32
|
+
puts 'All values should be in range almost always:'
|
33
33
|
counts.each_key do |k|
|
34
34
|
printf "%s: Allowable Range = %d, Expected - Observed = %d\n",
|
35
|
-
|
35
|
+
k, expected_counts[k][1], expected_counts[k][0] - counts[k]
|
36
36
|
end
|
37
37
|
puts
|
38
|
-
rescue
|
39
|
-
|
38
|
+
rescue RuntimeError => e
|
39
|
+
p e
|
40
40
|
puts
|
41
41
|
end
|
42
42
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aliastable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul J Sanchez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-09-10 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: If a categorical distribution has k distinct values, traditional approaches
|
14
14
|
will require O(k) work to pick an outcome with the correct probabilities. This
|
@@ -21,7 +21,7 @@ extra_rdoc_files: []
|
|
21
21
|
files:
|
22
22
|
- Rakefile
|
23
23
|
- aliastable.gemspec
|
24
|
-
- lib/
|
24
|
+
- lib/aliastable.rb
|
25
25
|
- test/infile.bad.1
|
26
26
|
- test/infile.bad.2
|
27
27
|
- test/infile.bad.3
|
@@ -49,7 +49,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
49
49
|
version: '0'
|
50
50
|
requirements: []
|
51
51
|
rubyforge_project:
|
52
|
-
rubygems_version: 2.4.5
|
52
|
+
rubygems_version: 2.4.5.1
|
53
53
|
signing_key:
|
54
54
|
specification_version: 4
|
55
55
|
summary: Efficiently generate random outcomes from an arbitrary categorical distribution.
|