aliastable 3.0.2 → 3.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/aliastable.gemspec +3 -3
- data/lib/{alias.rb → aliastable.rb} +24 -33
- data/test/test_alias.rb +14 -14
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 64997be896934a0e70ba0c4776e26f08a68fae50
|
4
|
+
data.tar.gz: 7e7d8a38a21f8355be9c0e83559fb3e5689ea556
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e49595d9a6da320b2e22db5672f5fc4546a4919edc3915dbd6e9b1cec69beff947b16006f76a55727c147946dd08cec4f2aeaad4be31df6e9a606de27190ed6d
|
7
|
+
data.tar.gz: f696a2b2f0b5de5eb7b9932f4a121f0b1e94e988284eb0123e79c45bef8e59dee1c3b089ab5e24ddd448ab17e22acfe367cbb52e8b3130e7745307c3bd4ae638
|
data/aliastable.gemspec
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
# -*- ruby -*-
|
2
|
-
_VERSION = "3.0.
|
2
|
+
_VERSION = "3.0.3"
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "aliastable"
|
6
6
|
s.version = _VERSION
|
7
|
-
s.date = "2015-
|
7
|
+
s.date = "2015-09-10"
|
8
8
|
s.summary = "Efficiently generate random outcomes from an arbitrary categorical distribution."
|
9
9
|
s.email = "pjs@alum.mit.edu"
|
10
10
|
s.description = "If a categorical distribution has k distinct values, traditional approaches will require O(k) work to pick an outcome with the correct probabilities. This algorithm uses conditional probability to construct a table which will yield outcomes with the correct probabilities, but in O(1) time."
|
11
11
|
s.author = "Paul J Sanchez"
|
12
12
|
s.files = %w[
|
13
13
|
aliastable.gemspec
|
14
|
-
lib/
|
14
|
+
lib/aliastable.rb
|
15
15
|
Rakefile
|
16
16
|
test/infile.bad.1
|
17
17
|
test/infile.bad.2
|
@@ -12,55 +12,47 @@ class AliasTable
|
|
12
12
|
# Construct an alias table from a set of values and their associated
|
13
13
|
# probabilities. Values and their probabilities must be synchronized,
|
14
14
|
# i.e., they must be arrays of the same length. Values can be
|
15
|
-
# anything, but the probabilities must be positive numbers
|
16
|
-
# sum to one.
|
15
|
+
# anything, but the probabilities must be positive Rational numbers
|
16
|
+
# that sum to one.
|
17
17
|
#
|
18
18
|
# *Arguments*::
|
19
|
-
# - +x_set+ -> the set of values to generate
|
19
|
+
# - +x_set+ -> the set of values from which to generate.
|
20
20
|
# - +p_value+ -> the synchronized set of probabilities associated
|
21
21
|
# with the value set. These values should be Rationals to avoid
|
22
22
|
# rounding errors.
|
23
23
|
# *Raises*::
|
24
24
|
# - RuntimeError if +x_set+ and +p_value+s are different lengths.
|
25
|
-
# - RuntimeError if any +p_value+
|
26
|
-
# - RuntimeError if +p_value+ don't sum to one.
|
25
|
+
# - RuntimeError if any +p_value+ is negative.
|
26
|
+
# - RuntimeError if +p_value+s don't sum to one. Rationals will avoid this.
|
27
27
|
#
|
28
|
-
def initialize(
|
29
|
-
if
|
30
|
-
|
28
|
+
def initialize(x_set, p_value)
|
29
|
+
if x_set.length != p_value.length
|
30
|
+
fail 'Args to AliasTable must be vectors of the same length.'
|
31
31
|
end
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
end
|
37
|
-
unless p_val.reduce(:+) == Rational(1)
|
38
|
-
raise "p_values must sum to 1.0"
|
39
|
-
end
|
40
|
-
@x = x_values.clone.freeze
|
32
|
+
fail 'p_values must be positive' unless p_value.all? { |value| value > 0 }
|
33
|
+
@p_primary = p_value.map(&:to_r)
|
34
|
+
fail 'p_values must sum to 1' unless @p_primary.reduce(:+) == Rational(1)
|
35
|
+
@x = x_set.clone.freeze
|
41
36
|
@alias = Array.new(@x.length)
|
42
|
-
|
43
|
-
|
44
|
-
deficit_set = []
|
45
|
-
surplus_set = []
|
46
|
-
|
47
|
-
unless p_val[i] == equiprob
|
48
|
-
(p_val[i] < equiprob ? deficit_set : surplus_set) << i
|
49
|
-
end
|
50
|
-
end
|
51
|
-
until deficit_set.empty? do
|
37
|
+
parity = Rational(1, @x.length)
|
38
|
+
group = @p_primary.each_index.group_by { |i| @p_primary[i] <=> parity }
|
39
|
+
deficit_set = group[-1]
|
40
|
+
surplus_set = group[1]
|
41
|
+
until deficit_set.empty?
|
52
42
|
deficit = deficit_set.pop
|
53
43
|
surplus = surplus_set.pop
|
54
|
-
@p_primary[
|
44
|
+
@p_primary[surplus] -= parity - @p_primary[deficit]
|
45
|
+
@p_primary[deficit] /= parity
|
55
46
|
@alias[deficit] = @x[surplus]
|
56
|
-
|
57
|
-
|
58
|
-
|
47
|
+
if @p_primary[surplus] == parity
|
48
|
+
@p_primary[surplus] = Rational(1)
|
49
|
+
else
|
50
|
+
(@p_primary[surplus] < parity ? deficit_set : surplus_set) << surplus
|
59
51
|
end
|
60
52
|
end
|
61
53
|
end
|
62
54
|
|
63
|
-
#
|
55
|
+
# Return a random outcome from this object's distribution.
|
64
56
|
# The generate method is O(1) time, but is not an inversion
|
65
57
|
# since two uniforms are used for each value that gets generated.
|
66
58
|
#
|
@@ -68,5 +60,4 @@ class AliasTable
|
|
68
60
|
column = rand(@x.length)
|
69
61
|
rand <= @p_primary[column] ? @x[column] : @alias[column]
|
70
62
|
end
|
71
|
-
|
72
63
|
end
|
data/test/test_alias.rb
CHANGED
@@ -1,42 +1,42 @@
|
|
1
1
|
#!/usr/bin/env ruby -w
|
2
2
|
|
3
|
-
|
3
|
+
require_relative '../lib/aliastable.rb'
|
4
4
|
|
5
5
|
nvars = 1_000_000
|
6
6
|
begin
|
7
|
-
at = AliasTable.new(
|
8
|
-
nvars.times {print at.generate, "\n"}
|
9
|
-
rescue
|
10
|
-
|
7
|
+
at = AliasTable.new(%w(yes no), [0.3, 0.3, 0.4])
|
8
|
+
nvars.times { print at.generate, "\n" }
|
9
|
+
rescue RuntimeError => e
|
10
|
+
p e
|
11
11
|
puts
|
12
12
|
end
|
13
|
-
Dir[
|
13
|
+
Dir['test/infile.*'].each do |f_name|
|
14
14
|
x = []
|
15
15
|
probs = []
|
16
|
-
f = File.open(f_name,
|
16
|
+
f = File.open(f_name, 'r')
|
17
17
|
counts = {}
|
18
18
|
expected_counts = {}
|
19
|
-
while line = f.gets
|
19
|
+
while line = f.gets
|
20
20
|
inputs = line.strip.split(/[\s,;:]+/)
|
21
21
|
x << inputs[0]
|
22
22
|
counts[inputs[0]] = 0
|
23
23
|
probs << inputs[1].to_r
|
24
24
|
n_hat = probs[-1] * nvars
|
25
|
-
half_width = 2.5 * Math
|
25
|
+
half_width = 2.5 * Math.sqrt(n_hat * (1.0 - probs[-1])) if n_hat > 0
|
26
26
|
expected_counts[inputs[0]] = [n_hat, half_width]
|
27
27
|
end
|
28
28
|
f.close
|
29
29
|
begin
|
30
30
|
at = AliasTable.new(x, probs)
|
31
|
-
nvars.times {counts[at.generate] += 1}
|
32
|
-
puts
|
31
|
+
nvars.times { counts[at.generate] += 1 }
|
32
|
+
puts 'All values should be in range almost always:'
|
33
33
|
counts.each_key do |k|
|
34
34
|
printf "%s: Allowable Range = %d, Expected - Observed = %d\n",
|
35
|
-
|
35
|
+
k, expected_counts[k][1], expected_counts[k][0] - counts[k]
|
36
36
|
end
|
37
37
|
puts
|
38
|
-
rescue
|
39
|
-
|
38
|
+
rescue RuntimeError => e
|
39
|
+
p e
|
40
40
|
puts
|
41
41
|
end
|
42
42
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aliastable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul J Sanchez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-09-10 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: If a categorical distribution has k distinct values, traditional approaches
|
14
14
|
will require O(k) work to pick an outcome with the correct probabilities. This
|
@@ -21,7 +21,7 @@ extra_rdoc_files: []
|
|
21
21
|
files:
|
22
22
|
- Rakefile
|
23
23
|
- aliastable.gemspec
|
24
|
-
- lib/
|
24
|
+
- lib/aliastable.rb
|
25
25
|
- test/infile.bad.1
|
26
26
|
- test/infile.bad.2
|
27
27
|
- test/infile.bad.3
|
@@ -49,7 +49,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
49
49
|
version: '0'
|
50
50
|
requirements: []
|
51
51
|
rubyforge_project:
|
52
|
-
rubygems_version: 2.4.5
|
52
|
+
rubygems_version: 2.4.5.1
|
53
53
|
signing_key:
|
54
54
|
specification_version: 4
|
55
55
|
summary: Efficiently generate random outcomes from an arbitrary categorical distribution.
|