aliastable 3.0.3 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/aliastable.gemspec +7 -13
- data/lib/aliastable.rb +41 -24
- metadata +13 -20
- data/Rakefile +0 -8
- data/test/infile.bad.1 +0 -5
- data/test/infile.bad.2 +0 -4
- data/test/infile.bad.3 +0 -3
- data/test/infile.good.1 +0 -4
- data/test/infile.good.2 +0 -4
- data/test/infile.good.3 +0 -22
- data/test/test_alias.rb +0 -42
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 88cae2e262098e88436d41789056946a36ec55270d4e52c33242f5018b687595
|
4
|
+
data.tar.gz: e6b9c4befb4950137807729b171178b0210c794693a1045dbd94ea10606aa593
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 294e9171f62c9602f1eb3fe9886daeb1fb899431d906e26a33d95c0af4398bad147a62b7e2eee8f7d52a736dd4aa57a67bbf2e5b767ed847817eae59409b7bcf
|
7
|
+
data.tar.gz: 545e97af416b1d7ad20f81444811343eafb0d0ec74a30ce2b5b754cfb4336d8aefb337b907fa4b1b7453da796b5d4b48da941b9840bfeb878c3bff4bc7f614e8
|
data/aliastable.gemspec
CHANGED
@@ -1,26 +1,20 @@
|
|
1
1
|
# -*- ruby -*-
|
2
|
-
_VERSION = "
|
2
|
+
_VERSION = "4.0.0"
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "aliastable"
|
6
6
|
s.version = _VERSION
|
7
|
-
s.date = "
|
7
|
+
s.date = "2023-01-21"
|
8
8
|
s.summary = "Efficiently generate random outcomes from an arbitrary categorical distribution."
|
9
9
|
s.email = "pjs@alum.mit.edu"
|
10
|
-
s.description = "If a categorical distribution has k distinct values, traditional approaches will require O(k) work to pick an outcome with the correct probabilities. This algorithm uses conditional probability to construct a table which will yield outcomes with the correct probabilities, but in O(1) time."
|
10
|
+
s.description = "If a categorical distribution has k distinct values, traditional approaches will require O(k) work to pick an outcome with the correct probabilities. This algorithm uses conditional probability to construct a table which will yield outcomes with the correct probabilities. Table generation requires O(k) time, but subsequent generation is done in O(1) time."
|
11
11
|
s.author = "Paul J Sanchez"
|
12
|
+
s.homepage = "https://bitbucket.org/paul_j_sanchez/aliastable"
|
12
13
|
s.files = %w[
|
13
14
|
aliastable.gemspec
|
14
15
|
lib/aliastable.rb
|
15
|
-
Rakefile
|
16
|
-
test/infile.bad.1
|
17
|
-
test/infile.bad.2
|
18
|
-
test/infile.bad.3
|
19
|
-
test/infile.good.1
|
20
|
-
test/infile.good.2
|
21
|
-
test/infile.good.3
|
22
|
-
test/test_alias.rb
|
23
16
|
]
|
24
|
-
s.required_ruby_version = '>=
|
25
|
-
s.license = '
|
17
|
+
s.required_ruby_version = '>= 2.6.0'
|
18
|
+
s.license = 'MIT'
|
19
|
+
s.metadata["homepage_uri"] = s.homepage
|
26
20
|
end
|
data/lib/aliastable.rb
CHANGED
@@ -9,6 +9,7 @@
|
|
9
9
|
# probabilities.
|
10
10
|
#
|
11
11
|
class AliasTable
|
12
|
+
include Enumerable
|
12
13
|
# Construct an alias table from a set of values and their associated
|
13
14
|
# probabilities. Values and their probabilities must be synchronized,
|
14
15
|
# i.e., they must be arrays of the same length. Values can be
|
@@ -26,38 +27,54 @@ class AliasTable
|
|
26
27
|
# - RuntimeError if +p_value+s don't sum to one. Rationals will avoid this.
|
27
28
|
#
|
28
29
|
def initialize(x_set, p_value)
|
29
|
-
if x_set.
|
30
|
-
fail 'Args to AliasTable must be vectors of the same length.'
|
31
|
-
end
|
30
|
+
fail 'x_set & p_value must have same length.' if x_set.size != p_value.size
|
32
31
|
fail 'p_values must be positive' unless p_value.all? { |value| value > 0 }
|
33
|
-
|
34
|
-
fail 'p_values must sum to 1' unless
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
32
|
+
p_primary = p_value.map(&:rationalize)
|
33
|
+
fail 'p_values must sum to 1' unless p_primary.reduce(:+) == Rational(1)
|
34
|
+
x = x_set.clone.freeze
|
35
|
+
len = x.length
|
36
|
+
col_alias = Array.new(len)
|
37
|
+
parity = Rational(1, len)
|
38
|
+
group = p_primary.each_index.group_by { |i| p_primary[i] <=> parity }
|
39
39
|
deficit_set = group[-1]
|
40
40
|
surplus_set = group[1]
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
41
|
+
if deficit_set.nil?
|
42
|
+
@enum = Enumerator.new { |y| loop { y << x[rand(len)] } }.lazy
|
43
|
+
else
|
44
|
+
until deficit_set.empty?
|
45
|
+
deficit = deficit_set.pop
|
46
|
+
surplus = surplus_set.pop
|
47
|
+
p_primary[surplus] -= parity - p_primary[deficit]
|
48
|
+
p_primary[deficit] /= parity
|
49
|
+
col_alias[deficit] = x[surplus]
|
50
|
+
if p_primary[surplus] == parity
|
51
|
+
p_primary[surplus] = Rational(1)
|
52
|
+
else
|
53
|
+
(p_primary[surplus] < parity ? deficit_set : surplus_set) << surplus
|
54
|
+
end
|
51
55
|
end
|
56
|
+
@enum = Enumerator.new do |y|
|
57
|
+
loop do
|
58
|
+
column = rand(len)
|
59
|
+
y << ((rand <= p_primary[column]) ? x[column] : col_alias[column])
|
60
|
+
end
|
61
|
+
end.lazy
|
52
62
|
end
|
53
63
|
end
|
54
64
|
|
55
|
-
|
56
|
-
|
65
|
+
def each(&block)
|
66
|
+
@enum.each(&block)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Return a random outcome from this object's distribution. The
|
70
|
+
# next (aka generate) method is O(1) time, but is not an inversion
|
57
71
|
# since two uniforms are used for each value that gets generated.
|
72
|
+
# The exception is that when all probabilities are equal, it is
|
73
|
+
# a true inversion.
|
58
74
|
#
|
59
|
-
def
|
60
|
-
|
61
|
-
rand <= @p_primary[column] ? @x[column] : @alias[column]
|
75
|
+
def next
|
76
|
+
@enum.next
|
62
77
|
end
|
78
|
+
|
79
|
+
alias generate next # for backwards compatibility
|
63
80
|
end
|
metadata
CHANGED
@@ -1,39 +1,33 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aliastable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul J Sanchez
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-01-21 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: If a categorical distribution has k distinct values, traditional approaches
|
14
14
|
will require O(k) work to pick an outcome with the correct probabilities. This
|
15
15
|
algorithm uses conditional probability to construct a table which will yield outcomes
|
16
|
-
with the correct probabilities
|
16
|
+
with the correct probabilities. Table generation requires O(k) time, but subsequent
|
17
|
+
generation is done in O(1) time.
|
17
18
|
email: pjs@alum.mit.edu
|
18
19
|
executables: []
|
19
20
|
extensions: []
|
20
21
|
extra_rdoc_files: []
|
21
22
|
files:
|
22
|
-
- Rakefile
|
23
23
|
- aliastable.gemspec
|
24
24
|
- lib/aliastable.rb
|
25
|
-
|
26
|
-
- test/infile.bad.2
|
27
|
-
- test/infile.bad.3
|
28
|
-
- test/infile.good.1
|
29
|
-
- test/infile.good.2
|
30
|
-
- test/infile.good.3
|
31
|
-
- test/test_alias.rb
|
32
|
-
homepage:
|
25
|
+
homepage: https://bitbucket.org/paul_j_sanchez/aliastable
|
33
26
|
licenses:
|
34
|
-
-
|
35
|
-
metadata:
|
36
|
-
|
27
|
+
- MIT
|
28
|
+
metadata:
|
29
|
+
homepage_uri: https://bitbucket.org/paul_j_sanchez/aliastable
|
30
|
+
post_install_message:
|
37
31
|
rdoc_options: []
|
38
32
|
require_paths:
|
39
33
|
- lib
|
@@ -41,16 +35,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
41
35
|
requirements:
|
42
36
|
- - ">="
|
43
37
|
- !ruby/object:Gem::Version
|
44
|
-
version:
|
38
|
+
version: 2.6.0
|
45
39
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
46
40
|
requirements:
|
47
41
|
- - ">="
|
48
42
|
- !ruby/object:Gem::Version
|
49
43
|
version: '0'
|
50
44
|
requirements: []
|
51
|
-
|
52
|
-
|
53
|
-
signing_key:
|
45
|
+
rubygems_version: 3.4.3
|
46
|
+
signing_key:
|
54
47
|
specification_version: 4
|
55
48
|
summary: Efficiently generate random outcomes from an arbitrary categorical distribution.
|
56
49
|
test_files: []
|
data/Rakefile
DELETED
data/test/infile.bad.1
DELETED
data/test/infile.bad.2
DELETED
data/test/infile.bad.3
DELETED
data/test/infile.good.1
DELETED
data/test/infile.good.2
DELETED
data/test/infile.good.3
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
1,1/253
|
2
|
-
2,2/253
|
3
|
-
3,3/253
|
4
|
-
4,4/253
|
5
|
-
5,5/253
|
6
|
-
6,6/253
|
7
|
-
7,7/253
|
8
|
-
8,8/253
|
9
|
-
9,9/253
|
10
|
-
10,10/253
|
11
|
-
11,1/23
|
12
|
-
12,12/253
|
13
|
-
13,13/253
|
14
|
-
14,14/253
|
15
|
-
15,15/253
|
16
|
-
16,16/253
|
17
|
-
17,17/253
|
18
|
-
18,18/253
|
19
|
-
19,19/253
|
20
|
-
20,20/253
|
21
|
-
21,21/253
|
22
|
-
22,2/23
|
data/test/test_alias.rb
DELETED
@@ -1,42 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby -w
|
2
|
-
|
3
|
-
require_relative '../lib/aliastable.rb'
|
4
|
-
|
5
|
-
nvars = 1_000_000
|
6
|
-
begin
|
7
|
-
at = AliasTable.new(%w(yes no), [0.3, 0.3, 0.4])
|
8
|
-
nvars.times { print at.generate, "\n" }
|
9
|
-
rescue RuntimeError => e
|
10
|
-
p e
|
11
|
-
puts
|
12
|
-
end
|
13
|
-
Dir['test/infile.*'].each do |f_name|
|
14
|
-
x = []
|
15
|
-
probs = []
|
16
|
-
f = File.open(f_name, 'r')
|
17
|
-
counts = {}
|
18
|
-
expected_counts = {}
|
19
|
-
while line = f.gets
|
20
|
-
inputs = line.strip.split(/[\s,;:]+/)
|
21
|
-
x << inputs[0]
|
22
|
-
counts[inputs[0]] = 0
|
23
|
-
probs << inputs[1].to_r
|
24
|
-
n_hat = probs[-1] * nvars
|
25
|
-
half_width = 2.5 * Math.sqrt(n_hat * (1.0 - probs[-1])) if n_hat > 0
|
26
|
-
expected_counts[inputs[0]] = [n_hat, half_width]
|
27
|
-
end
|
28
|
-
f.close
|
29
|
-
begin
|
30
|
-
at = AliasTable.new(x, probs)
|
31
|
-
nvars.times { counts[at.generate] += 1 }
|
32
|
-
puts 'All values should be in range almost always:'
|
33
|
-
counts.each_key do |k|
|
34
|
-
printf "%s: Allowable Range = %d, Expected - Observed = %d\n",
|
35
|
-
k, expected_counts[k][1], expected_counts[k][0] - counts[k]
|
36
|
-
end
|
37
|
-
puts
|
38
|
-
rescue RuntimeError => e
|
39
|
-
p e
|
40
|
-
puts
|
41
|
-
end
|
42
|
-
end
|