aliastable 3.1.0 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/aliastable.gemspec +6 -13
- data/lib/aliastable.rb +40 -21
- metadata +11 -20
- data/Rakefile +0 -8
- data/test/infile.bad.1 +0 -5
- data/test/infile.bad.2 +0 -4
- data/test/infile.bad.3 +0 -3
- data/test/infile.good.1 +0 -4
- data/test/infile.good.2 +0 -4
- data/test/infile.good.3 +0 -22
- data/test/infile.good.4 +0 -3
- data/test/test_alias.rb +0 -44
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 88cae2e262098e88436d41789056946a36ec55270d4e52c33242f5018b687595
|
4
|
+
data.tar.gz: e6b9c4befb4950137807729b171178b0210c794693a1045dbd94ea10606aa593
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 294e9171f62c9602f1eb3fe9886daeb1fb899431d906e26a33d95c0af4398bad147a62b7e2eee8f7d52a736dd4aa57a67bbf2e5b767ed847817eae59409b7bcf
|
7
|
+
data.tar.gz: 545e97af416b1d7ad20f81444811343eafb0d0ec74a30ce2b5b754cfb4336d8aefb337b907fa4b1b7453da796b5d4b48da941b9840bfeb878c3bff4bc7f614e8
|
data/aliastable.gemspec
CHANGED
@@ -1,27 +1,20 @@
|
|
1
1
|
# -*- ruby -*-
|
2
|
-
_VERSION = "
|
2
|
+
_VERSION = "4.0.0"
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "aliastable"
|
6
6
|
s.version = _VERSION
|
7
|
-
s.date = "
|
7
|
+
s.date = "2023-01-21"
|
8
8
|
s.summary = "Efficiently generate random outcomes from an arbitrary categorical distribution."
|
9
9
|
s.email = "pjs@alum.mit.edu"
|
10
10
|
s.description = "If a categorical distribution has k distinct values, traditional approaches will require O(k) work to pick an outcome with the correct probabilities. This algorithm uses conditional probability to construct a table which will yield outcomes with the correct probabilities. Table generation requires O(k) time, but subsequent generation is done in O(1) time."
|
11
11
|
s.author = "Paul J Sanchez"
|
12
|
+
s.homepage = "https://bitbucket.org/paul_j_sanchez/aliastable"
|
12
13
|
s.files = %w[
|
13
14
|
aliastable.gemspec
|
14
15
|
lib/aliastable.rb
|
15
|
-
Rakefile
|
16
|
-
test/infile.bad.1
|
17
|
-
test/infile.bad.2
|
18
|
-
test/infile.bad.3
|
19
|
-
test/infile.good.1
|
20
|
-
test/infile.good.2
|
21
|
-
test/infile.good.3
|
22
|
-
test/infile.good.4
|
23
|
-
test/test_alias.rb
|
24
16
|
]
|
25
|
-
s.required_ruby_version = '>= 2.
|
26
|
-
s.license = '
|
17
|
+
s.required_ruby_version = '>= 2.6.0'
|
18
|
+
s.license = 'MIT'
|
19
|
+
s.metadata["homepage_uri"] = s.homepage
|
27
20
|
end
|
data/lib/aliastable.rb
CHANGED
@@ -9,6 +9,7 @@
|
|
9
9
|
# probabilities.
|
10
10
|
#
|
11
11
|
class AliasTable
|
12
|
+
include Enumerable
|
12
13
|
# Construct an alias table from a set of values and their associated
|
13
14
|
# probabilities. Values and their probabilities must be synchronized,
|
14
15
|
# i.e., they must be arrays of the same length. Values can be
|
@@ -28,34 +29,52 @@ class AliasTable
|
|
28
29
|
def initialize(x_set, p_value)
|
29
30
|
fail 'x_set & p_value must have same length.' if x_set.size != p_value.size
|
30
31
|
fail 'p_values must be positive' unless p_value.all? { |value| value > 0 }
|
31
|
-
|
32
|
-
fail 'p_values must sum to 1' unless
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
32
|
+
p_primary = p_value.map(&:rationalize)
|
33
|
+
fail 'p_values must sum to 1' unless p_primary.reduce(:+) == Rational(1)
|
34
|
+
x = x_set.clone.freeze
|
35
|
+
len = x.length
|
36
|
+
col_alias = Array.new(len)
|
37
|
+
parity = Rational(1, len)
|
38
|
+
group = p_primary.each_index.group_by { |i| p_primary[i] <=> parity }
|
37
39
|
deficit_set = group[-1]
|
38
40
|
surplus_set = group[1]
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
41
|
+
if deficit_set.nil?
|
42
|
+
@enum = Enumerator.new { |y| loop { y << x[rand(len)] } }.lazy
|
43
|
+
else
|
44
|
+
until deficit_set.empty?
|
45
|
+
deficit = deficit_set.pop
|
46
|
+
surplus = surplus_set.pop
|
47
|
+
p_primary[surplus] -= parity - p_primary[deficit]
|
48
|
+
p_primary[deficit] /= parity
|
49
|
+
col_alias[deficit] = x[surplus]
|
50
|
+
if p_primary[surplus] == parity
|
51
|
+
p_primary[surplus] = Rational(1)
|
52
|
+
else
|
53
|
+
(p_primary[surplus] < parity ? deficit_set : surplus_set) << surplus
|
54
|
+
end
|
49
55
|
end
|
56
|
+
@enum = Enumerator.new do |y|
|
57
|
+
loop do
|
58
|
+
column = rand(len)
|
59
|
+
y << ((rand <= p_primary[column]) ? x[column] : col_alias[column])
|
60
|
+
end
|
61
|
+
end.lazy
|
50
62
|
end
|
51
63
|
end
|
52
64
|
|
53
|
-
|
54
|
-
|
65
|
+
def each(&block)
|
66
|
+
@enum.each(&block)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Return a random outcome from this object's distribution. The
|
70
|
+
# next (aka generate) method is O(1) time, but is not an inversion
|
55
71
|
# since two uniforms are used for each value that gets generated.
|
72
|
+
# The exception is that when all probabilities are equal, it is
|
73
|
+
# a true inversion.
|
56
74
|
#
|
57
|
-
def
|
58
|
-
|
59
|
-
rand <= @p_primary[column] ? @x[column] : @alias[column]
|
75
|
+
def next
|
76
|
+
@enum.next
|
60
77
|
end
|
78
|
+
|
79
|
+
alias generate next # for backwards compatibility
|
61
80
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aliastable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul J Sanchez
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-01-21 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: If a categorical distribution has k distinct values, traditional approaches
|
14
14
|
will require O(k) work to pick an outcome with the correct probabilities. This
|
@@ -20,22 +20,14 @@ executables: []
|
|
20
20
|
extensions: []
|
21
21
|
extra_rdoc_files: []
|
22
22
|
files:
|
23
|
-
- Rakefile
|
24
23
|
- aliastable.gemspec
|
25
24
|
- lib/aliastable.rb
|
26
|
-
|
27
|
-
- test/infile.bad.2
|
28
|
-
- test/infile.bad.3
|
29
|
-
- test/infile.good.1
|
30
|
-
- test/infile.good.2
|
31
|
-
- test/infile.good.3
|
32
|
-
- test/infile.good.4
|
33
|
-
- test/test_alias.rb
|
34
|
-
homepage:
|
25
|
+
homepage: https://bitbucket.org/paul_j_sanchez/aliastable
|
35
26
|
licenses:
|
36
|
-
-
|
37
|
-
metadata:
|
38
|
-
|
27
|
+
- MIT
|
28
|
+
metadata:
|
29
|
+
homepage_uri: https://bitbucket.org/paul_j_sanchez/aliastable
|
30
|
+
post_install_message:
|
39
31
|
rdoc_options: []
|
40
32
|
require_paths:
|
41
33
|
- lib
|
@@ -43,16 +35,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
43
35
|
requirements:
|
44
36
|
- - ">="
|
45
37
|
- !ruby/object:Gem::Version
|
46
|
-
version: 2.
|
38
|
+
version: 2.6.0
|
47
39
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
48
40
|
requirements:
|
49
41
|
- - ">="
|
50
42
|
- !ruby/object:Gem::Version
|
51
43
|
version: '0'
|
52
44
|
requirements: []
|
53
|
-
|
54
|
-
|
55
|
-
signing_key:
|
45
|
+
rubygems_version: 3.4.3
|
46
|
+
signing_key:
|
56
47
|
specification_version: 4
|
57
48
|
summary: Efficiently generate random outcomes from an arbitrary categorical distribution.
|
58
49
|
test_files: []
|
data/Rakefile
DELETED
data/test/infile.bad.1
DELETED
data/test/infile.bad.2
DELETED
data/test/infile.bad.3
DELETED
data/test/infile.good.1
DELETED
data/test/infile.good.2
DELETED
data/test/infile.good.3
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
1,1/253
|
2
|
-
2,2/253
|
3
|
-
3,3/253
|
4
|
-
4,4/253
|
5
|
-
5,5/253
|
6
|
-
6,6/253
|
7
|
-
7,7/253
|
8
|
-
8,8/253
|
9
|
-
9,9/253
|
10
|
-
10,10/253
|
11
|
-
11,11/253
|
12
|
-
12,12/253
|
13
|
-
13,13/253
|
14
|
-
14,14/253
|
15
|
-
15,15/253
|
16
|
-
16,16/253
|
17
|
-
17,17/253
|
18
|
-
18,18/253
|
19
|
-
19,19/253
|
20
|
-
20,20/253
|
21
|
-
21,21/253
|
22
|
-
22,22/253
|
data/test/infile.good.4
DELETED
data/test/test_alias.rb
DELETED
@@ -1,44 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby -w
|
2
|
-
|
3
|
-
require_relative '../lib/aliastable.rb'
|
4
|
-
|
5
|
-
nvars = 1_000_000
|
6
|
-
begin
|
7
|
-
at = AliasTable.new(%w(yes no), [0.3, 0.3, 0.4])
|
8
|
-
nvars.times { print at.generate, "\n" }
|
9
|
-
rescue RuntimeError => e
|
10
|
-
p e
|
11
|
-
puts
|
12
|
-
end
|
13
|
-
Dir['test/infile.*'].each do |f_name|
|
14
|
-
x = []
|
15
|
-
probs = []
|
16
|
-
f = File.open(f_name, 'r')
|
17
|
-
counts = {}
|
18
|
-
expected_counts = {}
|
19
|
-
while line = f.gets
|
20
|
-
inputs = line.strip.split(/[\s,;:]+/)
|
21
|
-
x << inputs[0]
|
22
|
-
counts[inputs[0]] = 0
|
23
|
-
probs << Rational(inputs[1])
|
24
|
-
n_hat = probs[-1] * nvars
|
25
|
-
half_width = 2.5 * Math.sqrt(n_hat * (1.0 - probs[-1])) if n_hat > 0
|
26
|
-
expected_counts[inputs[0]] = [n_hat, half_width]
|
27
|
-
end
|
28
|
-
f.close
|
29
|
-
p x
|
30
|
-
p probs
|
31
|
-
begin
|
32
|
-
at = AliasTable.new(x, probs)
|
33
|
-
nvars.times { counts[at.generate] += 1 }
|
34
|
-
puts 'All values should be in range almost always:'
|
35
|
-
counts.each_key do |k|
|
36
|
-
printf "%s: Allowable Range = %d, Expected - Observed = %d\n",
|
37
|
-
k, expected_counts[k][1], expected_counts[k][0] - counts[k]
|
38
|
-
end
|
39
|
-
puts
|
40
|
-
rescue RuntimeError => e
|
41
|
-
p e
|
42
|
-
puts
|
43
|
-
end
|
44
|
-
end
|