aliastable 1.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/aliastable.gemspec +4 -4
- data/lib/alias.rb +40 -54
- data/test/infile.good.3 +22 -0
- data/test/test_alias.rb +9 -4
- metadata +9 -23
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0122f41cb149120e7213250d548fb0ac909484e6
|
|
4
|
+
data.tar.gz: d7c7376ef3985fa43f7f59afe817b9ff1d38d1eb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0dc5359f8618ff2d58d9b88b0be91d60ef62a3dc9b36eb47779ac88ea4ecf54e37e31ab825502794391a894d4f3f1a1c0b28eeafe02fab3e32bbd40b18b6c65a
|
|
7
|
+
data.tar.gz: 196f3c2cd4a1d82bdac8234e0ca169d70e4036179ed34d60b25ed2efd72c0ac29612988120d24eee5824f7a7a924be5b2580750e641d6488a88d1c19ee779eb2
|
data/aliastable.gemspec
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# -*- ruby -*-
|
|
2
|
-
_VERSION = "
|
|
2
|
+
_VERSION = "3.0.0"
|
|
3
3
|
|
|
4
4
|
Gem::Specification.new do |s|
|
|
5
5
|
s.name = "aliastable"
|
|
6
6
|
s.version = _VERSION
|
|
7
|
-
s.date = "
|
|
7
|
+
s.date = "2015-04-19"
|
|
8
8
|
s.summary = "Efficiently generate random outcomes from an arbitrary categorical distribution."
|
|
9
9
|
s.email = "pjs@alum.mit.edu"
|
|
10
10
|
s.description = "If a categorical distribution has k distinct values, traditional approaches will require O(k) work to pick an outcome with the correct probabilities. This algorithm uses conditional probability to construct a table which will yield outcomes with the correct probabilities, but in O(1) time."
|
|
@@ -19,9 +19,9 @@ Gem::Specification.new do |s|
|
|
|
19
19
|
test/infile.bad.3
|
|
20
20
|
test/infile.good.1
|
|
21
21
|
test/infile.good.2
|
|
22
|
+
test/infile.good.3
|
|
22
23
|
test/test_alias.rb
|
|
23
24
|
]
|
|
24
|
-
s.
|
|
25
|
-
s.required_ruby_version = '>= 1.8.1'
|
|
25
|
+
s.required_ruby_version = '>= 1.9.3'
|
|
26
26
|
s.license = 'LGPL'
|
|
27
27
|
end
|
data/lib/alias.rb
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env ruby -w
|
|
2
2
|
|
|
3
|
-
require 'skewheap'
|
|
4
|
-
|
|
5
3
|
# Generate values from a categorical distribution in constant
|
|
6
4
|
# time, regardless of the number of categories. This clever algorithm
|
|
7
5
|
# uses conditional probability to construct a table comprised of columns
|
|
@@ -11,7 +9,7 @@ require 'skewheap'
|
|
|
11
9
|
# probabilities.
|
|
12
10
|
#
|
|
13
11
|
class AliasTable
|
|
14
|
-
|
|
12
|
+
TOLERANCE = Rational(1,1E15)
|
|
15
13
|
# Construct an alias table from a set of values and their associated
|
|
16
14
|
# probabilities. Values and their probabilities must be synchronized,
|
|
17
15
|
# i.e., they must be arrays of the same length. Values can be
|
|
@@ -19,67 +17,55 @@ class AliasTable
|
|
|
19
17
|
# sum to one.
|
|
20
18
|
#
|
|
21
19
|
# *Arguments*::
|
|
22
|
-
# - +
|
|
23
|
-
# - +
|
|
24
|
-
# with the
|
|
20
|
+
# - +x_set+ -> the set of values to generate from.
|
|
21
|
+
# - +p_value+ -> the synchronized set of probabilities associated
|
|
22
|
+
# with the value set.
|
|
25
23
|
# *Raises*::
|
|
26
|
-
# - RuntimeError if +
|
|
27
|
-
# - RuntimeError if any +
|
|
28
|
-
# - RuntimeError if +
|
|
24
|
+
# - RuntimeError if +x_set+ and +p_value+s are different lengths.
|
|
25
|
+
# - RuntimeError if any +p_value+ are negative.
|
|
26
|
+
# - RuntimeError if +p_value+ don't sum to one.
|
|
29
27
|
#
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
if values.length != p_values.length
|
|
28
|
+
def initialize(x_values, p_values)
|
|
29
|
+
if x_values.length != p_values.length
|
|
33
30
|
raise "Args to AliasTable must be vectors of the same length."
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
|
|
31
|
+
end
|
|
32
|
+
p = p_values.map do |current_p|
|
|
33
|
+
raise "p_values must be positive" if current_p <= 0.0
|
|
34
|
+
Rational(current_p)
|
|
35
|
+
end
|
|
36
|
+
unless (p.reduce(:+) - Rational(1)).abs < TOLERANCE
|
|
37
37
|
raise "p_values must sum to 1.0"
|
|
38
38
|
end
|
|
39
|
-
@
|
|
40
|
-
@
|
|
41
|
-
@
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
@
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
39
|
+
@x = x_values.clone.freeze
|
|
40
|
+
@alias = Array.new(@x.length)
|
|
41
|
+
@p_primary = Array.new(@x.length, 1.0)
|
|
42
|
+
equiprob = Rational(1, @x.length)
|
|
43
|
+
deficit_set = []
|
|
44
|
+
surplus_set = []
|
|
45
|
+
@x.each_index do |i|
|
|
46
|
+
unless (p[i] - equiprob).abs < TOLERANCE
|
|
47
|
+
(p[i] < equiprob ? deficit_set : surplus_set) << i
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
until deficit_set.empty? do
|
|
51
|
+
deficit = deficit_set.pop
|
|
52
|
+
surplus = surplus_set.pop
|
|
53
|
+
@p_primary[deficit] = p[deficit] / equiprob
|
|
54
|
+
@alias[deficit] = @x[surplus]
|
|
55
|
+
p[surplus] -= equiprob - p[deficit]
|
|
56
|
+
unless (p[surplus] - equiprob).abs < TOLERANCE
|
|
57
|
+
(p[surplus] < equiprob ? deficit_set : surplus_set) << surplus
|
|
58
|
+
end
|
|
54
59
|
end
|
|
55
60
|
end
|
|
56
61
|
|
|
57
|
-
# Returns a random outcome from
|
|
58
|
-
#
|
|
62
|
+
# Returns a random outcome from this object's distribution.
|
|
63
|
+
# The generate method is O(1) time, but is not an inversion
|
|
59
64
|
# since two uniforms are used for each value that gets generated.
|
|
60
|
-
#
|
|
65
|
+
#
|
|
61
66
|
def generate
|
|
62
|
-
column = rand(@
|
|
63
|
-
rand
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
private
|
|
67
|
-
def classify(i)
|
|
68
|
-
if @p_values[i].not_close_enough(@equiprob)
|
|
69
|
-
if @p_values[i] < @equiprob
|
|
70
|
-
@deficit_set.push i
|
|
71
|
-
else
|
|
72
|
-
@surplus_set << i
|
|
73
|
-
end
|
|
74
|
-
end
|
|
67
|
+
column = rand(@x.length)
|
|
68
|
+
rand <= @p_primary[column] ? @x[column] : @alias[column]
|
|
75
69
|
end
|
|
76
70
|
|
|
77
71
|
end
|
|
78
|
-
|
|
79
|
-
class Numeric
|
|
80
|
-
# Expand class Numeric to detect whether two values are within a
|
|
81
|
-
# tolerance of 10^-15 of each other.
|
|
82
|
-
def not_close_enough(n)
|
|
83
|
-
((self - n) / self).abs > 1E-15
|
|
84
|
-
end
|
|
85
|
-
end
|
data/test/infile.good.3
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
1,0.003952569169960474
|
|
2
|
+
2,0.007905138339920948
|
|
3
|
+
3,0.011857707509881422
|
|
4
|
+
4,0.015810276679841896
|
|
5
|
+
5,0.019762845849802372
|
|
6
|
+
6,0.023715415019762844
|
|
7
|
+
7,0.027667984189723320
|
|
8
|
+
8,0.031620553359683792
|
|
9
|
+
9,0.035573122529644272
|
|
10
|
+
10,0.039525691699604744
|
|
11
|
+
11,0.043478260869565216
|
|
12
|
+
12,0.047430830039525688
|
|
13
|
+
13,0.051383399209486168
|
|
14
|
+
14,0.055335968379446640
|
|
15
|
+
15,0.059288537549407112
|
|
16
|
+
16,0.063241106719367585
|
|
17
|
+
17,0.067193675889328064
|
|
18
|
+
18,0.071146245059288543
|
|
19
|
+
19,0.075098814229249009
|
|
20
|
+
20,0.079051383399209488
|
|
21
|
+
21,0.083003952569169967
|
|
22
|
+
22,0.086956521739130432
|
data/test/test_alias.rb
CHANGED
|
@@ -2,12 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
require 'alias'
|
|
4
4
|
|
|
5
|
-
nvars =
|
|
5
|
+
nvars = 1_000_000
|
|
6
6
|
begin
|
|
7
7
|
at = AliasTable.new(["yes", "no"], [0.3, 0.3, 0.4])
|
|
8
8
|
nvars.times {print at.generate, "\n"}
|
|
9
9
|
rescue Exception => e
|
|
10
10
|
puts e.message
|
|
11
|
+
puts
|
|
11
12
|
end
|
|
12
13
|
Dir["test/infile.*"].each do |f_name|
|
|
13
14
|
x = []
|
|
@@ -22,17 +23,21 @@ Dir["test/infile.*"].each do |f_name|
|
|
|
22
23
|
probs << inputs[1].to_f
|
|
23
24
|
n_hat = probs[-1] * nvars
|
|
24
25
|
half_width = 2.5 * Math::sqrt(n_hat * (1.0 - probs[-1])) if n_hat > 0
|
|
25
|
-
expected_counts[inputs[0]] = "%d +/- %d" % [n_hat, half_width]
|
|
26
|
+
# expected_counts[inputs[0]] = "%d +/- %d" % [n_hat, half_width]
|
|
27
|
+
expected_counts[inputs[0]] = [n_hat, half_width]
|
|
26
28
|
end
|
|
27
29
|
f.close
|
|
28
30
|
begin
|
|
29
31
|
at = AliasTable.new(x, probs)
|
|
30
32
|
nvars.times {counts[at.generate] += 1}
|
|
31
|
-
puts "
|
|
33
|
+
puts "All four values should be in range 95\% of the time:"
|
|
32
34
|
counts.each_key do |k|
|
|
33
|
-
printf "%s:
|
|
35
|
+
printf "%s: Half-width = %d, Expected - Observed = %d\n",
|
|
36
|
+
k, expected_counts[k][1], expected_counts[k][0] - counts[k]
|
|
34
37
|
end
|
|
38
|
+
puts
|
|
35
39
|
rescue Exception => e
|
|
36
40
|
puts e.message
|
|
41
|
+
puts
|
|
37
42
|
end
|
|
38
43
|
end
|
metadata
CHANGED
|
@@ -1,29 +1,15 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: aliastable
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 3.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Paul J Sanchez
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
12
|
-
dependencies:
|
|
13
|
-
- !ruby/object:Gem::Dependency
|
|
14
|
-
name: skewheap
|
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
|
16
|
-
requirements:
|
|
17
|
-
- - ~>
|
|
18
|
-
- !ruby/object:Gem::Version
|
|
19
|
-
version: '1.0'
|
|
20
|
-
type: :runtime
|
|
21
|
-
prerelease: false
|
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
-
requirements:
|
|
24
|
-
- - ~>
|
|
25
|
-
- !ruby/object:Gem::Version
|
|
26
|
-
version: '1.0'
|
|
11
|
+
date: 2015-04-19 00:00:00.000000000 Z
|
|
12
|
+
dependencies: []
|
|
27
13
|
description: If a categorical distribution has k distinct values, traditional approaches
|
|
28
14
|
will require O(k) work to pick an outcome with the correct probabilities. This
|
|
29
15
|
algorithm uses conditional probability to construct a table which will yield outcomes
|
|
@@ -33,15 +19,16 @@ executables: []
|
|
|
33
19
|
extensions: []
|
|
34
20
|
extra_rdoc_files: []
|
|
35
21
|
files:
|
|
22
|
+
- Rakefile
|
|
36
23
|
- aliastable.gemspec
|
|
37
24
|
- lgpl.txt
|
|
38
25
|
- lib/alias.rb
|
|
39
|
-
- Rakefile
|
|
40
26
|
- test/infile.bad.1
|
|
41
27
|
- test/infile.bad.2
|
|
42
28
|
- test/infile.bad.3
|
|
43
29
|
- test/infile.good.1
|
|
44
30
|
- test/infile.good.2
|
|
31
|
+
- test/infile.good.3
|
|
45
32
|
- test/test_alias.rb
|
|
46
33
|
homepage:
|
|
47
34
|
licenses:
|
|
@@ -53,19 +40,18 @@ require_paths:
|
|
|
53
40
|
- lib
|
|
54
41
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
55
42
|
requirements:
|
|
56
|
-
- -
|
|
43
|
+
- - ">="
|
|
57
44
|
- !ruby/object:Gem::Version
|
|
58
|
-
version: 1.
|
|
45
|
+
version: 1.9.3
|
|
59
46
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
60
47
|
requirements:
|
|
61
|
-
- -
|
|
48
|
+
- - ">="
|
|
62
49
|
- !ruby/object:Gem::Version
|
|
63
50
|
version: '0'
|
|
64
51
|
requirements: []
|
|
65
52
|
rubyforge_project:
|
|
66
|
-
rubygems_version: 2.
|
|
53
|
+
rubygems_version: 2.4.5
|
|
67
54
|
signing_key:
|
|
68
55
|
specification_version: 4
|
|
69
56
|
summary: Efficiently generate random outcomes from an arbitrary categorical distribution.
|
|
70
57
|
test_files: []
|
|
71
|
-
has_rdoc:
|