aliastable 1.0.0 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/aliastable.gemspec +4 -4
- data/lib/alias.rb +40 -54
- data/test/infile.good.3 +22 -0
- data/test/test_alias.rb +9 -4
- metadata +9 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0122f41cb149120e7213250d548fb0ac909484e6
|
4
|
+
data.tar.gz: d7c7376ef3985fa43f7f59afe817b9ff1d38d1eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0dc5359f8618ff2d58d9b88b0be91d60ef62a3dc9b36eb47779ac88ea4ecf54e37e31ab825502794391a894d4f3f1a1c0b28eeafe02fab3e32bbd40b18b6c65a
|
7
|
+
data.tar.gz: 196f3c2cd4a1d82bdac8234e0ca169d70e4036179ed34d60b25ed2efd72c0ac29612988120d24eee5824f7a7a924be5b2580750e641d6488a88d1c19ee779eb2
|
data/aliastable.gemspec
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
# -*- ruby -*-
|
2
|
-
_VERSION = "
|
2
|
+
_VERSION = "3.0.0"
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "aliastable"
|
6
6
|
s.version = _VERSION
|
7
|
-
s.date = "
|
7
|
+
s.date = "2015-04-19"
|
8
8
|
s.summary = "Efficiently generate random outcomes from an arbitrary categorical distribution."
|
9
9
|
s.email = "pjs@alum.mit.edu"
|
10
10
|
s.description = "If a categorical distribution has k distinct values, traditional approaches will require O(k) work to pick an outcome with the correct probabilities. This algorithm uses conditional probability to construct a table which will yield outcomes with the correct probabilities, but in O(1) time."
|
@@ -19,9 +19,9 @@ Gem::Specification.new do |s|
|
|
19
19
|
test/infile.bad.3
|
20
20
|
test/infile.good.1
|
21
21
|
test/infile.good.2
|
22
|
+
test/infile.good.3
|
22
23
|
test/test_alias.rb
|
23
24
|
]
|
24
|
-
s.
|
25
|
-
s.required_ruby_version = '>= 1.8.1'
|
25
|
+
s.required_ruby_version = '>= 1.9.3'
|
26
26
|
s.license = 'LGPL'
|
27
27
|
end
|
data/lib/alias.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby -w
|
2
2
|
|
3
|
-
require 'skewheap'
|
4
|
-
|
5
3
|
# Generate values from a categorical distribution in constant
|
6
4
|
# time, regardless of the number of categories. This clever algorithm
|
7
5
|
# uses conditional probability to construct a table comprised of columns
|
@@ -11,7 +9,7 @@ require 'skewheap'
|
|
11
9
|
# probabilities.
|
12
10
|
#
|
13
11
|
class AliasTable
|
14
|
-
|
12
|
+
TOLERANCE = Rational(1,1E15)
|
15
13
|
# Construct an alias table from a set of values and their associated
|
16
14
|
# probabilities. Values and their probabilities must be synchronized,
|
17
15
|
# i.e., they must be arrays of the same length. Values can be
|
@@ -19,67 +17,55 @@ class AliasTable
|
|
19
17
|
# sum to one.
|
20
18
|
#
|
21
19
|
# *Arguments*::
|
22
|
-
# - +
|
23
|
-
# - +
|
24
|
-
# with the
|
20
|
+
# - +x_set+ -> the set of values to generate from.
|
21
|
+
# - +p_value+ -> the synchronized set of probabilities associated
|
22
|
+
# with the value set.
|
25
23
|
# *Raises*::
|
26
|
-
# - RuntimeError if +
|
27
|
-
# - RuntimeError if any +
|
28
|
-
# - RuntimeError if +
|
24
|
+
# - RuntimeError if +x_set+ and +p_value+s are different lengths.
|
25
|
+
# - RuntimeError if any +p_value+ are negative.
|
26
|
+
# - RuntimeError if +p_value+ don't sum to one.
|
29
27
|
#
|
30
|
-
|
31
|
-
|
32
|
-
if values.length != p_values.length
|
28
|
+
def initialize(x_values, p_values)
|
29
|
+
if x_values.length != p_values.length
|
33
30
|
raise "Args to AliasTable must be vectors of the same length."
|
34
|
-
end
|
35
|
-
|
36
|
-
|
31
|
+
end
|
32
|
+
p = p_values.map do |current_p|
|
33
|
+
raise "p_values must be positive" if current_p <= 0.0
|
34
|
+
Rational(current_p)
|
35
|
+
end
|
36
|
+
unless (p.reduce(:+) - Rational(1)).abs < TOLERANCE
|
37
37
|
raise "p_values must sum to 1.0"
|
38
38
|
end
|
39
|
-
@
|
40
|
-
@
|
41
|
-
@
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
@
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
39
|
+
@x = x_values.clone.freeze
|
40
|
+
@alias = Array.new(@x.length)
|
41
|
+
@p_primary = Array.new(@x.length, 1.0)
|
42
|
+
equiprob = Rational(1, @x.length)
|
43
|
+
deficit_set = []
|
44
|
+
surplus_set = []
|
45
|
+
@x.each_index do |i|
|
46
|
+
unless (p[i] - equiprob).abs < TOLERANCE
|
47
|
+
(p[i] < equiprob ? deficit_set : surplus_set) << i
|
48
|
+
end
|
49
|
+
end
|
50
|
+
until deficit_set.empty? do
|
51
|
+
deficit = deficit_set.pop
|
52
|
+
surplus = surplus_set.pop
|
53
|
+
@p_primary[deficit] = p[deficit] / equiprob
|
54
|
+
@alias[deficit] = @x[surplus]
|
55
|
+
p[surplus] -= equiprob - p[deficit]
|
56
|
+
unless (p[surplus] - equiprob).abs < TOLERANCE
|
57
|
+
(p[surplus] < equiprob ? deficit_set : surplus_set) << surplus
|
58
|
+
end
|
54
59
|
end
|
55
60
|
end
|
56
61
|
|
57
|
-
# Returns a random outcome from
|
58
|
-
#
|
62
|
+
# Returns a random outcome from this object's distribution.
|
63
|
+
# The generate method is O(1) time, but is not an inversion
|
59
64
|
# since two uniforms are used for each value that gets generated.
|
60
|
-
#
|
65
|
+
#
|
61
66
|
def generate
|
62
|
-
column = rand(@
|
63
|
-
rand
|
64
|
-
end
|
65
|
-
|
66
|
-
private
|
67
|
-
def classify(i)
|
68
|
-
if @p_values[i].not_close_enough(@equiprob)
|
69
|
-
if @p_values[i] < @equiprob
|
70
|
-
@deficit_set.push i
|
71
|
-
else
|
72
|
-
@surplus_set << i
|
73
|
-
end
|
74
|
-
end
|
67
|
+
column = rand(@x.length)
|
68
|
+
rand <= @p_primary[column] ? @x[column] : @alias[column]
|
75
69
|
end
|
76
70
|
|
77
71
|
end
|
78
|
-
|
79
|
-
class Numeric
|
80
|
-
# Expand class Numeric to detect whether two values are within a
|
81
|
-
# tolerance of 10^-15 of each other.
|
82
|
-
def not_close_enough(n)
|
83
|
-
((self - n) / self).abs > 1E-15
|
84
|
-
end
|
85
|
-
end
|
data/test/infile.good.3
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
1,0.003952569169960474
|
2
|
+
2,0.007905138339920948
|
3
|
+
3,0.011857707509881422
|
4
|
+
4,0.015810276679841896
|
5
|
+
5,0.019762845849802372
|
6
|
+
6,0.023715415019762844
|
7
|
+
7,0.027667984189723320
|
8
|
+
8,0.031620553359683792
|
9
|
+
9,0.035573122529644272
|
10
|
+
10,0.039525691699604744
|
11
|
+
11,0.043478260869565216
|
12
|
+
12,0.047430830039525688
|
13
|
+
13,0.051383399209486168
|
14
|
+
14,0.055335968379446640
|
15
|
+
15,0.059288537549407112
|
16
|
+
16,0.063241106719367585
|
17
|
+
17,0.067193675889328064
|
18
|
+
18,0.071146245059288543
|
19
|
+
19,0.075098814229249009
|
20
|
+
20,0.079051383399209488
|
21
|
+
21,0.083003952569169967
|
22
|
+
22,0.086956521739130432
|
data/test/test_alias.rb
CHANGED
@@ -2,12 +2,13 @@
|
|
2
2
|
|
3
3
|
require 'alias'
|
4
4
|
|
5
|
-
nvars =
|
5
|
+
nvars = 1_000_000
|
6
6
|
begin
|
7
7
|
at = AliasTable.new(["yes", "no"], [0.3, 0.3, 0.4])
|
8
8
|
nvars.times {print at.generate, "\n"}
|
9
9
|
rescue Exception => e
|
10
10
|
puts e.message
|
11
|
+
puts
|
11
12
|
end
|
12
13
|
Dir["test/infile.*"].each do |f_name|
|
13
14
|
x = []
|
@@ -22,17 +23,21 @@ Dir["test/infile.*"].each do |f_name|
|
|
22
23
|
probs << inputs[1].to_f
|
23
24
|
n_hat = probs[-1] * nvars
|
24
25
|
half_width = 2.5 * Math::sqrt(n_hat * (1.0 - probs[-1])) if n_hat > 0
|
25
|
-
expected_counts[inputs[0]] = "%d +/- %d" % [n_hat, half_width]
|
26
|
+
# expected_counts[inputs[0]] = "%d +/- %d" % [n_hat, half_width]
|
27
|
+
expected_counts[inputs[0]] = [n_hat, half_width]
|
26
28
|
end
|
27
29
|
f.close
|
28
30
|
begin
|
29
31
|
at = AliasTable.new(x, probs)
|
30
32
|
nvars.times {counts[at.generate] += 1}
|
31
|
-
puts "
|
33
|
+
puts "All four values should be in range 95\% of the time:"
|
32
34
|
counts.each_key do |k|
|
33
|
-
printf "%s:
|
35
|
+
printf "%s: Half-width = %d, Expected - Observed = %d\n",
|
36
|
+
k, expected_counts[k][1], expected_counts[k][0] - counts[k]
|
34
37
|
end
|
38
|
+
puts
|
35
39
|
rescue Exception => e
|
36
40
|
puts e.message
|
41
|
+
puts
|
37
42
|
end
|
38
43
|
end
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aliastable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul J Sanchez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
12
|
-
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: skewheap
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ~>
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '1.0'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ~>
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '1.0'
|
11
|
+
date: 2015-04-19 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
27
13
|
description: If a categorical distribution has k distinct values, traditional approaches
|
28
14
|
will require O(k) work to pick an outcome with the correct probabilities. This
|
29
15
|
algorithm uses conditional probability to construct a table which will yield outcomes
|
@@ -33,15 +19,16 @@ executables: []
|
|
33
19
|
extensions: []
|
34
20
|
extra_rdoc_files: []
|
35
21
|
files:
|
22
|
+
- Rakefile
|
36
23
|
- aliastable.gemspec
|
37
24
|
- lgpl.txt
|
38
25
|
- lib/alias.rb
|
39
|
-
- Rakefile
|
40
26
|
- test/infile.bad.1
|
41
27
|
- test/infile.bad.2
|
42
28
|
- test/infile.bad.3
|
43
29
|
- test/infile.good.1
|
44
30
|
- test/infile.good.2
|
31
|
+
- test/infile.good.3
|
45
32
|
- test/test_alias.rb
|
46
33
|
homepage:
|
47
34
|
licenses:
|
@@ -53,19 +40,18 @@ require_paths:
|
|
53
40
|
- lib
|
54
41
|
required_ruby_version: !ruby/object:Gem::Requirement
|
55
42
|
requirements:
|
56
|
-
- -
|
43
|
+
- - ">="
|
57
44
|
- !ruby/object:Gem::Version
|
58
|
-
version: 1.
|
45
|
+
version: 1.9.3
|
59
46
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
60
47
|
requirements:
|
61
|
-
- -
|
48
|
+
- - ">="
|
62
49
|
- !ruby/object:Gem::Version
|
63
50
|
version: '0'
|
64
51
|
requirements: []
|
65
52
|
rubyforge_project:
|
66
|
-
rubygems_version: 2.
|
53
|
+
rubygems_version: 2.4.5
|
67
54
|
signing_key:
|
68
55
|
specification_version: 4
|
69
56
|
summary: Efficiently generate random outcomes from an arbitrary categorical distribution.
|
70
57
|
test_files: []
|
71
|
-
has_rdoc:
|