aliastable 1.0.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 795e9607a67a1039e1714f38208a8b9ec8a55151
4
- data.tar.gz: 10c976abff2116ea8e7df9606a8dc0cbb97031de
3
+ metadata.gz: 0122f41cb149120e7213250d548fb0ac909484e6
4
+ data.tar.gz: d7c7376ef3985fa43f7f59afe817b9ff1d38d1eb
5
5
  SHA512:
6
- metadata.gz: c8e932de5ef8453d29c2381ff2bf3c2346d5c756494b4f4ed6298f7f3cbafb00014e99f559e0e9dec6f6de290293e3bf675b038fac433ba1bb3c0ae797d1b4e7
7
- data.tar.gz: 38cb5fb9db393fc76f5b1ff945f61330d166e706b9a957e4d511ac69e31138b065aed07a7ebaf0b412889f376569d31e135ab154e4356ace39a5cdc3cc8e9c20
6
+ metadata.gz: 0dc5359f8618ff2d58d9b88b0be91d60ef62a3dc9b36eb47779ac88ea4ecf54e37e31ab825502794391a894d4f3f1a1c0b28eeafe02fab3e32bbd40b18b6c65a
7
+ data.tar.gz: 196f3c2cd4a1d82bdac8234e0ca169d70e4036179ed34d60b25ed2efd72c0ac29612988120d24eee5824f7a7a924be5b2580750e641d6488a88d1c19ee779eb2
@@ -1,10 +1,10 @@
1
1
  # -*- ruby -*-
2
- _VERSION = "1.0.0"
2
+ _VERSION = "3.0.0"
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "aliastable"
6
6
  s.version = _VERSION
7
- s.date = "2013-03-23"
7
+ s.date = "2015-04-19"
8
8
  s.summary = "Efficiently generate random outcomes from an arbitrary categorical distribution."
9
9
  s.email = "pjs@alum.mit.edu"
10
10
  s.description = "If a categorical distribution has k distinct values, traditional approaches will require O(k) work to pick an outcome with the correct probabilities. This algorithm uses conditional probability to construct a table which will yield outcomes with the correct probabilities, but in O(1) time."
@@ -19,9 +19,9 @@ Gem::Specification.new do |s|
19
19
  test/infile.bad.3
20
20
  test/infile.good.1
21
21
  test/infile.good.2
22
+ test/infile.good.3
22
23
  test/test_alias.rb
23
24
  ]
24
- s.add_runtime_dependency 'skewheap', '~> 1.0'
25
- s.required_ruby_version = '>= 1.8.1'
25
+ s.required_ruby_version = '>= 1.9.3'
26
26
  s.license = 'LGPL'
27
27
  end
@@ -1,7 +1,5 @@
1
1
  #!/usr/bin/env ruby -w
2
2
 
3
- require 'skewheap'
4
-
5
3
  # Generate values from a categorical distribution in constant
6
4
  # time, regardless of the number of categories. This clever algorithm
7
5
  # uses conditional probability to construct a table comprised of columns
@@ -11,7 +9,7 @@ require 'skewheap'
11
9
  # probabilities.
12
10
  #
13
11
  class AliasTable
14
-
12
+ TOLERANCE = Rational(1,1E15)
15
13
  # Construct an alias table from a set of values and their associated
16
14
  # probabilities. Values and their probabilities must be synchronized,
17
15
  # i.e., they must be arrays of the same length. Values can be
@@ -19,67 +17,55 @@ class AliasTable
19
17
  # sum to one.
20
18
  #
21
19
  # *Arguments*::
22
- # - +values+ -> the set of values to generate from.
23
- # - +p_values+ -> the synchronized set of probabilities associated
24
- # with the values set.
20
+ # - +x_set+ -> the set of values to generate from.
21
+ # - +p_value+ -> the synchronized set of probabilities associated
22
+ # with the value set.
25
23
  # *Raises*::
26
- # - RuntimeError if +values+ and +p_values+ are different lengths.
27
- # - RuntimeError if any +p_values+ are negative.
28
- # - RuntimeError if +p_values+ don't sum to one.
24
+ # - RuntimeError if +x_set+ and +p_value+s are different lengths.
25
+ # - RuntimeError if any +p_value+ are negative.
26
+ # - RuntimeError if +p_value+ don't sum to one.
29
27
  #
30
-
31
- def initialize(values, p_values)
32
- if values.length != p_values.length
28
+ def initialize(x_values, p_values)
29
+ if x_values.length != p_values.length
33
30
  raise "Args to AliasTable must be vectors of the same length."
34
- end
35
- p_values.each {|p| raise "p_values must be positive" if p <= 0.0}
36
- if p_values.reduce(:+).not_close_enough(1.0)
31
+ end
32
+ p = p_values.map do |current_p|
33
+ raise "p_values must be positive" if current_p <= 0.0
34
+ Rational(current_p)
35
+ end
36
+ unless (p.reduce(:+) - Rational(1)).abs < TOLERANCE
37
37
  raise "p_values must sum to 1.0"
38
38
  end
39
- @values = values.clone.freeze
40
- @p_values = p_values
41
- @alias = Array.new(values.length)
42
- @p_primary = Array.new(values.length, 1.0)
43
- @equiprob = 1.0 / values.length
44
- @deficit_set = SkewHeap.new
45
- @surplus_set = []
46
- @values.each_index {|i| classify(i) }
47
- until @deficit_set.empty? do
48
- deficit_column = @deficit_set.pop
49
- surplus_column = @surplus_set.shift
50
- @p_primary[deficit_column] = @p_values[deficit_column] / @equiprob
51
- @alias[deficit_column] = @values[surplus_column]
52
- @p_values[surplus_column] -= @equiprob - @p_values[deficit_column]
53
- classify(surplus_column)
39
+ @x = x_values.clone.freeze
40
+ @alias = Array.new(@x.length)
41
+ @p_primary = Array.new(@x.length, 1.0)
42
+ equiprob = Rational(1, @x.length)
43
+ deficit_set = []
44
+ surplus_set = []
45
+ @x.each_index do |i|
46
+ unless (p[i] - equiprob).abs < TOLERANCE
47
+ (p[i] < equiprob ? deficit_set : surplus_set) << i
48
+ end
49
+ end
50
+ until deficit_set.empty? do
51
+ deficit = deficit_set.pop
52
+ surplus = surplus_set.pop
53
+ @p_primary[deficit] = p[deficit] / equiprob
54
+ @alias[deficit] = @x[surplus]
55
+ p[surplus] -= equiprob - p[deficit]
56
+ unless (p[surplus] - equiprob).abs < TOLERANCE
57
+ (p[surplus] < equiprob ? deficit_set : surplus_set) << surplus
58
+ end
54
59
  end
55
60
  end
56
61
 
57
- # Returns a random outcome from the distribution provided to the constructor.
58
- # This process requires constant time, but is not an inversion
62
+ # Returns a random outcome from this object's distribution.
63
+ # The generate method is O(1) time, but is not an inversion
59
64
  # since two uniforms are used for each value that gets generated.
60
- #
65
+ #
61
66
  def generate
62
- column = rand(@values.length)
63
- rand < @p_primary[column] ? @values[column] : @alias[column]
64
- end
65
-
66
- private
67
- def classify(i)
68
- if @p_values[i].not_close_enough(@equiprob)
69
- if @p_values[i] < @equiprob
70
- @deficit_set.push i
71
- else
72
- @surplus_set << i
73
- end
74
- end
67
+ column = rand(@x.length)
68
+ rand <= @p_primary[column] ? @x[column] : @alias[column]
75
69
  end
76
70
 
77
71
  end
78
-
79
- class Numeric
80
- # Expand class Numeric to detect whether two values are within a
81
- # tolerance of 10^-15 of each other.
82
- def not_close_enough(n)
83
- ((self - n) / self).abs > 1E-15
84
- end
85
- end
@@ -0,0 +1,22 @@
1
+ 1,0.003952569169960474
2
+ 2,0.007905138339920948
3
+ 3,0.011857707509881422
4
+ 4,0.015810276679841896
5
+ 5,0.019762845849802372
6
+ 6,0.023715415019762844
7
+ 7,0.027667984189723320
8
+ 8,0.031620553359683792
9
+ 9,0.035573122529644272
10
+ 10,0.039525691699604744
11
+ 11,0.043478260869565216
12
+ 12,0.047430830039525688
13
+ 13,0.051383399209486168
14
+ 14,0.055335968379446640
15
+ 15,0.059288537549407112
16
+ 16,0.063241106719367585
17
+ 17,0.067193675889328064
18
+ 18,0.071146245059288543
19
+ 19,0.075098814229249009
20
+ 20,0.079051383399209488
21
+ 21,0.083003952569169967
22
+ 22,0.086956521739130432
@@ -2,12 +2,13 @@
2
2
 
3
3
  require 'alias'
4
4
 
5
- nvars = 1000000
5
+ nvars = 1_000_000
6
6
  begin
7
7
  at = AliasTable.new(["yes", "no"], [0.3, 0.3, 0.4])
8
8
  nvars.times {print at.generate, "\n"}
9
9
  rescue Exception => e
10
10
  puts e.message
11
+ puts
11
12
  end
12
13
  Dir["test/infile.*"].each do |f_name|
13
14
  x = []
@@ -22,17 +23,21 @@ Dir["test/infile.*"].each do |f_name|
22
23
  probs << inputs[1].to_f
23
24
  n_hat = probs[-1] * nvars
24
25
  half_width = 2.5 * Math::sqrt(n_hat * (1.0 - probs[-1])) if n_hat > 0
25
- expected_counts[inputs[0]] = "%d +/- %d" % [n_hat, half_width]
26
+ # expected_counts[inputs[0]] = "%d +/- %d" % [n_hat, half_width]
27
+ expected_counts[inputs[0]] = [n_hat, half_width]
26
28
  end
27
29
  f.close
28
30
  begin
29
31
  at = AliasTable.new(x, probs)
30
32
  nvars.times {counts[at.generate] += 1}
31
- puts "\nAll four values should be in range 95\% of the time:"
33
+ puts "All four values should be in range 95\% of the time:"
32
34
  counts.each_key do |k|
33
- printf "%s: Expected %s, got %d\n", k, expected_counts[k], counts[k]
35
+ printf "%s: Half-width = %d, Expected - Observed = %d\n",
36
+ k, expected_counts[k][1], expected_counts[k][0] - counts[k]
34
37
  end
38
+ puts
35
39
  rescue Exception => e
36
40
  puts e.message
41
+ puts
37
42
  end
38
43
  end
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aliastable
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul J Sanchez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-23 00:00:00.000000000 Z
12
- dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: skewheap
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: '1.0'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ~>
25
- - !ruby/object:Gem::Version
26
- version: '1.0'
11
+ date: 2015-04-19 00:00:00.000000000 Z
12
+ dependencies: []
27
13
  description: If a categorical distribution has k distinct values, traditional approaches
28
14
  will require O(k) work to pick an outcome with the correct probabilities. This
29
15
  algorithm uses conditional probability to construct a table which will yield outcomes
@@ -33,15 +19,16 @@ executables: []
33
19
  extensions: []
34
20
  extra_rdoc_files: []
35
21
  files:
22
+ - Rakefile
36
23
  - aliastable.gemspec
37
24
  - lgpl.txt
38
25
  - lib/alias.rb
39
- - Rakefile
40
26
  - test/infile.bad.1
41
27
  - test/infile.bad.2
42
28
  - test/infile.bad.3
43
29
  - test/infile.good.1
44
30
  - test/infile.good.2
31
+ - test/infile.good.3
45
32
  - test/test_alias.rb
46
33
  homepage:
47
34
  licenses:
@@ -53,19 +40,18 @@ require_paths:
53
40
  - lib
54
41
  required_ruby_version: !ruby/object:Gem::Requirement
55
42
  requirements:
56
- - - '>='
43
+ - - ">="
57
44
  - !ruby/object:Gem::Version
58
- version: 1.8.1
45
+ version: 1.9.3
59
46
  required_rubygems_version: !ruby/object:Gem::Requirement
60
47
  requirements:
61
- - - '>='
48
+ - - ">="
62
49
  - !ruby/object:Gem::Version
63
50
  version: '0'
64
51
  requirements: []
65
52
  rubyforge_project:
66
- rubygems_version: 2.0.3
53
+ rubygems_version: 2.4.5
67
54
  signing_key:
68
55
  specification_version: 4
69
56
  summary: Efficiently generate random outcomes from an arbitrary categorical distribution.
70
57
  test_files: []
71
- has_rdoc: