aliastable 1.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 795e9607a67a1039e1714f38208a8b9ec8a55151
4
- data.tar.gz: 10c976abff2116ea8e7df9606a8dc0cbb97031de
3
+ metadata.gz: 0122f41cb149120e7213250d548fb0ac909484e6
4
+ data.tar.gz: d7c7376ef3985fa43f7f59afe817b9ff1d38d1eb
5
5
  SHA512:
6
- metadata.gz: c8e932de5ef8453d29c2381ff2bf3c2346d5c756494b4f4ed6298f7f3cbafb00014e99f559e0e9dec6f6de290293e3bf675b038fac433ba1bb3c0ae797d1b4e7
7
- data.tar.gz: 38cb5fb9db393fc76f5b1ff945f61330d166e706b9a957e4d511ac69e31138b065aed07a7ebaf0b412889f376569d31e135ab154e4356ace39a5cdc3cc8e9c20
6
+ metadata.gz: 0dc5359f8618ff2d58d9b88b0be91d60ef62a3dc9b36eb47779ac88ea4ecf54e37e31ab825502794391a894d4f3f1a1c0b28eeafe02fab3e32bbd40b18b6c65a
7
+ data.tar.gz: 196f3c2cd4a1d82bdac8234e0ca169d70e4036179ed34d60b25ed2efd72c0ac29612988120d24eee5824f7a7a924be5b2580750e641d6488a88d1c19ee779eb2
@@ -1,10 +1,10 @@
1
1
  # -*- ruby -*-
2
- _VERSION = "1.0.0"
2
+ _VERSION = "3.0.0"
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "aliastable"
6
6
  s.version = _VERSION
7
- s.date = "2013-03-23"
7
+ s.date = "2015-04-19"
8
8
  s.summary = "Efficiently generate random outcomes from an arbitrary categorical distribution."
9
9
  s.email = "pjs@alum.mit.edu"
10
10
  s.description = "If a categorical distribution has k distinct values, traditional approaches will require O(k) work to pick an outcome with the correct probabilities. This algorithm uses conditional probability to construct a table which will yield outcomes with the correct probabilities, but in O(1) time."
@@ -19,9 +19,9 @@ Gem::Specification.new do |s|
19
19
  test/infile.bad.3
20
20
  test/infile.good.1
21
21
  test/infile.good.2
22
+ test/infile.good.3
22
23
  test/test_alias.rb
23
24
  ]
24
- s.add_runtime_dependency 'skewheap', '~> 1.0'
25
- s.required_ruby_version = '>= 1.8.1'
25
+ s.required_ruby_version = '>= 1.9.3'
26
26
  s.license = 'LGPL'
27
27
  end
@@ -1,7 +1,5 @@
1
1
  #!/usr/bin/env ruby -w
2
2
 
3
- require 'skewheap'
4
-
5
3
  # Generate values from a categorical distribution in constant
6
4
  # time, regardless of the number of categories. This clever algorithm
7
5
  # uses conditional probability to construct a table comprised of columns
@@ -11,7 +9,7 @@ require 'skewheap'
11
9
  # probabilities.
12
10
  #
13
11
  class AliasTable
14
-
12
+ TOLERANCE = Rational(1,1E15)
15
13
  # Construct an alias table from a set of values and their associated
16
14
  # probabilities. Values and their probabilities must be synchronized,
17
15
  # i.e., they must be arrays of the same length. Values can be
@@ -19,67 +17,55 @@ class AliasTable
19
17
  # sum to one.
20
18
  #
21
19
  # *Arguments*::
22
- # - +values+ -> the set of values to generate from.
23
- # - +p_values+ -> the synchronized set of probabilities associated
24
- # with the values set.
20
+ # - +x_set+ -> the set of values to generate from.
21
+ # - +p_value+ -> the synchronized set of probabilities associated
22
+ # with the value set.
25
23
  # *Raises*::
26
- # - RuntimeError if +values+ and +p_values+ are different lengths.
27
- # - RuntimeError if any +p_values+ are negative.
28
- # - RuntimeError if +p_values+ don't sum to one.
24
+ # - RuntimeError if +x_set+ and +p_value+s are different lengths.
25
+ # - RuntimeError if any +p_value+ are negative.
26
+ # - RuntimeError if +p_value+ don't sum to one.
29
27
  #
30
-
31
- def initialize(values, p_values)
32
- if values.length != p_values.length
28
+ def initialize(x_values, p_values)
29
+ if x_values.length != p_values.length
33
30
  raise "Args to AliasTable must be vectors of the same length."
34
- end
35
- p_values.each {|p| raise "p_values must be positive" if p <= 0.0}
36
- if p_values.reduce(:+).not_close_enough(1.0)
31
+ end
32
+ p = p_values.map do |current_p|
33
+ raise "p_values must be positive" if current_p <= 0.0
34
+ Rational(current_p)
35
+ end
36
+ unless (p.reduce(:+) - Rational(1)).abs < TOLERANCE
37
37
  raise "p_values must sum to 1.0"
38
38
  end
39
- @values = values.clone.freeze
40
- @p_values = p_values
41
- @alias = Array.new(values.length)
42
- @p_primary = Array.new(values.length, 1.0)
43
- @equiprob = 1.0 / values.length
44
- @deficit_set = SkewHeap.new
45
- @surplus_set = []
46
- @values.each_index {|i| classify(i) }
47
- until @deficit_set.empty? do
48
- deficit_column = @deficit_set.pop
49
- surplus_column = @surplus_set.shift
50
- @p_primary[deficit_column] = @p_values[deficit_column] / @equiprob
51
- @alias[deficit_column] = @values[surplus_column]
52
- @p_values[surplus_column] -= @equiprob - @p_values[deficit_column]
53
- classify(surplus_column)
39
+ @x = x_values.clone.freeze
40
+ @alias = Array.new(@x.length)
41
+ @p_primary = Array.new(@x.length, 1.0)
42
+ equiprob = Rational(1, @x.length)
43
+ deficit_set = []
44
+ surplus_set = []
45
+ @x.each_index do |i|
46
+ unless (p[i] - equiprob).abs < TOLERANCE
47
+ (p[i] < equiprob ? deficit_set : surplus_set) << i
48
+ end
49
+ end
50
+ until deficit_set.empty? do
51
+ deficit = deficit_set.pop
52
+ surplus = surplus_set.pop
53
+ @p_primary[deficit] = p[deficit] / equiprob
54
+ @alias[deficit] = @x[surplus]
55
+ p[surplus] -= equiprob - p[deficit]
56
+ unless (p[surplus] - equiprob).abs < TOLERANCE
57
+ (p[surplus] < equiprob ? deficit_set : surplus_set) << surplus
58
+ end
54
59
  end
55
60
  end
56
61
 
57
- # Returns a random outcome from the distribution provided to the constructor.
58
- # This process requires constant time, but is not an inversion
62
+ # Returns a random outcome from this object's distribution.
63
+ # The generate method is O(1) time, but is not an inversion
59
64
  # since two uniforms are used for each value that gets generated.
60
- #
65
+ #
61
66
  def generate
62
- column = rand(@values.length)
63
- rand < @p_primary[column] ? @values[column] : @alias[column]
64
- end
65
-
66
- private
67
- def classify(i)
68
- if @p_values[i].not_close_enough(@equiprob)
69
- if @p_values[i] < @equiprob
70
- @deficit_set.push i
71
- else
72
- @surplus_set << i
73
- end
74
- end
67
+ column = rand(@x.length)
68
+ rand <= @p_primary[column] ? @x[column] : @alias[column]
75
69
  end
76
70
 
77
71
  end
78
-
79
- class Numeric
80
- # Expand class Numeric to detect whether two values are within a
81
- # tolerance of 10^-15 of each other.
82
- def not_close_enough(n)
83
- ((self - n) / self).abs > 1E-15
84
- end
85
- end
@@ -0,0 +1,22 @@
1
+ 1,0.003952569169960474
2
+ 2,0.007905138339920948
3
+ 3,0.011857707509881422
4
+ 4,0.015810276679841896
5
+ 5,0.019762845849802372
6
+ 6,0.023715415019762844
7
+ 7,0.027667984189723320
8
+ 8,0.031620553359683792
9
+ 9,0.035573122529644272
10
+ 10,0.039525691699604744
11
+ 11,0.043478260869565216
12
+ 12,0.047430830039525688
13
+ 13,0.051383399209486168
14
+ 14,0.055335968379446640
15
+ 15,0.059288537549407112
16
+ 16,0.063241106719367585
17
+ 17,0.067193675889328064
18
+ 18,0.071146245059288543
19
+ 19,0.075098814229249009
20
+ 20,0.079051383399209488
21
+ 21,0.083003952569169967
22
+ 22,0.086956521739130432
@@ -2,12 +2,13 @@
2
2
 
3
3
  require 'alias'
4
4
 
5
- nvars = 1000000
5
+ nvars = 1_000_000
6
6
  begin
7
7
  at = AliasTable.new(["yes", "no"], [0.3, 0.3, 0.4])
8
8
  nvars.times {print at.generate, "\n"}
9
9
  rescue Exception => e
10
10
  puts e.message
11
+ puts
11
12
  end
12
13
  Dir["test/infile.*"].each do |f_name|
13
14
  x = []
@@ -22,17 +23,21 @@ Dir["test/infile.*"].each do |f_name|
22
23
  probs << inputs[1].to_f
23
24
  n_hat = probs[-1] * nvars
24
25
  half_width = 2.5 * Math::sqrt(n_hat * (1.0 - probs[-1])) if n_hat > 0
25
- expected_counts[inputs[0]] = "%d +/- %d" % [n_hat, half_width]
26
+ # expected_counts[inputs[0]] = "%d +/- %d" % [n_hat, half_width]
27
+ expected_counts[inputs[0]] = [n_hat, half_width]
26
28
  end
27
29
  f.close
28
30
  begin
29
31
  at = AliasTable.new(x, probs)
30
32
  nvars.times {counts[at.generate] += 1}
31
- puts "\nAll four values should be in range 95\% of the time:"
33
+ puts "All four values should be in range 95\% of the time:"
32
34
  counts.each_key do |k|
33
- printf "%s: Expected %s, got %d\n", k, expected_counts[k], counts[k]
35
+ printf "%s: Half-width = %d, Expected - Observed = %d\n",
36
+ k, expected_counts[k][1], expected_counts[k][0] - counts[k]
34
37
  end
38
+ puts
35
39
  rescue Exception => e
36
40
  puts e.message
41
+ puts
37
42
  end
38
43
  end
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aliastable
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul J Sanchez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-23 00:00:00.000000000 Z
12
- dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: skewheap
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: '1.0'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ~>
25
- - !ruby/object:Gem::Version
26
- version: '1.0'
11
+ date: 2015-04-19 00:00:00.000000000 Z
12
+ dependencies: []
27
13
  description: If a categorical distribution has k distinct values, traditional approaches
28
14
  will require O(k) work to pick an outcome with the correct probabilities. This
29
15
  algorithm uses conditional probability to construct a table which will yield outcomes
@@ -33,15 +19,16 @@ executables: []
33
19
  extensions: []
34
20
  extra_rdoc_files: []
35
21
  files:
22
+ - Rakefile
36
23
  - aliastable.gemspec
37
24
  - lgpl.txt
38
25
  - lib/alias.rb
39
- - Rakefile
40
26
  - test/infile.bad.1
41
27
  - test/infile.bad.2
42
28
  - test/infile.bad.3
43
29
  - test/infile.good.1
44
30
  - test/infile.good.2
31
+ - test/infile.good.3
45
32
  - test/test_alias.rb
46
33
  homepage:
47
34
  licenses:
@@ -53,19 +40,18 @@ require_paths:
53
40
  - lib
54
41
  required_ruby_version: !ruby/object:Gem::Requirement
55
42
  requirements:
56
- - - '>='
43
+ - - ">="
57
44
  - !ruby/object:Gem::Version
58
- version: 1.8.1
45
+ version: 1.9.3
59
46
  required_rubygems_version: !ruby/object:Gem::Requirement
60
47
  requirements:
61
- - - '>='
48
+ - - ">="
62
49
  - !ruby/object:Gem::Version
63
50
  version: '0'
64
51
  requirements: []
65
52
  rubyforge_project:
66
- rubygems_version: 2.0.3
53
+ rubygems_version: 2.4.5
67
54
  signing_key:
68
55
  specification_version: 4
69
56
  summary: Efficiently generate random outcomes from an arbitrary categorical distribution.
70
57
  test_files: []
71
- has_rdoc: