aliastable 3.0.3 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 64997be896934a0e70ba0c4776e26f08a68fae50
4
- data.tar.gz: 7e7d8a38a21f8355be9c0e83559fb3e5689ea556
2
+ SHA256:
3
+ metadata.gz: 88cae2e262098e88436d41789056946a36ec55270d4e52c33242f5018b687595
4
+ data.tar.gz: e6b9c4befb4950137807729b171178b0210c794693a1045dbd94ea10606aa593
5
5
  SHA512:
6
- metadata.gz: e49595d9a6da320b2e22db5672f5fc4546a4919edc3915dbd6e9b1cec69beff947b16006f76a55727c147946dd08cec4f2aeaad4be31df6e9a606de27190ed6d
7
- data.tar.gz: f696a2b2f0b5de5eb7b9932f4a121f0b1e94e988284eb0123e79c45bef8e59dee1c3b089ab5e24ddd448ab17e22acfe367cbb52e8b3130e7745307c3bd4ae638
6
+ metadata.gz: 294e9171f62c9602f1eb3fe9886daeb1fb899431d906e26a33d95c0af4398bad147a62b7e2eee8f7d52a736dd4aa57a67bbf2e5b767ed847817eae59409b7bcf
7
+ data.tar.gz: 545e97af416b1d7ad20f81444811343eafb0d0ec74a30ce2b5b754cfb4336d8aefb337b907fa4b1b7453da796b5d4b48da941b9840bfeb878c3bff4bc7f614e8
data/aliastable.gemspec CHANGED
@@ -1,26 +1,20 @@
1
1
  # -*- ruby -*-
2
- _VERSION = "3.0.3"
2
+ _VERSION = "4.0.0"
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "aliastable"
6
6
  s.version = _VERSION
7
- s.date = "2015-09-10"
7
+ s.date = "2023-01-21"
8
8
  s.summary = "Efficiently generate random outcomes from an arbitrary categorical distribution."
9
9
  s.email = "pjs@alum.mit.edu"
10
- s.description = "If a categorical distribution has k distinct values, traditional approaches will require O(k) work to pick an outcome with the correct probabilities. This algorithm uses conditional probability to construct a table which will yield outcomes with the correct probabilities, but in O(1) time."
10
+ s.description = "If a categorical distribution has k distinct values, traditional approaches will require O(k) work to pick an outcome with the correct probabilities. This algorithm uses conditional probability to construct a table which will yield outcomes with the correct probabilities. Table generation requires O(k) time, but subsequent generation is done in O(1) time."
11
11
  s.author = "Paul J Sanchez"
12
+ s.homepage = "https://bitbucket.org/paul_j_sanchez/aliastable"
12
13
  s.files = %w[
13
14
  aliastable.gemspec
14
15
  lib/aliastable.rb
15
- Rakefile
16
- test/infile.bad.1
17
- test/infile.bad.2
18
- test/infile.bad.3
19
- test/infile.good.1
20
- test/infile.good.2
21
- test/infile.good.3
22
- test/test_alias.rb
23
16
  ]
24
- s.required_ruby_version = '>= 1.9.3'
25
- s.license = 'LGPL'
17
+ s.required_ruby_version = '>= 2.6.0'
18
+ s.license = 'MIT'
19
+ s.metadata["homepage_uri"] = s.homepage
26
20
  end
data/lib/aliastable.rb CHANGED
@@ -9,6 +9,7 @@
9
9
  # probabilities.
10
10
  #
11
11
  class AliasTable
12
+ include Enumerable
12
13
  # Construct an alias table from a set of values and their associated
13
14
  # probabilities. Values and their probabilities must be synchronized,
14
15
  # i.e., they must be arrays of the same length. Values can be
@@ -26,38 +27,54 @@ class AliasTable
26
27
  # - RuntimeError if +p_value+s don't sum to one. Rationals will avoid this.
27
28
  #
28
29
  def initialize(x_set, p_value)
29
- if x_set.length != p_value.length
30
- fail 'Args to AliasTable must be vectors of the same length.'
31
- end
30
+ fail 'x_set & p_value must have same length.' if x_set.size != p_value.size
32
31
  fail 'p_values must be positive' unless p_value.all? { |value| value > 0 }
33
- @p_primary = p_value.map(&:to_r)
34
- fail 'p_values must sum to 1' unless @p_primary.reduce(:+) == Rational(1)
35
- @x = x_set.clone.freeze
36
- @alias = Array.new(@x.length)
37
- parity = Rational(1, @x.length)
38
- group = @p_primary.each_index.group_by { |i| @p_primary[i] <=> parity }
32
+ p_primary = p_value.map(&:rationalize)
33
+ fail 'p_values must sum to 1' unless p_primary.reduce(:+) == Rational(1)
34
+ x = x_set.clone.freeze
35
+ len = x.length
36
+ col_alias = Array.new(len)
37
+ parity = Rational(1, len)
38
+ group = p_primary.each_index.group_by { |i| p_primary[i] <=> parity }
39
39
  deficit_set = group[-1]
40
40
  surplus_set = group[1]
41
- until deficit_set.empty?
42
- deficit = deficit_set.pop
43
- surplus = surplus_set.pop
44
- @p_primary[surplus] -= parity - @p_primary[deficit]
45
- @p_primary[deficit] /= parity
46
- @alias[deficit] = @x[surplus]
47
- if @p_primary[surplus] == parity
48
- @p_primary[surplus] = Rational(1)
49
- else
50
- (@p_primary[surplus] < parity ? deficit_set : surplus_set) << surplus
41
+ if deficit_set.nil?
42
+ @enum = Enumerator.new { |y| loop { y << x[rand(len)] } }.lazy
43
+ else
44
+ until deficit_set.empty?
45
+ deficit = deficit_set.pop
46
+ surplus = surplus_set.pop
47
+ p_primary[surplus] -= parity - p_primary[deficit]
48
+ p_primary[deficit] /= parity
49
+ col_alias[deficit] = x[surplus]
50
+ if p_primary[surplus] == parity
51
+ p_primary[surplus] = Rational(1)
52
+ else
53
+ (p_primary[surplus] < parity ? deficit_set : surplus_set) << surplus
54
+ end
51
55
  end
56
+ @enum = Enumerator.new do |y|
57
+ loop do
58
+ column = rand(len)
59
+ y << ((rand <= p_primary[column]) ? x[column] : col_alias[column])
60
+ end
61
+ end.lazy
52
62
  end
53
63
  end
54
64
 
55
- # Return a random outcome from this object's distribution.
56
- # The generate method is O(1) time, but is not an inversion
65
+ def each(&block)
66
+ @enum.each(&block)
67
+ end
68
+
69
+ # Return a random outcome from this object's distribution. The
70
+ # next (aka generate) method is O(1) time, but is not an inversion
57
71
  # since two uniforms are used for each value that gets generated.
72
+ # The exception is that when all probabilities are equal, it is
73
+ # a true inversion.
58
74
  #
59
- def generate
60
- column = rand(@x.length)
61
- rand <= @p_primary[column] ? @x[column] : @alias[column]
75
+ def next
76
+ @enum.next
62
77
  end
78
+
79
+ alias generate next # for backwards compatibility
63
80
  end
metadata CHANGED
@@ -1,39 +1,33 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aliastable
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.3
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul J Sanchez
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-10 00:00:00.000000000 Z
11
+ date: 2023-01-21 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: If a categorical distribution has k distinct values, traditional approaches
14
14
  will require O(k) work to pick an outcome with the correct probabilities. This
15
15
  algorithm uses conditional probability to construct a table which will yield outcomes
16
- with the correct probabilities, but in O(1) time.
16
+ with the correct probabilities. Table generation requires O(k) time, but subsequent
17
+ generation is done in O(1) time.
17
18
  email: pjs@alum.mit.edu
18
19
  executables: []
19
20
  extensions: []
20
21
  extra_rdoc_files: []
21
22
  files:
22
- - Rakefile
23
23
  - aliastable.gemspec
24
24
  - lib/aliastable.rb
25
- - test/infile.bad.1
26
- - test/infile.bad.2
27
- - test/infile.bad.3
28
- - test/infile.good.1
29
- - test/infile.good.2
30
- - test/infile.good.3
31
- - test/test_alias.rb
32
- homepage:
25
+ homepage: https://bitbucket.org/paul_j_sanchez/aliastable
33
26
  licenses:
34
- - LGPL
35
- metadata: {}
36
- post_install_message:
27
+ - MIT
28
+ metadata:
29
+ homepage_uri: https://bitbucket.org/paul_j_sanchez/aliastable
30
+ post_install_message:
37
31
  rdoc_options: []
38
32
  require_paths:
39
33
  - lib
@@ -41,16 +35,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
41
35
  requirements:
42
36
  - - ">="
43
37
  - !ruby/object:Gem::Version
44
- version: 1.9.3
38
+ version: 2.6.0
45
39
  required_rubygems_version: !ruby/object:Gem::Requirement
46
40
  requirements:
47
41
  - - ">="
48
42
  - !ruby/object:Gem::Version
49
43
  version: '0'
50
44
  requirements: []
51
- rubyforge_project:
52
- rubygems_version: 2.4.5.1
53
- signing_key:
45
+ rubygems_version: 3.4.3
46
+ signing_key:
54
47
  specification_version: 4
55
48
  summary: Efficiently generate random outcomes from an arbitrary categorical distribution.
56
49
  test_files: []
data/Rakefile DELETED
@@ -1,8 +0,0 @@
1
- require 'rake/testtask'
2
-
3
- Rake::TestTask.new do |t|
4
- t.libs << 'test'
5
- end
6
-
7
- desc "Run tests"
8
- task :default => :test
data/test/infile.bad.1 DELETED
@@ -1,5 +0,0 @@
1
- 1,0.1
2
- 3,0.2
3
- 7,-0.3
4
- 42,0.4
5
- 8096,0.6
data/test/infile.bad.2 DELETED
@@ -1,4 +0,0 @@
1
- 1,0.1
2
- 3,0.2
3
- 7,0.3
4
- 42,0.3
data/test/infile.bad.3 DELETED
@@ -1,3 +0,0 @@
1
- 1,0.333333
2
- 3,0.333333
3
- 7,0.333333
data/test/infile.good.1 DELETED
@@ -1,4 +0,0 @@
1
- 1,0.1
2
- 3,0.2
3
- 7,0.3
4
- 42,0.4
data/test/infile.good.2 DELETED
@@ -1,4 +0,0 @@
1
- aardvark,0.01
2
- baboon,0.02
3
- coati,0.07
4
- doggie,0.9
data/test/infile.good.3 DELETED
@@ -1,22 +0,0 @@
1
- 1,1/253
2
- 2,2/253
3
- 3,3/253
4
- 4,4/253
5
- 5,5/253
6
- 6,6/253
7
- 7,7/253
8
- 8,8/253
9
- 9,9/253
10
- 10,10/253
11
- 11,1/23
12
- 12,12/253
13
- 13,13/253
14
- 14,14/253
15
- 15,15/253
16
- 16,16/253
17
- 17,17/253
18
- 18,18/253
19
- 19,19/253
20
- 20,20/253
21
- 21,21/253
22
- 22,2/23
data/test/test_alias.rb DELETED
@@ -1,42 +0,0 @@
1
- #!/usr/bin/env ruby -w
2
-
3
- require_relative '../lib/aliastable.rb'
4
-
5
- nvars = 1_000_000
6
- begin
7
- at = AliasTable.new(%w(yes no), [0.3, 0.3, 0.4])
8
- nvars.times { print at.generate, "\n" }
9
- rescue RuntimeError => e
10
- p e
11
- puts
12
- end
13
- Dir['test/infile.*'].each do |f_name|
14
- x = []
15
- probs = []
16
- f = File.open(f_name, 'r')
17
- counts = {}
18
- expected_counts = {}
19
- while line = f.gets
20
- inputs = line.strip.split(/[\s,;:]+/)
21
- x << inputs[0]
22
- counts[inputs[0]] = 0
23
- probs << inputs[1].to_r
24
- n_hat = probs[-1] * nvars
25
- half_width = 2.5 * Math.sqrt(n_hat * (1.0 - probs[-1])) if n_hat > 0
26
- expected_counts[inputs[0]] = [n_hat, half_width]
27
- end
28
- f.close
29
- begin
30
- at = AliasTable.new(x, probs)
31
- nvars.times { counts[at.generate] += 1 }
32
- puts 'All values should be in range almost always:'
33
- counts.each_key do |k|
34
- printf "%s: Allowable Range = %d, Expected - Observed = %d\n",
35
- k, expected_counts[k][1], expected_counts[k][0] - counts[k]
36
- end
37
- puts
38
- rescue RuntimeError => e
39
- p e
40
- puts
41
- end
42
- end