aliastable 3.1.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: daff6ffd485a6eabe1a2341625be2ffd7b43e821b66d6b0a50363f5822f22cdd
4
- data.tar.gz: cffa205ec028890811239ab49dfaf36111d53c749a8b90971fd658da5ad0fe5c
3
+ metadata.gz: 88cae2e262098e88436d41789056946a36ec55270d4e52c33242f5018b687595
4
+ data.tar.gz: e6b9c4befb4950137807729b171178b0210c794693a1045dbd94ea10606aa593
5
5
  SHA512:
6
- metadata.gz: f227b5d8d874414e8ff7ab1dea7295cf1cd212f9a7cdebd8df90cb2e74ff95ff07bda966df03ccc205abdf13397990de16988ccc011d9a025ab2964ac3e08a5e
7
- data.tar.gz: 4b2742b75bc5e1d8c089bcf653bb7b37c2c0a6757a754d97cace15b1b13a1dc2744dbc294641bf4790d238a7e9832d3f01c031d0f74fad043086fef251cb1148
6
+ metadata.gz: 294e9171f62c9602f1eb3fe9886daeb1fb899431d906e26a33d95c0af4398bad147a62b7e2eee8f7d52a736dd4aa57a67bbf2e5b767ed847817eae59409b7bcf
7
+ data.tar.gz: 545e97af416b1d7ad20f81444811343eafb0d0ec74a30ce2b5b754cfb4336d8aefb337b907fa4b1b7453da796b5d4b48da941b9840bfeb878c3bff4bc7f614e8
data/aliastable.gemspec CHANGED
@@ -1,27 +1,20 @@
1
1
  # -*- ruby -*-
2
- _VERSION = "3.1.0"
2
+ _VERSION = "4.0.0"
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "aliastable"
6
6
  s.version = _VERSION
7
- s.date = "2018-01-16"
7
+ s.date = "2023-01-21"
8
8
  s.summary = "Efficiently generate random outcomes from an arbitrary categorical distribution."
9
9
  s.email = "pjs@alum.mit.edu"
10
10
  s.description = "If a categorical distribution has k distinct values, traditional approaches will require O(k) work to pick an outcome with the correct probabilities. This algorithm uses conditional probability to construct a table which will yield outcomes with the correct probabilities. Table generation requires O(k) time, but subsequent generation is done in O(1) time."
11
11
  s.author = "Paul J Sanchez"
12
+ s.homepage = "https://bitbucket.org/paul_j_sanchez/aliastable"
12
13
  s.files = %w[
13
14
  aliastable.gemspec
14
15
  lib/aliastable.rb
15
- Rakefile
16
- test/infile.bad.1
17
- test/infile.bad.2
18
- test/infile.bad.3
19
- test/infile.good.1
20
- test/infile.good.2
21
- test/infile.good.3
22
- test/infile.good.4
23
- test/test_alias.rb
24
16
  ]
25
- s.required_ruby_version = '>= 2.0.0'
26
- s.license = 'LGPL'
17
+ s.required_ruby_version = '>= 2.6.0'
18
+ s.license = 'MIT'
19
+ s.metadata["homepage_uri"] = s.homepage
27
20
  end
data/lib/aliastable.rb CHANGED
@@ -9,6 +9,7 @@
9
9
  # probabilities.
10
10
  #
11
11
  class AliasTable
12
+ include Enumerable
12
13
  # Construct an alias table from a set of values and their associated
13
14
  # probabilities. Values and their probabilities must be synchronized,
14
15
  # i.e., they must be arrays of the same length. Values can be
@@ -28,34 +29,52 @@ class AliasTable
28
29
  def initialize(x_set, p_value)
29
30
  fail 'x_set & p_value must have same length.' if x_set.size != p_value.size
30
31
  fail 'p_values must be positive' unless p_value.all? { |value| value > 0 }
31
- @p_primary = p_value.map(&:rationalize)
32
- fail 'p_values must sum to 1' unless @p_primary.reduce(:+) == Rational(1)
33
- @x = x_set.clone.freeze
34
- @alias = Array.new(@x.length)
35
- parity = Rational(1, @x.length)
36
- group = @p_primary.each_index.group_by { |i| @p_primary[i] <=> parity }
32
+ p_primary = p_value.map(&:rationalize)
33
+ fail 'p_values must sum to 1' unless p_primary.reduce(:+) == Rational(1)
34
+ x = x_set.clone.freeze
35
+ len = x.length
36
+ col_alias = Array.new(len)
37
+ parity = Rational(1, len)
38
+ group = p_primary.each_index.group_by { |i| p_primary[i] <=> parity }
37
39
  deficit_set = group[-1]
38
40
  surplus_set = group[1]
39
- until deficit_set.empty?
40
- deficit = deficit_set.pop
41
- surplus = surplus_set.pop
42
- @p_primary[surplus] -= parity - @p_primary[deficit]
43
- @p_primary[deficit] /= parity
44
- @alias[deficit] = @x[surplus]
45
- if @p_primary[surplus] == parity
46
- @p_primary[surplus] = Rational(1)
47
- else
48
- (@p_primary[surplus] < parity ? deficit_set : surplus_set) << surplus
41
+ if deficit_set.nil?
42
+ @enum = Enumerator.new { |y| loop { y << x[rand(len)] } }.lazy
43
+ else
44
+ until deficit_set.empty?
45
+ deficit = deficit_set.pop
46
+ surplus = surplus_set.pop
47
+ p_primary[surplus] -= parity - p_primary[deficit]
48
+ p_primary[deficit] /= parity
49
+ col_alias[deficit] = x[surplus]
50
+ if p_primary[surplus] == parity
51
+ p_primary[surplus] = Rational(1)
52
+ else
53
+ (p_primary[surplus] < parity ? deficit_set : surplus_set) << surplus
54
+ end
49
55
  end
56
+ @enum = Enumerator.new do |y|
57
+ loop do
58
+ column = rand(len)
59
+ y << ((rand <= p_primary[column]) ? x[column] : col_alias[column])
60
+ end
61
+ end.lazy
50
62
  end
51
63
  end
52
64
 
53
- # Return a random outcome from this object's distribution.
54
- # The generate method is O(1) time, but is not an inversion
65
+ def each(&block)
66
+ @enum.each(&block)
67
+ end
68
+
69
+ # Return a random outcome from this object's distribution. The
70
+ # next (aka generate) method is O(1) time, but is not an inversion
55
71
  # since two uniforms are used for each value that gets generated.
72
+ # The exception is that when all probabilities are equal, it is
73
+ # a true inversion.
56
74
  #
57
- def generate
58
- column = rand(@x.length)
59
- rand <= @p_primary[column] ? @x[column] : @alias[column]
75
+ def next
76
+ @enum.next
60
77
  end
78
+
79
+ alias generate next # for backwards compatibility
61
80
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aliastable
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.0
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul J Sanchez
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-16 00:00:00.000000000 Z
11
+ date: 2023-01-21 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: If a categorical distribution has k distinct values, traditional approaches
14
14
  will require O(k) work to pick an outcome with the correct probabilities. This
@@ -20,22 +20,14 @@ executables: []
20
20
  extensions: []
21
21
  extra_rdoc_files: []
22
22
  files:
23
- - Rakefile
24
23
  - aliastable.gemspec
25
24
  - lib/aliastable.rb
26
- - test/infile.bad.1
27
- - test/infile.bad.2
28
- - test/infile.bad.3
29
- - test/infile.good.1
30
- - test/infile.good.2
31
- - test/infile.good.3
32
- - test/infile.good.4
33
- - test/test_alias.rb
34
- homepage:
25
+ homepage: https://bitbucket.org/paul_j_sanchez/aliastable
35
26
  licenses:
36
- - LGPL
37
- metadata: {}
38
- post_install_message:
27
+ - MIT
28
+ metadata:
29
+ homepage_uri: https://bitbucket.org/paul_j_sanchez/aliastable
30
+ post_install_message:
39
31
  rdoc_options: []
40
32
  require_paths:
41
33
  - lib
@@ -43,16 +35,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
43
35
  requirements:
44
36
  - - ">="
45
37
  - !ruby/object:Gem::Version
46
- version: 2.0.0
38
+ version: 2.6.0
47
39
  required_rubygems_version: !ruby/object:Gem::Requirement
48
40
  requirements:
49
41
  - - ">="
50
42
  - !ruby/object:Gem::Version
51
43
  version: '0'
52
44
  requirements: []
53
- rubyforge_project:
54
- rubygems_version: 2.7.4
55
- signing_key:
45
+ rubygems_version: 3.4.3
46
+ signing_key:
56
47
  specification_version: 4
57
48
  summary: Efficiently generate random outcomes from an arbitrary categorical distribution.
58
49
  test_files: []
data/Rakefile DELETED
@@ -1,8 +0,0 @@
1
- require 'rake/testtask'
2
-
3
- Rake::TestTask.new do |t|
4
- t.libs << 'test'
5
- end
6
-
7
- desc "Run tests"
8
- task :default => :test
data/test/infile.bad.1 DELETED
@@ -1,5 +0,0 @@
1
- 1,0.1
2
- 3,0.2
3
- 7,-0.3
4
- 42,0.4
5
- 8096,0.6
data/test/infile.bad.2 DELETED
@@ -1,4 +0,0 @@
1
- 1,0.1
2
- 3,0.2
3
- 7,0.3
4
- 42,0.3
data/test/infile.bad.3 DELETED
@@ -1,3 +0,0 @@
1
- 1,0.333333
2
- 3,0.333333
3
- 7,0.333333
data/test/infile.good.1 DELETED
@@ -1,4 +0,0 @@
1
- 1,0.1
2
- 3,0.2
3
- 7,0.3
4
- 42,0.4
data/test/infile.good.2 DELETED
@@ -1,4 +0,0 @@
1
- aardvark,0.01
2
- baboon,0.02
3
- coati,0.07
4
- doggie,0.9
data/test/infile.good.3 DELETED
@@ -1,22 +0,0 @@
1
- 1,1/253
2
- 2,2/253
3
- 3,3/253
4
- 4,4/253
5
- 5,5/253
6
- 6,6/253
7
- 7,7/253
8
- 8,8/253
9
- 9,9/253
10
- 10,10/253
11
- 11,11/253
12
- 12,12/253
13
- 13,13/253
14
- 14,14/253
15
- 15,15/253
16
- 16,16/253
17
- 17,17/253
18
- 18,18/253
19
- 19,19/253
20
- 20,20/253
21
- 21,21/253
22
- 22,22/253
data/test/infile.good.4 DELETED
@@ -1,3 +0,0 @@
1
- 1,0.333333
2
- 3,0.333333
3
- 7,0.333334
data/test/test_alias.rb DELETED
@@ -1,44 +0,0 @@
1
- #!/usr/bin/env ruby -w
2
-
3
- require_relative '../lib/aliastable.rb'
4
-
5
- nvars = 1_000_000
6
- begin
7
- at = AliasTable.new(%w(yes no), [0.3, 0.3, 0.4])
8
- nvars.times { print at.generate, "\n" }
9
- rescue RuntimeError => e
10
- p e
11
- puts
12
- end
13
- Dir['test/infile.*'].each do |f_name|
14
- x = []
15
- probs = []
16
- f = File.open(f_name, 'r')
17
- counts = {}
18
- expected_counts = {}
19
- while line = f.gets
20
- inputs = line.strip.split(/[\s,;:]+/)
21
- x << inputs[0]
22
- counts[inputs[0]] = 0
23
- probs << Rational(inputs[1])
24
- n_hat = probs[-1] * nvars
25
- half_width = 2.5 * Math.sqrt(n_hat * (1.0 - probs[-1])) if n_hat > 0
26
- expected_counts[inputs[0]] = [n_hat, half_width]
27
- end
28
- f.close
29
- p x
30
- p probs
31
- begin
32
- at = AliasTable.new(x, probs)
33
- nvars.times { counts[at.generate] += 1 }
34
- puts 'All values should be in range almost always:'
35
- counts.each_key do |k|
36
- printf "%s: Allowable Range = %d, Expected - Observed = %d\n",
37
- k, expected_counts[k][1], expected_counts[k][0] - counts[k]
38
- end
39
- puts
40
- rescue RuntimeError => e
41
- p e
42
- puts
43
- end
44
- end