aliastable 3.1.0 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: daff6ffd485a6eabe1a2341625be2ffd7b43e821b66d6b0a50363f5822f22cdd
4
- data.tar.gz: cffa205ec028890811239ab49dfaf36111d53c749a8b90971fd658da5ad0fe5c
3
+ metadata.gz: 88cae2e262098e88436d41789056946a36ec55270d4e52c33242f5018b687595
4
+ data.tar.gz: e6b9c4befb4950137807729b171178b0210c794693a1045dbd94ea10606aa593
5
5
  SHA512:
6
- metadata.gz: f227b5d8d874414e8ff7ab1dea7295cf1cd212f9a7cdebd8df90cb2e74ff95ff07bda966df03ccc205abdf13397990de16988ccc011d9a025ab2964ac3e08a5e
7
- data.tar.gz: 4b2742b75bc5e1d8c089bcf653bb7b37c2c0a6757a754d97cace15b1b13a1dc2744dbc294641bf4790d238a7e9832d3f01c031d0f74fad043086fef251cb1148
6
+ metadata.gz: 294e9171f62c9602f1eb3fe9886daeb1fb899431d906e26a33d95c0af4398bad147a62b7e2eee8f7d52a736dd4aa57a67bbf2e5b767ed847817eae59409b7bcf
7
+ data.tar.gz: 545e97af416b1d7ad20f81444811343eafb0d0ec74a30ce2b5b754cfb4336d8aefb337b907fa4b1b7453da796b5d4b48da941b9840bfeb878c3bff4bc7f614e8
data/aliastable.gemspec CHANGED
@@ -1,27 +1,20 @@
1
1
  # -*- ruby -*-
2
- _VERSION = "3.1.0"
2
+ _VERSION = "4.0.0"
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "aliastable"
6
6
  s.version = _VERSION
7
- s.date = "2018-01-16"
7
+ s.date = "2023-01-21"
8
8
  s.summary = "Efficiently generate random outcomes from an arbitrary categorical distribution."
9
9
  s.email = "pjs@alum.mit.edu"
10
10
  s.description = "If a categorical distribution has k distinct values, traditional approaches will require O(k) work to pick an outcome with the correct probabilities. This algorithm uses conditional probability to construct a table which will yield outcomes with the correct probabilities. Table generation requires O(k) time, but subsequent generation is done in O(1) time."
11
11
  s.author = "Paul J Sanchez"
12
+ s.homepage = "https://bitbucket.org/paul_j_sanchez/aliastable"
12
13
  s.files = %w[
13
14
  aliastable.gemspec
14
15
  lib/aliastable.rb
15
- Rakefile
16
- test/infile.bad.1
17
- test/infile.bad.2
18
- test/infile.bad.3
19
- test/infile.good.1
20
- test/infile.good.2
21
- test/infile.good.3
22
- test/infile.good.4
23
- test/test_alias.rb
24
16
  ]
25
- s.required_ruby_version = '>= 2.0.0'
26
- s.license = 'LGPL'
17
+ s.required_ruby_version = '>= 2.6.0'
18
+ s.license = 'MIT'
19
+ s.metadata["homepage_uri"] = s.homepage
27
20
  end
data/lib/aliastable.rb CHANGED
@@ -9,6 +9,7 @@
9
9
  # probabilities.
10
10
  #
11
11
  class AliasTable
12
+ include Enumerable
12
13
  # Construct an alias table from a set of values and their associated
13
14
  # probabilities. Values and their probabilities must be synchronized,
14
15
  # i.e., they must be arrays of the same length. Values can be
@@ -28,34 +29,52 @@ class AliasTable
28
29
  def initialize(x_set, p_value)
29
30
  fail 'x_set & p_value must have same length.' if x_set.size != p_value.size
30
31
  fail 'p_values must be positive' unless p_value.all? { |value| value > 0 }
31
- @p_primary = p_value.map(&:rationalize)
32
- fail 'p_values must sum to 1' unless @p_primary.reduce(:+) == Rational(1)
33
- @x = x_set.clone.freeze
34
- @alias = Array.new(@x.length)
35
- parity = Rational(1, @x.length)
36
- group = @p_primary.each_index.group_by { |i| @p_primary[i] <=> parity }
32
+ p_primary = p_value.map(&:rationalize)
33
+ fail 'p_values must sum to 1' unless p_primary.reduce(:+) == Rational(1)
34
+ x = x_set.clone.freeze
35
+ len = x.length
36
+ col_alias = Array.new(len)
37
+ parity = Rational(1, len)
38
+ group = p_primary.each_index.group_by { |i| p_primary[i] <=> parity }
37
39
  deficit_set = group[-1]
38
40
  surplus_set = group[1]
39
- until deficit_set.empty?
40
- deficit = deficit_set.pop
41
- surplus = surplus_set.pop
42
- @p_primary[surplus] -= parity - @p_primary[deficit]
43
- @p_primary[deficit] /= parity
44
- @alias[deficit] = @x[surplus]
45
- if @p_primary[surplus] == parity
46
- @p_primary[surplus] = Rational(1)
47
- else
48
- (@p_primary[surplus] < parity ? deficit_set : surplus_set) << surplus
41
+ if deficit_set.nil?
42
+ @enum = Enumerator.new { |y| loop { y << x[rand(len)] } }.lazy
43
+ else
44
+ until deficit_set.empty?
45
+ deficit = deficit_set.pop
46
+ surplus = surplus_set.pop
47
+ p_primary[surplus] -= parity - p_primary[deficit]
48
+ p_primary[deficit] /= parity
49
+ col_alias[deficit] = x[surplus]
50
+ if p_primary[surplus] == parity
51
+ p_primary[surplus] = Rational(1)
52
+ else
53
+ (p_primary[surplus] < parity ? deficit_set : surplus_set) << surplus
54
+ end
49
55
  end
56
+ @enum = Enumerator.new do |y|
57
+ loop do
58
+ column = rand(len)
59
+ y << ((rand <= p_primary[column]) ? x[column] : col_alias[column])
60
+ end
61
+ end.lazy
50
62
  end
51
63
  end
52
64
 
53
- # Return a random outcome from this object's distribution.
54
- # The generate method is O(1) time, but is not an inversion
65
+ def each(&block)
66
+ @enum.each(&block)
67
+ end
68
+
69
+ # Return a random outcome from this object's distribution. The
70
+ # next (aka generate) method is O(1) time, but is not an inversion
55
71
  # since two uniforms are used for each value that gets generated.
72
+ # The exception is that when all probabilities are equal, it is
73
+ # a true inversion.
56
74
  #
57
- def generate
58
- column = rand(@x.length)
59
- rand <= @p_primary[column] ? @x[column] : @alias[column]
75
+ def next
76
+ @enum.next
60
77
  end
78
+
79
+ alias generate next # for backwards compatibility
61
80
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aliastable
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.0
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul J Sanchez
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-16 00:00:00.000000000 Z
11
+ date: 2023-01-21 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: If a categorical distribution has k distinct values, traditional approaches
14
14
  will require O(k) work to pick an outcome with the correct probabilities. This
@@ -20,22 +20,14 @@ executables: []
20
20
  extensions: []
21
21
  extra_rdoc_files: []
22
22
  files:
23
- - Rakefile
24
23
  - aliastable.gemspec
25
24
  - lib/aliastable.rb
26
- - test/infile.bad.1
27
- - test/infile.bad.2
28
- - test/infile.bad.3
29
- - test/infile.good.1
30
- - test/infile.good.2
31
- - test/infile.good.3
32
- - test/infile.good.4
33
- - test/test_alias.rb
34
- homepage:
25
+ homepage: https://bitbucket.org/paul_j_sanchez/aliastable
35
26
  licenses:
36
- - LGPL
37
- metadata: {}
38
- post_install_message:
27
+ - MIT
28
+ metadata:
29
+ homepage_uri: https://bitbucket.org/paul_j_sanchez/aliastable
30
+ post_install_message:
39
31
  rdoc_options: []
40
32
  require_paths:
41
33
  - lib
@@ -43,16 +35,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
43
35
  requirements:
44
36
  - - ">="
45
37
  - !ruby/object:Gem::Version
46
- version: 2.0.0
38
+ version: 2.6.0
47
39
  required_rubygems_version: !ruby/object:Gem::Requirement
48
40
  requirements:
49
41
  - - ">="
50
42
  - !ruby/object:Gem::Version
51
43
  version: '0'
52
44
  requirements: []
53
- rubyforge_project:
54
- rubygems_version: 2.7.4
55
- signing_key:
45
+ rubygems_version: 3.4.3
46
+ signing_key:
56
47
  specification_version: 4
57
48
  summary: Efficiently generate random outcomes from an arbitrary categorical distribution.
58
49
  test_files: []
data/Rakefile DELETED
@@ -1,8 +0,0 @@
1
- require 'rake/testtask'
2
-
3
- Rake::TestTask.new do |t|
4
- t.libs << 'test'
5
- end
6
-
7
- desc "Run tests"
8
- task :default => :test
data/test/infile.bad.1 DELETED
@@ -1,5 +0,0 @@
1
- 1,0.1
2
- 3,0.2
3
- 7,-0.3
4
- 42,0.4
5
- 8096,0.6
data/test/infile.bad.2 DELETED
@@ -1,4 +0,0 @@
1
- 1,0.1
2
- 3,0.2
3
- 7,0.3
4
- 42,0.3
data/test/infile.bad.3 DELETED
@@ -1,3 +0,0 @@
1
- 1,0.333333
2
- 3,0.333333
3
- 7,0.333333
data/test/infile.good.1 DELETED
@@ -1,4 +0,0 @@
1
- 1,0.1
2
- 3,0.2
3
- 7,0.3
4
- 42,0.4
data/test/infile.good.2 DELETED
@@ -1,4 +0,0 @@
1
- aardvark,0.01
2
- baboon,0.02
3
- coati,0.07
4
- doggie,0.9
data/test/infile.good.3 DELETED
@@ -1,22 +0,0 @@
1
- 1,1/253
2
- 2,2/253
3
- 3,3/253
4
- 4,4/253
5
- 5,5/253
6
- 6,6/253
7
- 7,7/253
8
- 8,8/253
9
- 9,9/253
10
- 10,10/253
11
- 11,11/253
12
- 12,12/253
13
- 13,13/253
14
- 14,14/253
15
- 15,15/253
16
- 16,16/253
17
- 17,17/253
18
- 18,18/253
19
- 19,19/253
20
- 20,20/253
21
- 21,21/253
22
- 22,22/253
data/test/infile.good.4 DELETED
@@ -1,3 +0,0 @@
1
- 1,0.333333
2
- 3,0.333333
3
- 7,0.333334
data/test/test_alias.rb DELETED
@@ -1,44 +0,0 @@
1
- #!/usr/bin/env ruby -w
2
-
3
- require_relative '../lib/aliastable.rb'
4
-
5
- nvars = 1_000_000
6
- begin
7
- at = AliasTable.new(%w(yes no), [0.3, 0.3, 0.4])
8
- nvars.times { print at.generate, "\n" }
9
- rescue RuntimeError => e
10
- p e
11
- puts
12
- end
13
- Dir['test/infile.*'].each do |f_name|
14
- x = []
15
- probs = []
16
- f = File.open(f_name, 'r')
17
- counts = {}
18
- expected_counts = {}
19
- while line = f.gets
20
- inputs = line.strip.split(/[\s,;:]+/)
21
- x << inputs[0]
22
- counts[inputs[0]] = 0
23
- probs << Rational(inputs[1])
24
- n_hat = probs[-1] * nvars
25
- half_width = 2.5 * Math.sqrt(n_hat * (1.0 - probs[-1])) if n_hat > 0
26
- expected_counts[inputs[0]] = [n_hat, half_width]
27
- end
28
- f.close
29
- p x
30
- p probs
31
- begin
32
- at = AliasTable.new(x, probs)
33
- nvars.times { counts[at.generate] += 1 }
34
- puts 'All values should be in range almost always:'
35
- counts.each_key do |k|
36
- printf "%s: Allowable Range = %d, Expected - Observed = %d\n",
37
- k, expected_counts[k][1], expected_counts[k][0] - counts[k]
38
- end
39
- puts
40
- rescue RuntimeError => e
41
- p e
42
- puts
43
- end
44
- end