aliastable 3.0.3 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 64997be896934a0e70ba0c4776e26f08a68fae50
4
- data.tar.gz: 7e7d8a38a21f8355be9c0e83559fb3e5689ea556
2
+ SHA256:
3
+ metadata.gz: 88cae2e262098e88436d41789056946a36ec55270d4e52c33242f5018b687595
4
+ data.tar.gz: e6b9c4befb4950137807729b171178b0210c794693a1045dbd94ea10606aa593
5
5
  SHA512:
6
- metadata.gz: e49595d9a6da320b2e22db5672f5fc4546a4919edc3915dbd6e9b1cec69beff947b16006f76a55727c147946dd08cec4f2aeaad4be31df6e9a606de27190ed6d
7
- data.tar.gz: f696a2b2f0b5de5eb7b9932f4a121f0b1e94e988284eb0123e79c45bef8e59dee1c3b089ab5e24ddd448ab17e22acfe367cbb52e8b3130e7745307c3bd4ae638
6
+ metadata.gz: 294e9171f62c9602f1eb3fe9886daeb1fb899431d906e26a33d95c0af4398bad147a62b7e2eee8f7d52a736dd4aa57a67bbf2e5b767ed847817eae59409b7bcf
7
+ data.tar.gz: 545e97af416b1d7ad20f81444811343eafb0d0ec74a30ce2b5b754cfb4336d8aefb337b907fa4b1b7453da796b5d4b48da941b9840bfeb878c3bff4bc7f614e8
data/aliastable.gemspec CHANGED
@@ -1,26 +1,20 @@
1
1
  # -*- ruby -*-
2
- _VERSION = "3.0.3"
2
+ _VERSION = "4.0.0"
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "aliastable"
6
6
  s.version = _VERSION
7
- s.date = "2015-09-10"
7
+ s.date = "2023-01-21"
8
8
  s.summary = "Efficiently generate random outcomes from an arbitrary categorical distribution."
9
9
  s.email = "pjs@alum.mit.edu"
10
- s.description = "If a categorical distribution has k distinct values, traditional approaches will require O(k) work to pick an outcome with the correct probabilities. This algorithm uses conditional probability to construct a table which will yield outcomes with the correct probabilities, but in O(1) time."
10
+ s.description = "If a categorical distribution has k distinct values, traditional approaches will require O(k) work to pick an outcome with the correct probabilities. This algorithm uses conditional probability to construct a table which will yield outcomes with the correct probabilities. Table generation requires O(k) time, but subsequent generation is done in O(1) time."
11
11
  s.author = "Paul J Sanchez"
12
+ s.homepage = "https://bitbucket.org/paul_j_sanchez/aliastable"
12
13
  s.files = %w[
13
14
  aliastable.gemspec
14
15
  lib/aliastable.rb
15
- Rakefile
16
- test/infile.bad.1
17
- test/infile.bad.2
18
- test/infile.bad.3
19
- test/infile.good.1
20
- test/infile.good.2
21
- test/infile.good.3
22
- test/test_alias.rb
23
16
  ]
24
- s.required_ruby_version = '>= 1.9.3'
25
- s.license = 'LGPL'
17
+ s.required_ruby_version = '>= 2.6.0'
18
+ s.license = 'MIT'
19
+ s.metadata["homepage_uri"] = s.homepage
26
20
  end
data/lib/aliastable.rb CHANGED
@@ -9,6 +9,7 @@
9
9
  # probabilities.
10
10
  #
11
11
  class AliasTable
12
+ include Enumerable
12
13
  # Construct an alias table from a set of values and their associated
13
14
  # probabilities. Values and their probabilities must be synchronized,
14
15
  # i.e., they must be arrays of the same length. Values can be
@@ -26,38 +27,54 @@ class AliasTable
26
27
  # - RuntimeError if +p_value+s don't sum to one. Rationals will avoid this.
27
28
  #
28
29
  def initialize(x_set, p_value)
29
- if x_set.length != p_value.length
30
- fail 'Args to AliasTable must be vectors of the same length.'
31
- end
30
+ fail 'x_set & p_value must have same length.' if x_set.size != p_value.size
32
31
  fail 'p_values must be positive' unless p_value.all? { |value| value > 0 }
33
- @p_primary = p_value.map(&:to_r)
34
- fail 'p_values must sum to 1' unless @p_primary.reduce(:+) == Rational(1)
35
- @x = x_set.clone.freeze
36
- @alias = Array.new(@x.length)
37
- parity = Rational(1, @x.length)
38
- group = @p_primary.each_index.group_by { |i| @p_primary[i] <=> parity }
32
+ p_primary = p_value.map(&:rationalize)
33
+ fail 'p_values must sum to 1' unless p_primary.reduce(:+) == Rational(1)
34
+ x = x_set.clone.freeze
35
+ len = x.length
36
+ col_alias = Array.new(len)
37
+ parity = Rational(1, len)
38
+ group = p_primary.each_index.group_by { |i| p_primary[i] <=> parity }
39
39
  deficit_set = group[-1]
40
40
  surplus_set = group[1]
41
- until deficit_set.empty?
42
- deficit = deficit_set.pop
43
- surplus = surplus_set.pop
44
- @p_primary[surplus] -= parity - @p_primary[deficit]
45
- @p_primary[deficit] /= parity
46
- @alias[deficit] = @x[surplus]
47
- if @p_primary[surplus] == parity
48
- @p_primary[surplus] = Rational(1)
49
- else
50
- (@p_primary[surplus] < parity ? deficit_set : surplus_set) << surplus
41
+ if deficit_set.nil?
42
+ @enum = Enumerator.new { |y| loop { y << x[rand(len)] } }.lazy
43
+ else
44
+ until deficit_set.empty?
45
+ deficit = deficit_set.pop
46
+ surplus = surplus_set.pop
47
+ p_primary[surplus] -= parity - p_primary[deficit]
48
+ p_primary[deficit] /= parity
49
+ col_alias[deficit] = x[surplus]
50
+ if p_primary[surplus] == parity
51
+ p_primary[surplus] = Rational(1)
52
+ else
53
+ (p_primary[surplus] < parity ? deficit_set : surplus_set) << surplus
54
+ end
51
55
  end
56
+ @enum = Enumerator.new do |y|
57
+ loop do
58
+ column = rand(len)
59
+ y << ((rand <= p_primary[column]) ? x[column] : col_alias[column])
60
+ end
61
+ end.lazy
52
62
  end
53
63
  end
54
64
 
55
- # Return a random outcome from this object's distribution.
56
- # The generate method is O(1) time, but is not an inversion
65
+ def each(&block)
66
+ @enum.each(&block)
67
+ end
68
+
69
+ # Return a random outcome from this object's distribution. The
70
+ # next (aka generate) method is O(1) time, but is not an inversion
57
71
  # since two uniforms are used for each value that gets generated.
72
+ # The exception is that when all probabilities are equal, it is
73
+ # a true inversion.
58
74
  #
59
- def generate
60
- column = rand(@x.length)
61
- rand <= @p_primary[column] ? @x[column] : @alias[column]
75
+ def next
76
+ @enum.next
62
77
  end
78
+
79
+ alias generate next # for backwards compatibility
63
80
  end
metadata CHANGED
@@ -1,39 +1,33 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aliastable
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.3
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul J Sanchez
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-10 00:00:00.000000000 Z
11
+ date: 2023-01-21 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: If a categorical distribution has k distinct values, traditional approaches
14
14
  will require O(k) work to pick an outcome with the correct probabilities. This
15
15
  algorithm uses conditional probability to construct a table which will yield outcomes
16
- with the correct probabilities, but in O(1) time.
16
+ with the correct probabilities. Table generation requires O(k) time, but subsequent
17
+ generation is done in O(1) time.
17
18
  email: pjs@alum.mit.edu
18
19
  executables: []
19
20
  extensions: []
20
21
  extra_rdoc_files: []
21
22
  files:
22
- - Rakefile
23
23
  - aliastable.gemspec
24
24
  - lib/aliastable.rb
25
- - test/infile.bad.1
26
- - test/infile.bad.2
27
- - test/infile.bad.3
28
- - test/infile.good.1
29
- - test/infile.good.2
30
- - test/infile.good.3
31
- - test/test_alias.rb
32
- homepage:
25
+ homepage: https://bitbucket.org/paul_j_sanchez/aliastable
33
26
  licenses:
34
- - LGPL
35
- metadata: {}
36
- post_install_message:
27
+ - MIT
28
+ metadata:
29
+ homepage_uri: https://bitbucket.org/paul_j_sanchez/aliastable
30
+ post_install_message:
37
31
  rdoc_options: []
38
32
  require_paths:
39
33
  - lib
@@ -41,16 +35,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
41
35
  requirements:
42
36
  - - ">="
43
37
  - !ruby/object:Gem::Version
44
- version: 1.9.3
38
+ version: 2.6.0
45
39
  required_rubygems_version: !ruby/object:Gem::Requirement
46
40
  requirements:
47
41
  - - ">="
48
42
  - !ruby/object:Gem::Version
49
43
  version: '0'
50
44
  requirements: []
51
- rubyforge_project:
52
- rubygems_version: 2.4.5.1
53
- signing_key:
45
+ rubygems_version: 3.4.3
46
+ signing_key:
54
47
  specification_version: 4
55
48
  summary: Efficiently generate random outcomes from an arbitrary categorical distribution.
56
49
  test_files: []
data/Rakefile DELETED
@@ -1,8 +0,0 @@
1
- require 'rake/testtask'
2
-
3
- Rake::TestTask.new do |t|
4
- t.libs << 'test'
5
- end
6
-
7
- desc "Run tests"
8
- task :default => :test
data/test/infile.bad.1 DELETED
@@ -1,5 +0,0 @@
1
- 1,0.1
2
- 3,0.2
3
- 7,-0.3
4
- 42,0.4
5
- 8096,0.6
data/test/infile.bad.2 DELETED
@@ -1,4 +0,0 @@
1
- 1,0.1
2
- 3,0.2
3
- 7,0.3
4
- 42,0.3
data/test/infile.bad.3 DELETED
@@ -1,3 +0,0 @@
1
- 1,0.333333
2
- 3,0.333333
3
- 7,0.333333
data/test/infile.good.1 DELETED
@@ -1,4 +0,0 @@
1
- 1,0.1
2
- 3,0.2
3
- 7,0.3
4
- 42,0.4
data/test/infile.good.2 DELETED
@@ -1,4 +0,0 @@
1
- aardvark,0.01
2
- baboon,0.02
3
- coati,0.07
4
- doggie,0.9
data/test/infile.good.3 DELETED
@@ -1,22 +0,0 @@
1
- 1,1/253
2
- 2,2/253
3
- 3,3/253
4
- 4,4/253
5
- 5,5/253
6
- 6,6/253
7
- 7,7/253
8
- 8,8/253
9
- 9,9/253
10
- 10,10/253
11
- 11,1/23
12
- 12,12/253
13
- 13,13/253
14
- 14,14/253
15
- 15,15/253
16
- 16,16/253
17
- 17,17/253
18
- 18,18/253
19
- 19,19/253
20
- 20,20/253
21
- 21,21/253
22
- 22,2/23
data/test/test_alias.rb DELETED
@@ -1,42 +0,0 @@
1
- #!/usr/bin/env ruby -w
2
-
3
- require_relative '../lib/aliastable.rb'
4
-
5
- nvars = 1_000_000
6
- begin
7
- at = AliasTable.new(%w(yes no), [0.3, 0.3, 0.4])
8
- nvars.times { print at.generate, "\n" }
9
- rescue RuntimeError => e
10
- p e
11
- puts
12
- end
13
- Dir['test/infile.*'].each do |f_name|
14
- x = []
15
- probs = []
16
- f = File.open(f_name, 'r')
17
- counts = {}
18
- expected_counts = {}
19
- while line = f.gets
20
- inputs = line.strip.split(/[\s,;:]+/)
21
- x << inputs[0]
22
- counts[inputs[0]] = 0
23
- probs << inputs[1].to_r
24
- n_hat = probs[-1] * nvars
25
- half_width = 2.5 * Math.sqrt(n_hat * (1.0 - probs[-1])) if n_hat > 0
26
- expected_counts[inputs[0]] = [n_hat, half_width]
27
- end
28
- f.close
29
- begin
30
- at = AliasTable.new(x, probs)
31
- nvars.times { counts[at.generate] += 1 }
32
- puts 'All values should be in range almost always:'
33
- counts.each_key do |k|
34
- printf "%s: Allowable Range = %d, Expected - Observed = %d\n",
35
- k, expected_counts[k][1], expected_counts[k][0] - counts[k]
36
- end
37
- puts
38
- rescue RuntimeError => e
39
- p e
40
- puts
41
- end
42
- end