RubyGems - aliastable - Versions diffs - 1.0.0 → 3.0.0 - Mend

aliastable 1.0.0 → 3.0.0

Files changed (6) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 795e9607a67a1039e1714f38208a8b9ec8a55151
-  data.tar.gz: 10c976abff2116ea8e7df9606a8dc0cbb97031de
+  metadata.gz: 0122f41cb149120e7213250d548fb0ac909484e6
+  data.tar.gz: d7c7376ef3985fa43f7f59afe817b9ff1d38d1eb
 SHA512:
-  metadata.gz: c8e932de5ef8453d29c2381ff2bf3c2346d5c756494b4f4ed6298f7f3cbafb00014e99f559e0e9dec6f6de290293e3bf675b038fac433ba1bb3c0ae797d1b4e7
-  data.tar.gz: 38cb5fb9db393fc76f5b1ff945f61330d166e706b9a957e4d511ac69e31138b065aed07a7ebaf0b412889f376569d31e135ab154e4356ace39a5cdc3cc8e9c20
+  metadata.gz: 0dc5359f8618ff2d58d9b88b0be91d60ef62a3dc9b36eb47779ac88ea4ecf54e37e31ab825502794391a894d4f3f1a1c0b28eeafe02fab3e32bbd40b18b6c65a
+  data.tar.gz: 196f3c2cd4a1d82bdac8234e0ca169d70e4036179ed34d60b25ed2efd72c0ac29612988120d24eee5824f7a7a924be5b2580750e641d6488a88d1c19ee779eb2

data/aliastable.gemspec CHANGED

@@ -1,10 +1,10 @@
 # -*- ruby -*-
-_VERSION = "1.0.0"
+_VERSION = "3.0.0"
 Gem::Specification.new do |s|
   s.name = "aliastable"
   s.version = _VERSION
-  s.date = "2013-03-23"
+  s.date = "2015-04-19"
   s.summary = "Efficiently generate random outcomes from an arbitrary categorical distribution."
   s.email = "pjs@alum.mit.edu"
   s.description = "If a categorical distribution has k distinct values, traditional approaches will require O(k) work to pick an outcome with the correct probabilities.  This algorithm uses conditional probability to construct a table which will yield outcomes with the correct probabilities, but in O(1) time."
@@ -19,9 +19,9 @@ Gem::Specification.new do |s|
     test/infile.bad.3
     test/infile.good.1
     test/infile.good.2
+    test/infile.good.3
     test/test_alias.rb
   ]
-  s.add_runtime_dependency 'skewheap', '~> 1.0'
-  s.required_ruby_version = '>= 1.8.1'
+  s.required_ruby_version = '>= 1.9.3'
   s.license = 'LGPL'
 end

data/lib/alias.rb CHANGED

@@ -1,7 +1,5 @@
 #!/usr/bin/env ruby -w
-require 'skewheap'
 # Generate values from a categorical distribution in constant
 # time, regardless of the number of categories.  This clever algorithm
 # uses conditional probability to construct a table comprised of columns
@@ -11,7 +9,7 @@ require 'skewheap'
 # probabilities.
 #
 class AliasTable
+  TOLERANCE = Rational(1,1E15)
   # Construct an alias table from a set of values and their associated
   # probabilities.  Values and their probabilities must be synchronized,
   # i.e., they must be arrays of the same length.  Values can be
@@ -19,67 +17,55 @@ class AliasTable
   # sum to one.
   #
   # *Arguments*::
-  #   - +values+ -> the set of values to generate from.
-  #   - +p_values+ -> the synchronized set of probabilities associated
-  #     with the values set.
+  #   - +x_set+ -> the set of values to generate from.
+  #   - +p_value+ -> the synchronized set of probabilities associated
+  #     with the value set.
   # *Raises*::
-  #   - RuntimeError if +values+ and +p_values+ are different lengths.
-  #   - RuntimeError if any +p_values+ are negative.
-  #   - RuntimeError if +p_values+ don't sum to one.
+  #   - RuntimeError if +x_set+ and +p_value+s are different lengths.
+  #   - RuntimeError if any +p_value+ are negative.
+  #   - RuntimeError if +p_value+ don't sum to one.
   #
-  def initialize(values, p_values)
-    if values.length != p_values.length
+  def initialize(x_values, p_values)
+    if x_values.length != p_values.length
       raise "Args to AliasTable must be vectors of the same length."
-    end
-    p_values.each {|p| raise "p_values must be positive" if p <= 0.0}
-    if p_values.reduce(:+).not_close_enough(1.0)
+    end
+    p = p_values.map do |current_p|
+      raise "p_values must be positive" if current_p <= 0.0
+      Rational(current_p)
+    end
+    unless (p.reduce(:+) - Rational(1)).abs < TOLERANCE
       raise "p_values must sum to 1.0"
     end
-    @values = values.clone.freeze
-    @p_values = p_values
-    @alias = Array.new(values.length)
-    @p_primary = Array.new(values.length, 1.0)
-    @equiprob = 1.0 / values.length
-    @deficit_set = SkewHeap.new
-    @surplus_set = []
-    @values.each_index {|i| classify(i) }
-    until @deficit_set.empty? do
-      deficit_column = @deficit_set.pop
-      surplus_column = @surplus_set.shift
-      @p_primary[deficit_column] = @p_values[deficit_column] / @equiprob
-      @alias[deficit_column] = @values[surplus_column]
-      @p_values[surplus_column] -= @equiprob - @p_values[deficit_column]
-      classify(surplus_column)
+    @x = x_values.clone.freeze
+    @alias = Array.new(@x.length)
+    @p_primary = Array.new(@x.length, 1.0)
+    equiprob = Rational(1, @x.length)
+    deficit_set = []
+    surplus_set = []
+    @x.each_index do |i|
+      unless (p[i] - equiprob).abs < TOLERANCE
+        (p[i] < equiprob ? deficit_set : surplus_set) << i
+      end
+    end
+    until deficit_set.empty? do
+      deficit = deficit_set.pop
+      surplus = surplus_set.pop
+      @p_primary[deficit] = p[deficit] / equiprob
+      @alias[deficit] = @x[surplus]
+      p[surplus] -= equiprob - p[deficit]
+      unless (p[surplus] - equiprob).abs < TOLERANCE
+        (p[surplus] < equiprob ? deficit_set : surplus_set) << surplus
+      end
     end
   end
-  # Returns a random outcome from the distribution provided to the constructor.
-  # This process requires constant time, but is not an inversion
+  # Returns a random outcome from this object's distribution.
+  # The generate method is O(1) time, but is not an inversion
   # since two uniforms are used for each value that gets generated.
-  #
+  #
   def generate
-    column = rand(@values.length)
-    rand < @p_primary[column] ? @values[column] : @alias[column]
-  end
-  private
-  def classify(i)
-    if @p_values[i].not_close_enough(@equiprob)
-      if @p_values[i] < @equiprob
-        @deficit_set.push i
-      else
-        @surplus_set << i
-      end
-    end
+    column = rand(@x.length)
+    rand <= @p_primary[column] ? @x[column] : @alias[column]
   end
 end
-class Numeric
-  # Expand class Numeric to detect whether two values are within a
-  # tolerance of 10^-15 of each other.
-  def not_close_enough(n)
-    ((self - n) / self).abs > 1E-15
-  end
-end

data/test/infile.good.3 ADDED

@@ -0,0 +1,22 @@
+1,0.003952569169960474
+2,0.007905138339920948
+3,0.011857707509881422
+4,0.015810276679841896
+5,0.019762845849802372
+6,0.023715415019762844
+7,0.027667984189723320
+8,0.031620553359683792
+9,0.035573122529644272
+10,0.039525691699604744
+11,0.043478260869565216
+12,0.047430830039525688
+13,0.051383399209486168
+14,0.055335968379446640
+15,0.059288537549407112
+16,0.063241106719367585
+17,0.067193675889328064
+18,0.071146245059288543
+19,0.075098814229249009
+20,0.079051383399209488
+21,0.083003952569169967
+22,0.086956521739130432

data/test/test_alias.rb CHANGED

@@ -2,12 +2,13 @@
 require 'alias'
-nvars = 1000000
+nvars = 1_000_000
 begin
   at = AliasTable.new(["yes", "no"], [0.3, 0.3, 0.4])
   nvars.times {print at.generate, "\n"}
 rescue Exception => e
   puts e.message
+  puts
 end
 Dir["test/infile.*"].each do |f_name|
   x = []
@@ -22,17 +23,21 @@ Dir["test/infile.*"].each do |f_name|
     probs << inputs[1].to_f
     n_hat = probs[-1] * nvars
     half_width = 2.5 * Math::sqrt(n_hat * (1.0 - probs[-1])) if n_hat > 0
-    expected_counts[inputs[0]] = "%d +/- %d" % [n_hat, half_width]
+    # expected_counts[inputs[0]] = "%d +/- %d" % [n_hat, half_width]
+    expected_counts[inputs[0]] = [n_hat, half_width]
   end
   f.close
   begin
     at = AliasTable.new(x, probs)
     nvars.times {counts[at.generate] += 1}
-    puts "\nAll four values should be in range 95\% of the time:"
+    puts "All four values should be in range 95\% of the time:"
     counts.each_key do |k|
-      printf "%s: Expected %s, got %d\n", k, expected_counts[k], counts[k]
+      printf "%s: Half-width = %d, Expected - Observed = %d\n",
+        k, expected_counts[k][1], expected_counts[k][0] - counts[k]
     end
+    puts
   rescue Exception => e
     puts e.message
+    puts
   end
 end

metadata CHANGED

@@ -1,29 +1,15 @@
 --- !ruby/object:Gem::Specification
 name: aliastable
 version: !ruby/object:Gem::Version
-  version: 1.0.0
+  version: 3.0.0
 platform: ruby
 authors:
 - Paul J Sanchez
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-03-23 00:00:00.000000000 Z
-dependencies:
-- !ruby/object:Gem::Dependency
-  name: skewheap
-  requirement: !ruby/object:Gem::Requirement
-    requirements:
-    - - ~>
-      - !ruby/object:Gem::Version
-        version: '1.0'
-  type: :runtime
-  prerelease: false
-  version_requirements: !ruby/object:Gem::Requirement
-    requirements:
-    - - ~>
-      - !ruby/object:Gem::Version
-        version: '1.0'
+date: 2015-04-19 00:00:00.000000000 Z
+dependencies: []
 description: If a categorical distribution has k distinct values, traditional approaches
   will require O(k) work to pick an outcome with the correct probabilities.  This
   algorithm uses conditional probability to construct a table which will yield outcomes
@@ -33,15 +19,16 @@ executables: []
 extensions: []
 extra_rdoc_files: []
 files:
+- Rakefile
 - aliastable.gemspec
 - lgpl.txt
 - lib/alias.rb
-- Rakefile
 - test/infile.bad.1
 - test/infile.bad.2
 - test/infile.bad.3
 - test/infile.good.1
 - test/infile.good.2
+- test/infile.good.3
 - test/test_alias.rb
 homepage:
 licenses:
@@ -53,19 +40,18 @@ require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
-  - - '>='
+  - - ">="
     - !ruby/object:Gem::Version
-      version: 1.8.1
+      version: 1.9.3
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - '>='
+  - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.0.3
+rubygems_version: 2.4.5
 signing_key:
 specification_version: 4
 summary: Efficiently generate random outcomes from an arbitrary categorical distribution.
 test_files: []
-has_rdoc: