splashy 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +6 -0
- data/Gemfile.lock +18 -0
- data/LICENSE.txt +20 -0
- data/README.markdown +60 -0
- data/Rakefile +33 -0
- data/VERSION +1 -0
- data/buckets.gemspec +57 -0
- data/lib/splashy.rb +7 -0
- data/lib/splashy/bucket.rb +29 -0
- data/lib/splashy/buckets.rb +156 -0
- data/test/helper.rb +18 -0
- data/test/test_splashy_buckets.rb +243 -0
- metadata +124 -0
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
git (1.2.5)
|
5
|
+
jeweler (1.6.4)
|
6
|
+
bundler (~> 1.0)
|
7
|
+
git (>= 1.2.5)
|
8
|
+
rake
|
9
|
+
minitest (2.0.2)
|
10
|
+
rake (0.9.2.2)
|
11
|
+
|
12
|
+
PLATFORMS
|
13
|
+
ruby
|
14
|
+
|
15
|
+
DEPENDENCIES
|
16
|
+
bundler (~> 1.0.0)
|
17
|
+
jeweler (~> 1.6.4)
|
18
|
+
minitest
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 Tyson Tate
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
Splashy
|
2
|
+
=======
|
3
|
+
|
4
|
+
Simple distribution-based sampling of arbitrary objects from pools. Splashy.
|
5
|
+
Pools. Get it!?
|
6
|
+
|
7
|
+
Disclaimer: I have a BFA in Art, so I'm convinced there's a far better algorithm
|
8
|
+
than the awful one I came up with. Pull request button is thataway.
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
# Initialize with a desired final distribution.
|
12
|
+
buckets = Splashy::Buckets.new( :easy => 0.3, :hard => 0.7 )
|
13
|
+
|
14
|
+
# You can also specify a limit on elements in the final selection, no matter
|
15
|
+
# how many objects you collect.
|
16
|
+
buckets = Splashy::Buckets.new( {:easy => 0.3, :hard => 0.7}, 5 )
|
17
|
+
|
18
|
+
# Fill one-by-one:
|
19
|
+
buckets.add( :easy, obj1 )
|
20
|
+
buckets.add( :hard, obj2 )
|
21
|
+
|
22
|
+
# Fill using blocks:
|
23
|
+
i = 0
|
24
|
+
buckets.fill do |total_count|
|
25
|
+
bucket = [:easy, :hard][total_count % 1]
|
26
|
+
total_count < 100 ? [bucket, object] : nil
|
27
|
+
end
|
28
|
+
buckets.fill( :easy ) do |total_count|
|
29
|
+
total_count < 105 ? object : nil
|
30
|
+
end
|
31
|
+
|
32
|
+
# Get a distribution of objects:
|
33
|
+
buckets = Splashy::Buckets.new( :a => 0.01, :b => 0.19, :c => 0.80 )
|
34
|
+
10.times { |i| buckets.add( :a, "1#{i}") }
|
35
|
+
2.times { |i| buckets.add( :b, "2#{i}") }
|
36
|
+
40.times { |i| buckets.add( :c, "3#{i}") }
|
37
|
+
buckets.select
|
38
|
+
# Returns:
|
39
|
+
# {
|
40
|
+
# :a => ["10"],
|
41
|
+
# :b => ["20", "21"],
|
42
|
+
# :c => ["30", "31", "32", "33", "34", "35", "36", "37"]
|
43
|
+
# }
|
44
|
+
```
|
45
|
+
|
46
|
+
Contributing
|
47
|
+
============
|
48
|
+
|
49
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
50
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
51
|
+
* Fork the project
|
52
|
+
* Start a feature/bugfix branch
|
53
|
+
* Commit and push until you are happy with your contribution
|
54
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
55
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
56
|
+
|
57
|
+
Copyright
|
58
|
+
=========
|
59
|
+
|
60
|
+
Copyright (c) 2011 Tyson Tate. See LICENSE.txt for further details.
|
data/Rakefile
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
gem.name = "splashy"
|
17
|
+
gem.homepage = "http://github.com/tysontate/splashy"
|
18
|
+
gem.license = "MIT"
|
19
|
+
gem.summary = "Simple distribution-based sampling of arbitrary objects from buckets."
|
20
|
+
gem.description = "Simple distribution-based sampling of arbitrary objects from any number of buckets. Splashy. Buckets. Get it!?"
|
21
|
+
gem.email = "tyson@tysontate.com"
|
22
|
+
gem.authors = ["Tyson Tate"]
|
23
|
+
end
|
24
|
+
Jeweler::RubygemsDotOrgTasks.new
|
25
|
+
|
26
|
+
require 'rake/testtask'
|
27
|
+
Rake::TestTask.new(:test) do |test|
|
28
|
+
test.libs << 'lib' << 'test'
|
29
|
+
test.pattern = 'test/**/test_*.rb'
|
30
|
+
test.verbose = true
|
31
|
+
end
|
32
|
+
|
33
|
+
task :default => :test
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
data/buckets.gemspec
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{buckets}
|
8
|
+
s.version = "0.0.1"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Tyson Tate"]
|
12
|
+
s.date = %q{2011-12-08}
|
13
|
+
s.description = %q{Simple distribution-based sampling of arbitrary objects via the use of, well, buckets.}
|
14
|
+
s.email = %q{tyson@tysontate.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE.txt",
|
17
|
+
"README.markdown"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
"Gemfile",
|
21
|
+
"Gemfile.lock",
|
22
|
+
"LICENSE.txt",
|
23
|
+
"README.markdown",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"buckets.gemspec",
|
27
|
+
"lib/splashy.rb",
|
28
|
+
"lib/splashy/bucket.rb",
|
29
|
+
"lib/splashy/buckets.rb",
|
30
|
+
"test/helper.rb",
|
31
|
+
"test/test_splashy_buckets.rb"
|
32
|
+
]
|
33
|
+
s.homepage = %q{http://github.com/tysontate/buckets}
|
34
|
+
s.licenses = ["MIT"]
|
35
|
+
s.require_paths = ["lib"]
|
36
|
+
s.rubygems_version = %q{1.4.2}
|
37
|
+
s.summary = %q{Simple distribution-based sampling of arbitrary objects.}
|
38
|
+
|
39
|
+
if s.respond_to? :specification_version then
|
40
|
+
s.specification_version = 3
|
41
|
+
|
42
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
43
|
+
s.add_development_dependency(%q<minitest>, [">= 0"])
|
44
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
45
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
46
|
+
else
|
47
|
+
s.add_dependency(%q<minitest>, [">= 0"])
|
48
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
49
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
50
|
+
end
|
51
|
+
else
|
52
|
+
s.add_dependency(%q<minitest>, [">= 0"])
|
53
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
54
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
data/lib/splashy.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
module Splashy
|
2
|
+
private
|
3
|
+
|
4
|
+
# Private: Collector of elements.
|
5
|
+
class Bucket
|
6
|
+
attr_reader :name
|
7
|
+
|
8
|
+
def initialize( name )
|
9
|
+
@name = name
|
10
|
+
@elements = []
|
11
|
+
end
|
12
|
+
|
13
|
+
def <<( element )
|
14
|
+
@elements << element
|
15
|
+
end
|
16
|
+
|
17
|
+
def elements( count )
|
18
|
+
@elements[0, count]
|
19
|
+
end
|
20
|
+
|
21
|
+
def empty?
|
22
|
+
self.count == 0
|
23
|
+
end
|
24
|
+
|
25
|
+
def count
|
26
|
+
@elements.count
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,156 @@
|
|
1
|
+
module Splashy
|
2
|
+
class Buckets
|
3
|
+
# wanted_distribution - A Hash of desired distributions:
|
4
|
+
# { :a => 0.2, :b => 0.5, :c => 0.3 }
|
5
|
+
# wanted_count - (optional) Maximum total elements to be selected.
|
6
|
+
# otherwise, the maximum size set is selected.
|
7
|
+
def initialize( wanted_distribution, wanted_count=nil )
|
8
|
+
unless wanted_distribution.values.inject(0){ |m,v| m + v } == 1.0
|
9
|
+
raise ArgumentError.new( "Distribution must sum to 1.0" )
|
10
|
+
end
|
11
|
+
@wanted_distribution = wanted_distribution
|
12
|
+
@wanted_count = wanted_count
|
13
|
+
@buckets = Hash.new do |hash, name|
|
14
|
+
hash[name] = Bucket.new( name )
|
15
|
+
end
|
16
|
+
@total_count = 0
|
17
|
+
end
|
18
|
+
|
19
|
+
# Public: Put elements into buckets.
|
20
|
+
#
|
21
|
+
# bucket_name - If supplied, all yielded elements will be added to that
|
22
|
+
# bucket.
|
23
|
+
# &block - A block that returns (if `bucket_name` is not supplied)
|
24
|
+
# an Array: [bucket_name, element]. If `bucket_name` is
|
25
|
+
# supplied, only the element needs to be returned.
|
26
|
+
#
|
27
|
+
# Examples
|
28
|
+
#
|
29
|
+
# fill { return [bucket_name, element] }
|
30
|
+
# fill( :bucket_name ) { return element }
|
31
|
+
def fill( bucket_name = nil, &block )
|
32
|
+
if bucket_name
|
33
|
+
while element = yield( @total_count )
|
34
|
+
self.add( bucket_name, element )
|
35
|
+
end
|
36
|
+
else
|
37
|
+
while pair = yield( @total_count )
|
38
|
+
self.add( *pair )
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Public: Add a single element to a bucket.
|
44
|
+
def add( bucket_name, element )
|
45
|
+
@buckets[bucket_name] << element
|
46
|
+
@total_count += 1
|
47
|
+
end
|
48
|
+
|
49
|
+
# Returns true if the conditions are satisfied enough to select.
|
50
|
+
def satisfied?
|
51
|
+
begin
|
52
|
+
self.assert_satisfied!
|
53
|
+
true
|
54
|
+
rescue DistributionUnsatisfiedError => e
|
55
|
+
false
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Public: Return a distribution of elements based on the desired
|
60
|
+
# distribution. If a satisfactory distribution is not possible, a
|
61
|
+
# DistributionUnsatisfiedError is raised.
|
62
|
+
#
|
63
|
+
# Returns a Hash of elements based on the desired distribution, keyed by
|
64
|
+
# the bucket names.
|
65
|
+
def select
|
66
|
+
self.assert_satisfied!
|
67
|
+
|
68
|
+
total_count = estimated_final_count
|
69
|
+
|
70
|
+
selected = @wanted_distribution.keys.inject({}) do |memo, bucket_name|
|
71
|
+
bucket = @buckets[bucket_name]
|
72
|
+
count = total_count * @wanted_distribution[bucket_name]
|
73
|
+
count = [1, count.round].max
|
74
|
+
memo[bucket_name] = bucket.elements( count )
|
75
|
+
memo
|
76
|
+
end
|
77
|
+
|
78
|
+
# Sometimes we need to fudge by a few to meet the `@wanted_count`
|
79
|
+
selected = self.trim( selected ) if @wanted_count
|
80
|
+
|
81
|
+
selected
|
82
|
+
end
|
83
|
+
|
84
|
+
protected
|
85
|
+
|
86
|
+
# Trim a given Hash of Arrays keyed by bucket names until it meets
|
87
|
+
# @wanted_count.
|
88
|
+
def trim( selected )
|
89
|
+
raise ArgumentError.new( "Can't trip to a nil @wanted_count" ) unless @wanted_count
|
90
|
+
|
91
|
+
while self.class.elements_count( selected ) > @wanted_count
|
92
|
+
# Calculate current variances from desired distribution. Ignore
|
93
|
+
# buckets with only one element, too.
|
94
|
+
variances = selected.keys.inject([]) do |memo, bucket_name|
|
95
|
+
size = selected[bucket_name].size
|
96
|
+
if size > 1
|
97
|
+
current_percent = size / @wanted_count.to_f
|
98
|
+
variance = @wanted_distribution[bucket_name] / current_percent
|
99
|
+
memo << [bucket_name, variance]
|
100
|
+
end
|
101
|
+
memo
|
102
|
+
end
|
103
|
+
break if variances.empty? # All have one element. Can't trim.
|
104
|
+
trim_bucket_name = variances.sort{ |a, b| a[1] }[0][0] # Smallest variance
|
105
|
+
selected[trim_bucket_name].pop
|
106
|
+
end
|
107
|
+
|
108
|
+
selected
|
109
|
+
end
|
110
|
+
|
111
|
+
# Returns count of all elements in the Hash's Array values.
|
112
|
+
def self.elements_count( hash )
|
113
|
+
hash.values.inject(0){ |memo, array| memo + array.count }
|
114
|
+
end
|
115
|
+
|
116
|
+
# Returns projected final number of elements that will be returned to
|
117
|
+
# satisfy the requirements. If this is less than `@wanted_count`, when
|
118
|
+
# supplied, we can't meet the requirements.
|
119
|
+
def estimated_final_count
|
120
|
+
limiter_bucket = self.limiter_bucket
|
121
|
+
final_count = ( limiter_bucket.count / @wanted_distribution[limiter_bucket.name] ).floor
|
122
|
+
final_count = [@wanted_count, final_count].min if @wanted_count
|
123
|
+
final_count
|
124
|
+
end
|
125
|
+
|
126
|
+
def assert_satisfied!
|
127
|
+
if @total_count < @wanted_distribution.size
|
128
|
+
raise DistributionUnsatisfiedError.new(
|
129
|
+
"Not enough elements (#{@total_count})."
|
130
|
+
)
|
131
|
+
end
|
132
|
+
|
133
|
+
if @wanted_count
|
134
|
+
if @total_count < @wanted_count
|
135
|
+
raise DistributionUnsatisfiedError.new(
|
136
|
+
"Not enough elements (#{@total_count}) to satisfy your desired count (#{@wanted_count})."
|
137
|
+
)
|
138
|
+
end
|
139
|
+
|
140
|
+
if self.estimated_final_count < @wanted_count
|
141
|
+
raise DistributionUnsatisfiedError.new(
|
142
|
+
"Distribution prevents the satisfaction of your desired count (#{@wanted_count})."
|
143
|
+
)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# Return the bucket that is the limiter in the distribution.
|
149
|
+
def limiter_bucket
|
150
|
+
# Smallest value of "count / desired percent" is the limiter.
|
151
|
+
@buckets.values.map do |bucket|
|
152
|
+
[bucket, bucket.count / @wanted_distribution[bucket.name]]
|
153
|
+
end.sort { |a, b| a[1] <=> b[1] }[0][0]
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'minitest/autorun'
|
11
|
+
# require 'minitest/unit'
|
12
|
+
require 'minitest/benchmark'
|
13
|
+
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
16
|
+
require 'splashy'
|
17
|
+
|
18
|
+
MiniTest::Unit.autorun
|
@@ -0,0 +1,243 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
module BucketsSpecHelpers
|
4
|
+
def fill_with_counts( a, b, c )
|
5
|
+
a.times { |i| @buckets.add( :a, "1#{i}" ) }
|
6
|
+
b.times { |i| @buckets.add( :b, "2#{i}" ) }
|
7
|
+
c.times { |i| @buckets.add( :c, "3#{i}" ) }
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
describe Splashy::Buckets do
|
12
|
+
include BucketsSpecHelpers
|
13
|
+
|
14
|
+
describe "failure" do
|
15
|
+
it "fails with bad distribution" do
|
16
|
+
assert_raises( ArgumentError ) do
|
17
|
+
Splashy::Buckets.new( :a => 0.33, :b => 0.33, :c => 0.33 )
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
it "fails with empty pool" do
|
22
|
+
@buckets = Splashy::Buckets.new( :a => 0.33, :b => 0.33, :c => 0.34 )
|
23
|
+
assert !@buckets.satisfied?
|
24
|
+
assert_raises( Splashy::DistributionUnsatisfiedError ) do
|
25
|
+
@buckets.select
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
it "fails with one empty pool" do
|
30
|
+
@buckets = Splashy::Buckets.new( :a => 0.33, :b => 0.33, :c => 0.34 )
|
31
|
+
fill_with_counts( 0, 1, 1 )
|
32
|
+
assert !@buckets.satisfied?
|
33
|
+
assert_raises( Splashy::DistributionUnsatisfiedError ) do
|
34
|
+
@buckets.select
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
it "fails with not enough for the desired count" do
|
39
|
+
@buckets = Splashy::Buckets.new({ :a => 0.33, :b => 0.33, :c => 0.34 }, 4 )
|
40
|
+
fill_with_counts( 1, 1, 1 )
|
41
|
+
assert !@buckets.satisfied?
|
42
|
+
assert_raises( Splashy::DistributionUnsatisfiedError ) do
|
43
|
+
@buckets.select
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
it "fails with an empty bucket" do
|
48
|
+
@buckets = Splashy::Buckets.new({ :a => 0.33, :b => 0.33, :c => 0.34 } )
|
49
|
+
fill_with_counts( 1, 1, 0 )
|
50
|
+
assert !@buckets.satisfied?
|
51
|
+
assert_raises( Splashy::DistributionUnsatisfiedError ) do
|
52
|
+
@buckets.select
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
it "fails with distribution being such that desired count can't be met" do
|
57
|
+
@buckets = Splashy::Buckets.new({ :a => 0.80, :b => 0.1, :c => 0.10 }, 10 )
|
58
|
+
fill_with_counts( 2, 20, 20 )
|
59
|
+
assert !@buckets.satisfied?
|
60
|
+
assert_raises( Splashy::DistributionUnsatisfiedError ) do
|
61
|
+
@buckets.select
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
describe "success" do
|
67
|
+
it "fills a single bucket, which is dumb" do
|
68
|
+
@buckets = Splashy::Buckets.new( :a => 1 )
|
69
|
+
@buckets.add( :a, "1" )
|
70
|
+
assert @buckets.satisfied?
|
71
|
+
assert_equal( {:a=>["1"]}, @buckets.select )
|
72
|
+
end
|
73
|
+
|
74
|
+
it "selects from a small pool" do
|
75
|
+
@buckets = Splashy::Buckets.new( :a => 0.33, :b => 0.33, :c => 0.34 )
|
76
|
+
fill_with_counts( 1, 1, 1 )
|
77
|
+
assert @buckets.satisfied?
|
78
|
+
assert_equal( {:a=>["10"], :b=>["20"], :c=>["30"]}, @buckets.select )
|
79
|
+
end
|
80
|
+
|
81
|
+
it "selects from a small pool with more than enough in one bucket" do
|
82
|
+
@buckets = Splashy::Buckets.new( :a => 0.33, :b => 0.33, :c => 0.34 )
|
83
|
+
fill_with_counts( 1, 1, 2 )
|
84
|
+
assert @buckets.satisfied?
|
85
|
+
assert_equal( {:a=>["10"], :b=>["20"], :c=>["30"]}, @buckets.select )
|
86
|
+
end
|
87
|
+
|
88
|
+
it "selects from a small pool with a limiter bucket" do
|
89
|
+
@buckets = Splashy::Buckets.new( :a => 0.33, :b => 0.33, :c => 0.34 )
|
90
|
+
fill_with_counts( 1, 3, 3 )
|
91
|
+
assert @buckets.satisfied?
|
92
|
+
assert_equal( {:a=>["10"], :b=>["20"], :c=>["30"]}, @buckets.select )
|
93
|
+
end
|
94
|
+
|
95
|
+
it "selects from a larger pool" do
|
96
|
+
@buckets = Splashy::Buckets.new( :a => 0.33, :b => 0.33, :c => 0.34 )
|
97
|
+
fill_with_counts( 3, 3, 3 )
|
98
|
+
assert @buckets.satisfied?
|
99
|
+
assert_equal(
|
100
|
+
{:a=>["10", "11", "12"], :b=>["20", "21", "22"], :c=>["30", "31", "32"]},
|
101
|
+
@buckets.select
|
102
|
+
)
|
103
|
+
end
|
104
|
+
|
105
|
+
it "selects from a pool with a unequal distribution" do
|
106
|
+
@buckets = Splashy::Buckets.new( :a => 0.10, :b => 0.10, :c => 0.80 )
|
107
|
+
fill_with_counts( 3, 3, 3 )
|
108
|
+
assert @buckets.satisfied?
|
109
|
+
assert_equal(
|
110
|
+
{:a=>["10"], :b=>["20"], :c=>["30", "31"]},
|
111
|
+
@buckets.select
|
112
|
+
)
|
113
|
+
end
|
114
|
+
|
115
|
+
it "selects from a pool with an \"opposite\" distribution" do
|
116
|
+
@buckets = Splashy::Buckets.new( :a => 0.10, :b => 0.10, :c => 0.80 )
|
117
|
+
fill_with_counts( 5, 5, 2 )
|
118
|
+
assert @buckets.satisfied?
|
119
|
+
assert_equal(
|
120
|
+
{:a=>["10"], :b=>["20"], :c=>["30", "31"]},
|
121
|
+
@buckets.select
|
122
|
+
)
|
123
|
+
end
|
124
|
+
|
125
|
+
it "selects from a pool with a skewed distribution" do
|
126
|
+
@buckets = Splashy::Buckets.new( :a => 0.10, :b => 0.10, :c => 0.80 )
|
127
|
+
fill_with_counts( 10, 10, 1 )
|
128
|
+
assert @buckets.satisfied?
|
129
|
+
assert_equal(
|
130
|
+
{:a=>["10"], :b=>["20"], :c=>["30"]},
|
131
|
+
@buckets.select
|
132
|
+
)
|
133
|
+
end
|
134
|
+
|
135
|
+
it "selects from a pool with another skewed distribution" do
|
136
|
+
@buckets = Splashy::Buckets.new( :a => 0.01, :b => 0.19, :c => 0.80 )
|
137
|
+
fill_with_counts( 10, 10, 1 )
|
138
|
+
assert @buckets.satisfied?
|
139
|
+
assert_equal(
|
140
|
+
{:a=>["10"], :b=>["20"], :c=>["30"]},
|
141
|
+
@buckets.select
|
142
|
+
)
|
143
|
+
end
|
144
|
+
|
145
|
+
it "selects from a pool with yet another skewed distribution" do
|
146
|
+
@buckets = Splashy::Buckets.new( :a => 0.01, :b => 0.19, :c => 0.80 )
|
147
|
+
fill_with_counts( 10, 2, 40 )
|
148
|
+
assert @buckets.satisfied?
|
149
|
+
assert_equal(
|
150
|
+
{:a=>["10"], :b=>["20", "21"], :c=>["30", "31", "32", "33", "34", "35", "36", "37"]},
|
151
|
+
@buckets.select
|
152
|
+
)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
describe "filling buckets" do
|
157
|
+
it "accepts blocks" do
|
158
|
+
@buckets = Splashy::Buckets.new( :a => 0.01, :b => 0.19, :c => 0.80 )
|
159
|
+
a = [[:a, "1"]]
|
160
|
+
b = [[:b, "2"]]
|
161
|
+
c = [[:c, "3"]]
|
162
|
+
@buckets.fill { a.pop }
|
163
|
+
@buckets.fill { b.pop }
|
164
|
+
@buckets.fill { c.pop }
|
165
|
+
assert @buckets.satisfied?
|
166
|
+
assert_equal(
|
167
|
+
{:a=>["1"], :b=>["2"], :c=>["3"]},
|
168
|
+
@buckets.select
|
169
|
+
)
|
170
|
+
end
|
171
|
+
|
172
|
+
it "accepts blocks with specified buckets" do
|
173
|
+
@buckets = Splashy::Buckets.new( :a => 0.01, :b => 0.19, :c => 0.80 )
|
174
|
+
a = ["1"]
|
175
|
+
b = ["2"]
|
176
|
+
c = ["3"]
|
177
|
+
@buckets.fill( :a ) { a.pop }
|
178
|
+
@buckets.fill( :b ) { b.pop }
|
179
|
+
@buckets.fill( :c ) { c.pop }
|
180
|
+
assert @buckets.satisfied?
|
181
|
+
assert_equal(
|
182
|
+
{:a=>["1"], :b=>["2"], :c=>["3"]},
|
183
|
+
@buckets.select
|
184
|
+
)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
describe "success with an enforced count" do
|
189
|
+
it "selects from a pool with an even distribution" do
|
190
|
+
@buckets = Splashy::Buckets.new( {:a => 0.33, :b => 0.33, :c => 0.34}, 6 )
|
191
|
+
fill_with_counts( 10, 2, 40 )
|
192
|
+
assert @buckets.satisfied?
|
193
|
+
assert_equal(
|
194
|
+
{:a=>["10", "11"], :b=>["20", "21"], :c=>["30", "31"]},
|
195
|
+
@buckets.select
|
196
|
+
)
|
197
|
+
end
|
198
|
+
|
199
|
+
it "selects from a pool with an uneven distribution" do
|
200
|
+
@buckets = Splashy::Buckets.new( {:a => 0.33, :b => 0.33, :c => 0.34}, 5 )
|
201
|
+
fill_with_counts( 10, 2, 40 )
|
202
|
+
assert @buckets.satisfied?
|
203
|
+
assert_equal(
|
204
|
+
{:a=>["10", "11"], :b=>["20"], :c=>["30", "31"]},
|
205
|
+
@buckets.select
|
206
|
+
)
|
207
|
+
end
|
208
|
+
|
209
|
+
it "selects from a pool with a skewed distribution" do
|
210
|
+
@buckets = Splashy::Buckets.new( {:a => 0.01, :b => 0.19, :c => 0.80}, 8 )
|
211
|
+
fill_with_counts( 10, 2, 40 )
|
212
|
+
assert @buckets.satisfied?
|
213
|
+
assert_equal(
|
214
|
+
{:a=>["10"], :b=>["20"], :c=>["30", "31", "32", "33", "34", "35"]},
|
215
|
+
@buckets.select
|
216
|
+
)
|
217
|
+
end
|
218
|
+
|
219
|
+
it "selects from a pool with a wacky distribution" do
|
220
|
+
@buckets = Splashy::Buckets.new( {:a => 0.01, :b => 0.01, :c => 0.98}, 3 )
|
221
|
+
fill_with_counts( 3, 3, 3 )
|
222
|
+
assert @buckets.satisfied?
|
223
|
+
assert_equal(
|
224
|
+
{:a=>["10"], :b=>["20"], :c=>["30"]},
|
225
|
+
@buckets.select
|
226
|
+
)
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
describe "performance" do
|
231
|
+
it "grows linearly with more elements" do
|
232
|
+
puts
|
233
|
+
assert_performance_linear 0.999 do |n|
|
234
|
+
@buckets = Splashy::Buckets.new( :a => 0.20, :b => 0.30, :c => 0.50 )
|
235
|
+
n.times do |i|
|
236
|
+
bucket = [:a, :a, :b, :c][i % 3]
|
237
|
+
@buckets.add( bucket, i.to_s )
|
238
|
+
end
|
239
|
+
@buckets.select rescue nil
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
metadata
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: splashy
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Tyson Tate
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-12-08 00:00:00 -08:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
prerelease: false
|
23
|
+
name: minitest
|
24
|
+
type: :development
|
25
|
+
version_requirements: &id001 !ruby/object:Gem::Requirement
|
26
|
+
none: false
|
27
|
+
requirements:
|
28
|
+
- - ">="
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
hash: 3
|
31
|
+
segments:
|
32
|
+
- 0
|
33
|
+
version: "0"
|
34
|
+
requirement: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
prerelease: false
|
37
|
+
name: bundler
|
38
|
+
type: :development
|
39
|
+
version_requirements: &id002 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ~>
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
hash: 23
|
45
|
+
segments:
|
46
|
+
- 1
|
47
|
+
- 0
|
48
|
+
- 0
|
49
|
+
version: 1.0.0
|
50
|
+
requirement: *id002
|
51
|
+
- !ruby/object:Gem::Dependency
|
52
|
+
prerelease: false
|
53
|
+
name: jeweler
|
54
|
+
type: :development
|
55
|
+
version_requirements: &id003 !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ~>
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
hash: 7
|
61
|
+
segments:
|
62
|
+
- 1
|
63
|
+
- 6
|
64
|
+
- 4
|
65
|
+
version: 1.6.4
|
66
|
+
requirement: *id003
|
67
|
+
description: Simple distribution-based sampling of arbitrary objects from any number of buckets. Splashy. Buckets. Get it!?
|
68
|
+
email: tyson@tysontate.com
|
69
|
+
executables: []
|
70
|
+
|
71
|
+
extensions: []
|
72
|
+
|
73
|
+
extra_rdoc_files:
|
74
|
+
- LICENSE.txt
|
75
|
+
- README.markdown
|
76
|
+
files:
|
77
|
+
- Gemfile
|
78
|
+
- Gemfile.lock
|
79
|
+
- LICENSE.txt
|
80
|
+
- README.markdown
|
81
|
+
- Rakefile
|
82
|
+
- VERSION
|
83
|
+
- buckets.gemspec
|
84
|
+
- lib/splashy.rb
|
85
|
+
- lib/splashy/bucket.rb
|
86
|
+
- lib/splashy/buckets.rb
|
87
|
+
- test/helper.rb
|
88
|
+
- test/test_splashy_buckets.rb
|
89
|
+
has_rdoc: true
|
90
|
+
homepage: http://github.com/tysontate/splashy
|
91
|
+
licenses:
|
92
|
+
- MIT
|
93
|
+
post_install_message:
|
94
|
+
rdoc_options: []
|
95
|
+
|
96
|
+
require_paths:
|
97
|
+
- lib
|
98
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
hash: 3
|
104
|
+
segments:
|
105
|
+
- 0
|
106
|
+
version: "0"
|
107
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
108
|
+
none: false
|
109
|
+
requirements:
|
110
|
+
- - ">="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
hash: 3
|
113
|
+
segments:
|
114
|
+
- 0
|
115
|
+
version: "0"
|
116
|
+
requirements: []
|
117
|
+
|
118
|
+
rubyforge_project:
|
119
|
+
rubygems_version: 1.4.2
|
120
|
+
signing_key:
|
121
|
+
specification_version: 3
|
122
|
+
summary: Simple distribution-based sampling of arbitrary objects from buckets.
|
123
|
+
test_files: []
|
124
|
+
|