splashy 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -46,6 +46,12 @@ buckets.select
46
46
  Changelog
47
47
  =========
48
48
 
49
+ * 0.1.0 - Several bug fixes, add "neediest_buckets" method to Buckets to allow
50
+ you to choose which buckets to add to first if an element can be put in
51
+ multiple buckets, final distributions can now have empty buckets if it means
52
+ we meet the wanted distribution better (i.e. a 99% % 1% distribution with 5
53
+ and 1 elements, respectively, which will now select 4 and 0 elements if your
54
+ wanted count is 4).
49
55
  * 0.0.2 - Raise `ArgumentError` when trying to add to a bucket that doesn't
50
56
  exist, don't consider an empty bucket "satisfied".
51
57
  * 0.0.1 - Initial release.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.2
1
+ 0.1.0
@@ -14,8 +14,12 @@ module Splashy
14
14
  @elements << element
15
15
  end
16
16
 
17
- def elements( count )
18
- @elements[0, count]
17
+ def elements( count = nil )
18
+ if count
19
+ @elements[0, count]
20
+ else
21
+ @elements
22
+ end
19
23
  end
20
24
 
21
25
  def empty?
@@ -17,13 +17,13 @@ module Splashy
17
17
  @total_count = 0
18
18
  end
19
19
 
20
- # Public: Put elements into buckets.
20
+ # Public: Put elements into buckets with a block.
21
21
  #
22
22
  # bucket_name - If supplied, all yielded elements will be added to that
23
23
  # bucket.
24
- # &block - A block that returns (if `bucket_name` is not supplied)
25
- # an Array: [bucket_name, element]. If `bucket_name` is
26
- # supplied, only the element needs to be returned.
24
+ # &block - A block that returns (if `bucket_name` is not supplied) an
25
+ # Array: [bucket_name, element]. If `bucket_name` is supplied, only
26
+ # the element needs to be returned.
27
27
  #
28
28
  # Examples
29
29
  #
@@ -50,7 +50,10 @@ module Splashy
50
50
  @total_count += 1
51
51
  end
52
52
 
53
- # Returns true if the conditions are satisfied enough to select.
53
+ # Public
54
+ #
55
+ # Returns true if the conditions (distribution and, optionally, count) are
56
+ # satisfied enough to do a final selection of elements.
54
57
  def satisfied?
55
58
  begin
56
59
  self.assert_satisfied!
@@ -64,61 +67,123 @@ module Splashy
64
67
  # distribution. If a satisfactory distribution is not possible, a
65
68
  # DistributionUnsatisfiedError is raised.
66
69
  #
67
- # Returns a Hash of elements based on the desired distribution, keyed by
70
+ # Returns a Hash of elements matching the desired distribution, keyed by
68
71
  # the bucket names.
69
72
  def select
70
73
  self.assert_satisfied!
71
-
72
- total_count = estimated_final_count
73
-
74
- selected = @wanted_distribution.keys.inject({}) do |memo, bucket_name|
75
- bucket = @buckets[bucket_name]
76
- count = total_count * @wanted_distribution[bucket_name]
77
- count = [1, count.round].max
78
- memo[bucket_name] = bucket.elements( count )
74
+ selected = self._select_wanted
75
+ # Sometimes we need to fudge by a few to meet the `@wanted_count`
76
+ selected = self.trim( selected, @wanted_count ) if @wanted_count
77
+ selected
78
+ end
79
+
80
+ # Array of the buckets that need more elements to match the desired
81
+ # distribution, sorted descending by how much more they need.
82
+ def neediest_buckets
83
+ multipliers = self.needed_multipliers( self._select_all, @wanted_distribution ).to_a
84
+ multipliers.sort! { |a, b| b[1] <=> a[1] } # Sort on multiplier ascending
85
+ multipliers.map{ |bucket_name, multiplier| bucket_name }
86
+ end
87
+
88
+ protected
89
+
90
+ # Protected
91
+ #
92
+ # Returns Hash of all bucket elements, keyed by bucket name.
93
+ def _select_all
94
+ @buckets.values.inject({}) do |memo, bucket|
95
+ memo[bucket.name] = bucket.elements
79
96
  memo
80
97
  end
98
+ end
99
+
100
+ # Protected
101
+ #
102
+ # Returns Hash of bucket elements, matching the wanted distribution as
103
+ # closely as possible.
104
+ def _select_wanted
105
+ final_count = self.estimated_final_count
81
106
 
82
- # Sometimes we need to fudge by a few to meet the `@wanted_count`
83
- selected = self.trim( selected ) if @wanted_count
107
+ @buckets.values.inject({}) do |memo, bucket|
108
+ count = ( final_count * @wanted_distribution[bucket.name] ).round
109
+ count = [1, count].max # Ensure every bucket has at least one element
110
+ memo[bucket.name] = bucket.elements( count )
111
+ memo
112
+ end
113
+ end
114
+
115
+ # Protected: Trim a given Hash of Arrays -- keyed by bucket names -- until
116
+ # it satisfies @wanted_count.
117
+ #
118
+ # selected - A Hash of selected elements, keyed by the bucket names. All
119
+ # values must be Arrays (or respond to `size`).
120
+ # size - The desired total size of `selected`.
121
+ def trim( selected, size )
122
+ raise ArgumentError.new( "Can't trim to a nil size" ) unless size
123
+ while self.class.elements_count( selected ) > size
124
+ candidates = self.trim_candidates( selected, @wanted_distribution )
125
+ selected[candidates.first].pop
126
+ end
84
127
 
85
128
  selected
86
129
  end
87
130
 
88
- protected
131
+ # Protected
132
+ #
133
+ # current_selections - Hash of element Arrays, keyed by bucket name.
134
+ # wanted_distribution - The wanted distribution as a hash of percentage
135
+ # Floats.
136
+ #
137
+ # Returns Array of bucket names for buckets that are good trim candidates,
138
+ # ordered by best candidates first.
139
+ def trim_candidates( current_selections, wanted_distribution )
140
+ multipliers = self.needed_multipliers( current_selections, wanted_distribution ).to_a
141
+ multipliers.select do |bucket_name, multiplier|
142
+ # Can't trim empty buckets
143
+ @buckets[bucket_name].count != 0
144
+ end
145
+ return multipliers if multipliers.empty?
146
+ multipliers.sort! { |a, b| a[1] <=> b[1] } # Sort on multiplier ascending
147
+ multipliers.map{ |bucket_name, multiplier| bucket_name }
148
+ end
89
149
 
90
- # Trim a given Hash of Arrays keyed by bucket names until it meets
91
- # @wanted_count.
92
- def trim( selected )
93
- raise ArgumentError.new( "Can't trip to a nil @wanted_count" ) unless @wanted_count
150
+ # Protected
151
+ #
152
+ # current_selections - Hash of element Arrays, keyed by bucket name.
153
+ # wanted_distribution - The wanted distribution as a hash of percentage
154
+ # Floats.
155
+ #
156
+ # Returns Hash of multipliers needd for each bucket to reach its current
157
+ # wanted distribution.
158
+ def needed_multipliers( current_selections, wanted_distribution )
159
+ total_size = self.class.elements_count( current_selections )
94
160
 
95
- while self.class.elements_count( selected ) > @wanted_count
96
- # Calculate current variances from desired distribution. Ignore
97
- # buckets with only one element, too.
98
- variances = selected.keys.inject([]) do |memo, bucket_name|
99
- size = selected[bucket_name].size
100
- if size > 1
101
- current_percent = size / @wanted_count.to_f
102
- variance = @wanted_distribution[bucket_name] / current_percent
103
- memo << [bucket_name, variance]
104
- end
105
- memo
161
+ current_selections.keys.inject({}) do |memo, bucket_name|
162
+ bucket_size = current_selections[bucket_name].size
163
+ desired_pct = wanted_distribution[bucket_name]
164
+ current_pct = bucket_size.to_f / total_size
165
+ if current_pct > 0
166
+ memo[bucket_name] = desired_pct / current_pct
167
+ else
168
+ memo[bucket_name] = 1 / 0.0 # Infinity
106
169
  end
107
- break if variances.empty? # All have one element. Can't trim.
108
- trim_bucket_name = variances.sort{ |a, b| a[1] }[0][0] # Smallest variance
109
- selected[trim_bucket_name].pop
170
+ memo
110
171
  end
111
-
112
- selected
113
172
  end
114
173
 
174
+ # Protected
175
+ #
176
+ # hash - Hash of Objects that respond to `count` (usually Arrays).
177
+ #
115
178
  # Returns count of all elements in the Hash's Array values.
116
179
  def self.elements_count( hash )
117
180
  hash.values.inject(0){ |memo, array| memo + array.count }
118
181
  end
119
182
 
183
+ # Protected
184
+ #
120
185
  # Returns projected final number of elements that will be returned to
121
- # satisfy the requirements. If this is less than `@wanted_count`, when
186
+ # satisfy the requirements. If this is less than `@wanted_count`, if
122
187
  # supplied, we can't meet the requirements.
123
188
  def estimated_final_count
124
189
  limiter_bucket = self.limiter_bucket
@@ -127,6 +192,10 @@ module Splashy
127
192
  final_count
128
193
  end
129
194
 
195
+ # Protected
196
+ #
197
+ # Raises a DistributionUnsatisfiedError if we can't meet the wanted
198
+ # distribution or count (or both).
130
199
  def assert_satisfied!
131
200
  if @total_count < @wanted_distribution.size
132
201
  raise DistributionUnsatisfiedError.new(
@@ -156,7 +225,11 @@ module Splashy
156
225
  end
157
226
  end
158
227
 
159
- # Return the bucket that is the limiter in the distribution.
228
+ # Protected
229
+ #
230
+ # Return the Bucket that is the current limiter in the distribution. In
231
+ # other words, this bucket is limiting the total size of the final
232
+ # selection.
160
233
  def limiter_bucket
161
234
  # Smallest value of "count / desired percent" is the limiter.
162
235
  @buckets.values.map do |bucket|
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{splashy}
8
- s.version = "0.0.2"
8
+ s.version = "0.1.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Tyson Tate"]
@@ -227,7 +227,7 @@ describe Splashy::Buckets do
227
227
  fill_with_counts( 10, 2, 40 )
228
228
  assert @buckets.satisfied?
229
229
  assert_equal(
230
- {:a=>["10"], :b=>["20"], :c=>["30", "31", "32", "33", "34", "35"]},
230
+ {:a=>[], :b=>["20", "21"], :c=>["30", "31", "32", "33", "34", "35"]},
231
231
  @buckets.select
232
232
  )
233
233
  end
@@ -237,15 +237,41 @@ describe Splashy::Buckets do
237
237
  fill_with_counts( 3, 3, 3 )
238
238
  assert @buckets.satisfied?
239
239
  assert_equal(
240
- {:a=>["10"], :b=>["20"], :c=>["30"]},
240
+ {:a=>[], :b=>[], :c=>["30", "31", "32"]},
241
241
  @buckets.select
242
242
  )
243
243
  end
244
244
  end
245
245
 
246
+ describe "variances" do
247
+ it "reports on a pool with an even distribution" do
248
+ @buckets = Splashy::Buckets.new( {:a => 0.33, :b => 0.33, :c => 0.34} )
249
+ fill_with_counts( 10, 2, 40 )
250
+ assert_equal( [:b, :a, :c], @buckets.neediest_buckets )
251
+ end
252
+
253
+ it "reports on a pool with an uneven distribution" do
254
+ @buckets = Splashy::Buckets.new( {:a => 0.33, :b => 0.33, :c => 0.34}, 3 )
255
+ fill_with_counts( 10, 2, 40 )
256
+ assert_equal( [:b, :a, :c], @buckets.neediest_buckets )
257
+ end
258
+
259
+ it "reports on a pool with a skewed distribution" do
260
+ @buckets = Splashy::Buckets.new( {:a => 0.01, :b => 0.19, :c => 0.80} )
261
+ fill_with_counts( 10, 2, 1 )
262
+ assert_equal( [:c, :b, :a], @buckets.neediest_buckets )
263
+ end
264
+
265
+ it "reports on a pool with a wacky distribution" do
266
+ @buckets = Splashy::Buckets.new( {:a => 0.01, :b => 0.01, :c => 0.98} )
267
+ fill_with_counts( 3, 3, 3 )
268
+ assert_equal( [:c, :a, :b], @buckets.neediest_buckets )
269
+ end
270
+ end
271
+
246
272
  describe "performance" do
247
273
  it "grows linearly with more elements" do
248
- puts
274
+ puts # Formatting...
249
275
  assert_performance_linear 0.999 do |n|
250
276
  @buckets = Splashy::Buckets.new( :a => 0.20, :b => 0.30, :c => 0.50 )
251
277
  n.times do |i|
@@ -257,3 +283,4 @@ describe Splashy::Buckets do
257
283
  end
258
284
  end
259
285
  end
286
+
metadata CHANGED
@@ -5,9 +5,9 @@ version: !ruby/object:Gem::Version
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
+ - 1
8
9
  - 0
9
- - 2
10
- version: 0.0.2
10
+ version: 0.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Tyson Tate