splashy 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,6 +46,12 @@ buckets.select
46
46
  Changelog
47
47
  =========
48
48
 
49
+ * 0.1.0 - Several bug fixes, add "neediest_buckets" method to Buckets to allow
50
+ you to choose which buckets to add to first if an element can be put in
51
+ multiple buckets, final distributions can now have empty buckets if it means
52
+ we meet the wanted distribution better (i.e. a 99% % 1% distribution with 5
53
+ and 1 elements, respectively, which will now select 4 and 0 elements if your
54
+ wanted count is 4).
49
55
  * 0.0.2 - Raise `ArgumentError` when trying to add to a bucket that doesn't
50
56
  exist, don't consider an empty bucket "satisfied".
51
57
  * 0.0.1 - Initial release.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.2
1
+ 0.1.0
@@ -14,8 +14,12 @@ module Splashy
14
14
  @elements << element
15
15
  end
16
16
 
17
- def elements( count )
18
- @elements[0, count]
17
+ def elements( count = nil )
18
+ if count
19
+ @elements[0, count]
20
+ else
21
+ @elements
22
+ end
19
23
  end
20
24
 
21
25
  def empty?
@@ -17,13 +17,13 @@ module Splashy
17
17
  @total_count = 0
18
18
  end
19
19
 
20
- # Public: Put elements into buckets.
20
+ # Public: Put elements into buckets with a block.
21
21
  #
22
22
  # bucket_name - If supplied, all yielded elements will be added to that
23
23
  # bucket.
24
- # &block - A block that returns (if `bucket_name` is not supplied)
25
- # an Array: [bucket_name, element]. If `bucket_name` is
26
- # supplied, only the element needs to be returned.
24
+ # &block - A block that returns (if `bucket_name` is not supplied) an
25
+ # Array: [bucket_name, element]. If `bucket_name` is supplied, only
26
+ # the element needs to be returned.
27
27
  #
28
28
  # Examples
29
29
  #
@@ -50,7 +50,10 @@ module Splashy
50
50
  @total_count += 1
51
51
  end
52
52
 
53
- # Returns true if the conditions are satisfied enough to select.
53
+ # Public
54
+ #
55
+ # Returns true if the conditions (distribution and, optionally, count) are
56
+ # satisfied enough to do a final selection of elements.
54
57
  def satisfied?
55
58
  begin
56
59
  self.assert_satisfied!
@@ -64,61 +67,123 @@ module Splashy
64
67
  # distribution. If a satisfactory distribution is not possible, a
65
68
  # DistributionUnsatisfiedError is raised.
66
69
  #
67
- # Returns a Hash of elements based on the desired distribution, keyed by
70
+ # Returns a Hash of elements matching the desired distribution, keyed by
68
71
  # the bucket names.
69
72
  def select
70
73
  self.assert_satisfied!
71
-
72
- total_count = estimated_final_count
73
-
74
- selected = @wanted_distribution.keys.inject({}) do |memo, bucket_name|
75
- bucket = @buckets[bucket_name]
76
- count = total_count * @wanted_distribution[bucket_name]
77
- count = [1, count.round].max
78
- memo[bucket_name] = bucket.elements( count )
74
+ selected = self._select_wanted
75
+ # Sometimes we need to fudge by a few to meet the `@wanted_count`
76
+ selected = self.trim( selected, @wanted_count ) if @wanted_count
77
+ selected
78
+ end
79
+
80
+ # Array of the buckets that need more elements to match the desired
81
+ # distribution, sorted descending by how much more they need.
82
+ def neediest_buckets
83
+ multipliers = self.needed_multipliers( self._select_all, @wanted_distribution ).to_a
84
+ multipliers.sort! { |a, b| b[1] <=> a[1] } # Sort on multiplier ascending
85
+ multipliers.map{ |bucket_name, multiplier| bucket_name }
86
+ end
87
+
88
+ protected
89
+
90
+ # Protected
91
+ #
92
+ # Returns Hash of all bucket elements, keyed by bucket name.
93
+ def _select_all
94
+ @buckets.values.inject({}) do |memo, bucket|
95
+ memo[bucket.name] = bucket.elements
79
96
  memo
80
97
  end
98
+ end
99
+
100
+ # Protected
101
+ #
102
+ # Returns Hash of bucket elements, matching the wanted distribution as
103
+ # closely as possible.
104
+ def _select_wanted
105
+ final_count = self.estimated_final_count
81
106
 
82
- # Sometimes we need to fudge by a few to meet the `@wanted_count`
83
- selected = self.trim( selected ) if @wanted_count
107
+ @buckets.values.inject({}) do |memo, bucket|
108
+ count = ( final_count * @wanted_distribution[bucket.name] ).round
109
+ count = [1, count].max # Ensure every bucket has at least one element
110
+ memo[bucket.name] = bucket.elements( count )
111
+ memo
112
+ end
113
+ end
114
+
115
+ # Protected: Trim a given Hash of Arrays -- keyed by bucket names -- until
116
+ # it satisfies @wanted_count.
117
+ #
118
+ # selected - A Hash of selected elements, keyed by the bucket names. All
119
+ # values must be Arrays (or respond to `size`).
120
+ # size - The desired total size of `selected`.
121
+ def trim( selected, size )
122
+ raise ArgumentError.new( "Can't trim to a nil size" ) unless size
123
+ while self.class.elements_count( selected ) > size
124
+ candidates = self.trim_candidates( selected, @wanted_distribution )
125
+ selected[candidates.first].pop
126
+ end
84
127
 
85
128
  selected
86
129
  end
87
130
 
88
- protected
131
+ # Protected
132
+ #
133
+ # current_selections - Hash of element Arrays, keyed by bucket name.
134
+ # wanted_distribution - The wanted distribution as a hash of percentage
135
+ # Floats.
136
+ #
137
+ # Returns Array of bucket names for buckets that are good trim candidates,
138
+ # ordered by best candidates first.
139
+ def trim_candidates( current_selections, wanted_distribution )
140
+ multipliers = self.needed_multipliers( current_selections, wanted_distribution ).to_a
141
+ multipliers.select do |bucket_name, multiplier|
142
+ # Can't trim empty buckets
143
+ @buckets[bucket_name].count != 0
144
+ end
145
+ return multipliers if multipliers.empty?
146
+ multipliers.sort! { |a, b| a[1] <=> b[1] } # Sort on multiplier ascending
147
+ multipliers.map{ |bucket_name, multiplier| bucket_name }
148
+ end
89
149
 
90
- # Trim a given Hash of Arrays keyed by bucket names until it meets
91
- # @wanted_count.
92
- def trim( selected )
93
- raise ArgumentError.new( "Can't trip to a nil @wanted_count" ) unless @wanted_count
150
+ # Protected
151
+ #
152
+ # current_selections - Hash of element Arrays, keyed by bucket name.
153
+ # wanted_distribution - The wanted distribution as a hash of percentage
154
+ # Floats.
155
+ #
156
+ # Returns Hash of multipliers needd for each bucket to reach its current
157
+ # wanted distribution.
158
+ def needed_multipliers( current_selections, wanted_distribution )
159
+ total_size = self.class.elements_count( current_selections )
94
160
 
95
- while self.class.elements_count( selected ) > @wanted_count
96
- # Calculate current variances from desired distribution. Ignore
97
- # buckets with only one element, too.
98
- variances = selected.keys.inject([]) do |memo, bucket_name|
99
- size = selected[bucket_name].size
100
- if size > 1
101
- current_percent = size / @wanted_count.to_f
102
- variance = @wanted_distribution[bucket_name] / current_percent
103
- memo << [bucket_name, variance]
104
- end
105
- memo
161
+ current_selections.keys.inject({}) do |memo, bucket_name|
162
+ bucket_size = current_selections[bucket_name].size
163
+ desired_pct = wanted_distribution[bucket_name]
164
+ current_pct = bucket_size.to_f / total_size
165
+ if current_pct > 0
166
+ memo[bucket_name] = desired_pct / current_pct
167
+ else
168
+ memo[bucket_name] = 1 / 0.0 # Infinity
106
169
  end
107
- break if variances.empty? # All have one element. Can't trim.
108
- trim_bucket_name = variances.sort{ |a, b| a[1] }[0][0] # Smallest variance
109
- selected[trim_bucket_name].pop
170
+ memo
110
171
  end
111
-
112
- selected
113
172
  end
114
173
 
174
+ # Protected
175
+ #
176
+ # hash - Hash of Objects that respond to `count` (usually Arrays).
177
+ #
115
178
  # Returns count of all elements in the Hash's Array values.
116
179
  def self.elements_count( hash )
117
180
  hash.values.inject(0){ |memo, array| memo + array.count }
118
181
  end
119
182
 
183
+ # Protected
184
+ #
120
185
  # Returns projected final number of elements that will be returned to
121
- # satisfy the requirements. If this is less than `@wanted_count`, when
186
+ # satisfy the requirements. If this is less than `@wanted_count`, if
122
187
  # supplied, we can't meet the requirements.
123
188
  def estimated_final_count
124
189
  limiter_bucket = self.limiter_bucket
@@ -127,6 +192,10 @@ module Splashy
127
192
  final_count
128
193
  end
129
194
 
195
+ # Protected
196
+ #
197
+ # Raises a DistributionUnsatisfiedError if we can't meet the wanted
198
+ # distribution or count (or both).
130
199
  def assert_satisfied!
131
200
  if @total_count < @wanted_distribution.size
132
201
  raise DistributionUnsatisfiedError.new(
@@ -156,7 +225,11 @@ module Splashy
156
225
  end
157
226
  end
158
227
 
159
- # Return the bucket that is the limiter in the distribution.
228
+ # Protected
229
+ #
230
+ # Return the Bucket that is the current limiter in the distribution. In
231
+ # other words, this bucket is limiting the total size of the final
232
+ # selection.
160
233
  def limiter_bucket
161
234
  # Smallest value of "count / desired percent" is the limiter.
162
235
  @buckets.values.map do |bucket|
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{splashy}
8
- s.version = "0.0.2"
8
+ s.version = "0.1.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Tyson Tate"]
@@ -227,7 +227,7 @@ describe Splashy::Buckets do
227
227
  fill_with_counts( 10, 2, 40 )
228
228
  assert @buckets.satisfied?
229
229
  assert_equal(
230
- {:a=>["10"], :b=>["20"], :c=>["30", "31", "32", "33", "34", "35"]},
230
+ {:a=>[], :b=>["20", "21"], :c=>["30", "31", "32", "33", "34", "35"]},
231
231
  @buckets.select
232
232
  )
233
233
  end
@@ -237,15 +237,41 @@ describe Splashy::Buckets do
237
237
  fill_with_counts( 3, 3, 3 )
238
238
  assert @buckets.satisfied?
239
239
  assert_equal(
240
- {:a=>["10"], :b=>["20"], :c=>["30"]},
240
+ {:a=>[], :b=>[], :c=>["30", "31", "32"]},
241
241
  @buckets.select
242
242
  )
243
243
  end
244
244
  end
245
245
 
246
+ describe "variances" do
247
+ it "reports on a pool with an even distribution" do
248
+ @buckets = Splashy::Buckets.new( {:a => 0.33, :b => 0.33, :c => 0.34} )
249
+ fill_with_counts( 10, 2, 40 )
250
+ assert_equal( [:b, :a, :c], @buckets.neediest_buckets )
251
+ end
252
+
253
+ it "reports on a pool with an uneven distribution" do
254
+ @buckets = Splashy::Buckets.new( {:a => 0.33, :b => 0.33, :c => 0.34}, 3 )
255
+ fill_with_counts( 10, 2, 40 )
256
+ assert_equal( [:b, :a, :c], @buckets.neediest_buckets )
257
+ end
258
+
259
+ it "reports on a pool with a skewed distribution" do
260
+ @buckets = Splashy::Buckets.new( {:a => 0.01, :b => 0.19, :c => 0.80} )
261
+ fill_with_counts( 10, 2, 1 )
262
+ assert_equal( [:c, :b, :a], @buckets.neediest_buckets )
263
+ end
264
+
265
+ it "reports on a pool with a wacky distribution" do
266
+ @buckets = Splashy::Buckets.new( {:a => 0.01, :b => 0.01, :c => 0.98} )
267
+ fill_with_counts( 3, 3, 3 )
268
+ assert_equal( [:c, :a, :b], @buckets.neediest_buckets )
269
+ end
270
+ end
271
+
246
272
  describe "performance" do
247
273
  it "grows linearly with more elements" do
248
- puts
274
+ puts # Formatting...
249
275
  assert_performance_linear 0.999 do |n|
250
276
  @buckets = Splashy::Buckets.new( :a => 0.20, :b => 0.30, :c => 0.50 )
251
277
  n.times do |i|
@@ -257,3 +283,4 @@ describe Splashy::Buckets do
257
283
  end
258
284
  end
259
285
  end
286
+
metadata CHANGED
@@ -5,9 +5,9 @@ version: !ruby/object:Gem::Version
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
+ - 1
8
9
  - 0
9
- - 2
10
- version: 0.0.2
10
+ version: 0.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Tyson Tate