pick_me_too 1.1.0 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 52db253007776ea2ede6caa43361e3e260660accdd4a0b1eb1439532f1ef92d3
4
- data.tar.gz: 7f370bb747440c33062b8193a3edffb2fe0ac752493735366ea4d7161ded0c4a
3
+ metadata.gz: 070a1c66bfe69fe534fc449c39a0f187a7b46e3bbcf3bc9827953993cfa30e28
4
+ data.tar.gz: 41ea453f24ccb5a1602aca774f488295e68280ece7b44c0d12f060295c645d58
5
5
  SHA512:
6
- metadata.gz: 4f70d7864e25ca65ea5ca660c1964f0e79229da429e2414539ffaf3a65da50b2a30f70f186266532adbf6115b4e7e7a9bbd7b0b8727994c690876e9648ff0ab7
7
- data.tar.gz: bcf37dedb54b3304367799a16c3521541fa57c0db4dd4e819220dcb9ca85ff804c37e1b026e7867cf90a8990575d10e2b7ac0b452433d74760a358301b1eec8f
6
+ metadata.gz: 7f15740e07eaf7c91548df5cae303e598a13c0e03c9f2af514b6368a497ff60741dfb6b957ee9b4b72cc222575be7ece4faaa5582753e1486fd5f84d2256c734
7
+ data.tar.gz: 751c64c9aece6e27fd53ecd52af26eec09883542b78077259487804e056594a8f5ab95e4c614fbc8b6dfa206d91e2d4271bec570153487f1da8d4968f0c53030
data/.gitignore CHANGED
@@ -1,4 +1,4 @@
1
1
  .byebug_history
2
2
  *.swp
3
3
  .ruby-version
4
-
4
+ pkg
data/CHANGES.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Change Log
2
2
 
3
+ ## 1.1.2 *2022-11-6*
4
+ * optimized ternerization code still further
5
+ ## 1.1.1 *2022-10-30*
6
+ * simplified, and perhaps improved, the code to compile the frequency list into a nested ternary expression; ***NOTE***: I am not considering this a breaking change, though it may change the sequence of items picked by a fixed random number sequence
3
7
  ## 1.1.0 *2022-8-21*
4
8
  * added the `randomize!` method
5
9
  ## 1.0.0 *2022-8-14*
data/README.md CHANGED
@@ -18,7 +18,7 @@ picker = PickMeToo.new([["prevention", 1], ["cure", 16]], -> { rng.rand })
18
18
  counter = Hash.new 0
19
19
  32.times { counter[picker.pick] += 1 }
20
20
  counter
21
- # => {"cure"=>29, "prevention"=>3}
21
+ # => {"cure"=>31, "prevention"=>1}
22
22
 
23
23
  # you can also use a hash to map items to frequencies
24
24
  # frequencies don't need to be whole numbers
@@ -29,7 +29,7 @@ picker = PickMeToo.new({foo: 1, bar: 2, baz: 0.5}, -> { rng.rand })
29
29
  counter = Hash.new 0
30
30
  32.times { counter[picker.pick] += 1 }
31
31
  counter
32
- # => {:foo=>13, :bar=>12, :baz=>7}
32
+ # => {:bar=>22, :foo=>5, :baz=>5}
33
33
 
34
34
  # you don't need to provide your own random number sequence
35
35
  picker = PickMeToo({a: 1, b: 2, c: 3})
data/lib/pick_me_too.rb CHANGED
@@ -10,14 +10,13 @@
10
10
  # # => [:goblin, :orc, :bugbear, :orc, :goblin, :bugbear, :goblin, :goblin, :orc, :goblin]
11
11
  #
12
12
  # irrational = PickMeToo.new({e: Math::E, pi: Math::PI})
13
- # to.times.map { irrational.pick }
13
+ # 10.times.map { irrational.pick }
14
14
  # # => [:e, :e, :e, :pi, :e, :e, :e, :pi, :pi, :e]
15
15
  #
16
16
  # Items once picked are "placed back in the urn", so if you pick a cat this doesn't reduce the
17
- # probability the next thing you pick is also a cat, and the urn will never be picked empty. (And of course
18
- # this is all a metaphor.)
17
+ # probability the next thing you pick is also a cat, and the urn will never be picked empty.
19
18
  class PickMeToo
20
- VERSION = '1.1.0'
19
+ VERSION = '1.1.2'
21
20
 
22
21
  class Error < StandardError; end
23
22
 
@@ -42,11 +41,9 @@ class PickMeToo
42
41
  if @objects.length == 1
43
42
  @picker = ->(_p) { 0 }
44
43
  else
45
- frequencies = frequencies.map(&:last)
46
- balanced_binary_tree = bifurcate(frequencies.dup)
47
- probability_tree = probabilities(frequencies, balanced_binary_tree)
44
+ root = optimize(frequencies)
48
45
  # compile everything into a nested ternary expression
49
- @picker = eval "->(p) { #{ternarize(probability_tree)} }"
46
+ @picker = eval "->(p) { #{ternarize(root)} }"
50
47
  end
51
48
  end
52
49
 
@@ -82,94 +79,74 @@ class PickMeToo
82
79
  raise Error, "the following have non-positive frequencies: #{bad.inspect}" if bad.any?
83
80
 
84
81
  total = good.map(&:last).sum.to_f
85
- good.map { |o, n| [o, n / total] }
82
+ # sort by size of probability interval
83
+ # in general we will want to consider wide intervals before narrow ones
84
+ good.sort_by(&:last).reverse.map { |o, n| [o, n / total] }
86
85
  end
87
86
 
88
- # reduce the probability tree to nested ternary expressions
89
- def ternarize(ptree)
90
- p, left, right = ptree.values_at :p, :left, :right
91
- left = left.is_a?(Numeric) ? left : ternarize(left)
92
- right = right.is_a?(Numeric) ? right : ternarize(right)
93
- "(p > #{p} ? #{right} : #{left})"
94
- end
95
-
96
- def probabilities(frequencies, tree)
97
- tree = sum_probabilities(tree, 0)
98
- replace_frequencies_with_indices(tree, frequencies.each_with_index.to_a)
99
- tree
87
+ # optimize the order of threshold comparisons to map a random number to an index in the array
88
+ # of choices
89
+ def optimize(frequencies)
90
+ frequencies = frequencies.each_with_index.map { |(*, i), idx| { interval: i, index: idx } }
91
+ root = build_branch(frequencies)
92
+ add_thresholds(root, 0)
93
+ root
100
94
  end
101
95
 
102
- def replace_frequencies_with_indices(tree, frequencies)
103
- left, right = tree.values_at :left, :right
104
- if left.is_a?(Numeric)
105
- i = frequencies.index { |v,| v == left }
106
- *, i = frequencies.slice!(i)
107
- tree[:left] = i
108
- else
109
- replace_frequencies_with_indices(left, frequencies)
96
+ def add_thresholds(node, acc)
97
+ # acc represents the accumulated probability mass known to be before anything in the tree
98
+ # currently under consideration
99
+ if (l = node[:left])
100
+ add_thresholds(l, acc)
101
+ node[:left_threshold] = acc += l[:sum]
110
102
  end
111
- if right.is_a?(Numeric)
112
- i = frequencies.index { |v,| v == right }
113
- *, i = frequencies.slice!(i)
114
- tree[:right] = i
115
- else
116
- replace_frequencies_with_indices(right, frequencies)
103
+ if (r = node[:right])
104
+ acc = node[:right_threshold] = acc + node[:interval]
105
+ add_thresholds(r, acc)
117
106
  end
118
107
  end
119
108
 
120
- # convert the frequency numbers to probabilities
121
- def sum_probabilities(tree, base)
122
- left, right = tree
123
- p = left.flatten.sum + base
124
- left = left.length == 1 ? left.first : sum_probabilities(left, base)
125
- right = right.length == 1 ? right.first : sum_probabilities(right, p)
126
- { p: p, left: left, right: right }
127
- end
128
-
129
- # distribute the frequencies so their as balanced as possible
130
- # the better to reduce expected length of the binary search
131
- def bifurcate(nums)
132
- return nums if nums.length < 2
133
-
134
- max = total = 0
135
- max_index = -1
136
- # make one loop find all these things
137
- nums.each_with_index do |n, i|
138
- total += n
139
- if n > max
140
- max = n
141
- max_index = i
109
+ def build_branch(frequencies)
110
+ sum = frequencies.sum { |o| o[:interval] }
111
+ node = frequencies.shift
112
+ if frequencies.any?
113
+ if node[:interval] * 3 >= sum
114
+ # a binary search would be wasteful because the frequencies are so skewed
115
+ node[:right] = build_branch(frequencies)
116
+ else
117
+ # build a binary-branching search tree
118
+ left, right = frequencies.each_with_index.partition { |*, i| left_branch?(i + 1) }
119
+ node[:left] = build_branch(left.map(&:first))
120
+ node[:right] = build_branch(right.map(&:first)) if right.any?
142
121
  end
143
122
  end
144
- half = total / 2.0
145
- right = [nums.slice!(max_index)]
146
- if max >= half
147
- [bifurcate(nums), right]
123
+ node[:sum] = sum
124
+ node
125
+ end
126
+
127
+ # this implements the heap rule for matching a node to its parent
128
+ # our binary-branching trees are heaps with wider intervals towards the root
129
+ def left_branch?(index)
130
+ if index == 1
131
+ true
132
+ elsif index < 1
133
+ false
148
134
  else
149
- gap = half - max
150
- while rv = fit_gap(gap, nums)
151
- removed, remaining_gap = rv
152
- right << removed
153
- break unless gap = remaining_gap
154
- end
155
- [bifurcate(nums), bifurcate(right)]
135
+ left_branch?((index - 1) / 2)
156
136
  end
157
137
  end
158
138
 
159
- # look for the frequency best suited to balance the two branches
160
- def fit_gap(gap, nums)
161
- best_index = 0
162
- best_fit = (gap - nums[0]).abs
163
- nums.each_with_index.drop(1).each do |n, i|
164
- fit = (gap - n).abs
165
- if fit < best_fit
166
- best_index = i
167
- best_fit = fit
168
- end
169
- end
170
- if nums[best_index] < gap * 2
171
- n = nums.slice!(best_index)
172
- [n, n < gap ? gap - n : nil]
139
+ # reduce the probability tree to nested ternary expressions
140
+ def ternarize(node)
141
+ l, r = node.values_at :left, :right
142
+ if l && r
143
+ "(p > #{node[:left_threshold]} ? (p > #{node[:right_threshold]} ? #{ternarize(r)} : #{node[:index]}) : #{ternarize(l)})"
144
+ elsif l
145
+ "(p > #{node[:left_threshold]} ? #{node[:index]} : #{ternarize(l)})"
146
+ elsif r
147
+ "(p > #{node[:right_threshold]} ? #{ternarize(r)} : #{node[:index]})"
148
+ else
149
+ node[:index]
173
150
  end
174
151
  end
175
152
  end
data/test/basic_test.rb CHANGED
@@ -71,6 +71,30 @@ class BasicTest < Minitest::Test
71
71
  end
72
72
  end
73
73
 
74
+ # force unary branching
75
+ def test_big
76
+ rnd = Random.new 1
77
+ frequencies = %w[a b c d e f g h].each_with_index.map { |k, i| [k, 2**i] }
78
+ picker = PickMeToo.new(frequencies, -> { rnd.rand })
79
+ counter = Hash.new(0)
80
+ (frequencies.map(&:last).sum * 6000).times { counter[picker.pick] += 1 }
81
+ frequencies.each do |key, n|
82
+ assert_equal n, (counter[key] / 6000.0).round, "right number of #{key}"
83
+ end
84
+ end
85
+
86
+ # force binary branching
87
+ def test_small
88
+ rnd = Random.new 1
89
+ frequencies = %w[a b c d e f g h].each_with_index.map { |k, _i| [k, 1] }
90
+ picker = PickMeToo.new(frequencies, -> { rnd.rand })
91
+ counter = Hash.new(0)
92
+ (frequencies.map(&:last).sum * 6000).times { counter[picker.pick] += 1 }
93
+ frequencies.each do |key, n|
94
+ assert_equal n, (counter[key] / 6000.0).round, "right number of #{key}"
95
+ end
96
+ end
97
+
74
98
  def test_randomize
75
99
  rnd1 = Random.new 1
76
100
  rnd2 = Random.new 1
@@ -13,7 +13,7 @@ class BasicTest < Minitest::Test
13
13
  picker = PickMeToo.new([['prevention', 1], ['cure', 16]], -> { rng.rand })
14
14
  counter = Hash.new 0
15
15
  32.times { counter[picker.pick] += 1 }
16
- assert_equal({ 'cure' => 29, 'prevention' => 3 }, counter)
16
+ assert_equal({ 'cure' => 31, 'prevention' => 1 }, counter)
17
17
  end
18
18
 
19
19
  def test_synopsis_hash
@@ -21,6 +21,6 @@ class BasicTest < Minitest::Test
21
21
  picker = PickMeToo.new({ foo: 1, bar: 2, baz: 0.5 }, -> { rng.rand })
22
22
  counter = Hash.new 0
23
23
  32.times { counter[picker.pick] += 1 }
24
- assert_equal({ foo: 13, bar: 12, baz: 7 }, counter)
24
+ assert_equal({ bar: 22, foo: 5, baz: 5 }, counter)
25
25
  end
26
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pick_me_too
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - David F. Houghton
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-21 00:00:00.000000000 Z
11
+ date: 2022-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -107,7 +107,7 @@ homepage: https://github.com/dfhoughton/pick_me_too
107
107
  licenses:
108
108
  - MIT
109
109
  metadata: {}
110
- post_install_message:
110
+ post_install_message:
111
111
  rdoc_options: []
112
112
  require_paths:
113
113
  - lib
@@ -122,9 +122,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
122
122
  - !ruby/object:Gem::Version
123
123
  version: '0'
124
124
  requirements: []
125
- rubyforge_project:
126
- rubygems_version: 2.7.6
127
- signing_key:
125
+ rubygems_version: 3.3.7
126
+ signing_key:
128
127
  specification_version: 4
129
128
  summary: Randomly select items from a list with specified frequencies
130
129
  test_files: