pick_me_too 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 52db253007776ea2ede6caa43361e3e260660accdd4a0b1eb1439532f1ef92d3
4
- data.tar.gz: 7f370bb747440c33062b8193a3edffb2fe0ac752493735366ea4d7161ded0c4a
3
+ metadata.gz: 070a1c66bfe69fe534fc449c39a0f187a7b46e3bbcf3bc9827953993cfa30e28
4
+ data.tar.gz: 41ea453f24ccb5a1602aca774f488295e68280ece7b44c0d12f060295c645d58
5
5
  SHA512:
6
- metadata.gz: 4f70d7864e25ca65ea5ca660c1964f0e79229da429e2414539ffaf3a65da50b2a30f70f186266532adbf6115b4e7e7a9bbd7b0b8727994c690876e9648ff0ab7
7
- data.tar.gz: bcf37dedb54b3304367799a16c3521541fa57c0db4dd4e819220dcb9ca85ff804c37e1b026e7867cf90a8990575d10e2b7ac0b452433d74760a358301b1eec8f
6
+ metadata.gz: 7f15740e07eaf7c91548df5cae303e598a13c0e03c9f2af514b6368a497ff60741dfb6b957ee9b4b72cc222575be7ece4faaa5582753e1486fd5f84d2256c734
7
+ data.tar.gz: 751c64c9aece6e27fd53ecd52af26eec09883542b78077259487804e056594a8f5ab95e4c614fbc8b6dfa206d91e2d4271bec570153487f1da8d4968f0c53030
data/.gitignore CHANGED
@@ -1,4 +1,4 @@
1
1
  .byebug_history
2
2
  *.swp
3
3
  .ruby-version
4
-
4
+ pkg
data/CHANGES.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Change Log
2
2
 
3
+ ## 1.1.2 *2022-11-6*
4
+ * optimized ternerization code still further
5
+ ## 1.1.1 *2022-10-30*
6
+ * simplified, and perhaps improved, the code to compile the frequency list into a nested ternary expression; ***NOTE***: I am not considering this a breaking change, though it may change the sequence of items picked by a fixed random number sequence
3
7
  ## 1.1.0 *2022-8-21*
4
8
  * added the `randomize!` method
5
9
  ## 1.0.0 *2022-8-14*
data/README.md CHANGED
@@ -18,7 +18,7 @@ picker = PickMeToo.new([["prevention", 1], ["cure", 16]], -> { rng.rand })
18
18
  counter = Hash.new 0
19
19
  32.times { counter[picker.pick] += 1 }
20
20
  counter
21
- # => {"cure"=>29, "prevention"=>3}
21
+ # => {"cure"=>31, "prevention"=>1}
22
22
 
23
23
  # you can also use a hash to map items to frequencies
24
24
  # frequencies don't need to be whole numbers
@@ -29,7 +29,7 @@ picker = PickMeToo.new({foo: 1, bar: 2, baz: 0.5}, -> { rng.rand })
29
29
  counter = Hash.new 0
30
30
  32.times { counter[picker.pick] += 1 }
31
31
  counter
32
- # => {:foo=>13, :bar=>12, :baz=>7}
32
+ # => {:bar=>22, :foo=>5, :baz=>5}
33
33
 
34
34
  # you don't need to provide your own random number sequence
35
35
  picker = PickMeToo({a: 1, b: 2, c: 3})
data/lib/pick_me_too.rb CHANGED
@@ -10,14 +10,13 @@
10
10
  # # => [:goblin, :orc, :bugbear, :orc, :goblin, :bugbear, :goblin, :goblin, :orc, :goblin]
11
11
  #
12
12
  # irrational = PickMeToo.new({e: Math::E, pi: Math::PI})
13
- # to.times.map { irrational.pick }
13
+ # 10.times.map { irrational.pick }
14
14
  # # => [:e, :e, :e, :pi, :e, :e, :e, :pi, :pi, :e]
15
15
  #
16
16
  # Items once picked are "placed back in the urn", so if you pick a cat this doesn't reduce the
17
- # probability the next thing you pick is also a cat, and the urn will never be picked empty. (And of course
18
- # this is all a metaphor.)
17
+ # probability the next thing you pick is also a cat, and the urn will never be picked empty.
19
18
  class PickMeToo
20
- VERSION = '1.1.0'
19
+ VERSION = '1.1.2'
21
20
 
22
21
  class Error < StandardError; end
23
22
 
@@ -42,11 +41,9 @@ class PickMeToo
42
41
  if @objects.length == 1
43
42
  @picker = ->(_p) { 0 }
44
43
  else
45
- frequencies = frequencies.map(&:last)
46
- balanced_binary_tree = bifurcate(frequencies.dup)
47
- probability_tree = probabilities(frequencies, balanced_binary_tree)
44
+ root = optimize(frequencies)
48
45
  # compile everything into a nested ternary expression
49
- @picker = eval "->(p) { #{ternarize(probability_tree)} }"
46
+ @picker = eval "->(p) { #{ternarize(root)} }"
50
47
  end
51
48
  end
52
49
 
@@ -82,94 +79,74 @@ class PickMeToo
82
79
  raise Error, "the following have non-positive frequencies: #{bad.inspect}" if bad.any?
83
80
 
84
81
  total = good.map(&:last).sum.to_f
85
- good.map { |o, n| [o, n / total] }
82
+ # sort by size of probability interval
83
+ # in general we will want to consider wide intervals before narrow ones
84
+ good.sort_by(&:last).reverse.map { |o, n| [o, n / total] }
86
85
  end
87
86
 
88
- # reduce the probability tree to nested ternary expressions
89
- def ternarize(ptree)
90
- p, left, right = ptree.values_at :p, :left, :right
91
- left = left.is_a?(Numeric) ? left : ternarize(left)
92
- right = right.is_a?(Numeric) ? right : ternarize(right)
93
- "(p > #{p} ? #{right} : #{left})"
94
- end
95
-
96
- def probabilities(frequencies, tree)
97
- tree = sum_probabilities(tree, 0)
98
- replace_frequencies_with_indices(tree, frequencies.each_with_index.to_a)
99
- tree
87
+ # optimize the order of threshold comparisons to map a random number to an index in the array
88
+ # of choices
89
+ def optimize(frequencies)
90
+ frequencies = frequencies.each_with_index.map { |(*, i), idx| { interval: i, index: idx } }
91
+ root = build_branch(frequencies)
92
+ add_thresholds(root, 0)
93
+ root
100
94
  end
101
95
 
102
- def replace_frequencies_with_indices(tree, frequencies)
103
- left, right = tree.values_at :left, :right
104
- if left.is_a?(Numeric)
105
- i = frequencies.index { |v,| v == left }
106
- *, i = frequencies.slice!(i)
107
- tree[:left] = i
108
- else
109
- replace_frequencies_with_indices(left, frequencies)
96
+ def add_thresholds(node, acc)
97
+ # acc represents the accumulated probability mass known to be before anything in the tree
98
+ # currently under consideration
99
+ if (l = node[:left])
100
+ add_thresholds(l, acc)
101
+ node[:left_threshold] = acc += l[:sum]
110
102
  end
111
- if right.is_a?(Numeric)
112
- i = frequencies.index { |v,| v == right }
113
- *, i = frequencies.slice!(i)
114
- tree[:right] = i
115
- else
116
- replace_frequencies_with_indices(right, frequencies)
103
+ if (r = node[:right])
104
+ acc = node[:right_threshold] = acc + node[:interval]
105
+ add_thresholds(r, acc)
117
106
  end
118
107
  end
119
108
 
120
- # convert the frequency numbers to probabilities
121
- def sum_probabilities(tree, base)
122
- left, right = tree
123
- p = left.flatten.sum + base
124
- left = left.length == 1 ? left.first : sum_probabilities(left, base)
125
- right = right.length == 1 ? right.first : sum_probabilities(right, p)
126
- { p: p, left: left, right: right }
127
- end
128
-
129
- # distribute the frequencies so their as balanced as possible
130
- # the better to reduce expected length of the binary search
131
- def bifurcate(nums)
132
- return nums if nums.length < 2
133
-
134
- max = total = 0
135
- max_index = -1
136
- # make one loop find all these things
137
- nums.each_with_index do |n, i|
138
- total += n
139
- if n > max
140
- max = n
141
- max_index = i
109
+ def build_branch(frequencies)
110
+ sum = frequencies.sum { |o| o[:interval] }
111
+ node = frequencies.shift
112
+ if frequencies.any?
113
+ if node[:interval] * 3 >= sum
114
+ # a binary search would be wasteful because the frequencies are so skewed
115
+ node[:right] = build_branch(frequencies)
116
+ else
117
+ # build a binary-branching search tree
118
+ left, right = frequencies.each_with_index.partition { |*, i| left_branch?(i + 1) }
119
+ node[:left] = build_branch(left.map(&:first))
120
+ node[:right] = build_branch(right.map(&:first)) if right.any?
142
121
  end
143
122
  end
144
- half = total / 2.0
145
- right = [nums.slice!(max_index)]
146
- if max >= half
147
- [bifurcate(nums), right]
123
+ node[:sum] = sum
124
+ node
125
+ end
126
+
127
+ # this implements the heap rule for matching a node to its parent
128
+ # our binary-branching trees are heaps with wider intervals towards the root
129
+ def left_branch?(index)
130
+ if index == 1
131
+ true
132
+ elsif index < 1
133
+ false
148
134
  else
149
- gap = half - max
150
- while rv = fit_gap(gap, nums)
151
- removed, remaining_gap = rv
152
- right << removed
153
- break unless gap = remaining_gap
154
- end
155
- [bifurcate(nums), bifurcate(right)]
135
+ left_branch?((index - 1) / 2)
156
136
  end
157
137
  end
158
138
 
159
- # look for the frequency best suited to balance the two branches
160
- def fit_gap(gap, nums)
161
- best_index = 0
162
- best_fit = (gap - nums[0]).abs
163
- nums.each_with_index.drop(1).each do |n, i|
164
- fit = (gap - n).abs
165
- if fit < best_fit
166
- best_index = i
167
- best_fit = fit
168
- end
169
- end
170
- if nums[best_index] < gap * 2
171
- n = nums.slice!(best_index)
172
- [n, n < gap ? gap - n : nil]
139
+ # reduce the probability tree to nested ternary expressions
140
+ def ternarize(node)
141
+ l, r = node.values_at :left, :right
142
+ if l && r
143
+ "(p > #{node[:left_threshold]} ? (p > #{node[:right_threshold]} ? #{ternarize(r)} : #{node[:index]}) : #{ternarize(l)})"
144
+ elsif l
145
+ "(p > #{node[:left_threshold]} ? #{node[:index]} : #{ternarize(l)})"
146
+ elsif r
147
+ "(p > #{node[:right_threshold]} ? #{ternarize(r)} : #{node[:index]})"
148
+ else
149
+ node[:index]
173
150
  end
174
151
  end
175
152
  end
data/test/basic_test.rb CHANGED
@@ -71,6 +71,30 @@ class BasicTest < Minitest::Test
71
71
  end
72
72
  end
73
73
 
74
+ # force unary branching
75
+ def test_big
76
+ rnd = Random.new 1
77
+ frequencies = %w[a b c d e f g h].each_with_index.map { |k, i| [k, 2**i] }
78
+ picker = PickMeToo.new(frequencies, -> { rnd.rand })
79
+ counter = Hash.new(0)
80
+ (frequencies.map(&:last).sum * 6000).times { counter[picker.pick] += 1 }
81
+ frequencies.each do |key, n|
82
+ assert_equal n, (counter[key] / 6000.0).round, "right number of #{key}"
83
+ end
84
+ end
85
+
86
+ # force binary branching
87
+ def test_small
88
+ rnd = Random.new 1
89
+ frequencies = %w[a b c d e f g h].each_with_index.map { |k, _i| [k, 1] }
90
+ picker = PickMeToo.new(frequencies, -> { rnd.rand })
91
+ counter = Hash.new(0)
92
+ (frequencies.map(&:last).sum * 6000).times { counter[picker.pick] += 1 }
93
+ frequencies.each do |key, n|
94
+ assert_equal n, (counter[key] / 6000.0).round, "right number of #{key}"
95
+ end
96
+ end
97
+
74
98
  def test_randomize
75
99
  rnd1 = Random.new 1
76
100
  rnd2 = Random.new 1
@@ -13,7 +13,7 @@ class BasicTest < Minitest::Test
13
13
  picker = PickMeToo.new([['prevention', 1], ['cure', 16]], -> { rng.rand })
14
14
  counter = Hash.new 0
15
15
  32.times { counter[picker.pick] += 1 }
16
- assert_equal({ 'cure' => 29, 'prevention' => 3 }, counter)
16
+ assert_equal({ 'cure' => 31, 'prevention' => 1 }, counter)
17
17
  end
18
18
 
19
19
  def test_synopsis_hash
@@ -21,6 +21,6 @@ class BasicTest < Minitest::Test
21
21
  picker = PickMeToo.new({ foo: 1, bar: 2, baz: 0.5 }, -> { rng.rand })
22
22
  counter = Hash.new 0
23
23
  32.times { counter[picker.pick] += 1 }
24
- assert_equal({ foo: 13, bar: 12, baz: 7 }, counter)
24
+ assert_equal({ bar: 22, foo: 5, baz: 5 }, counter)
25
25
  end
26
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pick_me_too
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - David F. Houghton
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-21 00:00:00.000000000 Z
11
+ date: 2022-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -107,7 +107,7 @@ homepage: https://github.com/dfhoughton/pick_me_too
107
107
  licenses:
108
108
  - MIT
109
109
  metadata: {}
110
- post_install_message:
110
+ post_install_message:
111
111
  rdoc_options: []
112
112
  require_paths:
113
113
  - lib
@@ -122,9 +122,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
122
122
  - !ruby/object:Gem::Version
123
123
  version: '0'
124
124
  requirements: []
125
- rubyforge_project:
126
- rubygems_version: 2.7.6
127
- signing_key:
125
+ rubygems_version: 3.3.7
126
+ signing_key:
128
127
  specification_version: 4
129
128
  summary: Randomly select items from a list with specified frequencies
130
129
  test_files: