pick_me_too 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0b52966a03f64a0e479d224c5c93abed4ca6e3316ae05b88a86f1c9742da799c
4
- data.tar.gz: 912417eb4f4c70b4d5211ed0e706f6fbd5be43b518ae4f3be8b198e52d9072b5
3
+ metadata.gz: 070a1c66bfe69fe534fc449c39a0f187a7b46e3bbcf3bc9827953993cfa30e28
4
+ data.tar.gz: 41ea453f24ccb5a1602aca774f488295e68280ece7b44c0d12f060295c645d58
5
5
  SHA512:
6
- metadata.gz: f9391158418a645aa5886a86934d695251e22168870c7df05946dfbc9d4a2a64253f8476574dbe44e3bf40fc6f9f9b4e6c9e1d8a466586baef29c4c729c41a03
7
- data.tar.gz: 3e77cab8f0ce92f2ef30434ad5d45eb99003eae157aad21be287636b71ec16a1f373e3018941c8a8094882053806a1d5749667fec9ae9734a137392806a9075d
6
+ metadata.gz: 7f15740e07eaf7c91548df5cae303e598a13c0e03c9f2af514b6368a497ff60741dfb6b957ee9b4b72cc222575be7ece4faaa5582753e1486fd5f84d2256c734
7
+ data.tar.gz: 751c64c9aece6e27fd53ecd52af26eec09883542b78077259487804e056594a8f5ab95e4c614fbc8b6dfa206d91e2d4271bec570153487f1da8d4968f0c53030
data/.gitignore CHANGED
@@ -1,4 +1,4 @@
1
1
  .byebug_history
2
2
  *.swp
3
3
  .ruby-version
4
-
4
+ pkg
data/CHANGES.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # Change Log
2
2
 
3
+ ## 1.1.2 *2022-11-6*
4
+ * optimized ternerization code still further
3
5
  ## 1.1.1 *2022-10-30*
4
6
  * simplified, and perhaps improved, the code to compile the frequency list into a nested ternary expression; ***NOTE***: I am not considering this a breaking change, though it may change the sequence of items picked by a fixed random number sequence
5
7
  ## 1.1.0 *2022-8-21*
data/README.md CHANGED
@@ -18,7 +18,7 @@ picker = PickMeToo.new([["prevention", 1], ["cure", 16]], -> { rng.rand })
18
18
  counter = Hash.new 0
19
19
  32.times { counter[picker.pick] += 1 }
20
20
  counter
21
- # => {"cure"=>29, "prevention"=>3}
21
+ # => {"cure"=>31, "prevention"=>1}
22
22
 
23
23
  # you can also use a hash to map items to frequencies
24
24
  # frequencies don't need to be whole numbers
@@ -29,7 +29,7 @@ picker = PickMeToo.new({foo: 1, bar: 2, baz: 0.5}, -> { rng.rand })
29
29
  counter = Hash.new 0
30
30
  32.times { counter[picker.pick] += 1 }
31
31
  counter
32
- # => {:bar=>15, :foo=>12, :baz=>5}
32
+ # => {:bar=>22, :foo=>5, :baz=>5}
33
33
 
34
34
  # you don't need to provide your own random number sequence
35
35
  picker = PickMeToo({a: 1, b: 2, c: 3})
data/lib/pick_me_too.rb CHANGED
@@ -10,14 +10,13 @@
10
10
  # # => [:goblin, :orc, :bugbear, :orc, :goblin, :bugbear, :goblin, :goblin, :orc, :goblin]
11
11
  #
12
12
  # irrational = PickMeToo.new({e: Math::E, pi: Math::PI})
13
- # to.times.map { irrational.pick }
13
+ # 10.times.map { irrational.pick }
14
14
  # # => [:e, :e, :e, :pi, :e, :e, :e, :pi, :pi, :e]
15
15
  #
16
16
  # Items once picked are "placed back in the urn", so if you pick a cat this doesn't reduce the
17
- # probability the next thing you pick is also a cat, and the urn will never be picked empty. (And of course
18
- # this is all a metaphor.)
17
+ # probability the next thing you pick is also a cat, and the urn will never be picked empty.
19
18
  class PickMeToo
20
- VERSION = '1.1.1'
19
+ VERSION = '1.1.2'
21
20
 
22
21
  class Error < StandardError; end
23
22
 
@@ -42,9 +41,9 @@ class PickMeToo
42
41
  if @objects.length == 1
43
42
  @picker = ->(_p) { 0 }
44
43
  else
45
- root = balanced_binary_tree(frequencies)
44
+ root = optimize(frequencies)
46
45
  # compile everything into a nested ternary expression
47
- @picker = eval "->(p) { #{ternarize(root, 0)} }"
46
+ @picker = eval "->(p) { #{ternarize(root)} }"
48
47
  end
49
48
  end
50
49
 
@@ -80,49 +79,74 @@ class PickMeToo
80
79
  raise Error, "the following have non-positive frequencies: #{bad.inspect}" if bad.any?
81
80
 
82
81
  total = good.map(&:last).sum.to_f
83
- # sort by size of probability interval -- optimization step
82
+ # sort by size of probability interval
83
+ # in general we will want to consider wide intervals before narrow ones
84
84
  good.sort_by(&:last).reverse.map { |o, n| [o, n / total] }
85
85
  end
86
86
 
87
- # treat the frequencies as a heap
88
- # returns the root of this binary tree
89
- def balanced_binary_tree(frequencies)
87
+ # optimize the order of threshold comparisons to map a random number to an index in the array
88
+ # of choices
89
+ def optimize(frequencies)
90
90
  frequencies = frequencies.each_with_index.map { |(*, i), idx| { interval: i, index: idx } }
91
- frequencies.each do |obj|
92
- left_idx = obj[:index] * 2 + 1
93
- next unless (left = frequencies[left_idx])
91
+ root = build_branch(frequencies)
92
+ add_thresholds(root, 0)
93
+ root
94
+ end
94
95
 
95
- obj[:left] = left
96
- right_idx = left_idx + 1
97
- if (right = frequencies[right_idx])
98
- obj[:right] = right
99
- end
96
+ def add_thresholds(node, acc)
97
+ # acc represents the accumulated probability mass known to be before anything in the tree
98
+ # currently under consideration
99
+ if (l = node[:left])
100
+ add_thresholds(l, acc)
101
+ node[:left_threshold] = acc += l[:sum]
102
+ end
103
+ if (r = node[:right])
104
+ acc = node[:right_threshold] = acc + node[:interval]
105
+ add_thresholds(r, acc)
100
106
  end
101
- frequencies[0]
102
107
  end
103
108
 
104
- # what is the sum of all intervals under this node?
105
- def sum(obj)
106
- return 0 unless obj
109
+ def build_branch(frequencies)
110
+ sum = frequencies.sum { |o| o[:interval] }
111
+ node = frequencies.shift
112
+ if frequencies.any?
113
+ if node[:interval] * 3 >= sum
114
+ # a binary search would be wasteful because the frequencies are so skewed
115
+ node[:right] = build_branch(frequencies)
116
+ else
117
+ # build a binary-branching search tree
118
+ left, right = frequencies.each_with_index.partition { |*, i| left_branch?(i + 1) }
119
+ node[:left] = build_branch(left.map(&:first))
120
+ node[:right] = build_branch(right.map(&:first)) if right.any?
121
+ end
122
+ end
123
+ node[:sum] = sum
124
+ node
125
+ end
107
126
 
108
- obj[:sum] ||= begin
109
- left = sum(obj[:left])
110
- right = sum(obj[:right])
111
- left + right + obj[:interval]
127
+ # this implements the heap rule for matching a node to its parent
128
+ # our binary-branching trees are heaps with wider intervals towards the root
129
+ def left_branch?(index)
130
+ if index == 1
131
+ true
132
+ elsif index < 1
133
+ false
134
+ else
135
+ left_branch?((index - 1) / 2)
112
136
  end
113
137
  end
114
138
 
115
139
  # reduce the probability tree to nested ternary expressions
116
- def ternarize(node, acc)
117
- left = sum(node[:left])
118
- return node[:index] if left == 0 # this is a leaf
119
-
120
- right = if (r = node[:right])
121
- increment = acc + left + node[:interval]
122
- "(p < #{increment} ? #{node[:index]} : #{ternarize(r, increment)})"
123
- else
124
- node[:index]
140
+ def ternarize(node)
141
+ l, r = node.values_at :left, :right
142
+ if l && r
143
+ "(p > #{node[:left_threshold]} ? (p > #{node[:right_threshold]} ? #{ternarize(r)} : #{node[:index]}) : #{ternarize(l)})"
144
+ elsif l
145
+ "(p > #{node[:left_threshold]} ? #{node[:index]} : #{ternarize(l)})"
146
+ elsif r
147
+ "(p > #{node[:right_threshold]} ? #{ternarize(r)} : #{node[:index]})"
148
+ else
149
+ node[:index]
125
150
  end
126
- "(p < #{left + acc} ? #{ternarize(node[:left], acc)} : #{right})"
127
151
  end
128
152
  end
data/test/basic_test.rb CHANGED
@@ -71,6 +71,30 @@ class BasicTest < Minitest::Test
71
71
  end
72
72
  end
73
73
 
74
+ # force unary branching
75
+ def test_big
76
+ rnd = Random.new 1
77
+ frequencies = %w[a b c d e f g h].each_with_index.map { |k, i| [k, 2**i] }
78
+ picker = PickMeToo.new(frequencies, -> { rnd.rand })
79
+ counter = Hash.new(0)
80
+ (frequencies.map(&:last).sum * 6000).times { counter[picker.pick] += 1 }
81
+ frequencies.each do |key, n|
82
+ assert_equal n, (counter[key] / 6000.0).round, "right number of #{key}"
83
+ end
84
+ end
85
+
86
+ # force binary branching
87
+ def test_small
88
+ rnd = Random.new 1
89
+ frequencies = %w[a b c d e f g h].each_with_index.map { |k, _i| [k, 1] }
90
+ picker = PickMeToo.new(frequencies, -> { rnd.rand })
91
+ counter = Hash.new(0)
92
+ (frequencies.map(&:last).sum * 6000).times { counter[picker.pick] += 1 }
93
+ frequencies.each do |key, n|
94
+ assert_equal n, (counter[key] / 6000.0).round, "right number of #{key}"
95
+ end
96
+ end
97
+
74
98
  def test_randomize
75
99
  rnd1 = Random.new 1
76
100
  rnd2 = Random.new 1
@@ -13,7 +13,7 @@ class BasicTest < Minitest::Test
13
13
  picker = PickMeToo.new([['prevention', 1], ['cure', 16]], -> { rng.rand })
14
14
  counter = Hash.new 0
15
15
  32.times { counter[picker.pick] += 1 }
16
- assert_equal({ 'cure' => 29, 'prevention' => 3 }, counter)
16
+ assert_equal({ 'cure' => 31, 'prevention' => 1 }, counter)
17
17
  end
18
18
 
19
19
  def test_synopsis_hash
@@ -21,6 +21,6 @@ class BasicTest < Minitest::Test
21
21
  picker = PickMeToo.new({ foo: 1, bar: 2, baz: 0.5 }, -> { rng.rand })
22
22
  counter = Hash.new 0
23
23
  32.times { counter[picker.pick] += 1 }
24
- assert_equal({:bar=>15, :foo=>12, :baz=>5}, counter)
24
+ assert_equal({ bar: 22, foo: 5, baz: 5 }, counter)
25
25
  end
26
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pick_me_too
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - David F. Houghton
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-30 00:00:00.000000000 Z
11
+ date: 2022-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler