pick_me_too 1.1.1 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0b52966a03f64a0e479d224c5c93abed4ca6e3316ae05b88a86f1c9742da799c
4
- data.tar.gz: 912417eb4f4c70b4d5211ed0e706f6fbd5be43b518ae4f3be8b198e52d9072b5
3
+ metadata.gz: 070a1c66bfe69fe534fc449c39a0f187a7b46e3bbcf3bc9827953993cfa30e28
4
+ data.tar.gz: 41ea453f24ccb5a1602aca774f488295e68280ece7b44c0d12f060295c645d58
5
5
  SHA512:
6
- metadata.gz: f9391158418a645aa5886a86934d695251e22168870c7df05946dfbc9d4a2a64253f8476574dbe44e3bf40fc6f9f9b4e6c9e1d8a466586baef29c4c729c41a03
7
- data.tar.gz: 3e77cab8f0ce92f2ef30434ad5d45eb99003eae157aad21be287636b71ec16a1f373e3018941c8a8094882053806a1d5749667fec9ae9734a137392806a9075d
6
+ metadata.gz: 7f15740e07eaf7c91548df5cae303e598a13c0e03c9f2af514b6368a497ff60741dfb6b957ee9b4b72cc222575be7ece4faaa5582753e1486fd5f84d2256c734
7
+ data.tar.gz: 751c64c9aece6e27fd53ecd52af26eec09883542b78077259487804e056594a8f5ab95e4c614fbc8b6dfa206d91e2d4271bec570153487f1da8d4968f0c53030
data/.gitignore CHANGED
@@ -1,4 +1,4 @@
1
1
  .byebug_history
2
2
  *.swp
3
3
  .ruby-version
4
-
4
+ pkg
data/CHANGES.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # Change Log
2
2
 
3
+ ## 1.1.2 *2022-11-6*
4
+ * optimized ternerization code still further
3
5
  ## 1.1.1 *2022-10-30*
4
6
  * simplified, and perhaps improved, the code to compile the frequency list into a nested ternary expression; ***NOTE***: I am not considering this a breaking change, though it may change the sequence of items picked by a fixed random number sequence
5
7
  ## 1.1.0 *2022-8-21*
data/README.md CHANGED
@@ -18,7 +18,7 @@ picker = PickMeToo.new([["prevention", 1], ["cure", 16]], -> { rng.rand })
18
18
  counter = Hash.new 0
19
19
  32.times { counter[picker.pick] += 1 }
20
20
  counter
21
- # => {"cure"=>29, "prevention"=>3}
21
+ # => {"cure"=>31, "prevention"=>1}
22
22
 
23
23
  # you can also use a hash to map items to frequencies
24
24
  # frequencies don't need to be whole numbers
@@ -29,7 +29,7 @@ picker = PickMeToo.new({foo: 1, bar: 2, baz: 0.5}, -> { rng.rand })
29
29
  counter = Hash.new 0
30
30
  32.times { counter[picker.pick] += 1 }
31
31
  counter
32
- # => {:bar=>15, :foo=>12, :baz=>5}
32
+ # => {:bar=>22, :foo=>5, :baz=>5}
33
33
 
34
34
  # you don't need to provide your own random number sequence
35
35
  picker = PickMeToo({a: 1, b: 2, c: 3})
data/lib/pick_me_too.rb CHANGED
@@ -10,14 +10,13 @@
10
10
  # # => [:goblin, :orc, :bugbear, :orc, :goblin, :bugbear, :goblin, :goblin, :orc, :goblin]
11
11
  #
12
12
  # irrational = PickMeToo.new({e: Math::E, pi: Math::PI})
13
- # to.times.map { irrational.pick }
13
+ # 10.times.map { irrational.pick }
14
14
  # # => [:e, :e, :e, :pi, :e, :e, :e, :pi, :pi, :e]
15
15
  #
16
16
  # Items once picked are "placed back in the urn", so if you pick a cat this doesn't reduce the
17
- # probability the next thing you pick is also a cat, and the urn will never be picked empty. (And of course
18
- # this is all a metaphor.)
17
+ # probability the next thing you pick is also a cat, and the urn will never be picked empty.
19
18
  class PickMeToo
20
- VERSION = '1.1.1'
19
+ VERSION = '1.1.2'
21
20
 
22
21
  class Error < StandardError; end
23
22
 
@@ -42,9 +41,9 @@ class PickMeToo
42
41
  if @objects.length == 1
43
42
  @picker = ->(_p) { 0 }
44
43
  else
45
- root = balanced_binary_tree(frequencies)
44
+ root = optimize(frequencies)
46
45
  # compile everything into a nested ternary expression
47
- @picker = eval "->(p) { #{ternarize(root, 0)} }"
46
+ @picker = eval "->(p) { #{ternarize(root)} }"
48
47
  end
49
48
  end
50
49
 
@@ -80,49 +79,74 @@ class PickMeToo
80
79
  raise Error, "the following have non-positive frequencies: #{bad.inspect}" if bad.any?
81
80
 
82
81
  total = good.map(&:last).sum.to_f
83
- # sort by size of probability interval -- optimization step
82
+ # sort by size of probability interval
83
+ # in general we will want to consider wide intervals before narrow ones
84
84
  good.sort_by(&:last).reverse.map { |o, n| [o, n / total] }
85
85
  end
86
86
 
87
- # treat the frequencies as a heap
88
- # returns the root of this binary tree
89
- def balanced_binary_tree(frequencies)
87
+ # optimize the order of threshold comparisons to map a random number to an index in the array
88
+ # of choices
89
+ def optimize(frequencies)
90
90
  frequencies = frequencies.each_with_index.map { |(*, i), idx| { interval: i, index: idx } }
91
- frequencies.each do |obj|
92
- left_idx = obj[:index] * 2 + 1
93
- next unless (left = frequencies[left_idx])
91
+ root = build_branch(frequencies)
92
+ add_thresholds(root, 0)
93
+ root
94
+ end
94
95
 
95
- obj[:left] = left
96
- right_idx = left_idx + 1
97
- if (right = frequencies[right_idx])
98
- obj[:right] = right
99
- end
96
+ def add_thresholds(node, acc)
97
+ # acc represents the accumulated probability mass known to be before anything in the tree
98
+ # currently under consideration
99
+ if (l = node[:left])
100
+ add_thresholds(l, acc)
101
+ node[:left_threshold] = acc += l[:sum]
102
+ end
103
+ if (r = node[:right])
104
+ acc = node[:right_threshold] = acc + node[:interval]
105
+ add_thresholds(r, acc)
100
106
  end
101
- frequencies[0]
102
107
  end
103
108
 
104
- # what is the sum of all intervals under this node?
105
- def sum(obj)
106
- return 0 unless obj
109
+ def build_branch(frequencies)
110
+ sum = frequencies.sum { |o| o[:interval] }
111
+ node = frequencies.shift
112
+ if frequencies.any?
113
+ if node[:interval] * 3 >= sum
114
+ # a binary search would be wasteful because the frequencies are so skewed
115
+ node[:right] = build_branch(frequencies)
116
+ else
117
+ # build a binary-branching search tree
118
+ left, right = frequencies.each_with_index.partition { |*, i| left_branch?(i + 1) }
119
+ node[:left] = build_branch(left.map(&:first))
120
+ node[:right] = build_branch(right.map(&:first)) if right.any?
121
+ end
122
+ end
123
+ node[:sum] = sum
124
+ node
125
+ end
107
126
 
108
- obj[:sum] ||= begin
109
- left = sum(obj[:left])
110
- right = sum(obj[:right])
111
- left + right + obj[:interval]
127
+ # this implements the heap rule for matching a node to its parent
128
+ # our binary-branching trees are heaps with wider intervals towards the root
129
+ def left_branch?(index)
130
+ if index == 1
131
+ true
132
+ elsif index < 1
133
+ false
134
+ else
135
+ left_branch?((index - 1) / 2)
112
136
  end
113
137
  end
114
138
 
115
139
  # reduce the probability tree to nested ternary expressions
116
- def ternarize(node, acc)
117
- left = sum(node[:left])
118
- return node[:index] if left == 0 # this is a leaf
119
-
120
- right = if (r = node[:right])
121
- increment = acc + left + node[:interval]
122
- "(p < #{increment} ? #{node[:index]} : #{ternarize(r, increment)})"
123
- else
124
- node[:index]
140
+ def ternarize(node)
141
+ l, r = node.values_at :left, :right
142
+ if l && r
143
+ "(p > #{node[:left_threshold]} ? (p > #{node[:right_threshold]} ? #{ternarize(r)} : #{node[:index]}) : #{ternarize(l)})"
144
+ elsif l
145
+ "(p > #{node[:left_threshold]} ? #{node[:index]} : #{ternarize(l)})"
146
+ elsif r
147
+ "(p > #{node[:right_threshold]} ? #{ternarize(r)} : #{node[:index]})"
148
+ else
149
+ node[:index]
125
150
  end
126
- "(p < #{left + acc} ? #{ternarize(node[:left], acc)} : #{right})"
127
151
  end
128
152
  end
data/test/basic_test.rb CHANGED
@@ -71,6 +71,30 @@ class BasicTest < Minitest::Test
71
71
  end
72
72
  end
73
73
 
74
+ # force unary branching
75
+ def test_big
76
+ rnd = Random.new 1
77
+ frequencies = %w[a b c d e f g h].each_with_index.map { |k, i| [k, 2**i] }
78
+ picker = PickMeToo.new(frequencies, -> { rnd.rand })
79
+ counter = Hash.new(0)
80
+ (frequencies.map(&:last).sum * 6000).times { counter[picker.pick] += 1 }
81
+ frequencies.each do |key, n|
82
+ assert_equal n, (counter[key] / 6000.0).round, "right number of #{key}"
83
+ end
84
+ end
85
+
86
+ # force binary branching
87
+ def test_small
88
+ rnd = Random.new 1
89
+ frequencies = %w[a b c d e f g h].each_with_index.map { |k, _i| [k, 1] }
90
+ picker = PickMeToo.new(frequencies, -> { rnd.rand })
91
+ counter = Hash.new(0)
92
+ (frequencies.map(&:last).sum * 6000).times { counter[picker.pick] += 1 }
93
+ frequencies.each do |key, n|
94
+ assert_equal n, (counter[key] / 6000.0).round, "right number of #{key}"
95
+ end
96
+ end
97
+
74
98
  def test_randomize
75
99
  rnd1 = Random.new 1
76
100
  rnd2 = Random.new 1
@@ -13,7 +13,7 @@ class BasicTest < Minitest::Test
13
13
  picker = PickMeToo.new([['prevention', 1], ['cure', 16]], -> { rng.rand })
14
14
  counter = Hash.new 0
15
15
  32.times { counter[picker.pick] += 1 }
16
- assert_equal({ 'cure' => 29, 'prevention' => 3 }, counter)
16
+ assert_equal({ 'cure' => 31, 'prevention' => 1 }, counter)
17
17
  end
18
18
 
19
19
  def test_synopsis_hash
@@ -21,6 +21,6 @@ class BasicTest < Minitest::Test
21
21
  picker = PickMeToo.new({ foo: 1, bar: 2, baz: 0.5 }, -> { rng.rand })
22
22
  counter = Hash.new 0
23
23
  32.times { counter[picker.pick] += 1 }
24
- assert_equal({:bar=>15, :foo=>12, :baz=>5}, counter)
24
+ assert_equal({ bar: 22, foo: 5, baz: 5 }, counter)
25
25
  end
26
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pick_me_too
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - David F. Houghton
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-30 00:00:00.000000000 Z
11
+ date: 2022-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler