data_structures_rmolinari 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,676 @@
|
|
1
|
+
# A priority search tree (PST) stores points in two dimensions (x,y) and can efficiently answer certain questions about the set of
|
2
|
+
# point.
|
3
|
+
#
|
4
|
+
# The structure was introduced by McCreight [1].
|
5
|
+
#
|
6
|
+
# It is a binary search tree which is a max-heap by the y-coordinate, and, for a non-leaf node N storing (x, y), all the nodes in
|
7
|
+
# the left subtree of N have smaller x values than any of the nodes in the right subtree of N. Note, though, that the x-value at N
|
8
|
+
# has no particular property relative to the x values in its subtree. It is thus _almost_ a binary search tree in the x coordinate.
|
9
|
+
#
|
10
|
+
# See more: https://en.wikipedia.org/wiki/Priority_search_tree
|
11
|
+
#
|
12
|
+
# It is possible to build such a tree in place, given an array of pairs. See [2]. In a follow-up paper, [3], the authors show how to
|
13
|
+
# construct a more flexible data structure,
|
14
|
+
#
|
15
|
+
# "[T]he Min-Max Priority Search tree for a set P of n points in R^2. It is a binary tree T with the following properties:
|
16
|
+
#
|
17
|
+
# * For each internal node u, all points in the left subtree of u have an x-coordinate which is less than the x-coordinate of any
|
18
|
+
# point in the right subtree of u.
|
19
|
+
# * The y-coordinate values of the nodes on even (resp. odd) levels are smaller (resp. greater) than the y-coordinate values of
|
20
|
+
# their descendants (if any), where the root is at level zero.
|
21
|
+
#
|
22
|
+
# "The first property implies that T is a binary search three on the x-coordinates of the points in P, excepts that there is no
|
23
|
+
# relation between the x-coordinates of the points stored at u and any of its children. The second property implies that T is a
|
24
|
+
# min-max heap on the y-coordinates of the points in P."
|
25
|
+
#
|
26
|
+
# I started implementing the in-place PST. Then, finding the follow-up paper [3], decided to do that one instead, as the paper says
|
27
|
+
# it is more flexible. The point is to learn a new data structure and its associated algorithms.
|
28
|
+
#
|
29
|
+
# Hmmm. The algorithms are rather bewildering. Highest3SidedUp is complicated, and only two of the functions CheckLeft, CheckLeftIn,
|
30
|
+
# CheckRight, CheckRightIn are given; the other two are "symmetric". But it's not really clear what the first are actually doing, so
|
31
|
+
# it's hard to know what the others actually do.
|
32
|
+
#
|
33
|
+
# I either need to go back to MaxPST until I understand things better, or spend quite a lot of time going through the algorithms
|
34
|
+
# here on paper.
|
35
|
+
|
36
|
+
# [1] E. McCreight, _Priority Search Trees_, SIAM J. Computing, v14, no 3, May 1985, pp 257-276.
|
37
|
+
# [2] De, Maheshwari, Nandy, Smid, _An in-place priority search tree_, 23rd Annual Canadian Conference on Computational Geometry.
|
38
|
+
# [3] De, Maheshwari, Nandy, Smid, _An in-place min-max priority search tree_, Computational Geometry, v46 (2013), pp 310-327.
|
39
|
+
# [4] Atkinson, Sack, Santoro, Strothotte, _Min-max heaps and generalized priority queues_, Commun. ACM 29 (10) (1986), pp 996-1000.
|
40
|
+
|
41
|
+
require 'must_be'
|
42
|
+
|
43
|
+
Pair = Struct.new(:x, :y) do
|
44
|
+
def fmt
|
45
|
+
"(#{x},#{y})"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class MinmaxPrioritySearchTreeInternal
|
50
|
+
INFINITY = Float::INFINITY
|
51
|
+
|
52
|
+
# The array of pairs is turned into a minmax PST in-place without cloning. So clone before passing it in, if you care.
|
53
|
+
#
|
54
|
+
# Each element must respond to #x and #y. Use Pair (above) if you like.
|
55
|
+
def initialize(data, verify: false)
|
56
|
+
@data = data
|
57
|
+
@size = @data.size
|
58
|
+
|
59
|
+
construct_pst
|
60
|
+
return unless verify
|
61
|
+
|
62
|
+
# puts "Validating tree structure..."
|
63
|
+
verify_properties
|
64
|
+
end
|
65
|
+
|
66
|
+
# Let Q = [x0, infty) X [y0, infty) be the northeast "quadrant" defined by the point (x0, y0) and let P be the points in this data
|
67
|
+
# structure. Define p* as
|
68
|
+
#
|
69
|
+
# - (infty, infty) if Q \intersect P is empty and
|
70
|
+
# - the leftmost (i.e., min-x) point in Q \intersect P otherwise
|
71
|
+
#
|
72
|
+
# This method returns p*.
|
73
|
+
#
|
74
|
+
# From De et al:
|
75
|
+
#
|
76
|
+
# [t]he variables best, p, and q satisfy the folling invariant:
|
77
|
+
#
|
78
|
+
# - if Q \intersect P is nonempty then p* \in {best} \union T(p) \union T(q)
|
79
|
+
# - if Q \intersect P is empty then p* = best
|
80
|
+
# - p and q are at the same level of T and x(p) <= x(q)
|
81
|
+
#
|
82
|
+
# Here T(x) is the subtree rooted at x
|
83
|
+
def leftmost_ne(x0, y0)
|
84
|
+
best = Pair.new(INFINITY, INFINITY)
|
85
|
+
p = q = root
|
86
|
+
|
87
|
+
in_q = ->(pair) { pair.x >= x0 && pair.y >= y0 }
|
88
|
+
|
89
|
+
# From the paper:
|
90
|
+
#
|
91
|
+
# takes as input a point t \in P and updates best as follows: if t \in Q and x(t) < x(best) then it assignes best = t
|
92
|
+
#
|
93
|
+
# Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
|
94
|
+
update_leftmost = lambda do |node|
|
95
|
+
t = val_at(node)
|
96
|
+
if in_q.call(t) && t.x < best.x
|
97
|
+
best = t
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Generalize the c1,...,c4 idea from the paper in line with the BUG 2 IN PAPER notes, below.
|
102
|
+
#
|
103
|
+
# Given: 0 or more nodes n1, ..., nk in the tree. All are at the same level, which is a "max level" in our MinmaxPST, such that
|
104
|
+
# x(n1) <= x(n2) <= ... <= x(nk). (Note: it is expected that the nj are either children or grandchildren of p and q, though we
|
105
|
+
# don't check that.)
|
106
|
+
#
|
107
|
+
# If k = 0 return nil. Otherwise...
|
108
|
+
#
|
109
|
+
# We return two values p_goal, q_goal (possibly equal) from among the nj such that
|
110
|
+
#
|
111
|
+
# - p_goal is not to the right of q_goal in the tree and so, in particular x(p_goal) <= x(q_goal)
|
112
|
+
# - if and when the auction reaches p = p_goal and q = q_goal the algorithm invariant will be satisfied.
|
113
|
+
#
|
114
|
+
# As a special case, we return nil if we detect that none of the subtrees T(nj) contain any points in Q. This is a sign to
|
115
|
+
# terminate the algorithm.
|
116
|
+
#
|
117
|
+
# See the notes at "BUG 2 IN PAPER" below for more details about what is going on.
|
118
|
+
determine_goal_nodes = lambda do |nodes|
|
119
|
+
node_count = nodes.size
|
120
|
+
return nil if node_count.zero?
|
121
|
+
|
122
|
+
if val_at(nodes.last).x <= x0
|
123
|
+
# Only the rightmost subtree can possibly have anything Q, assuming that all the x-values are distinct.
|
124
|
+
return [nodes.last, nodes.last]
|
125
|
+
end
|
126
|
+
|
127
|
+
if val_at(nodes.first).x > x0
|
128
|
+
# All subtrees have x-values large enough to provide elements of Q. Since we are at a max-level the y-values help us work
|
129
|
+
# out which subtree to focus on.
|
130
|
+
leftmost = nodes.find { |node| val_at(node).y >= y0 }
|
131
|
+
|
132
|
+
return nil unless leftmost # nothing left to find
|
133
|
+
|
134
|
+
# Otherwise we explore the leftmost subtree. Its root is in Q and can't be beaten by anything to its right.
|
135
|
+
return [leftmost, leftmost]
|
136
|
+
end
|
137
|
+
|
138
|
+
values = nodes.map { |n| val_at(n) }
|
139
|
+
|
140
|
+
# Otherwise x(n1) <= x0 < x(nk). Thus i is well-defined.
|
141
|
+
i = (0...node_count).select { |j| values[j].x <= x0 && x0 < values[j + 1].x }.min
|
142
|
+
|
143
|
+
# these nodes all have large-enough x-values and so this finds the ones in the set Q.
|
144
|
+
new_q = nodes[(i + 1)..].select { |node| val_at(node).y >= y0 }.min # could be nil
|
145
|
+
new_p = nodes[i] if values[i].y >= y0 # The leftmost subtree is worth exploring if the y-value is big enough. Otherwise not
|
146
|
+
new_p ||= new_q # if nodes[i] is no good we send p along with q
|
147
|
+
new_q ||= new_p # but if there was no worthwhile value for q we should send it along with p
|
148
|
+
|
149
|
+
return nil unless new_p
|
150
|
+
|
151
|
+
[new_p, new_q]
|
152
|
+
end
|
153
|
+
|
154
|
+
until leaf?(p)
|
155
|
+
level = Math.log2(p).floor # TODO: don't calculate log every time!
|
156
|
+
|
157
|
+
update_leftmost.call(p)
|
158
|
+
update_leftmost.call(q)
|
159
|
+
|
160
|
+
if p == q
|
161
|
+
if one_child?(p)
|
162
|
+
p = q = left(p)
|
163
|
+
else
|
164
|
+
q = right(p)
|
165
|
+
p = left(p)
|
166
|
+
end
|
167
|
+
else
|
168
|
+
# p != q
|
169
|
+
if leaf?(q)
|
170
|
+
q = p # p itself is just one layer above the leaves, or is itself a leaf
|
171
|
+
elsif one_child?(q)
|
172
|
+
# Note that p has two children
|
173
|
+
if val_at(left(q)).x < x0
|
174
|
+
# x-values below p are too small
|
175
|
+
p = q = left(q)
|
176
|
+
elsif val_at(right(p)).x <= x0
|
177
|
+
# x-values in T(right(p)) are too small. DISTINCT-X
|
178
|
+
p = right(p)
|
179
|
+
q = left(q)
|
180
|
+
else
|
181
|
+
# BUG 1 IN PAPER.
|
182
|
+
#
|
183
|
+
# So, x(q_l) >= x0 and x(p_r) > x0. But how can we be sure that the child of q isn't the winner?. Should we be trying
|
184
|
+
# it in this case?
|
185
|
+
#
|
186
|
+
# Yes: otherwise it never gets checked.
|
187
|
+
|
188
|
+
update_leftmost.call(left(q))
|
189
|
+
q = right(p)
|
190
|
+
p = left(p)
|
191
|
+
end
|
192
|
+
else
|
193
|
+
# p and q both have two children
|
194
|
+
|
195
|
+
# BUG 2 IN PAPER.
|
196
|
+
#
|
197
|
+
# Define c as the paper does:
|
198
|
+
#
|
199
|
+
# (c1, c2, c3, c4) = (left(p), right(p), left(q), right(q))
|
200
|
+
#
|
201
|
+
# Because of the PST property on x and the invariant x(p) <= x(q) we know that
|
202
|
+
#
|
203
|
+
# x(c1) <= x(c2) <= x(c3) <= x(c4)
|
204
|
+
#
|
205
|
+
# Similarly, the sets of values x(T(ci)) are pairwise ordered in the same sense.
|
206
|
+
#
|
207
|
+
# Suppose further that x(ci) <= x0 <= x(c(i+i)). Then we know several things
|
208
|
+
#
|
209
|
+
# - there might be a "winner" (point in Q) in T(ci), perhaps ci itself.
|
210
|
+
# - there are not any winners in T(cj) for j < i, becasue the x-values there aren't big enough
|
211
|
+
# - any winner in ck, for k >= i, will be the left of and thus beat any winner in c(k+1), because of the ordering of
|
212
|
+
# x-values
|
213
|
+
#
|
214
|
+
# If x(c4) <= x0 then the rightmost subtree T(c4) is the only one worth checking and we set p = q = c4.
|
215
|
+
# If x(c1) > x0 then we take i = 0 and ignore the logic on ci in what follows and setting p = q.
|
216
|
+
#
|
217
|
+
# Pretend for the moment that we are using a MaxPST instead of a MinmaxPST. Then we can look at y values to learn more.
|
218
|
+
#
|
219
|
+
# - if y(ci) >= y0 then we need to search T(ci), so we will update p = ci
|
220
|
+
# - but if y(ci) < y0 then there are no winners in T(ci) because the y-values are too small.
|
221
|
+
# - similarly, if y(c(i+i)) >= y0 then we need to search T(c(i+1)). Indeed c(i+1) itself is in Q and beats any winner in
|
222
|
+
# subtrees further to the right
|
223
|
+
# - so, let k > i be minimal such that y(ck) >= y0, if there is any. Note that ck is itself a winner. Then
|
224
|
+
# - if y(ci) >= y0,
|
225
|
+
# - set p = ci, and q = ck (or q = ci if there is no such k)
|
226
|
+
# - otherwise (T(ci) has no winners because its y-values are too small)
|
227
|
+
# - if k is defined set p = q = ck. Otherwise HALT (there are no more winners)
|
228
|
+
#
|
229
|
+
# But we are working with a MinmaxPST rather than a MaxPST, so we have to work harder. If c1, ..., c4 (the children of p
|
230
|
+
# and q) are in a "max-level" of the tree - that is, an even level - then the logic above still applies. But if they are
|
231
|
+
# at a min level things are trickier and we need to go another layer down.
|
232
|
+
#
|
233
|
+
# The paper knows that we need to look a further layer down, but the logic is too simplistic. It looks at cj for j > i and
|
234
|
+
# checks if cj or either of its children are in Q. But that's not good enough. For the same reason that in a MaxPST we may
|
235
|
+
# need to explore below T(ci) even if ci isn't in Q, we may need to decend through one of the grandchilden of p or q even
|
236
|
+
# if that grandchild isn't in Q.
|
237
|
+
#
|
238
|
+
# Getting a bit handwavey especially over what happens near the leaves...
|
239
|
+
#
|
240
|
+
# Consider the children d1, d2, ..., dm, of ci, ..., c4 (and so grandchildren of p and q). They are at a max-level and so
|
241
|
+
# the logic described applies to the dk. If ci happens to be a winner we can set p = ci and work out what to do with q by
|
242
|
+
# looking at the children of c(i+1), ..., c4. Otherwise we look at all the dj values (up to 8 of them), apply the logic
|
243
|
+
# above to work out that we want to head for, say, p = ds and q = dt, and in this cycle update p = parent(ds), q =
|
244
|
+
# parent(dt). (We also need to submit the values c(i+1)..c4 to UpdateLeftmost.)
|
245
|
+
#
|
246
|
+
# In other words, we can use the MaxPST logic on d1,...,dm to decide where we need to go, and then step to the relevant
|
247
|
+
# parents among the cj.
|
248
|
+
|
249
|
+
c = [left(p), right(p), left(q), right(q)]
|
250
|
+
if level.odd?
|
251
|
+
# the elements of c are at an even level, and hence their y values are maxima for the subtrees. We can learn what we
|
252
|
+
# need to know from them
|
253
|
+
p, q = determine_goal_nodes.call(c)
|
254
|
+
if p && !q
|
255
|
+
# byebug
|
256
|
+
# determine_goal_nodes.call(c)
|
257
|
+
raise 'bad logic'
|
258
|
+
end
|
259
|
+
else
|
260
|
+
# They are at an odd level and so aren't helpful in working out what to do next: we look at their children, which are in
|
261
|
+
# a max-level. We need to check the elements of c against best since we are otherwise ignoring them.
|
262
|
+
c.each { |n| update_leftmost.call(n) }
|
263
|
+
|
264
|
+
d = c.map { [left(_1), right(_1)]}.flatten.select { |n| n <= @size }
|
265
|
+
|
266
|
+
# Note that we are jumping down two levels here!
|
267
|
+
p, q = determine_goal_nodes.call(d)
|
268
|
+
if p && !q
|
269
|
+
# byebug
|
270
|
+
# determine_goal_nodes.call(c)
|
271
|
+
raise 'bad logic'
|
272
|
+
end
|
273
|
+
|
274
|
+
p
|
275
|
+
end
|
276
|
+
|
277
|
+
return best unless p # nothing more to do
|
278
|
+
end
|
279
|
+
end
|
280
|
+
end
|
281
|
+
update_leftmost.call(p)
|
282
|
+
update_leftmost.call(q)
|
283
|
+
best
|
284
|
+
end
|
285
|
+
|
286
|
+
# Let Q be the "three-sided query range" [x0, x1] X [y0, infty) and let P_Q be P \intersect Q.
|
287
|
+
#
|
288
|
+
# If P_Q is empty then p* = (infty, -infty).
|
289
|
+
# Otherwise, p* is the point in P_Q with maximal y value.
|
290
|
+
#
|
291
|
+
# This method returns p*
|
292
|
+
# def highest_3_sided_up(x0, x1, y0)
|
293
|
+
# best = Pair.new(INFINITY, -INFINITY)
|
294
|
+
|
295
|
+
# in_q = lambda do |pair|
|
296
|
+
# pair.x >= x0 && pair.x <= x1 && pair.y >= y0
|
297
|
+
# end
|
298
|
+
|
299
|
+
# # From the paper:
|
300
|
+
# #
|
301
|
+
# # takes as input a point t and does the following: if t \in Q and y(t) > y(best) then it assignes best = t
|
302
|
+
# #
|
303
|
+
# # Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
|
304
|
+
# #
|
305
|
+
# # The algorithm is complicated. From the paper:
|
306
|
+
# #
|
307
|
+
# # Since Q is bounded by two vertical sides, we use four index variables p, p', q and q' to guide the search path. In addition,
|
308
|
+
# # we use four bits L, L', R and R'; these correspond to the subtrees of T rooted at the nodes p, p', q, and q', respectively;
|
309
|
+
# # if a bit is equal to one, then the corresonding node is referred to as an _active node_ (for example, if L = 1 then p is an
|
310
|
+
# # active node), and the subtree rooted at that node may contain a candidate point for p*. So the search is required to be
|
311
|
+
# # performed in the subtree rooted at all active nodes. More formally, at any instant of time the variables satisfy the folling
|
312
|
+
# # invariants:
|
313
|
+
# #
|
314
|
+
# # - If L = 1 the x(p) < x0.
|
315
|
+
# # - If L' = 1 then x0 <= x(p') <= x1.
|
316
|
+
# # - If R = 1 then x(q) > x1.
|
317
|
+
# # - If R' = 1 then x0 <= x(q') <= x1.
|
318
|
+
# # - If L' = 1 and R' = 1 then x(p') <= x(q').
|
319
|
+
# # - If P_Q is non-empty then p* = best or p* is in the subtree rooted at any one of the active nodes.
|
320
|
+
# #
|
321
|
+
# # There are more details in the paper
|
322
|
+
# update_highest = lambda do |node|
|
323
|
+
# t = val_at(node)
|
324
|
+
# if in_q.call(t) && t.y > best.y
|
325
|
+
# best = t
|
326
|
+
# end
|
327
|
+
# end
|
328
|
+
|
329
|
+
# ex_update_highest = lambda do |node|
|
330
|
+
# update_highest.call(node)
|
331
|
+
# update_highest.call(left(node)) unless leaf?(node)
|
332
|
+
# update_highest.call(right(node)) unless one_child?(node)
|
333
|
+
# end
|
334
|
+
|
335
|
+
# if val_at(root).x < x0
|
336
|
+
# p = root
|
337
|
+
# l = true
|
338
|
+
# l_prime = r = r_prime = false
|
339
|
+
# elsif val_at(root).x < x1
|
340
|
+
# p_prime = root
|
341
|
+
# l_prime = true
|
342
|
+
# l = r = r_prime = false
|
343
|
+
# else
|
344
|
+
# q = root
|
345
|
+
# r = true
|
346
|
+
# l = l_prime = r_prime = false
|
347
|
+
# end
|
348
|
+
|
349
|
+
# set_z = lambda do
|
350
|
+
# r = []
|
351
|
+
# r << p if l
|
352
|
+
# r << p_prime if l_prime
|
353
|
+
# r << q if r
|
354
|
+
# r << q_prime if r_primg
|
355
|
+
# r
|
356
|
+
# end
|
357
|
+
|
358
|
+
# check_left = lambda do
|
359
|
+
# if leaf?(p)
|
360
|
+
# l = false
|
361
|
+
# elsif one_child?(p)
|
362
|
+
# p_l_x = val_at(left(p))
|
363
|
+
# if x0 <= p_l_x && p_l_x <= x1
|
364
|
+
# update_highest.call(left(p))
|
365
|
+
# if l_prime && r_prime
|
366
|
+
# ex_update_highest.call(p_prime)
|
367
|
+
# elsif l_prime
|
368
|
+
# q_prime = p_prime
|
369
|
+
# r_prime = true
|
370
|
+
# end
|
371
|
+
# p_prime = left(p)
|
372
|
+
# l_prime = true
|
373
|
+
# l = false
|
374
|
+
# elsif p_l_x < x0
|
375
|
+
# p = left(p)
|
376
|
+
# else
|
377
|
+
# q = left(p)
|
378
|
+
# r = true
|
379
|
+
# l = false
|
380
|
+
# end
|
381
|
+
# else
|
382
|
+
# # p has two children
|
383
|
+
|
384
|
+
# end
|
385
|
+
|
386
|
+
# while l || l_prime || r || r_prime
|
387
|
+
# z_star = set_z.call.min_by(4) { level(_1) }
|
388
|
+
# if z_star.include? p_prime
|
389
|
+
# check_left_in(p_prime)
|
390
|
+
# elsif z_star.include? q_prime
|
391
|
+
# check_right_in(q_prime)
|
392
|
+
# elsif z_star.include? p
|
393
|
+
# check_left(p)
|
394
|
+
# else
|
395
|
+
# check_right(q)
|
396
|
+
# end
|
397
|
+
# end
|
398
|
+
# end
|
399
|
+
|
400
|
+
# Find the "highest" (max-y) point that is "northeast" of (x, y).
|
401
|
+
#
|
402
|
+
# That is, the point p* in Q = [x, infty) X [y, infty) with the largest y value, or (infty, -infty) if there is no point in that
|
403
|
+
# quadrant.
|
404
|
+
#
|
405
|
+
# Algorithm is from De et al. section 3.1
|
406
|
+
def highest_ne(x0, y0)
|
407
|
+
raise "Write me"
|
408
|
+
# From the paper:
|
409
|
+
#
|
410
|
+
# The algorithm uses two variables best and p, which satisfy the following invariant
|
411
|
+
#
|
412
|
+
# - If Q intersect P is nonempty then p* in {best} union T_p
|
413
|
+
# - If Q intersect P is empty then p* = best
|
414
|
+
#
|
415
|
+
# Here, P is the set of points in our data structure and T_p is the subtree rooted at p
|
416
|
+
best = Pair.new(INFINITY, -INFINITY)
|
417
|
+
p = root # root of the whole tree AND the pair stored there
|
418
|
+
|
419
|
+
in_q = lambda do |pair|
|
420
|
+
pair.x >= x0 && pair.y >= y0
|
421
|
+
end
|
422
|
+
|
423
|
+
# From the paper:
|
424
|
+
#
|
425
|
+
# takes as input a point t and does the following: if t \in Q and y(t) > y(best) then it assignes best = t
|
426
|
+
#
|
427
|
+
# Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
|
428
|
+
update_highest = lambda do |node|
|
429
|
+
t = val_at(node)
|
430
|
+
if in_q.call(t) && t.y > best.y
|
431
|
+
best = t
|
432
|
+
end
|
433
|
+
end
|
434
|
+
|
435
|
+
# We could make this code more efficient. But since we only have O(log n) steps we won't actually gain much so let's keep it
|
436
|
+
# readable and close to the paper's pseudocode for now.
|
437
|
+
until leaf?(p)
|
438
|
+
p_val = val_at(p)
|
439
|
+
if in_q.call(p_val)
|
440
|
+
# p \in Q and nothing in its subtree can beat it because of the max-heap
|
441
|
+
update_highest.call(p)
|
442
|
+
return best
|
443
|
+
|
444
|
+
# p = left(p) <- from paper
|
445
|
+
elsif p_val.y < y0
|
446
|
+
# p is too low for Q, so the entire subtree is too low as well
|
447
|
+
return best
|
448
|
+
|
449
|
+
# p = left(p)
|
450
|
+
elsif one_child?(p)
|
451
|
+
# With just one child we need to check it
|
452
|
+
p = left(p)
|
453
|
+
elsif val_at(right(p)).x <= x0
|
454
|
+
# right(p) might be in Q, but nothing in the left subtree can be, by the PST property on x.
|
455
|
+
p = right(p)
|
456
|
+
elsif val_at(left(p)).x >= x0
|
457
|
+
# Both children are in Q, so try the higher of them. Note that nothing in either subtree will beat this one.
|
458
|
+
higher = left(p)
|
459
|
+
if val_at(right(p)).y > val_at(left(p)).y
|
460
|
+
higher = right(p)
|
461
|
+
end
|
462
|
+
p = higher
|
463
|
+
elsif val_at(right(p)).y < y0
|
464
|
+
# Nothing in the right subtree is in Q, but maybe we'll find something in the left
|
465
|
+
p = left(p)
|
466
|
+
else
|
467
|
+
# At this point we know that right(p) \in Q so we need to check it. Nothing in its subtree can beat it so we don't need to
|
468
|
+
# look there. But there might be something better in the left subtree.
|
469
|
+
update_highest.call(right(p))
|
470
|
+
p = left(p)
|
471
|
+
end
|
472
|
+
end
|
473
|
+
update_highest.call(p) # try the leaf
|
474
|
+
best
|
475
|
+
end
|
476
|
+
|
477
|
+
# O(n log^2 n)
|
478
|
+
private def construct_pst
|
479
|
+
# We follow the algorithm in [3]. Indexing is from 1 there and we follow that here. The algorithm is almost exactly the same as
|
480
|
+
# for the (max) PST.
|
481
|
+
h = Math.log2(@size).floor
|
482
|
+
a = @size - (2**h - 1) # the paper calls it A
|
483
|
+
sort_subarray(1, @size)
|
484
|
+
level = 0 # TODO: isn't level always equal to i in the loop?
|
485
|
+
|
486
|
+
(0...h).each do |i|
|
487
|
+
sense = level.even? ? :max : :min
|
488
|
+
pow_of_2 = 2**i
|
489
|
+
|
490
|
+
k = a / (2**(h - i))
|
491
|
+
k1 = 2**(h + 1 - i) - 1
|
492
|
+
k2 = (1 - k) * 2**(h - i) - 1 + a
|
493
|
+
k3 = 2**(h - i) - 1
|
494
|
+
(1..k).each do |j|
|
495
|
+
l = index_with_extremal_y_in(pow_of_2 + (j - 1) * k1, pow_of_2 + j * k1 - 1, sense:)
|
496
|
+
swap(l, pow_of_2 + j - 1)
|
497
|
+
end
|
498
|
+
|
499
|
+
if k < pow_of_2
|
500
|
+
l = index_with_extremal_y_in(pow_of_2 + k * k1, pow_of_2 + k * k1 + k2 - 1, sense:)
|
501
|
+
swap(l, pow_of_2 + k)
|
502
|
+
|
503
|
+
m = pow_of_2 + k * k1 + k2
|
504
|
+
(1..(pow_of_2 - k - 1)).each do |j|
|
505
|
+
l = index_with_extremal_y_in(m + (j - 1) * k3, m + j * k3 - 1, sense:)
|
506
|
+
swap(l, pow_of_2 + k + j)
|
507
|
+
end
|
508
|
+
end
|
509
|
+
sort_subarray(2 * pow_of_2, @size)
|
510
|
+
level += 1
|
511
|
+
end
|
512
|
+
end
|
513
|
+
|
514
|
+
########################################
|
515
|
+
# Indexing the data structure as though it were from 1, even though the underlying @data is indexed from zero.
|
516
|
+
|
517
|
+
# First element and root of the tree structure
|
518
|
+
private def root
|
519
|
+
1
|
520
|
+
end
|
521
|
+
|
522
|
+
private def val_at(idx)
|
523
|
+
@data[idx - 1]
|
524
|
+
end
|
525
|
+
|
526
|
+
# Indexing is from 1
|
527
|
+
private def parent(i)
|
528
|
+
i >> 1
|
529
|
+
end
|
530
|
+
|
531
|
+
private def left(i)
|
532
|
+
i << 1
|
533
|
+
end
|
534
|
+
|
535
|
+
private def right(i)
|
536
|
+
1 + (i << 1)
|
537
|
+
end
|
538
|
+
|
539
|
+
private def leaf?(i)
|
540
|
+
left(i) > @size
|
541
|
+
end
|
542
|
+
|
543
|
+
private def one_child?(i)
|
544
|
+
left(i) <= @size && right(i) > @size
|
545
|
+
end
|
546
|
+
|
547
|
+
private def swap(index1, index2)
|
548
|
+
return if index1 == index2
|
549
|
+
|
550
|
+
@data[index1 - 1], @data[index2 - 1] = @data[index2 - 1], @data[index1 - 1]
|
551
|
+
end
|
552
|
+
|
553
|
+
private def level(i)
|
554
|
+
count = 0
|
555
|
+
while i > root
|
556
|
+
i >>= 1
|
557
|
+
count += 1
|
558
|
+
end
|
559
|
+
count
|
560
|
+
end
|
561
|
+
|
562
|
+
# The index in @data[l..r] having the largest/smallest value for y
|
563
|
+
# The sense argument should be :min or :max
|
564
|
+
private def index_with_extremal_y_in(l, r, sense:)
|
565
|
+
return nil if r < l
|
566
|
+
|
567
|
+
case sense
|
568
|
+
when :min
|
569
|
+
(l..r).min_by { |idx| val_at(idx).y }
|
570
|
+
when :max
|
571
|
+
(l..r).max_by { |idx| val_at(idx).y }
|
572
|
+
else
|
573
|
+
raise "Bad comparison sense #{sense}"
|
574
|
+
end
|
575
|
+
end
|
576
|
+
|
577
|
+
# Sort the subarray @data[l..r]. This is much faster than a Ruby-layer heapsort because it is mostly happening in C.
|
578
|
+
private def sort_subarray(l, r)
|
579
|
+
# heapsort_subarray(l, r)
|
580
|
+
return if l == r # 1-array already sorted!
|
581
|
+
|
582
|
+
l -= 1
|
583
|
+
r -= 1
|
584
|
+
@data[l..r] = @data[l..r].sort_by(&:x)
|
585
|
+
end
|
586
|
+
|
587
|
+
########################################
|
588
|
+
# Debugging support
|
589
|
+
#
|
590
|
+
# These methods are not written for speed
|
591
|
+
|
592
|
+
# Check that our data satisfies the requirements of a Priority Search Tree:
|
593
|
+
# - max-heap in y
|
594
|
+
# - all the x values in the left subtree are less than all the x values in the right subtree
|
595
|
+
def verify_properties
|
596
|
+
# It's a min-max heap in y
|
597
|
+
(2..@size).each do |node|
|
598
|
+
level = Math.log2(node).floor
|
599
|
+
parent_level = level - 1
|
600
|
+
|
601
|
+
_, _, min_y, max_y = minmax_in_subtree(node)
|
602
|
+
parent_y = val_at(parent(node)).y
|
603
|
+
|
604
|
+
it_is_fine = if parent_level.even?
|
605
|
+
# max!
|
606
|
+
parent_y > max_y
|
607
|
+
else
|
608
|
+
parent_y < min_y
|
609
|
+
end
|
610
|
+
|
611
|
+
raise "Heap property violated at child #{node}" unless it_is_fine
|
612
|
+
end
|
613
|
+
|
614
|
+
# Left subtree has x values less than all of the right subtree
|
615
|
+
(1..@size).each do |node|
|
616
|
+
next if right(node) >= @size
|
617
|
+
|
618
|
+
left_max = max_x_in_subtree(left(node))
|
619
|
+
right_min = min_x_in_subtree(right(node))
|
620
|
+
|
621
|
+
raise "Left-right property of x-values violated at #{node}" unless left_max < right_min
|
622
|
+
end
|
623
|
+
|
624
|
+
nil
|
625
|
+
end
|
626
|
+
|
627
|
+
private def max_x_in_subtree(root)
|
628
|
+
minmax_in_subtree(root)[1]
|
629
|
+
end
|
630
|
+
|
631
|
+
private def min_x_in_subtree(root)
|
632
|
+
minmax_in_subtree(root)[0]
|
633
|
+
end
|
634
|
+
|
635
|
+
# Return min_x, max_x, min_y, max_y in subtree rooted at and including root
|
636
|
+
private def minmax_in_subtree(root)
|
637
|
+
@minmax_vals ||= []
|
638
|
+
@minmax_vals[root] ||= calc_minmax_at(root).freeze
|
639
|
+
end
|
640
|
+
|
641
|
+
# No memoization
|
642
|
+
private def calc_minmax_at(root)
|
643
|
+
return [INFINITY, -INFINITY, INFINITY, -INFINITY] if root > @size
|
644
|
+
|
645
|
+
pair = val_at(root)
|
646
|
+
|
647
|
+
return [pair.x, pair.x, pair.y, pair.y] if leaf?(root)
|
648
|
+
|
649
|
+
left = left(root)
|
650
|
+
left_min_max = minmax_in_subtree(left)
|
651
|
+
return left_min_max if one_child?(root)
|
652
|
+
|
653
|
+
right = right(root)
|
654
|
+
right_min_max = minmax_in_subtree(right)
|
655
|
+
|
656
|
+
[
|
657
|
+
[pair.x, left_min_max[0], right_min_max[0]].min,
|
658
|
+
[pair.x, left_min_max[1], right_min_max[1]].max,
|
659
|
+
[pair.y, left_min_max[2], right_min_max[2]].min,
|
660
|
+
[pair.y, left_min_max[3], right_min_max[3]].max
|
661
|
+
]
|
662
|
+
end
|
663
|
+
|
664
|
+
private def output_quasi_dot
|
665
|
+
(2..@size).to_a.reverse.map do |node|
|
666
|
+
"#{val_at(parent(node)).fmt} -- #{val_at(node).fmt}"
|
667
|
+
end.join("\n")
|
668
|
+
end
|
669
|
+
|
670
|
+
private def pair_to_s
|
671
|
+
end
|
672
|
+
|
673
|
+
########################################
|
674
|
+
# Dead code
|
675
|
+
|
676
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
require_relative 'data_structures_rmolinari/max_priority_search_tree_internal'
|
2
|
+
require_relative 'data_structures_rmolinari/minmax_priority_search_tree_internal'
|
3
|
+
|
4
|
+
module DataStructuresRMolinari
|
5
|
+
MaxPrioritySearchTree = MaxPrioritySearchTreeInternal
|
6
|
+
MinmaxPrioritySearchTree = MinmaxPrioritySearchTreeInternal
|
7
|
+
end
|