data_structures_rmolinari 0.3.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/lib/data_structures_rmolinari/disjoint_union.rb +29 -13
- data/lib/data_structures_rmolinari/generic_segment_tree.rb +3 -3
- data/lib/data_structures_rmolinari/max_priority_search_tree.rb +50 -99
- data/lib/data_structures_rmolinari.rb +5 -4
- metadata +2 -3
- data/lib/data_structures_rmolinari/minmax_priority_search_tree.rb +0 -668
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eb25e49219167201208f45a402b202180466202bc071940da418b2f84d281f6d
|
4
|
+
data.tar.gz: f43c1614c2a433d7a4e1148eb90121fc4b1d61c807afeb78c456d77b66935adb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d2e77397f790e8fe8d650d7727be55b464aa8f19d928e215b824820b712df1f5762d74ed304196e1c773960f02d73cd29bc07486f0cc28eb5ddcd5dbd422d691
|
7
|
+
data.tar.gz: 56328269625f88b5119b64696f792a2e567d6327605dc9a6cc391edd33e5e49f293adb304ef3b819094b860299f904b79862eaac2a8eaeff0d249350bce280db
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,23 @@
|
|
2
2
|
|
3
3
|
## [Unreleased]
|
4
4
|
|
5
|
+
## [0.4.0] 2023-01-12
|
6
|
+
|
7
|
+
### Changed
|
8
|
+
|
9
|
+
- MaxPrioritySearchTree
|
10
|
+
- Duplicate y values are now allowed. Ties are broken with a preference for smaller values of x.
|
11
|
+
- Method names have changed
|
12
|
+
- Instead of "highest", "leftmost", "rightmost" we use "largest_y", "smallest_x", "largest_x"
|
13
|
+
- For example, +highest_ne+ is now +largest_y_in_nw+
|
14
|
+
- DisjointUnion
|
15
|
+
- the size argument to initializer is optional. The default value is 0.
|
16
|
+
- elements can be added to the "universe" of known values with +make_set+
|
17
|
+
|
18
|
+
### Removed
|
19
|
+
- MinmaxPrioritySearchTree is no longer available
|
20
|
+
- it was only a partial implementation anyway
|
21
|
+
|
5
22
|
## [0.3.0] 2023-01-06
|
6
23
|
|
7
24
|
### Added
|
@@ -10,34 +10,48 @@
|
|
10
10
|
# See https://en.wikipedia.org/wiki/Disjoint-set_data_structure for a good introduction.
|
11
11
|
#
|
12
12
|
# The code uses several ideas from Tarjan and van Leeuwen for efficiency. We use "union by rank" in +unite+ and path-halving in
|
13
|
-
# +find+. Together, these make the amortized cost
|
13
|
+
# +find+. Together, these make the amortized cost of each opperation effectively constant.
|
14
14
|
#
|
15
|
-
# - Tarjan, Robert E., van Leeuwen, Jan (1984).
|
15
|
+
# - Tarjan, Robert E., van Leeuwen, Jan (1984). _Worst-case analysis of set union algorithms_. Journal of the ACM. 31 (2): 245–281.
|
16
16
|
#
|
17
17
|
# @todo
|
18
18
|
# - allow caller to expand the size of the universe. This operation is called "make set".
|
19
19
|
# - All we need to do is increase the size of @d, set the parent pointers, define the new ranks (zero), and update @size.
|
20
20
|
class DataStructuresRMolinari::DisjointUnion
|
21
|
+
include Shared
|
22
|
+
|
21
23
|
# The number of subsets in the partition.
|
22
24
|
attr_reader :subset_count
|
23
25
|
|
24
|
-
# @param
|
25
|
-
#
|
26
|
-
def initialize(
|
27
|
-
@size = size
|
26
|
+
# @param initial_size the initial size of the universe. The elements 0, 1, ..., initial_size - 1 start out in disjoint singleton
|
27
|
+
# subsets.
|
28
|
+
def initialize(initial_size = 0)
|
28
29
|
# Initialize to
|
29
|
-
@d = (0...
|
30
|
-
@rank = [0] *
|
30
|
+
@d = (0...initial_size).to_a
|
31
|
+
@rank = [0] * initial_size
|
32
|
+
|
33
|
+
@subset_count = initial_size
|
34
|
+
end
|
35
|
+
|
36
|
+
# Add a new subset to the universe containing the element +new_v+
|
37
|
+
# @param new_v the new element, starting in its own singleton subset
|
38
|
+
# - it must be a non-negative integer, not already part of the universe of elements.
|
39
|
+
def make_set(new_v)
|
40
|
+
raise DataError, "Element #{new_v} must be a non-negative integer" unless new_v.is_a?(Integer) && !new_v.negative?
|
41
|
+
raise DataError, "Element #{new_v} is already present" if @d[new_v]
|
31
42
|
|
32
|
-
@
|
43
|
+
@d[new_v] = new_v
|
44
|
+
@rank[new_v] = 0
|
45
|
+
@subset_count += 1
|
33
46
|
end
|
34
47
|
|
35
48
|
# Declare that e and f are equivalent, i.e., in the same subset. If they are already in the same subset this is a no-op.
|
36
49
|
#
|
37
|
-
# Each argument must be
|
50
|
+
# Each argument must be in the universe of elements
|
38
51
|
def unite(e, f)
|
39
52
|
check_value(e)
|
40
53
|
check_value(f)
|
54
|
+
|
41
55
|
raise 'Uniting an element with itself is meaningless' if e == f
|
42
56
|
|
43
57
|
e_root = find(e)
|
@@ -50,9 +64,11 @@ class DataStructuresRMolinari::DisjointUnion
|
|
50
64
|
|
51
65
|
# The canonical representative of the subset containing e. Two elements d and e are in the same subset exactly when find(d) ==
|
52
66
|
# find(e).
|
53
|
-
# @param e must be
|
54
|
-
# @return (Integer) one of
|
67
|
+
# @param e must be in the universe of elements
|
68
|
+
# @return (Integer) one of the universe of elements
|
55
69
|
def find(e)
|
70
|
+
check_value(e)
|
71
|
+
|
56
72
|
# We implement find with "halving" to shrink the length of paths to the root. See Tarjan and van Leeuwin p 252.
|
57
73
|
x = e
|
58
74
|
x = @d[x] = @d[@d[x]] while @d[@d[x]] != @d[x]
|
@@ -60,7 +76,7 @@ class DataStructuresRMolinari::DisjointUnion
|
|
60
76
|
end
|
61
77
|
|
62
78
|
private def check_value(v)
|
63
|
-
raise DataError, "Value
|
79
|
+
raise Shared::DataError, "Value #{v} is not part of the univserse." unless @d[v]
|
64
80
|
end
|
65
81
|
|
66
82
|
private def link(e, f)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require_relative 'shared'
|
2
2
|
|
3
|
-
#
|
4
|
-
# arbitrary subarray of a given array.
|
3
|
+
# The template of Segment Tree, which can be used for various interval-related purposes, like efficiently finding the sum (or min or
|
4
|
+
# max) on a arbitrary subarray of a given array.
|
5
5
|
#
|
6
6
|
# There is an excellent description of the data structure at https://cp-algorithms.com/data_structures/segment_tree.html. The
|
7
7
|
# Wikipedia article (https://en.wikipedia.org/wiki/Segment_tree) appears to describe a different data structure which is sometimes
|
@@ -16,7 +16,7 @@ require_relative 'shared'
|
|
16
16
|
# initializer and the definitions of concrete realisations like MaxValSegmentTree.
|
17
17
|
#
|
18
18
|
# We do O(n) work to build the internal data structure at initialization. Then we answer queries in O(log n) time.
|
19
|
-
class DataStructuresRMolinari::
|
19
|
+
class DataStructuresRMolinari::SegmentTreeTemplate
|
20
20
|
include Shared::BinaryTreeArithmetic
|
21
21
|
|
22
22
|
# Construct a concrete instance of a Segment Tree. See details at the links above for the underlying concepts here.
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'must_be'
|
1
2
|
require 'set'
|
2
3
|
require_relative 'shared'
|
3
4
|
|
@@ -9,18 +10,18 @@ require_relative 'shared'
|
|
9
10
|
# operations. It is their approach that we have implemented.
|
10
11
|
#
|
11
12
|
# The PST structure is an implicit, balanced binary tree with the following properties:
|
12
|
-
# * The tree is a _max-heap_ in the y coordinate. That is, the point at each node has a y-value
|
13
|
+
# * The tree is a _max-heap_ in the y coordinate. That is, the point at each node has a y-value no greater than its parent.
|
13
14
|
# * For each node p, the x-values of all the nodes in the left subtree of p are less than the x-values of all the nodes in the right
|
14
15
|
# subtree of p. Note that this says nothing about the x-value at the node p itself. The tree is thus _almost_ a binary search tree
|
15
16
|
# in the x coordinate.
|
16
17
|
#
|
17
18
|
# Given a set of n points, we can answer the following questions quickly:
|
18
19
|
#
|
19
|
-
# - +
|
20
|
-
# - +
|
21
|
-
# - +
|
22
|
-
# - +
|
23
|
-
# - +
|
20
|
+
# - +smallest_x_in_ne+: for x0 and y0, what is the leftmost point (x, y) in P satisfying x >= x0 and y >= y0?
|
21
|
+
# - +largest_x_in_nw+: for x0 and y0, what is the rightmost point (x, y) in P satisfying x <= x0 and y >= y0?
|
22
|
+
# - +largest_y_in_ne+: for x0 and y0, what is the highest point (x, y) in P satisfying x >= x0 and y >= y0?
|
23
|
+
# - +largest_y_in_nw+: for x0 and y0, what is the highest point (x, y) in P satisfying x <= x0 and y >= y0?
|
24
|
+
# - +largest_y_in_3_sided+: for x0, x1, and y0, what is the highest point (x, y) in P satisfying x >= x0, x <= x1 and y >= y0?
|
24
25
|
# - +enumerate_3_sided+: for x0, x1, and y0, enumerate all points in P satisfying x >= x0, x <= x1 and y >= y0.
|
25
26
|
#
|
26
27
|
# (Here, "leftmost/rightmost" means "minimal/maximal x", and "highest" means "maximal y".)
|
@@ -29,8 +30,8 @@ require_relative 'shared'
|
|
29
30
|
#
|
30
31
|
# The final operation (enumerate) takes O(m + log n) time, where m is the number of points that are enumerated.
|
31
32
|
#
|
32
|
-
# In the current implementation no two points can share an x-value
|
33
|
-
#
|
33
|
+
# In the current implementation no two points can share an x-value. This (rather severe) restriction can be relaxed with some more
|
34
|
+
# complicated code, but it hasn't been written yet. See issue #9.
|
34
35
|
#
|
35
36
|
#
|
36
37
|
# There is a related data structure called the Min-max priority search tree so we have called this a "Max priority search tree", or
|
@@ -49,7 +50,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
49
50
|
# @param data [Array] the set P of points presented as an array. The tree is built in the array in-place without cloning.
|
50
51
|
# - Each element of the array must respond to +#x+ and +#y+.
|
51
52
|
# - This is not checked explicitly but a missing method exception will be thrown when we try to call one of them.
|
52
|
-
# - The +x+ values must be distinct
|
53
|
+
# - The +x+ values must be distinct. We raise a +Shared::DataError+ if this isn't the case.
|
53
54
|
# - This is a restriction that simplifies some of the algorithm code. It can be removed as the cost of some extra work. Issue
|
54
55
|
# #9.
|
55
56
|
#
|
@@ -60,9 +61,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
60
61
|
@size = @data.size
|
61
62
|
|
62
63
|
construct_pst
|
63
|
-
return unless verify
|
64
64
|
|
65
|
-
verify_properties
|
65
|
+
verify_properties if verify
|
66
66
|
end
|
67
67
|
|
68
68
|
########################################
|
@@ -74,11 +74,11 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
74
74
|
# structure. Define p* as
|
75
75
|
#
|
76
76
|
# - (infty, -infty) if Q \intersect P is empty and
|
77
|
-
# - the highest (max-
|
77
|
+
# - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
|
78
78
|
#
|
79
79
|
# This method returns p* in O(log n) time and O(1) extra space.
|
80
|
-
def
|
81
|
-
|
80
|
+
def largest_y_in_ne(x0, y0)
|
81
|
+
largest_y_in_quadrant(x0, y0, :ne)
|
82
82
|
end
|
83
83
|
|
84
84
|
# Return the highest point in P to the "northwest" of (x0, y0).
|
@@ -87,17 +87,17 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
87
87
|
# structure. Define p* as
|
88
88
|
#
|
89
89
|
# - (-infty, -infty) if Q \intersect P is empty and
|
90
|
-
# - the highest (max-y) point in Q \intersect P otherwise
|
90
|
+
# - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
|
91
91
|
#
|
92
92
|
# This method returns p* in O(log n) time and O(1) extra space.
|
93
|
-
def
|
94
|
-
|
93
|
+
def largest_y_in_nw(x0, y0)
|
94
|
+
largest_y_in_quadrant(x0, y0, :nw)
|
95
95
|
end
|
96
96
|
|
97
|
-
# The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both
|
98
|
-
#
|
97
|
+
# The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both largest_y_in_ne and
|
98
|
+
# largest_y_in_nw
|
99
99
|
#
|
100
|
-
# Note that
|
100
|
+
# Note that largest_y_in_ne(x0, y0) = largest_y_in_3_sided(x0, infinty, y0) so we don't really need this. But it's a bit faster than the
|
101
101
|
# general case and is a simple algorithm that introduces a typical way that an algorithm interacts with the data structure.
|
102
102
|
#
|
103
103
|
# From the paper:
|
@@ -108,7 +108,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
108
108
|
# - If Q intersect P is empty then p* = best
|
109
109
|
#
|
110
110
|
# Here, P is the set of points in our data structure and T_p is the subtree rooted at p
|
111
|
-
private def
|
111
|
+
private def largest_y_in_quadrant(x0, y0, quadrant)
|
112
112
|
quadrant.must_be_in [:ne, :nw]
|
113
113
|
|
114
114
|
p = root
|
@@ -135,10 +135,10 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
135
135
|
#
|
136
136
|
# takes as input a point t and does the following: if t \in Q and y(t) > y(best) then it assignes best = t
|
137
137
|
#
|
138
|
-
#
|
138
|
+
# We break ties by preferring points with smaller x values
|
139
139
|
update_highest = lambda do |node|
|
140
140
|
t = @data[node]
|
141
|
-
if in_q.call(t) && t.y > best.y
|
141
|
+
if in_q.call(t) && (t.y > best.y || (t.y == best.y && t.x < best.x))
|
142
142
|
best = t
|
143
143
|
end
|
144
144
|
end
|
@@ -194,7 +194,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
194
194
|
# - the leftmost (min-x) point in Q \intersect P otherwise.
|
195
195
|
#
|
196
196
|
# This method returns p* in O(log n) time and O(1) extra space.
|
197
|
-
def
|
197
|
+
def smallest_x_in_ne(x0, y0)
|
198
198
|
extremal_in_x_dimension(x0, y0, :ne)
|
199
199
|
end
|
200
200
|
|
@@ -207,14 +207,14 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
207
207
|
# - the leftmost (min-x) point in Q \intersect P otherwise.
|
208
208
|
#
|
209
209
|
# This method returns p* in O(log n) time and O(1) extra space.
|
210
|
-
def
|
210
|
+
def largest_x_in_nw(x0, y0)
|
211
211
|
extremal_in_x_dimension(x0, y0, :nw)
|
212
212
|
end
|
213
213
|
|
214
|
-
# A genericized version of the paper's
|
214
|
+
# A genericized version of the paper's smallest_x_in_ne that can calculate either smallest_x_in_ne or largest_x_in_nw as specifies via a
|
215
215
|
# parameter.
|
216
216
|
#
|
217
|
-
# Quadrant is either :ne (which gives
|
217
|
+
# Quadrant is either :ne (which gives smallest_x_in_ne) or :nw (which gives largest_x_in_nw).
|
218
218
|
#
|
219
219
|
# From De et al:
|
220
220
|
#
|
@@ -245,7 +245,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
245
245
|
# takes as input a point t and does the following: if t \in Q and x(t) < x(best) then it assignes best = t
|
246
246
|
#
|
247
247
|
# Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
|
248
|
-
|
248
|
+
update_best = lambda do |node|
|
249
249
|
t = @data[node]
|
250
250
|
if in_q.call(t) && sign * t.x < sign * best.x
|
251
251
|
best = t
|
@@ -261,13 +261,13 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
261
261
|
#
|
262
262
|
# - If x0 <= x(c1) then all subtrees have large enough x values and we look for the leftmost node in c with a large enough y
|
263
263
|
# value. Both p and q are sent into that subtree.
|
264
|
-
# - If x0 >= x(ck) the the rightmost subtree is our only hope
|
264
|
+
# - If x0 >= x(ck) the the rightmost subtree is our only hope
|
265
265
|
# - Otherwise, x(c1) < x0 < x(ck) and we let i be least so that x(ci) <= x0 < x(c(i+1)). Then q becomes the lefmost cj in c not
|
266
266
|
# to the left of ci such that y(cj) >= y0, if any. p becomes ci if y(ci) >= y0 and q otherwise. If there is no such j, we put
|
267
267
|
# q = p. This may leave both of p, q undefined which means there is no useful way forward and we return nils to signal this to
|
268
268
|
# calling code.
|
269
269
|
#
|
270
|
-
# The same logic applies to
|
270
|
+
# The same logic applies to largest_x_in_nw, though everything is "backwards"
|
271
271
|
# - membership of Q depends on having a small-enough value of x, rather than a large-enough one
|
272
272
|
# - among the ci, values towards the end of the array tend not to be in Q while values towards the start of the array tend to be
|
273
273
|
# in Q
|
@@ -302,14 +302,14 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
302
302
|
new_p ||= new_q # if nodes[i] is no good, send p along with q
|
303
303
|
new_q ||= new_p # but if there is no worthwhile value for q we should send it along with p
|
304
304
|
|
305
|
-
return [new_q, new_p] if quadrant == :nw # swap for the
|
305
|
+
return [new_q, new_p] if quadrant == :nw # swap for the largest_x_in_nw case.
|
306
306
|
|
307
307
|
[new_p, new_q]
|
308
308
|
end
|
309
309
|
|
310
310
|
until leaf?(p)
|
311
|
-
|
312
|
-
|
311
|
+
update_best.call(p)
|
312
|
+
update_best.call(q)
|
313
313
|
|
314
314
|
if p == q
|
315
315
|
if one_child?(p)
|
@@ -324,7 +324,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
324
324
|
q = p # p itself is just one layer above the leaves, or is itself a leaf
|
325
325
|
elsif one_child?(q)
|
326
326
|
# This generic approach is not as fast as the bespoke checks described in the paper. But it is easier to maintain the code
|
327
|
-
# this way and allows easy implementation of
|
327
|
+
# this way and allows easy implementation of largest_x_in_nw
|
328
328
|
p, q = determine_next_nodes.call(left(p), right(p), left(q))
|
329
329
|
else
|
330
330
|
p, q = determine_next_nodes.call(left(p), right(p), left(q), right(q))
|
@@ -332,8 +332,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
332
332
|
break unless p # we've run out of useful nodes
|
333
333
|
end
|
334
334
|
end
|
335
|
-
|
336
|
-
|
335
|
+
update_best.call(p) if p
|
336
|
+
update_best.call(q) if q
|
337
337
|
best
|
338
338
|
end
|
339
339
|
|
@@ -346,10 +346,10 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
346
346
|
# MaxPST. (Note that Q is empty if x1 < x0.) Define p* as
|
347
347
|
#
|
348
348
|
# - (infty, -infty) if Q \intersect P is empty and
|
349
|
-
# - the highest (max-
|
349
|
+
# - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller x values.
|
350
350
|
#
|
351
351
|
# This method returns p* in O(log n) time and O(1) extra space.
|
352
|
-
def
|
352
|
+
def largest_y_in_3_sided(x0, x1, y0)
|
353
353
|
# From the paper:
|
354
354
|
#
|
355
355
|
# The three real numbers x0, x1, and y0 define the three-sided range Q = [x0,x1] X [y0,∞). If Q \intersect P̸ is not \empty,
|
@@ -389,7 +389,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
389
389
|
# Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
|
390
390
|
update_highest = lambda do |node|
|
391
391
|
t = @data[node]
|
392
|
-
if in_q.call(t) && t.y > best.y
|
392
|
+
if in_q.call(t) && (t.y > best.y || (t.y == best.y && t.x < best.x))
|
393
393
|
best = t
|
394
394
|
end
|
395
395
|
end
|
@@ -570,7 +570,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
570
570
|
# My high-level understanding of the algorithm
|
571
571
|
# --------------------------------------------
|
572
572
|
#
|
573
|
-
# We need to find all elements of Q \intersect P, so it isn't enough, as it was in
|
573
|
+
# We need to find all elements of Q \intersect P, so it isn't enough, as it was in largest_y_in_3_sided simply to keep track of p and
|
574
574
|
# q. We need to track four nodes, p, p', q', and q which are (with a little handwaving) respectively
|
575
575
|
#
|
576
576
|
# - the rightmost node to the left of Q' = [x0, x1] X [-infinity, infinity],
|
@@ -692,8 +692,6 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
692
692
|
# The four key helpers described in the paper
|
693
693
|
|
694
694
|
# Handle the next step of the subtree at p
|
695
|
-
#
|
696
|
-
# I need to go through this with paper, pencil, and some diagrams.
|
697
695
|
enumerate_left = lambda do
|
698
696
|
if leaf?(p)
|
699
697
|
left = false
|
@@ -999,13 +997,14 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
999
997
|
|
1000
998
|
private def construct_pst
|
1001
999
|
raise DataError, 'Duplicate x values are not supported' if contains_duplicates?(@data, by: :x)
|
1002
|
-
raise DataError, 'Duplicate y values are not supported' if contains_duplicates?(@data, by: :y)
|
1003
1000
|
|
1004
|
-
# We follow the algorithm in the paper by De, Maheshwari et al.
|
1001
|
+
# We follow the algorithm in the paper by De, Maheshwari et al, which takes O(n log^2 n) time. Their follow-up paper that
|
1002
|
+
# defines the Min-max PST, describes how to do the construction in O(n log n) time, but it is more complex and probably not
|
1003
|
+
# worth the trouble of both a bespoke heapsort the special sorting algorithm of Katajainen and Pasanen.
|
1005
1004
|
|
1006
|
-
# Since we are building an implicit binary tree, things are simpler if the array is 1-based. This
|
1007
|
-
#
|
1008
|
-
# construction.
|
1005
|
+
# Since we are building an implicit binary tree, things are simpler if the array is 1-based. This requires a malloc (perhaps)
|
1006
|
+
# and memcpy (for sure), which isn't great, but it's in the C layer so cheap compared to the O(n log^2 n) work we need to do for
|
1007
|
+
# construction.
|
1009
1008
|
@data.unshift nil
|
1010
1009
|
|
1011
1010
|
h = Math.log2(@size).floor
|
@@ -1052,63 +1051,14 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
1052
1051
|
end
|
1053
1052
|
end
|
1054
1053
|
|
1055
|
-
########################################
|
1056
|
-
# Tree arithmetic
|
1057
|
-
|
1058
|
-
# # First element and root of the tree structure
|
1059
|
-
# private def root
|
1060
|
-
# 1
|
1061
|
-
# end
|
1062
|
-
|
1063
|
-
# # Indexing is from 1
|
1064
|
-
# private def parent(i)
|
1065
|
-
# i >> 1
|
1066
|
-
# end
|
1067
|
-
|
1068
|
-
# private def left(i)
|
1069
|
-
# i << 1
|
1070
|
-
# end
|
1071
|
-
|
1072
|
-
# private def right(i)
|
1073
|
-
# 1 + (i << 1)
|
1074
|
-
# end
|
1075
|
-
|
1076
|
-
# private def level(i)
|
1077
|
-
# l = 0
|
1078
|
-
# while i > root
|
1079
|
-
# i >>= 1
|
1080
|
-
# l += 1
|
1081
|
-
# end
|
1082
|
-
# l
|
1083
|
-
# end
|
1084
|
-
|
1085
|
-
# # i has no children
|
1086
|
-
# private def leaf?(i)
|
1087
|
-
# i > @last_non_leaf
|
1088
|
-
# end
|
1089
|
-
|
1090
|
-
# # i has exactly one child (the left)
|
1091
|
-
# private def one_child?(i)
|
1092
|
-
# i == @parent_of_one_child
|
1093
|
-
# end
|
1094
|
-
|
1095
|
-
# # i has two children
|
1096
|
-
# private def two_children?(i)
|
1097
|
-
# i <= @last_parent_of_two_children
|
1098
|
-
# end
|
1099
|
-
|
1100
|
-
# # i is the left child of its parent.
|
1101
|
-
# private def left_child?(i)
|
1102
|
-
# (i & 1).zero?
|
1103
|
-
# end
|
1104
|
-
|
1105
1054
|
private def swap(index1, index2)
|
1106
1055
|
return if index1 == index2
|
1107
1056
|
|
1108
1057
|
@data[index1], @data[index2] = @data[index2], @data[index1]
|
1109
1058
|
end
|
1110
1059
|
|
1111
|
-
# The index in @data[l..r] having the largest value for y
|
1060
|
+
# The index in @data[l..r] having the largest value for y, breaking ties with the smaller x value. Since we are already sorted by
|
1061
|
+
# x we don't actually need to check this.
|
1112
1062
|
private def index_with_largest_y_in(l, r)
|
1113
1063
|
return nil if r < l
|
1114
1064
|
|
@@ -1134,7 +1084,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
1134
1084
|
private def verify_properties
|
1135
1085
|
# It's a max-heap in y
|
1136
1086
|
(2..@size).each do |node|
|
1137
|
-
|
1087
|
+
byebug unless @data[node].y <= @data[parent(node)].y
|
1088
|
+
raise InternalLogicError, "Heap property violated at child #{node}" unless @data[node].y <= @data[parent(node)].y
|
1138
1089
|
end
|
1139
1090
|
|
1140
1091
|
# Left subtree has x values less than all of the right subtree
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
1
3
|
require_relative 'data_structures_rmolinari/shared'
|
2
4
|
|
3
5
|
module DataStructuresRMolinari
|
@@ -10,14 +12,13 @@ require_relative 'data_structures_rmolinari/disjoint_union'
|
|
10
12
|
require_relative 'data_structures_rmolinari/generic_segment_tree'
|
11
13
|
require_relative 'data_structures_rmolinari/heap'
|
12
14
|
require_relative 'data_structures_rmolinari/max_priority_search_tree'
|
13
|
-
require_relative 'data_structures_rmolinari/minmax_priority_search_tree'
|
14
15
|
|
15
16
|
# A namespace to hold the provided classes. We want to avoid polluting the global namespace with names like "Heap"
|
16
17
|
module DataStructuresRMolinari
|
17
18
|
########################################
|
18
19
|
# Concrete instances of Segment Tree
|
19
20
|
#
|
20
|
-
# @todo consider moving these into generic_segment_tree.rb
|
21
|
+
# @todo consider moving these into generic_segment_tree.rb and renaming that file
|
21
22
|
|
22
23
|
# A segment tree that for an array A(0...n) answers questions of the form "what is the maximum value in the subinterval A(i..j)?"
|
23
24
|
# in O(log n) time.
|
@@ -30,7 +31,7 @@ module DataStructuresRMolinari
|
|
30
31
|
# @param data an object that contains values at integer indices based at 0, via +data[i]+.
|
31
32
|
# - This will usually be an Array, but it could also be a hash or a proc.
|
32
33
|
def initialize(data)
|
33
|
-
@structure =
|
34
|
+
@structure = SegmentTreeTemplate.new(
|
34
35
|
combine: ->(a, b) { [a, b].max },
|
35
36
|
single_cell_array_val: ->(i) { data[i] },
|
36
37
|
size: data.size,
|
@@ -57,7 +58,7 @@ module DataStructuresRMolinari
|
|
57
58
|
|
58
59
|
# @param (see MaxValSegmentTree#initialize)
|
59
60
|
def initialize(data)
|
60
|
-
@structure =
|
61
|
+
@structure = SegmentTreeTemplate.new(
|
61
62
|
combine: ->(p1, p2) { p1[1] >= p2[1] ? p1 : p2 },
|
62
63
|
single_cell_array_val: ->(i) { [i, data[i]] },
|
63
64
|
size: data.size,
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_structures_rmolinari
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rory Molinari
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: must_be
|
@@ -86,7 +86,6 @@ files:
|
|
86
86
|
- lib/data_structures_rmolinari/generic_segment_tree.rb
|
87
87
|
- lib/data_structures_rmolinari/heap.rb
|
88
88
|
- lib/data_structures_rmolinari/max_priority_search_tree.rb
|
89
|
-
- lib/data_structures_rmolinari/minmax_priority_search_tree.rb
|
90
89
|
- lib/data_structures_rmolinari/shared.rb
|
91
90
|
homepage: https://github.com/rmolinari/data_structures
|
92
91
|
licenses:
|
@@ -1,668 +0,0 @@
|
|
1
|
-
require 'must_be'
|
2
|
-
|
3
|
-
require_relative 'shared'
|
4
|
-
|
5
|
-
# THIS CLASS IS INCOMPLETE AND NOT USABLE
|
6
|
-
#
|
7
|
-
# A priority search tree (PST) stores points in two dimensions (x,y) and can efficiently answer certain questions about the set of
|
8
|
-
# point.
|
9
|
-
#
|
10
|
-
# The structure was introduced by McCreight [1].
|
11
|
-
#
|
12
|
-
# See more: https://en.wikipedia.org/wiki/Priority_search_tree
|
13
|
-
#
|
14
|
-
# It is possible to build such a tree in place, given an array of pairs. See [2]. In a follow-up paper, [3], the authors show how to
|
15
|
-
# construct a more flexible data structure,
|
16
|
-
#
|
17
|
-
# "[T]he Min-Max Priority Search tree for a set P of n points in R^2. It is a binary tree T with the following properties:
|
18
|
-
#
|
19
|
-
# * For each internal node u, all points in the left subtree of u have an x-coordinate which is less than the x-coordinate of any
|
20
|
-
# point in the right subtree of u.
|
21
|
-
# * The y-coordinate values of the nodes on even (resp. odd) levels are smaller (resp. greater) than the y-coordinate values of
|
22
|
-
# their descendants (if any), where the root is at level zero.
|
23
|
-
#
|
24
|
-
# "The first property implies that T is a binary search three on the x-coordinates of the points in P, excepts that there is no
|
25
|
-
# relation between the x-coordinates of the points stored at u and any of its children. The second property implies that T is a
|
26
|
-
# min-max heap on the y-coordinates of the points in P."
|
27
|
-
#
|
28
|
-
# I started implementing the in-place PST. Then, finding the follow-up paper [3], decided to do that one instead, as the paper says
|
29
|
-
# it is more flexible. The point is to learn a new data structure and its associated algorithms.
|
30
|
-
#
|
31
|
-
# The algorithms are rather bewildering. Highest3SidedUp is complicated, and only two of the functions CheckLeft, CheckLeftIn,
|
32
|
-
# CheckRight, CheckRightIn are given; the other two are "symmetric". But it's not really clear what the first are actually doing, so
|
33
|
-
# it's hard to know what the others actually do.
|
34
|
-
#
|
35
|
-
# The implementation is incomplete. The pseduo-code in the paper is buggy (see the code below), which makes progress difficult.
|
36
|
-
#
|
37
|
-
# [1] E. McCreight, _Priority Search Trees_, SIAM J. Computing, v14, no 3, May 1985, pp 257-276.
|
38
|
-
# [2] De, Maheshwari, Nandy, Smid, _An in-place priority search tree_, 23rd Annual Canadian Conference on Computational Geometry.
|
39
|
-
# [3] De, Maheshwari, Nandy, Smid, _An in-place min-max priority search tree_, Computational Geometry, v46 (2013), pp 310-327.
|
40
|
-
# [4] Atkinson, Sack, Santoro, Strothotte, _Min-max heaps and generalized priority queues_, Commun. ACM 29 (10) (1986), pp 996-1000.
|
41
|
-
class DataStructuresRMolinari::MinmaxPrioritySearchTree
|
42
|
-
include Shared
|
43
|
-
|
44
|
-
# The array of pairs is turned into a minmax PST in-place without cloning. So clone before passing it in, if you care.
|
45
|
-
#
|
46
|
-
# Each element must respond to #x and #y. Use Point (above) if you like.
|
47
|
-
def initialize(data, verify: false)
|
48
|
-
@data = data
|
49
|
-
@size = @data.size
|
50
|
-
|
51
|
-
construct_pst
|
52
|
-
return unless verify
|
53
|
-
|
54
|
-
# puts "Validating tree structure..."
|
55
|
-
verify_properties
|
56
|
-
end
|
57
|
-
|
58
|
-
# Let Q = [x0, infty) X [y0, infty) be the northeast "quadrant" defined by the point (x0, y0) and let P be the points in this data
|
59
|
-
# structure. Define p* as
|
60
|
-
#
|
61
|
-
# - (infty, infty) if Q \intersect P is empty and
|
62
|
-
# - the leftmost (i.e., min-x) point in Q \intersect P otherwise
|
63
|
-
#
|
64
|
-
# This method returns p*.
|
65
|
-
#
|
66
|
-
# From De et al:
|
67
|
-
#
|
68
|
-
# [t]he variables best, p, and q satisfy the folling invariant:
|
69
|
-
#
|
70
|
-
# - if Q \intersect P is nonempty then p* \in {best} \union T(p) \union T(q)
|
71
|
-
# - if Q \intersect P is empty then p* = best
|
72
|
-
# - p and q are at the same level of T and x(p) <= x(q)
|
73
|
-
#
|
74
|
-
# Here T(x) is the subtree rooted at x
|
75
|
-
def leftmost_ne(x0, y0)
|
76
|
-
best = Point.new(INFINITY, INFINITY)
|
77
|
-
p = q = root
|
78
|
-
|
79
|
-
in_q = ->(pair) { pair.x >= x0 && pair.y >= y0 }
|
80
|
-
|
81
|
-
# From the paper:
|
82
|
-
#
|
83
|
-
# takes as input a point t \in P and updates best as follows: if t \in Q and x(t) < x(best) then it assignes best = t
|
84
|
-
#
|
85
|
-
# Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
|
86
|
-
update_leftmost = lambda do |node|
|
87
|
-
t = val_at(node)
|
88
|
-
if in_q.call(t) && t.x < best.x
|
89
|
-
best = t
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
# Generalize the c1,...,c4 idea from the paper in line with the BUG 2 IN PAPER notes, below.
|
94
|
-
#
|
95
|
-
# Given: 0 or more nodes n1, ..., nk in the tree. All are at the same level, which is a "max level" in our MinmaxPST, such that
|
96
|
-
# x(n1) <= x(n2) <= ... <= x(nk). (Note: it is expected that the nj are either children or grandchildren of p and q, though we
|
97
|
-
# don't check that.)
|
98
|
-
#
|
99
|
-
# If k = 0 return nil. Otherwise...
|
100
|
-
#
|
101
|
-
# We return two values p_goal, q_goal (possibly equal) from among the nj such that
|
102
|
-
#
|
103
|
-
# - p_goal is not to the right of q_goal in the tree and so, in particular x(p_goal) <= x(q_goal)
|
104
|
-
# - if and when the auction reaches p = p_goal and q = q_goal the algorithm invariant will be satisfied.
|
105
|
-
#
|
106
|
-
# As a special case, we return nil if we detect that none of the subtrees T(nj) contain any points in Q. This is a sign to
|
107
|
-
# terminate the algorithm.
|
108
|
-
#
|
109
|
-
# See the notes at "BUG 2 IN PAPER" below for more details about what is going on.
|
110
|
-
determine_goal_nodes = lambda do |nodes|
|
111
|
-
node_count = nodes.size
|
112
|
-
return nil if node_count.zero?
|
113
|
-
|
114
|
-
if val_at(nodes.last).x <= x0
|
115
|
-
# Only the rightmost subtree can possibly have anything Q, assuming that all the x-values are distinct.
|
116
|
-
return [nodes.last, nodes.last]
|
117
|
-
end
|
118
|
-
|
119
|
-
if val_at(nodes.first).x > x0
|
120
|
-
# All subtrees have x-values large enough to provide elements of Q. Since we are at a max-level the y-values help us work
|
121
|
-
# out which subtree to focus on.
|
122
|
-
leftmost = nodes.find { |node| val_at(node).y >= y0 }
|
123
|
-
|
124
|
-
return nil unless leftmost # nothing left to find
|
125
|
-
|
126
|
-
# Otherwise we explore the leftmost subtree. Its root is in Q and can't be beaten by anything to its right.
|
127
|
-
return [leftmost, leftmost]
|
128
|
-
end
|
129
|
-
|
130
|
-
values = nodes.map { |n| val_at(n) }
|
131
|
-
|
132
|
-
# Otherwise x(n1) <= x0 < x(nk). Thus i is well-defined.
|
133
|
-
i = (0...node_count).select { |j| values[j].x <= x0 && x0 < values[j + 1].x }.min
|
134
|
-
|
135
|
-
# these nodes all have large-enough x-values and so this finds the ones in the set Q.
|
136
|
-
new_q = nodes[(i + 1)..].select { |node| val_at(node).y >= y0 }.min # could be nil
|
137
|
-
new_p = nodes[i] if values[i].y >= y0 # The leftmost subtree is worth exploring if the y-value is big enough. Otherwise not
|
138
|
-
new_p ||= new_q # if nodes[i] is no good we send p along with q
|
139
|
-
new_q ||= new_p # but if there was no worthwhile value for q we should send it along with p
|
140
|
-
|
141
|
-
return nil unless new_p
|
142
|
-
|
143
|
-
[new_p, new_q]
|
144
|
-
end
|
145
|
-
|
146
|
-
until leaf?(p)
|
147
|
-
level = Math.log2(p).floor # TODO: don't calculate log every time!
|
148
|
-
|
149
|
-
update_leftmost.call(p)
|
150
|
-
update_leftmost.call(q)
|
151
|
-
|
152
|
-
if p == q
|
153
|
-
if one_child?(p)
|
154
|
-
p = q = left(p)
|
155
|
-
else
|
156
|
-
q = right(p)
|
157
|
-
p = left(p)
|
158
|
-
end
|
159
|
-
else
|
160
|
-
# p != q
|
161
|
-
if leaf?(q)
|
162
|
-
q = p # p itself is just one layer above the leaves, or is itself a leaf
|
163
|
-
elsif one_child?(q)
|
164
|
-
# Note that p has two children
|
165
|
-
if val_at(left(q)).x < x0
|
166
|
-
# x-values below p are too small
|
167
|
-
p = q = left(q)
|
168
|
-
elsif val_at(right(p)).x <= x0
|
169
|
-
# x-values in T(right(p)) are too small. DISTINCT-X
|
170
|
-
p = right(p)
|
171
|
-
q = left(q)
|
172
|
-
else
|
173
|
-
# BUG 1 IN PAPER.
|
174
|
-
#
|
175
|
-
# So, x(q_l) >= x0 and x(p_r) > x0. But how can we be sure that the child of q isn't the winner?. Should we be trying
|
176
|
-
# it in this case?
|
177
|
-
#
|
178
|
-
# Yes: otherwise it never gets checked.
|
179
|
-
|
180
|
-
update_leftmost.call(left(q))
|
181
|
-
q = right(p)
|
182
|
-
p = left(p)
|
183
|
-
end
|
184
|
-
else
|
185
|
-
# p and q both have two children
|
186
|
-
|
187
|
-
# BUG 2 IN PAPER.
|
188
|
-
#
|
189
|
-
# Define c as the paper does:
|
190
|
-
#
|
191
|
-
# (c1, c2, c3, c4) = (left(p), right(p), left(q), right(q))
|
192
|
-
#
|
193
|
-
# Because of the PST property on x and the invariant x(p) <= x(q) we know that
|
194
|
-
#
|
195
|
-
# x(c1) <= x(c2) <= x(c3) <= x(c4)
|
196
|
-
#
|
197
|
-
# Similarly, the sets of values x(T(ci)) are pairwise ordered in the same sense.
|
198
|
-
#
|
199
|
-
# Suppose further that x(ci) <= x0 <= x(c(i+i)). Then we know several things
|
200
|
-
#
|
201
|
-
# - there might be a "winner" (point in Q) in T(ci), perhaps ci itself.
|
202
|
-
# - there are not any winners in T(cj) for j < i, becasue the x-values there aren't big enough
|
203
|
-
# - any winner in ck, for k >= i, will be the left of and thus beat any winner in c(k+1), because of the ordering of
|
204
|
-
# x-values
|
205
|
-
#
|
206
|
-
# If x(c4) <= x0 then the rightmost subtree T(c4) is the only one worth checking and we set p = q = c4.
|
207
|
-
# If x(c1) > x0 then we take i = 0 and ignore the logic on ci in what follows and setting p = q.
|
208
|
-
#
|
209
|
-
# Pretend for the moment that we are using a MaxPST instead of a MinmaxPST. Then we can look at y values to learn more.
|
210
|
-
#
|
211
|
-
# - if y(ci) >= y0 then we need to search T(ci), so we will update p = ci
|
212
|
-
# - but if y(ci) < y0 then there are no winners in T(ci) because the y-values are too small.
|
213
|
-
# - similarly, if y(c(i+i)) >= y0 then we need to search T(c(i+1)). Indeed c(i+1) itself is in Q and beats any winner in
|
214
|
-
# subtrees further to the right
|
215
|
-
# - so, let k > i be minimal such that y(ck) >= y0, if there is any. Note that ck is itself a winner. Then
|
216
|
-
# - if y(ci) >= y0,
|
217
|
-
# - set p = ci, and q = ck (or q = ci if there is no such k)
|
218
|
-
# - otherwise (T(ci) has no winners because its y-values are too small)
|
219
|
-
# - if k is defined set p = q = ck. Otherwise HALT (there are no more winners)
|
220
|
-
#
|
221
|
-
# But we are working with a MinmaxPST rather than a MaxPST, so we have to work harder. If c1, ..., c4 (the children of p
|
222
|
-
# and q) are in a "max-level" of the tree - that is, an even level - then the logic above still applies. But if they are
|
223
|
-
# at a min level things are trickier and we need to go another layer down.
|
224
|
-
#
|
225
|
-
# The paper knows that we need to look a further layer down, but the logic is too simplistic. It looks at cj for j > i and
|
226
|
-
# checks if cj or either of its children are in Q. But that's not good enough. For the same reason that in a MaxPST we may
|
227
|
-
# need to explore below T(ci) even if ci isn't in Q, we may need to decend through one of the grandchilden of p or q even
|
228
|
-
# if that grandchild isn't in Q.
|
229
|
-
#
|
230
|
-
# Getting a bit handwavey especially over what happens near the leaves...
|
231
|
-
#
|
232
|
-
# Consider the children d1, d2, ..., dm, of ci, ..., c4 (and so grandchildren of p and q). They are at a max-level and so
|
233
|
-
# the logic described applies to the dk. If ci happens to be a winner we can set p = ci and work out what to do with q by
|
234
|
-
# looking at the children of c(i+1), ..., c4. Otherwise we look at all the dj values (up to 8 of them), apply the logic
|
235
|
-
# above to work out that we want to head for, say, p = ds and q = dt, and in this cycle update p = parent(ds), q =
|
236
|
-
# parent(dt). (We also need to submit the values c(i+1)..c4 to UpdateLeftmost.)
|
237
|
-
#
|
238
|
-
# In other words, we can use the MaxPST logic on d1,...,dm to decide where we need to go, and then step to the relevant
|
239
|
-
# parents among the cj.
|
240
|
-
|
241
|
-
c = [left(p), right(p), left(q), right(q)]
|
242
|
-
if level.odd?
|
243
|
-
# the elements of c are at an even level, and hence their y values are maxima for the subtrees. We can learn what we
|
244
|
-
# need to know from them
|
245
|
-
p, q = determine_goal_nodes.call(c)
|
246
|
-
if p && !q
|
247
|
-
# byebug
|
248
|
-
# determine_goal_nodes.call(c)
|
249
|
-
raise 'bad logic'
|
250
|
-
end
|
251
|
-
else
|
252
|
-
# They are at an odd level and so aren't helpful in working out what to do next: we look at their children, which are in
|
253
|
-
# a max-level. We need to check the elements of c against best since we are otherwise ignoring them.
|
254
|
-
c.each { |n| update_leftmost.call(n) }
|
255
|
-
|
256
|
-
d = c.map { [left(_1), right(_1)]}.flatten.select { |n| n <= @size }
|
257
|
-
|
258
|
-
# Note that we are jumping down two levels here!
|
259
|
-
p, q = determine_goal_nodes.call(d)
|
260
|
-
if p && !q
|
261
|
-
# byebug
|
262
|
-
# determine_goal_nodes.call(c)
|
263
|
-
raise 'bad logic'
|
264
|
-
end
|
265
|
-
|
266
|
-
p
|
267
|
-
end
|
268
|
-
|
269
|
-
return best unless p # nothing more to do
|
270
|
-
end
|
271
|
-
end
|
272
|
-
end
|
273
|
-
update_leftmost.call(p)
|
274
|
-
update_leftmost.call(q)
|
275
|
-
best
|
276
|
-
end
|
277
|
-
|
278
|
-
# Let Q be the "three-sided query range" [x0, x1] X [y0, infty) and let P_Q be P \intersect Q.
|
279
|
-
#
|
280
|
-
# If P_Q is empty then p* = (infty, -infty).
|
281
|
-
# Otherwise, p* is the point in P_Q with maximal y value.
|
282
|
-
#
|
283
|
-
# This method returns p*
|
284
|
-
# def highest_3_sided_up(x0, x1, y0)
|
285
|
-
# best = Point.new(INFINITY, -INFINITY)
|
286
|
-
|
287
|
-
# in_q = lambda do |pair|
|
288
|
-
# pair.x >= x0 && pair.x <= x1 && pair.y >= y0
|
289
|
-
# end
|
290
|
-
|
291
|
-
# # From the paper:
|
292
|
-
# #
|
293
|
-
# # takes as input a point t and does the following: if t \in Q and y(t) > y(best) then it assignes best = t
|
294
|
-
# #
|
295
|
-
# # Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
|
296
|
-
# #
|
297
|
-
# # The algorithm is complicated. From the paper:
|
298
|
-
# #
|
299
|
-
# # Since Q is bounded by two vertical sides, we use four index variables p, p', q and q' to guide the search path. In addition,
|
300
|
-
# # we use four bits L, L', R and R'; these correspond to the subtrees of T rooted at the nodes p, p', q, and q', respectively;
|
301
|
-
# # if a bit is equal to one, then the corresonding node is referred to as an _active node_ (for example, if L = 1 then p is an
|
302
|
-
# # active node), and the subtree rooted at that node may contain a candidate point for p*. So the search is required to be
|
303
|
-
# # performed in the subtree rooted at all active nodes. More formally, at any instant of time the variables satisfy the folling
|
304
|
-
# # invariants:
|
305
|
-
# #
|
306
|
-
# # - If L = 1 the x(p) < x0.
|
307
|
-
# # - If L' = 1 then x0 <= x(p') <= x1.
|
308
|
-
# # - If R = 1 then x(q) > x1.
|
309
|
-
# # - If R' = 1 then x0 <= x(q') <= x1.
|
310
|
-
# # - If L' = 1 and R' = 1 then x(p') <= x(q').
|
311
|
-
# # - If P_Q is non-empty then p* = best or p* is in the subtree rooted at any one of the active nodes.
|
312
|
-
# #
|
313
|
-
# # There are more details in the paper
|
314
|
-
# update_highest = lambda do |node|
|
315
|
-
# t = val_at(node)
|
316
|
-
# if in_q.call(t) && t.y > best.y
|
317
|
-
# best = t
|
318
|
-
# end
|
319
|
-
# end
|
320
|
-
|
321
|
-
# ex_update_highest = lambda do |node|
|
322
|
-
# update_highest.call(node)
|
323
|
-
# update_highest.call(left(node)) unless leaf?(node)
|
324
|
-
# update_highest.call(right(node)) unless one_child?(node)
|
325
|
-
# end
|
326
|
-
|
327
|
-
# if val_at(root).x < x0
|
328
|
-
# p = root
|
329
|
-
# l = true
|
330
|
-
# l_prime = r = r_prime = false
|
331
|
-
# elsif val_at(root).x < x1
|
332
|
-
# p_prime = root
|
333
|
-
# l_prime = true
|
334
|
-
# l = r = r_prime = false
|
335
|
-
# else
|
336
|
-
# q = root
|
337
|
-
# r = true
|
338
|
-
# l = l_prime = r_prime = false
|
339
|
-
# end
|
340
|
-
|
341
|
-
# set_z = lambda do
|
342
|
-
# r = []
|
343
|
-
# r << p if l
|
344
|
-
# r << p_prime if l_prime
|
345
|
-
# r << q if r
|
346
|
-
# r << q_prime if r_primg
|
347
|
-
# r
|
348
|
-
# end
|
349
|
-
|
350
|
-
# check_left = lambda do
|
351
|
-
# if leaf?(p)
|
352
|
-
# l = false
|
353
|
-
# elsif one_child?(p)
|
354
|
-
# p_l_x = val_at(left(p))
|
355
|
-
# if x0 <= p_l_x && p_l_x <= x1
|
356
|
-
# update_highest.call(left(p))
|
357
|
-
# if l_prime && r_prime
|
358
|
-
# ex_update_highest.call(p_prime)
|
359
|
-
# elsif l_prime
|
360
|
-
# q_prime = p_prime
|
361
|
-
# r_prime = true
|
362
|
-
# end
|
363
|
-
# p_prime = left(p)
|
364
|
-
# l_prime = true
|
365
|
-
# l = false
|
366
|
-
# elsif p_l_x < x0
|
367
|
-
# p = left(p)
|
368
|
-
# else
|
369
|
-
# q = left(p)
|
370
|
-
# r = true
|
371
|
-
# l = false
|
372
|
-
# end
|
373
|
-
# else
|
374
|
-
# # p has two children
|
375
|
-
|
376
|
-
# end
|
377
|
-
|
378
|
-
# while l || l_prime || r || r_prime
|
379
|
-
# z_star = set_z.call.min_by(4) { level(_1) }
|
380
|
-
# if z_star.include? p_prime
|
381
|
-
# check_left_in(p_prime)
|
382
|
-
# elsif z_star.include? q_prime
|
383
|
-
# check_right_in(q_prime)
|
384
|
-
# elsif z_star.include? p
|
385
|
-
# check_left(p)
|
386
|
-
# else
|
387
|
-
# check_right(q)
|
388
|
-
# end
|
389
|
-
# end
|
390
|
-
# end
|
391
|
-
|
392
|
-
# Find the "highest" (max-y) point that is "northeast" of (x, y).
|
393
|
-
#
|
394
|
-
# That is, the point p* in Q = [x, infty) X [y, infty) with the largest y value, or (infty, -infty) if there is no point in that
|
395
|
-
# quadrant.
|
396
|
-
#
|
397
|
-
# Algorithm is from De et al. section 3.1
|
398
|
-
def highest_ne(x0, y0)
|
399
|
-
raise "Write me"
|
400
|
-
# From the paper:
|
401
|
-
#
|
402
|
-
# The algorithm uses two variables best and p, which satisfy the following invariant
|
403
|
-
#
|
404
|
-
# - If Q intersect P is nonempty then p* in {best} union T_p
|
405
|
-
# - If Q intersect P is empty then p* = best
|
406
|
-
#
|
407
|
-
# Here, P is the set of points in our data structure and T_p is the subtree rooted at p
|
408
|
-
best = Point.new(INFINITY, -INFINITY)
|
409
|
-
p = root # root of the whole tree AND the pair stored there
|
410
|
-
|
411
|
-
in_q = lambda do |pair|
|
412
|
-
pair.x >= x0 && pair.y >= y0
|
413
|
-
end
|
414
|
-
|
415
|
-
# From the paper:
|
416
|
-
#
|
417
|
-
# takes as input a point t and does the following: if t \in Q and y(t) > y(best) then it assignes best = t
|
418
|
-
#
|
419
|
-
# Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
|
420
|
-
update_highest = lambda do |node|
|
421
|
-
t = val_at(node)
|
422
|
-
if in_q.call(t) && t.y > best.y
|
423
|
-
best = t
|
424
|
-
end
|
425
|
-
end
|
426
|
-
|
427
|
-
# We could make this code more efficient. But since we only have O(log n) steps we won't actually gain much so let's keep it
|
428
|
-
# readable and close to the paper's pseudocode for now.
|
429
|
-
until leaf?(p)
|
430
|
-
p_val = val_at(p)
|
431
|
-
if in_q.call(p_val)
|
432
|
-
# p \in Q and nothing in its subtree can beat it because of the max-heap
|
433
|
-
update_highest.call(p)
|
434
|
-
return best
|
435
|
-
|
436
|
-
# p = left(p) <- from paper
|
437
|
-
elsif p_val.y < y0
|
438
|
-
# p is too low for Q, so the entire subtree is too low as well
|
439
|
-
return best
|
440
|
-
|
441
|
-
# p = left(p)
|
442
|
-
elsif one_child?(p)
|
443
|
-
# With just one child we need to check it
|
444
|
-
p = left(p)
|
445
|
-
elsif val_at(right(p)).x <= x0
|
446
|
-
# right(p) might be in Q, but nothing in the left subtree can be, by the PST property on x.
|
447
|
-
p = right(p)
|
448
|
-
elsif val_at(left(p)).x >= x0
|
449
|
-
# Both children are in Q, so try the higher of them. Note that nothing in either subtree will beat this one.
|
450
|
-
higher = left(p)
|
451
|
-
if val_at(right(p)).y > val_at(left(p)).y
|
452
|
-
higher = right(p)
|
453
|
-
end
|
454
|
-
p = higher
|
455
|
-
elsif val_at(right(p)).y < y0
|
456
|
-
# Nothing in the right subtree is in Q, but maybe we'll find something in the left
|
457
|
-
p = left(p)
|
458
|
-
else
|
459
|
-
# At this point we know that right(p) \in Q so we need to check it. Nothing in its subtree can beat it so we don't need to
|
460
|
-
# look there. But there might be something better in the left subtree.
|
461
|
-
update_highest.call(right(p))
|
462
|
-
p = left(p)
|
463
|
-
end
|
464
|
-
end
|
465
|
-
update_highest.call(p) # try the leaf
|
466
|
-
best
|
467
|
-
end
|
468
|
-
|
469
|
-
# O(n log^2 n)
|
470
|
-
private def construct_pst
|
471
|
-
# We follow the algorithm in [3]. Indexing is from 1 there and we follow that here. The algorithm is almost exactly the same as
|
472
|
-
# for the (max) PST.
|
473
|
-
h = Math.log2(@size).floor
|
474
|
-
a = @size - (2**h - 1) # the paper calls it A
|
475
|
-
sort_subarray(1, @size)
|
476
|
-
level = 0 # TODO: isn't level always equal to i in the loop?
|
477
|
-
|
478
|
-
(0...h).each do |i|
|
479
|
-
sense = level.even? ? :max : :min
|
480
|
-
pow_of_2 = 2**i
|
481
|
-
|
482
|
-
k = a / (2**(h - i))
|
483
|
-
k1 = 2**(h + 1 - i) - 1
|
484
|
-
k2 = (1 - k) * 2**(h - i) - 1 + a
|
485
|
-
k3 = 2**(h - i) - 1
|
486
|
-
(1..k).each do |j|
|
487
|
-
l = index_with_extremal_y_in(pow_of_2 + (j - 1) * k1, pow_of_2 + j * k1 - 1, sense:)
|
488
|
-
swap(l, pow_of_2 + j - 1)
|
489
|
-
end
|
490
|
-
|
491
|
-
if k < pow_of_2
|
492
|
-
l = index_with_extremal_y_in(pow_of_2 + k * k1, pow_of_2 + k * k1 + k2 - 1, sense:)
|
493
|
-
swap(l, pow_of_2 + k)
|
494
|
-
|
495
|
-
m = pow_of_2 + k * k1 + k2
|
496
|
-
(1..(pow_of_2 - k - 1)).each do |j|
|
497
|
-
l = index_with_extremal_y_in(m + (j - 1) * k3, m + j * k3 - 1, sense:)
|
498
|
-
swap(l, pow_of_2 + k + j)
|
499
|
-
end
|
500
|
-
end
|
501
|
-
sort_subarray(2 * pow_of_2, @size)
|
502
|
-
level += 1
|
503
|
-
end
|
504
|
-
end
|
505
|
-
|
506
|
-
########################################
|
507
|
-
# Indexing the data structure as though it were from 1, even though the underlying @data is indexed from zero.
|
508
|
-
|
509
|
-
# First element and root of the tree structure
|
510
|
-
private def root
|
511
|
-
1
|
512
|
-
end
|
513
|
-
|
514
|
-
private def val_at(idx)
|
515
|
-
@data[idx - 1]
|
516
|
-
end
|
517
|
-
|
518
|
-
# Indexing is from 1
|
519
|
-
private def parent(i)
|
520
|
-
i >> 1
|
521
|
-
end
|
522
|
-
|
523
|
-
private def left(i)
|
524
|
-
i << 1
|
525
|
-
end
|
526
|
-
|
527
|
-
private def right(i)
|
528
|
-
1 + (i << 1)
|
529
|
-
end
|
530
|
-
|
531
|
-
private def leaf?(i)
|
532
|
-
left(i) > @size
|
533
|
-
end
|
534
|
-
|
535
|
-
private def one_child?(i)
|
536
|
-
left(i) <= @size && right(i) > @size
|
537
|
-
end
|
538
|
-
|
539
|
-
private def swap(index1, index2)
|
540
|
-
return if index1 == index2
|
541
|
-
|
542
|
-
@data[index1 - 1], @data[index2 - 1] = @data[index2 - 1], @data[index1 - 1]
|
543
|
-
end
|
544
|
-
|
545
|
-
private def level(i)
|
546
|
-
count = 0
|
547
|
-
while i > root
|
548
|
-
i >>= 1
|
549
|
-
count += 1
|
550
|
-
end
|
551
|
-
count
|
552
|
-
end
|
553
|
-
|
554
|
-
# The index in @data[l..r] having the largest/smallest value for y
|
555
|
-
# The sense argument should be :min or :max
|
556
|
-
private def index_with_extremal_y_in(l, r, sense:)
|
557
|
-
return nil if r < l
|
558
|
-
|
559
|
-
case sense
|
560
|
-
when :min
|
561
|
-
(l..r).min_by { |idx| val_at(idx).y }
|
562
|
-
when :max
|
563
|
-
(l..r).max_by { |idx| val_at(idx).y }
|
564
|
-
else
|
565
|
-
raise "Bad comparison sense #{sense}"
|
566
|
-
end
|
567
|
-
end
|
568
|
-
|
569
|
-
# Sort the subarray @data[l..r]. This is much faster than a Ruby-layer heapsort because it is mostly happening in C.
|
570
|
-
private def sort_subarray(l, r)
|
571
|
-
# heapsort_subarray(l, r)
|
572
|
-
return if l == r # 1-array already sorted!
|
573
|
-
|
574
|
-
l -= 1
|
575
|
-
r -= 1
|
576
|
-
@data[l..r] = @data[l..r].sort_by(&:x)
|
577
|
-
end
|
578
|
-
|
579
|
-
########################################
|
580
|
-
# Debugging support
|
581
|
-
#
|
582
|
-
# These methods are not written for speed
|
583
|
-
|
584
|
-
# Check that our data satisfies the requirements of a Priority Search Tree:
|
585
|
-
# - max-heap in y
|
586
|
-
# - all the x values in the left subtree are less than all the x values in the right subtree
|
587
|
-
def verify_properties
|
588
|
-
# It's a min-max heap in y
|
589
|
-
(2..@size).each do |node|
|
590
|
-
level = Math.log2(node).floor
|
591
|
-
parent_level = level - 1
|
592
|
-
|
593
|
-
_, _, min_y, max_y = minmax_in_subtree(node)
|
594
|
-
parent_y = val_at(parent(node)).y
|
595
|
-
|
596
|
-
it_is_fine = if parent_level.even?
|
597
|
-
# max!
|
598
|
-
parent_y > max_y
|
599
|
-
else
|
600
|
-
parent_y < min_y
|
601
|
-
end
|
602
|
-
|
603
|
-
raise "Heap property violated at child #{node}" unless it_is_fine
|
604
|
-
end
|
605
|
-
|
606
|
-
# Left subtree has x values less than all of the right subtree
|
607
|
-
(1..@size).each do |node|
|
608
|
-
next if right(node) >= @size
|
609
|
-
|
610
|
-
left_max = max_x_in_subtree(left(node))
|
611
|
-
right_min = min_x_in_subtree(right(node))
|
612
|
-
|
613
|
-
raise "Left-right property of x-values violated at #{node}" unless left_max < right_min
|
614
|
-
end
|
615
|
-
|
616
|
-
nil
|
617
|
-
end
|
618
|
-
|
619
|
-
private def max_x_in_subtree(root)
|
620
|
-
minmax_in_subtree(root)[1]
|
621
|
-
end
|
622
|
-
|
623
|
-
private def min_x_in_subtree(root)
|
624
|
-
minmax_in_subtree(root)[0]
|
625
|
-
end
|
626
|
-
|
627
|
-
# Return min_x, max_x, min_y, max_y in subtree rooted at and including root
|
628
|
-
private def minmax_in_subtree(root)
|
629
|
-
@minmax_vals ||= []
|
630
|
-
@minmax_vals[root] ||= calc_minmax_at(root).freeze
|
631
|
-
end
|
632
|
-
|
633
|
-
# No memoization
|
634
|
-
private def calc_minmax_at(root)
|
635
|
-
return [INFINITY, -INFINITY, INFINITY, -INFINITY] if root > @size
|
636
|
-
|
637
|
-
pair = val_at(root)
|
638
|
-
|
639
|
-
return [pair.x, pair.x, pair.y, pair.y] if leaf?(root)
|
640
|
-
|
641
|
-
left = left(root)
|
642
|
-
left_min_max = minmax_in_subtree(left)
|
643
|
-
return left_min_max if one_child?(root)
|
644
|
-
|
645
|
-
right = right(root)
|
646
|
-
right_min_max = minmax_in_subtree(right)
|
647
|
-
|
648
|
-
[
|
649
|
-
[pair.x, left_min_max[0], right_min_max[0]].min,
|
650
|
-
[pair.x, left_min_max[1], right_min_max[1]].max,
|
651
|
-
[pair.y, left_min_max[2], right_min_max[2]].min,
|
652
|
-
[pair.y, left_min_max[3], right_min_max[3]].max
|
653
|
-
]
|
654
|
-
end
|
655
|
-
|
656
|
-
private def output_quasi_dot
|
657
|
-
(2..@size).to_a.reverse.map do |node|
|
658
|
-
"#{val_at(parent(node)).fmt} -- #{val_at(node).fmt}"
|
659
|
-
end.join("\n")
|
660
|
-
end
|
661
|
-
|
662
|
-
private def pair_to_s
|
663
|
-
end
|
664
|
-
|
665
|
-
########################################
|
666
|
-
# Dead code
|
667
|
-
|
668
|
-
end
|