data_structures_rmolinari 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9f006234ee3b216d5607e9b10bb1958a6107ccfa0cc8c359f98383dc7fde14ee
4
- data.tar.gz: f281ab0768e24e7c983cd046ba7b185dab8fd972fb3065fd73ff575782bf5486
3
+ metadata.gz: 87a44faaaa62f555546867230df704981671491f040f6be29eeed7db7eb22a0a
4
+ data.tar.gz: 0a0f1f6cf22bdde5d0510a818af9d8a6dbdbf11a6e69ce2e178bf6f336bb3d92
5
5
  SHA512:
6
- metadata.gz: e274a97f177fad44bad20ecf24ecca1385fee3c217e7e42aac076c24377970c6444dfdbadc6fd3e1e201555177429c9f8eddaee211e463dd60f6b36e74004eec
7
- data.tar.gz: 293fc0b2973a8d851c27f4e64177dbf7b9a25b2bb7eb9efb4b33abdb07c4e006f80f4450996ef99da7e8bb1516ca8aa89ab893258960d9127d101995906254ed
6
+ metadata.gz: 990fc38cbc64c20290317bf2858ff6f2813f832d0046f249faea32c7f88f389e8c8c2db892f8288a0747aa9446181864a3e62435e4846a230411b6afa4b75faf
7
+ data.tar.gz: f1e641b03d30c4726268c1c8da6d6364f635251152230f89aba2b551f0355d37ce843dba8e631c2fbd4a20e87ae94c78cf30b46dc3d472f1a1b55add258de32a
data/CHANGELOG.md CHANGED
@@ -2,6 +2,21 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ### Changed
6
+
7
+ - MaxPrioritySearchTree
8
+ - Duplicate y values are now allowed. Ties are broken with a preference for smaller values of x.
9
+ - Method names have changed
10
+ - Instead of "highest", "leftmost", "rightmost" we use "largest_y", "smallest_x", "largest_x"
11
+ - For example, +highest_ne+ is now +largest_y_in_nw+
12
+ - DisjointUnion
13
+ - the size argument to initializer is optional. The default value is 0.
14
+ - elements can be added to the "universe" of known values with +make_set+
15
+
16
+ ### Removed
17
+ - MinmaxPrioritySearchTree is no longer available
18
+ - it was only a partial implementation anyway
19
+
5
20
  ## [0.3.0] 2023-01-06
6
21
 
7
22
  ### Added
@@ -10,34 +10,48 @@
10
10
  # See https://en.wikipedia.org/wiki/Disjoint-set_data_structure for a good introduction.
11
11
  #
12
12
  # The code uses several ideas from Tarjan and van Leeuwen for efficiency. We use "union by rank" in +unite+ and path-halving in
13
- # +find+. Together, these make the amortized cost for each of n such operations effectively constant.
13
+ # +find+. Together, these make the amortized cost of each opperation effectively constant.
14
14
  #
15
- # - Tarjan, Robert E., van Leeuwen, Jan (1984). "Worst-case analysis of set union algorithms". Journal of the ACM. 31 (2): 245–281.
15
+ # - Tarjan, Robert E., van Leeuwen, Jan (1984). _Worst-case analysis of set union algorithms_. Journal of the ACM. 31 (2): 245–281.
16
16
  #
17
17
  # @todo
18
18
  # - allow caller to expand the size of the universe. This operation is called "make set".
19
19
  # - All we need to do is increase the size of @d, set the parent pointers, define the new ranks (zero), and update @size.
20
20
  class DataStructuresRMolinari::DisjointUnion
21
+ include Shared
22
+
21
23
  # The number of subsets in the partition.
22
24
  attr_reader :subset_count
23
25
 
24
- # @param size the size of the universe, which must be known at the time of construction. The elements 0, 1, ..., size - 1 start
25
- # out in disjoint singleton subsets.
26
- def initialize(size)
27
- @size = size
26
+ # @param initial_size the initial size of the universe. The elements 0, 1, ..., initial_size - 1 start out in disjoint singleton
27
+ # subsets.
28
+ def initialize(initial_size = 0)
28
29
  # Initialize to
29
- @d = (0...size).to_a
30
- @rank = [0] * size
30
+ @d = (0...initial_size).to_a
31
+ @rank = [0] * initial_size
32
+
33
+ @subset_count = initial_size
34
+ end
35
+
36
+ # Add a new subset to the universe containing the element +new_v+
37
+ # @param new_v the new element, starting in its own singleton subset
38
+ # - it must be a non-negative integer, not already part of the universe of elements.
39
+ def make_set(new_v)
40
+ raise DataError, "Element #{new_v} must be a non-negative integer" unless new_v.is_a?(Integer) && !new_v.negative?
41
+ raise DataError, "Element #{new_v} is already present" if @d[new_v]
31
42
 
32
- @subset_count = size
43
+ @d[new_v] = new_v
44
+ @rank[new_v] = 0
45
+ @subset_count += 1
33
46
  end
34
47
 
35
48
  # Declare that e and f are equivalent, i.e., in the same subset. If they are already in the same subset this is a no-op.
36
49
  #
37
- # Each argument must be one of 0, 1, ..., size-1.
50
+ # Each argument must be in the universe of elements
38
51
  def unite(e, f)
39
52
  check_value(e)
40
53
  check_value(f)
54
+
41
55
  raise 'Uniting an element with itself is meaningless' if e == f
42
56
 
43
57
  e_root = find(e)
@@ -50,9 +64,11 @@ class DataStructuresRMolinari::DisjointUnion
50
64
 
51
65
  # The canonical representative of the subset containing e. Two elements d and e are in the same subset exactly when find(d) ==
52
66
  # find(e).
53
- # @param e must be one of 0, 1, ..., size-1.
54
- # @return (Integer) one of 0, 1, ..., size-1.
67
+ # @param e must be in the universe of elements
68
+ # @return (Integer) one of the universe of elements
55
69
  def find(e)
70
+ check_value(e)
71
+
56
72
  # We implement find with "halving" to shrink the length of paths to the root. See Tarjan and van Leeuwin p 252.
57
73
  x = e
58
74
  x = @d[x] = @d[@d[x]] while @d[@d[x]] != @d[x]
@@ -60,7 +76,7 @@ class DataStructuresRMolinari::DisjointUnion
60
76
  end
61
77
 
62
78
  private def check_value(v)
63
- raise DataError, "Value must be given and be in (0..#{@size - 1})" unless v && v.between?(0, @size - 1)
79
+ raise Shared::DataError, "Value #{v} is not part of the univserse." unless @d[v]
64
80
  end
65
81
 
66
82
  private def link(e, f)
@@ -1,7 +1,7 @@
1
1
  require_relative 'shared'
2
2
 
3
- # A Segment Tree, which can be used for various interval-related purposes, like efficiently finding the sum (or min or max) on a
4
- # arbitrary subarray of a given array.
3
+ # The template of Segment Tree, which can be used for various interval-related purposes, like efficiently finding the sum (or min or
4
+ # max) on a arbitrary subarray of a given array.
5
5
  #
6
6
  # There is an excellent description of the data structure at https://cp-algorithms.com/data_structures/segment_tree.html. The
7
7
  # Wikipedia article (https://en.wikipedia.org/wiki/Segment_tree) appears to describe a different data structure which is sometimes
@@ -16,7 +16,7 @@ require_relative 'shared'
16
16
  # initializer and the definitions of concrete realisations like MaxValSegmentTree.
17
17
  #
18
18
  # We do O(n) work to build the internal data structure at initialization. Then we answer queries in O(log n) time.
19
- class DataStructuresRMolinari::GenericSegmentTree
19
+ class DataStructuresRMolinari::SegmentTreeTemplate
20
20
  include Shared::BinaryTreeArithmetic
21
21
 
22
22
  # Construct a concrete instance of a Segment Tree. See details at the links above for the underlying concepts here.
@@ -1,3 +1,4 @@
1
+ require 'must_be'
1
2
  require 'set'
2
3
  require_relative 'shared'
3
4
 
@@ -9,18 +10,18 @@ require_relative 'shared'
9
10
  # operations. It is their approach that we have implemented.
10
11
  #
11
12
  # The PST structure is an implicit, balanced binary tree with the following properties:
12
- # * The tree is a _max-heap_ in the y coordinate. That is, the point at each node has a y-value less than its parent.
13
+ # * The tree is a _max-heap_ in the y coordinate. That is, the point at each node has a y-value no greater than its parent.
13
14
  # * For each node p, the x-values of all the nodes in the left subtree of p are less than the x-values of all the nodes in the right
14
15
  # subtree of p. Note that this says nothing about the x-value at the node p itself. The tree is thus _almost_ a binary search tree
15
16
  # in the x coordinate.
16
17
  #
17
18
  # Given a set of n points, we can answer the following questions quickly:
18
19
  #
19
- # - +leftmost_ne+: for x0 and y0, what is the leftmost point (x, y) in P satisfying x >= x0 and y >= y0?
20
- # - +rightmost_nw+: for x0 and y0, what is the rightmost point (x, y) in P satisfying x <= x0 and y >= y0?
21
- # - +highest_ne+: for x0 and y0, what is the highest point (x, y) in P satisfying x >= x0 and y >= y0?
22
- # - +highest_nw+: for x0 and y0, what is the highest point (x, y) in P satisfying x <= x0 and y >= y0?
23
- # - +highest_3_sided+: for x0, x1, and y0, what is the highest point (x, y) in P satisfying x >= x0, x <= x1 and y >= y0?
20
+ # - +smallest_x_in_ne+: for x0 and y0, what is the leftmost point (x, y) in P satisfying x >= x0 and y >= y0?
21
+ # - +largest_x_in_nw+: for x0 and y0, what is the rightmost point (x, y) in P satisfying x <= x0 and y >= y0?
22
+ # - +largest_y_in_ne+: for x0 and y0, what is the highest point (x, y) in P satisfying x >= x0 and y >= y0?
23
+ # - +largest_y_in_nw+: for x0 and y0, what is the highest point (x, y) in P satisfying x <= x0 and y >= y0?
24
+ # - +largest_y_in_3_sided+: for x0, x1, and y0, what is the highest point (x, y) in P satisfying x >= x0, x <= x1 and y >= y0?
24
25
  # - +enumerate_3_sided+: for x0, x1, and y0, enumerate all points in P satisfying x >= x0, x <= x1 and y >= y0.
25
26
  #
26
27
  # (Here, "leftmost/rightmost" means "minimal/maximal x", and "highest" means "maximal y".)
@@ -29,8 +30,8 @@ require_relative 'shared'
29
30
  #
30
31
  # The final operation (enumerate) takes O(m + log n) time, where m is the number of points that are enumerated.
31
32
  #
32
- # In the current implementation no two points can share an x-value and no two points can share a y-value. This (rather severe)
33
- # restriction can be relaxed with some more complicated code.
33
+ # In the current implementation no two points can share an x-value. This (rather severe) restriction can be relaxed with some more
34
+ # complicated code, but it hasn't been written yet. See issue #9.
34
35
  #
35
36
  #
36
37
  # There is a related data structure called the Min-max priority search tree so we have called this a "Max priority search tree", or
@@ -49,7 +50,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
49
50
  # @param data [Array] the set P of points presented as an array. The tree is built in the array in-place without cloning.
50
51
  # - Each element of the array must respond to +#x+ and +#y+.
51
52
  # - This is not checked explicitly but a missing method exception will be thrown when we try to call one of them.
52
- # - The +x+ values must be distinct, as must the +y+ values. We raise a +Shared::DataError+ if this isn't the case.
53
+ # - The +x+ values must be distinct. We raise a +Shared::DataError+ if this isn't the case.
53
54
  # - This is a restriction that simplifies some of the algorithm code. It can be removed as the cost of some extra work. Issue
54
55
  # #9.
55
56
  #
@@ -60,9 +61,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
60
61
  @size = @data.size
61
62
 
62
63
  construct_pst
63
- return unless verify
64
64
 
65
- verify_properties
65
+ verify_properties if verify
66
66
  end
67
67
 
68
68
  ########################################
@@ -74,11 +74,11 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
74
74
  # structure. Define p* as
75
75
  #
76
76
  # - (infty, -infty) if Q \intersect P is empty and
77
- # - the highest (max-x) point in Q \intersect P otherwise.
77
+ # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
78
78
  #
79
79
  # This method returns p* in O(log n) time and O(1) extra space.
80
- def highest_ne(x0, y0)
81
- highest_in_quadrant(x0, y0, :ne)
80
+ def largest_y_in_ne(x0, y0)
81
+ largest_y_in_quadrant(x0, y0, :ne)
82
82
  end
83
83
 
84
84
  # Return the highest point in P to the "northwest" of (x0, y0).
@@ -87,17 +87,17 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
87
87
  # structure. Define p* as
88
88
  #
89
89
  # - (-infty, -infty) if Q \intersect P is empty and
90
- # - the highest (max-y) point in Q \intersect P otherwise.
90
+ # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
91
91
  #
92
92
  # This method returns p* in O(log n) time and O(1) extra space.
93
- def highest_nw(x0, y0)
94
- highest_in_quadrant(x0, y0, :nw)
93
+ def largest_y_in_nw(x0, y0)
94
+ largest_y_in_quadrant(x0, y0, :nw)
95
95
  end
96
96
 
97
- # The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both highest_ne and
98
- # highest_nw
97
+ # The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both largest_y_in_ne and
98
+ # largest_y_in_nw
99
99
  #
100
- # Note that highest_ne(x0, y0) = highest_3_sided(x0, infinty, y0) so we don't really need this. But it's a bit faster than the
100
+ # Note that largest_y_in_ne(x0, y0) = largest_y_in_3_sided(x0, infinty, y0) so we don't really need this. But it's a bit faster than the
101
101
  # general case and is a simple algorithm that introduces a typical way that an algorithm interacts with the data structure.
102
102
  #
103
103
  # From the paper:
@@ -108,7 +108,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
108
108
  # - If Q intersect P is empty then p* = best
109
109
  #
110
110
  # Here, P is the set of points in our data structure and T_p is the subtree rooted at p
111
- private def highest_in_quadrant(x0, y0, quadrant)
111
+ private def largest_y_in_quadrant(x0, y0, quadrant)
112
112
  quadrant.must_be_in [:ne, :nw]
113
113
 
114
114
  p = root
@@ -135,10 +135,10 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
135
135
  #
136
136
  # takes as input a point t and does the following: if t \in Q and y(t) > y(best) then it assignes best = t
137
137
  #
138
- # Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
138
+ # We break ties by preferring points with smaller x values
139
139
  update_highest = lambda do |node|
140
140
  t = @data[node]
141
- if in_q.call(t) && t.y > best.y
141
+ if in_q.call(t) && (t.y > best.y || (t.y == best.y && t.x < best.x))
142
142
  best = t
143
143
  end
144
144
  end
@@ -194,7 +194,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
194
194
  # - the leftmost (min-x) point in Q \intersect P otherwise.
195
195
  #
196
196
  # This method returns p* in O(log n) time and O(1) extra space.
197
- def leftmost_ne(x0, y0)
197
+ def smallest_x_in_ne(x0, y0)
198
198
  extremal_in_x_dimension(x0, y0, :ne)
199
199
  end
200
200
 
@@ -207,14 +207,14 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
207
207
  # - the leftmost (min-x) point in Q \intersect P otherwise.
208
208
  #
209
209
  # This method returns p* in O(log n) time and O(1) extra space.
210
- def rightmost_nw(x0, y0)
210
+ def largest_x_in_nw(x0, y0)
211
211
  extremal_in_x_dimension(x0, y0, :nw)
212
212
  end
213
213
 
214
- # A genericized version of the paper's leftmost_ne that can calculate either leftmost_ne or rightmost_nw as specifies via a
214
+ # A genericized version of the paper's smallest_x_in_ne that can calculate either smallest_x_in_ne or largest_x_in_nw as specifies via a
215
215
  # parameter.
216
216
  #
217
- # Quadrant is either :ne (which gives leftmost_ne) or :nw (which gives rightmost_nw).
217
+ # Quadrant is either :ne (which gives smallest_x_in_ne) or :nw (which gives largest_x_in_nw).
218
218
  #
219
219
  # From De et al:
220
220
  #
@@ -245,7 +245,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
245
245
  # takes as input a point t and does the following: if t \in Q and x(t) < x(best) then it assignes best = t
246
246
  #
247
247
  # Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
248
- update_leftmost = lambda do |node|
248
+ update_best = lambda do |node|
249
249
  t = @data[node]
250
250
  if in_q.call(t) && sign * t.x < sign * best.x
251
251
  best = t
@@ -261,13 +261,13 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
261
261
  #
262
262
  # - If x0 <= x(c1) then all subtrees have large enough x values and we look for the leftmost node in c with a large enough y
263
263
  # value. Both p and q are sent into that subtree.
264
- # - If x0 >= x(ck) the the rightmost subtree is our only hope the rightmost subtree.
264
+ # - If x0 >= x(ck) the the rightmost subtree is our only hope
265
265
  # - Otherwise, x(c1) < x0 < x(ck) and we let i be least so that x(ci) <= x0 < x(c(i+1)). Then q becomes the lefmost cj in c not
266
266
  # to the left of ci such that y(cj) >= y0, if any. p becomes ci if y(ci) >= y0 and q otherwise. If there is no such j, we put
267
267
  # q = p. This may leave both of p, q undefined which means there is no useful way forward and we return nils to signal this to
268
268
  # calling code.
269
269
  #
270
- # The same logic applies to rightmost_nw, though everything is "backwards"
270
+ # The same logic applies to largest_x_in_nw, though everything is "backwards"
271
271
  # - membership of Q depends on having a small-enough value of x, rather than a large-enough one
272
272
  # - among the ci, values towards the end of the array tend not to be in Q while values towards the start of the array tend to be
273
273
  # in Q
@@ -302,14 +302,14 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
302
302
  new_p ||= new_q # if nodes[i] is no good, send p along with q
303
303
  new_q ||= new_p # but if there is no worthwhile value for q we should send it along with p
304
304
 
305
- return [new_q, new_p] if quadrant == :nw # swap for the rightmost_nw case.
305
+ return [new_q, new_p] if quadrant == :nw # swap for the largest_x_in_nw case.
306
306
 
307
307
  [new_p, new_q]
308
308
  end
309
309
 
310
310
  until leaf?(p)
311
- update_leftmost.call(p)
312
- update_leftmost.call(q)
311
+ update_best.call(p)
312
+ update_best.call(q)
313
313
 
314
314
  if p == q
315
315
  if one_child?(p)
@@ -324,7 +324,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
324
324
  q = p # p itself is just one layer above the leaves, or is itself a leaf
325
325
  elsif one_child?(q)
326
326
  # This generic approach is not as fast as the bespoke checks described in the paper. But it is easier to maintain the code
327
- # this way and allows easy implementation of rightmost_nw
327
+ # this way and allows easy implementation of largest_x_in_nw
328
328
  p, q = determine_next_nodes.call(left(p), right(p), left(q))
329
329
  else
330
330
  p, q = determine_next_nodes.call(left(p), right(p), left(q), right(q))
@@ -332,8 +332,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
332
332
  break unless p # we've run out of useful nodes
333
333
  end
334
334
  end
335
- update_leftmost.call(p) if p
336
- update_leftmost.call(q) if q
335
+ update_best.call(p) if p
336
+ update_best.call(q) if q
337
337
  best
338
338
  end
339
339
 
@@ -346,10 +346,10 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
346
346
  # MaxPST. (Note that Q is empty if x1 < x0.) Define p* as
347
347
  #
348
348
  # - (infty, -infty) if Q \intersect P is empty and
349
- # - the highest (max-x) point in Q \intersect P otherwise.
349
+ # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller x values.
350
350
  #
351
351
  # This method returns p* in O(log n) time and O(1) extra space.
352
- def highest_3_sided(x0, x1, y0)
352
+ def largest_y_in_3_sided(x0, x1, y0)
353
353
  # From the paper:
354
354
  #
355
355
  # The three real numbers x0, x1, and y0 define the three-sided range Q = [x0,x1] X [y0,∞). If Q \intersect P̸ is not \empty,
@@ -389,7 +389,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
389
389
  # Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
390
390
  update_highest = lambda do |node|
391
391
  t = @data[node]
392
- if in_q.call(t) && t.y > best.y
392
+ if in_q.call(t) && (t.y > best.y || (t.y == best.y && t.x < best.x))
393
393
  best = t
394
394
  end
395
395
  end
@@ -570,7 +570,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
570
570
  # My high-level understanding of the algorithm
571
571
  # --------------------------------------------
572
572
  #
573
- # We need to find all elements of Q \intersect P, so it isn't enough, as it was in highest_3_sided simply to keep track of p and
573
+ # We need to find all elements of Q \intersect P, so it isn't enough, as it was in largest_y_in_3_sided simply to keep track of p and
574
574
  # q. We need to track four nodes, p, p', q', and q which are (with a little handwaving) respectively
575
575
  #
576
576
  # - the rightmost node to the left of Q' = [x0, x1] X [-infinity, infinity],
@@ -692,8 +692,6 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
692
692
  # The four key helpers described in the paper
693
693
 
694
694
  # Handle the next step of the subtree at p
695
- #
696
- # I need to go through this with paper, pencil, and some diagrams.
697
695
  enumerate_left = lambda do
698
696
  if leaf?(p)
699
697
  left = false
@@ -999,13 +997,14 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
999
997
 
1000
998
  private def construct_pst
1001
999
  raise DataError, 'Duplicate x values are not supported' if contains_duplicates?(@data, by: :x)
1002
- raise DataError, 'Duplicate y values are not supported' if contains_duplicates?(@data, by: :y)
1003
1000
 
1004
- # We follow the algorithm in the paper by De, Maheshwari et al.
1001
+ # We follow the algorithm in the paper by De, Maheshwari et al, which takes O(n log^2 n) time. Their follow-up paper that
1002
+ # defines the Min-max PST, describes how to do the construction in O(n log n) time, but it is more complex and probably not
1003
+ # worth the trouble of both a bespoke heapsort the special sorting algorithm of Katajainen and Pasanen.
1005
1004
 
1006
- # Since we are building an implicit binary tree, things are simpler if the array is 1-based. This probably requires a malloc and
1007
- # data copy, which isn't great, but it's in the C layer so cheap compared to the O(n log^2 n) work we need to do for
1008
- # construction. In fact, we are probably doing O(n^2) work because of all the calls to #index_with_largest_y_in.
1005
+ # Since we are building an implicit binary tree, things are simpler if the array is 1-based. This requires a malloc (perhaps)
1006
+ # and memcpy (for sure), which isn't great, but it's in the C layer so cheap compared to the O(n log^2 n) work we need to do for
1007
+ # construction.
1009
1008
  @data.unshift nil
1010
1009
 
1011
1010
  h = Math.log2(@size).floor
@@ -1052,63 +1051,14 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
1052
1051
  end
1053
1052
  end
1054
1053
 
1055
- ########################################
1056
- # Tree arithmetic
1057
-
1058
- # # First element and root of the tree structure
1059
- # private def root
1060
- # 1
1061
- # end
1062
-
1063
- # # Indexing is from 1
1064
- # private def parent(i)
1065
- # i >> 1
1066
- # end
1067
-
1068
- # private def left(i)
1069
- # i << 1
1070
- # end
1071
-
1072
- # private def right(i)
1073
- # 1 + (i << 1)
1074
- # end
1075
-
1076
- # private def level(i)
1077
- # l = 0
1078
- # while i > root
1079
- # i >>= 1
1080
- # l += 1
1081
- # end
1082
- # l
1083
- # end
1084
-
1085
- # # i has no children
1086
- # private def leaf?(i)
1087
- # i > @last_non_leaf
1088
- # end
1089
-
1090
- # # i has exactly one child (the left)
1091
- # private def one_child?(i)
1092
- # i == @parent_of_one_child
1093
- # end
1094
-
1095
- # # i has two children
1096
- # private def two_children?(i)
1097
- # i <= @last_parent_of_two_children
1098
- # end
1099
-
1100
- # # i is the left child of its parent.
1101
- # private def left_child?(i)
1102
- # (i & 1).zero?
1103
- # end
1104
-
1105
1054
  private def swap(index1, index2)
1106
1055
  return if index1 == index2
1107
1056
 
1108
1057
  @data[index1], @data[index2] = @data[index2], @data[index1]
1109
1058
  end
1110
1059
 
1111
- # The index in @data[l..r] having the largest value for y
1060
+ # The index in @data[l..r] having the largest value for y, breaking ties with the smaller x value. Since we are already sorted by
1061
+ # x we don't actually need to check this.
1112
1062
  private def index_with_largest_y_in(l, r)
1113
1063
  return nil if r < l
1114
1064
 
@@ -1134,7 +1084,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
1134
1084
  private def verify_properties
1135
1085
  # It's a max-heap in y
1136
1086
  (2..@size).each do |node|
1137
- raise InternalLogicError, "Heap property violated at child #{node}" unless @data[node].y < @data[parent(node)].y
1087
+ byebug unless @data[node].y <= @data[parent(node)].y
1088
+ raise InternalLogicError, "Heap property violated at child #{node}" unless @data[node].y <= @data[parent(node)].y
1138
1089
  end
1139
1090
 
1140
1091
  # Left subtree has x values less than all of the right subtree
@@ -1,3 +1,5 @@
1
+ require 'forwardable'
2
+
1
3
  require_relative 'data_structures_rmolinari/shared'
2
4
 
3
5
  module DataStructuresRMolinari
@@ -10,14 +12,13 @@ require_relative 'data_structures_rmolinari/disjoint_union'
10
12
  require_relative 'data_structures_rmolinari/generic_segment_tree'
11
13
  require_relative 'data_structures_rmolinari/heap'
12
14
  require_relative 'data_structures_rmolinari/max_priority_search_tree'
13
- require_relative 'data_structures_rmolinari/minmax_priority_search_tree'
14
15
 
15
16
  # A namespace to hold the provided classes. We want to avoid polluting the global namespace with names like "Heap"
16
17
  module DataStructuresRMolinari
17
18
  ########################################
18
19
  # Concrete instances of Segment Tree
19
20
  #
20
- # @todo consider moving these into generic_segment_tree.rb
21
+ # @todo consider moving these into generic_segment_tree.rb and renaming that file
21
22
 
22
23
  # A segment tree that for an array A(0...n) answers questions of the form "what is the maximum value in the subinterval A(i..j)?"
23
24
  # in O(log n) time.
@@ -30,7 +31,7 @@ module DataStructuresRMolinari
30
31
  # @param data an object that contains values at integer indices based at 0, via +data[i]+.
31
32
  # - This will usually be an Array, but it could also be a hash or a proc.
32
33
  def initialize(data)
33
- @structure = GenericSegmentTree.new(
34
+ @structure = SegmentTreeTemplate.new(
34
35
  combine: ->(a, b) { [a, b].max },
35
36
  single_cell_array_val: ->(i) { data[i] },
36
37
  size: data.size,
@@ -57,7 +58,7 @@ module DataStructuresRMolinari
57
58
 
58
59
  # @param (see MaxValSegmentTree#initialize)
59
60
  def initialize(data)
60
- @structure = GenericSegmentTree.new(
61
+ @structure = SegmentTreeTemplate.new(
61
62
  combine: ->(p1, p2) { p1[1] >= p2[1] ? p1 : p2 },
62
63
  single_cell_array_val: ->(i) { [i, data[i]] },
63
64
  size: data.size,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_structures_rmolinari
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rory Molinari
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-06 00:00:00.000000000 Z
11
+ date: 2023-01-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: must_be
@@ -86,7 +86,6 @@ files:
86
86
  - lib/data_structures_rmolinari/generic_segment_tree.rb
87
87
  - lib/data_structures_rmolinari/heap.rb
88
88
  - lib/data_structures_rmolinari/max_priority_search_tree.rb
89
- - lib/data_structures_rmolinari/minmax_priority_search_tree.rb
90
89
  - lib/data_structures_rmolinari/shared.rb
91
90
  homepage: https://github.com/rmolinari/data_structures
92
91
  licenses:
@@ -1,668 +0,0 @@
1
- require 'must_be'
2
-
3
- require_relative 'shared'
4
-
5
- # THIS CLASS IS INCOMPLETE AND NOT USABLE
6
- #
7
- # A priority search tree (PST) stores points in two dimensions (x,y) and can efficiently answer certain questions about the set of
8
- # point.
9
- #
10
- # The structure was introduced by McCreight [1].
11
- #
12
- # See more: https://en.wikipedia.org/wiki/Priority_search_tree
13
- #
14
- # It is possible to build such a tree in place, given an array of pairs. See [2]. In a follow-up paper, [3], the authors show how to
15
- # construct a more flexible data structure,
16
- #
17
- # "[T]he Min-Max Priority Search tree for a set P of n points in R^2. It is a binary tree T with the following properties:
18
- #
19
- # * For each internal node u, all points in the left subtree of u have an x-coordinate which is less than the x-coordinate of any
20
- # point in the right subtree of u.
21
- # * The y-coordinate values of the nodes on even (resp. odd) levels are smaller (resp. greater) than the y-coordinate values of
22
- # their descendants (if any), where the root is at level zero.
23
- #
24
- # "The first property implies that T is a binary search three on the x-coordinates of the points in P, excepts that there is no
25
- # relation between the x-coordinates of the points stored at u and any of its children. The second property implies that T is a
26
- # min-max heap on the y-coordinates of the points in P."
27
- #
28
- # I started implementing the in-place PST. Then, finding the follow-up paper [3], decided to do that one instead, as the paper says
29
- # it is more flexible. The point is to learn a new data structure and its associated algorithms.
30
- #
31
- # The algorithms are rather bewildering. Highest3SidedUp is complicated, and only two of the functions CheckLeft, CheckLeftIn,
32
- # CheckRight, CheckRightIn are given; the other two are "symmetric". But it's not really clear what the first are actually doing, so
33
- # it's hard to know what the others actually do.
34
- #
35
- # The implementation is incomplete. The pseduo-code in the paper is buggy (see the code below), which makes progress difficult.
36
- #
37
- # [1] E. McCreight, _Priority Search Trees_, SIAM J. Computing, v14, no 3, May 1985, pp 257-276.
38
- # [2] De, Maheshwari, Nandy, Smid, _An in-place priority search tree_, 23rd Annual Canadian Conference on Computational Geometry.
39
- # [3] De, Maheshwari, Nandy, Smid, _An in-place min-max priority search tree_, Computational Geometry, v46 (2013), pp 310-327.
40
- # [4] Atkinson, Sack, Santoro, Strothotte, _Min-max heaps and generalized priority queues_, Commun. ACM 29 (10) (1986), pp 996-1000.
41
- class DataStructuresRMolinari::MinmaxPrioritySearchTree
42
- include Shared
43
-
44
- # The array of pairs is turned into a minmax PST in-place without cloning. So clone before passing it in, if you care.
45
- #
46
- # Each element must respond to #x and #y. Use Point (above) if you like.
47
- def initialize(data, verify: false)
48
- @data = data
49
- @size = @data.size
50
-
51
- construct_pst
52
- return unless verify
53
-
54
- # puts "Validating tree structure..."
55
- verify_properties
56
- end
57
-
58
- # Let Q = [x0, infty) X [y0, infty) be the northeast "quadrant" defined by the point (x0, y0) and let P be the points in this data
59
- # structure. Define p* as
60
- #
61
- # - (infty, infty) if Q \intersect P is empty and
62
- # - the leftmost (i.e., min-x) point in Q \intersect P otherwise
63
- #
64
- # This method returns p*.
65
- #
66
- # From De et al:
67
- #
68
- # [t]he variables best, p, and q satisfy the folling invariant:
69
- #
70
- # - if Q \intersect P is nonempty then p* \in {best} \union T(p) \union T(q)
71
- # - if Q \intersect P is empty then p* = best
72
- # - p and q are at the same level of T and x(p) <= x(q)
73
- #
74
- # Here T(x) is the subtree rooted at x
75
- def leftmost_ne(x0, y0)
76
- best = Point.new(INFINITY, INFINITY)
77
- p = q = root
78
-
79
- in_q = ->(pair) { pair.x >= x0 && pair.y >= y0 }
80
-
81
- # From the paper:
82
- #
83
- # takes as input a point t \in P and updates best as follows: if t \in Q and x(t) < x(best) then it assignes best = t
84
- #
85
- # Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
86
- update_leftmost = lambda do |node|
87
- t = val_at(node)
88
- if in_q.call(t) && t.x < best.x
89
- best = t
90
- end
91
- end
92
-
93
- # Generalize the c1,...,c4 idea from the paper in line with the BUG 2 IN PAPER notes, below.
94
- #
95
- # Given: 0 or more nodes n1, ..., nk in the tree. All are at the same level, which is a "max level" in our MinmaxPST, such that
96
- # x(n1) <= x(n2) <= ... <= x(nk). (Note: it is expected that the nj are either children or grandchildren of p and q, though we
97
- # don't check that.)
98
- #
99
- # If k = 0 return nil. Otherwise...
100
- #
101
- # We return two values p_goal, q_goal (possibly equal) from among the nj such that
102
- #
103
- # - p_goal is not to the right of q_goal in the tree and so, in particular x(p_goal) <= x(q_goal)
104
- # - if and when the auction reaches p = p_goal and q = q_goal the algorithm invariant will be satisfied.
105
- #
106
- # As a special case, we return nil if we detect that none of the subtrees T(nj) contain any points in Q. This is a sign to
107
- # terminate the algorithm.
108
- #
109
- # See the notes at "BUG 2 IN PAPER" below for more details about what is going on.
110
- determine_goal_nodes = lambda do |nodes|
111
- node_count = nodes.size
112
- return nil if node_count.zero?
113
-
114
- if val_at(nodes.last).x <= x0
115
- # Only the rightmost subtree can possibly have anything Q, assuming that all the x-values are distinct.
116
- return [nodes.last, nodes.last]
117
- end
118
-
119
- if val_at(nodes.first).x > x0
120
- # All subtrees have x-values large enough to provide elements of Q. Since we are at a max-level the y-values help us work
121
- # out which subtree to focus on.
122
- leftmost = nodes.find { |node| val_at(node).y >= y0 }
123
-
124
- return nil unless leftmost # nothing left to find
125
-
126
- # Otherwise we explore the leftmost subtree. Its root is in Q and can't be beaten by anything to its right.
127
- return [leftmost, leftmost]
128
- end
129
-
130
- values = nodes.map { |n| val_at(n) }
131
-
132
- # Otherwise x(n1) <= x0 < x(nk). Thus i is well-defined.
133
- i = (0...node_count).select { |j| values[j].x <= x0 && x0 < values[j + 1].x }.min
134
-
135
- # these nodes all have large-enough x-values and so this finds the ones in the set Q.
136
- new_q = nodes[(i + 1)..].select { |node| val_at(node).y >= y0 }.min # could be nil
137
- new_p = nodes[i] if values[i].y >= y0 # The leftmost subtree is worth exploring if the y-value is big enough. Otherwise not
138
- new_p ||= new_q # if nodes[i] is no good we send p along with q
139
- new_q ||= new_p # but if there was no worthwhile value for q we should send it along with p
140
-
141
- return nil unless new_p
142
-
143
- [new_p, new_q]
144
- end
145
-
146
- until leaf?(p)
147
- level = Math.log2(p).floor # TODO: don't calculate log every time!
148
-
149
- update_leftmost.call(p)
150
- update_leftmost.call(q)
151
-
152
- if p == q
153
- if one_child?(p)
154
- p = q = left(p)
155
- else
156
- q = right(p)
157
- p = left(p)
158
- end
159
- else
160
- # p != q
161
- if leaf?(q)
162
- q = p # p itself is just one layer above the leaves, or is itself a leaf
163
- elsif one_child?(q)
164
- # Note that p has two children
165
- if val_at(left(q)).x < x0
166
- # x-values below p are too small
167
- p = q = left(q)
168
- elsif val_at(right(p)).x <= x0
169
- # x-values in T(right(p)) are too small. DISTINCT-X
170
- p = right(p)
171
- q = left(q)
172
- else
173
- # BUG 1 IN PAPER.
174
- #
175
- # So, x(q_l) >= x0 and x(p_r) > x0. But how can we be sure that the child of q isn't the winner?. Should we be trying
176
- # it in this case?
177
- #
178
- # Yes: otherwise it never gets checked.
179
-
180
- update_leftmost.call(left(q))
181
- q = right(p)
182
- p = left(p)
183
- end
184
- else
185
- # p and q both have two children
186
-
187
- # BUG 2 IN PAPER.
188
- #
189
- # Define c as the paper does:
190
- #
191
- # (c1, c2, c3, c4) = (left(p), right(p), left(q), right(q))
192
- #
193
- # Because of the PST property on x and the invariant x(p) <= x(q) we know that
194
- #
195
- # x(c1) <= x(c2) <= x(c3) <= x(c4)
196
- #
197
- # Similarly, the sets of values x(T(ci)) are pairwise ordered in the same sense.
198
- #
199
- # Suppose further that x(ci) <= x0 <= x(c(i+i)). Then we know several things
200
- #
201
- # - there might be a "winner" (point in Q) in T(ci), perhaps ci itself.
202
- # - there are not any winners in T(cj) for j < i, becasue the x-values there aren't big enough
203
- # - any winner in ck, for k >= i, will be the left of and thus beat any winner in c(k+1), because of the ordering of
204
- # x-values
205
- #
206
- # If x(c4) <= x0 then the rightmost subtree T(c4) is the only one worth checking and we set p = q = c4.
207
- # If x(c1) > x0 then we take i = 0 and ignore the logic on ci in what follows and setting p = q.
208
- #
209
- # Pretend for the moment that we are using a MaxPST instead of a MinmaxPST. Then we can look at y values to learn more.
210
- #
211
- # - if y(ci) >= y0 then we need to search T(ci), so we will update p = ci
212
- # - but if y(ci) < y0 then there are no winners in T(ci) because the y-values are too small.
213
- # - similarly, if y(c(i+i)) >= y0 then we need to search T(c(i+1)). Indeed c(i+1) itself is in Q and beats any winner in
214
- # subtrees further to the right
215
- # - so, let k > i be minimal such that y(ck) >= y0, if there is any. Note that ck is itself a winner. Then
216
- # - if y(ci) >= y0,
217
- # - set p = ci, and q = ck (or q = ci if there is no such k)
218
- # - otherwise (T(ci) has no winners because its y-values are too small)
219
- # - if k is defined set p = q = ck. Otherwise HALT (there are no more winners)
220
- #
221
- # But we are working with a MinmaxPST rather than a MaxPST, so we have to work harder. If c1, ..., c4 (the children of p
222
- # and q) are in a "max-level" of the tree - that is, an even level - then the logic above still applies. But if they are
223
- # at a min level things are trickier and we need to go another layer down.
224
- #
225
- # The paper knows that we need to look a further layer down, but the logic is too simplistic. It looks at cj for j > i and
226
- # checks if cj or either of its children are in Q. But that's not good enough. For the same reason that in a MaxPST we may
227
- # need to explore below T(ci) even if ci isn't in Q, we may need to decend through one of the grandchilden of p or q even
228
- # if that grandchild isn't in Q.
229
- #
230
- # Getting a bit handwavey especially over what happens near the leaves...
231
- #
232
- # Consider the children d1, d2, ..., dm, of ci, ..., c4 (and so grandchildren of p and q). They are at a max-level and so
233
- # the logic described applies to the dk. If ci happens to be a winner we can set p = ci and work out what to do with q by
234
- # looking at the children of c(i+1), ..., c4. Otherwise we look at all the dj values (up to 8 of them), apply the logic
235
- # above to work out that we want to head for, say, p = ds and q = dt, and in this cycle update p = parent(ds), q =
236
- # parent(dt). (We also need to submit the values c(i+1)..c4 to UpdateLeftmost.)
237
- #
238
- # In other words, we can use the MaxPST logic on d1,...,dm to decide where we need to go, and then step to the relevant
239
- # parents among the cj.
240
-
241
- c = [left(p), right(p), left(q), right(q)]
242
- if level.odd?
243
- # the elements of c are at an even level, and hence their y values are maxima for the subtrees. We can learn what we
244
- # need to know from them
245
- p, q = determine_goal_nodes.call(c)
246
- if p && !q
247
- # byebug
248
- # determine_goal_nodes.call(c)
249
- raise 'bad logic'
250
- end
251
- else
252
- # They are at an odd level and so aren't helpful in working out what to do next: we look at their children, which are in
253
- # a max-level. We need to check the elements of c against best since we are otherwise ignoring them.
254
- c.each { |n| update_leftmost.call(n) }
255
-
256
- d = c.map { [left(_1), right(_1)]}.flatten.select { |n| n <= @size }
257
-
258
- # Note that we are jumping down two levels here!
259
- p, q = determine_goal_nodes.call(d)
260
- if p && !q
261
- # byebug
262
- # determine_goal_nodes.call(c)
263
- raise 'bad logic'
264
- end
265
-
266
- p
267
- end
268
-
269
- return best unless p # nothing more to do
270
- end
271
- end
272
- end
273
- update_leftmost.call(p)
274
- update_leftmost.call(q)
275
- best
276
- end
277
-
278
- # Let Q be the "three-sided query range" [x0, x1] X [y0, infty) and let P_Q be P \intersect Q.
279
- #
280
- # If P_Q is empty then p* = (infty, -infty).
281
- # Otherwise, p* is the point in P_Q with maximal y value.
282
- #
283
- # This method returns p*
284
- # def highest_3_sided_up(x0, x1, y0)
285
- # best = Point.new(INFINITY, -INFINITY)
286
-
287
- # in_q = lambda do |pair|
288
- # pair.x >= x0 && pair.x <= x1 && pair.y >= y0
289
- # end
290
-
291
- # # From the paper:
292
- # #
293
- # # takes as input a point t and does the following: if t \in Q and y(t) > y(best) then it assignes best = t
294
- # #
295
- # # Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
296
- # #
297
- # # The algorithm is complicated. From the paper:
298
- # #
299
- # # Since Q is bounded by two vertical sides, we use four index variables p, p', q and q' to guide the search path. In addition,
300
- # # we use four bits L, L', R and R'; these correspond to the subtrees of T rooted at the nodes p, p', q, and q', respectively;
301
- # # if a bit is equal to one, then the corresonding node is referred to as an _active node_ (for example, if L = 1 then p is an
302
- # # active node), and the subtree rooted at that node may contain a candidate point for p*. So the search is required to be
303
- # # performed in the subtree rooted at all active nodes. More formally, at any instant of time the variables satisfy the folling
304
- # # invariants:
305
- # #
306
- # # - If L = 1 the x(p) < x0.
307
- # # - If L' = 1 then x0 <= x(p') <= x1.
308
- # # - If R = 1 then x(q) > x1.
309
- # # - If R' = 1 then x0 <= x(q') <= x1.
310
- # # - If L' = 1 and R' = 1 then x(p') <= x(q').
311
- # # - If P_Q is non-empty then p* = best or p* is in the subtree rooted at any one of the active nodes.
312
- # #
313
- # # There are more details in the paper
314
- # update_highest = lambda do |node|
315
- # t = val_at(node)
316
- # if in_q.call(t) && t.y > best.y
317
- # best = t
318
- # end
319
- # end
320
-
321
- # ex_update_highest = lambda do |node|
322
- # update_highest.call(node)
323
- # update_highest.call(left(node)) unless leaf?(node)
324
- # update_highest.call(right(node)) unless one_child?(node)
325
- # end
326
-
327
- # if val_at(root).x < x0
328
- # p = root
329
- # l = true
330
- # l_prime = r = r_prime = false
331
- # elsif val_at(root).x < x1
332
- # p_prime = root
333
- # l_prime = true
334
- # l = r = r_prime = false
335
- # else
336
- # q = root
337
- # r = true
338
- # l = l_prime = r_prime = false
339
- # end
340
-
341
- # set_z = lambda do
342
- # r = []
343
- # r << p if l
344
- # r << p_prime if l_prime
345
- # r << q if r
346
- # r << q_prime if r_primg
347
- # r
348
- # end
349
-
350
- # check_left = lambda do
351
- # if leaf?(p)
352
- # l = false
353
- # elsif one_child?(p)
354
- # p_l_x = val_at(left(p))
355
- # if x0 <= p_l_x && p_l_x <= x1
356
- # update_highest.call(left(p))
357
- # if l_prime && r_prime
358
- # ex_update_highest.call(p_prime)
359
- # elsif l_prime
360
- # q_prime = p_prime
361
- # r_prime = true
362
- # end
363
- # p_prime = left(p)
364
- # l_prime = true
365
- # l = false
366
- # elsif p_l_x < x0
367
- # p = left(p)
368
- # else
369
- # q = left(p)
370
- # r = true
371
- # l = false
372
- # end
373
- # else
374
- # # p has two children
375
-
376
- # end
377
-
378
- # while l || l_prime || r || r_prime
379
- # z_star = set_z.call.min_by(4) { level(_1) }
380
- # if z_star.include? p_prime
381
- # check_left_in(p_prime)
382
- # elsif z_star.include? q_prime
383
- # check_right_in(q_prime)
384
- # elsif z_star.include? p
385
- # check_left(p)
386
- # else
387
- # check_right(q)
388
- # end
389
- # end
390
- # end
391
-
392
- # Find the "highest" (max-y) point that is "northeast" of (x, y).
393
- #
394
- # That is, the point p* in Q = [x, infty) X [y, infty) with the largest y value, or (infty, -infty) if there is no point in that
395
- # quadrant.
396
- #
397
- # Algorithm is from De et al. section 3.1
398
- def highest_ne(x0, y0)
399
- raise "Write me"
400
- # From the paper:
401
- #
402
- # The algorithm uses two variables best and p, which satisfy the following invariant
403
- #
404
- # - If Q intersect P is nonempty then p* in {best} union T_p
405
- # - If Q intersect P is empty then p* = best
406
- #
407
- # Here, P is the set of points in our data structure and T_p is the subtree rooted at p
408
- best = Point.new(INFINITY, -INFINITY)
409
- p = root # root of the whole tree AND the pair stored there
410
-
411
- in_q = lambda do |pair|
412
- pair.x >= x0 && pair.y >= y0
413
- end
414
-
415
- # From the paper:
416
- #
417
- # takes as input a point t and does the following: if t \in Q and y(t) > y(best) then it assignes best = t
418
- #
419
- # Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
420
- update_highest = lambda do |node|
421
- t = val_at(node)
422
- if in_q.call(t) && t.y > best.y
423
- best = t
424
- end
425
- end
426
-
427
- # We could make this code more efficient. But since we only have O(log n) steps we won't actually gain much so let's keep it
428
- # readable and close to the paper's pseudocode for now.
429
- until leaf?(p)
430
- p_val = val_at(p)
431
- if in_q.call(p_val)
432
- # p \in Q and nothing in its subtree can beat it because of the max-heap
433
- update_highest.call(p)
434
- return best
435
-
436
- # p = left(p) <- from paper
437
- elsif p_val.y < y0
438
- # p is too low for Q, so the entire subtree is too low as well
439
- return best
440
-
441
- # p = left(p)
442
- elsif one_child?(p)
443
- # With just one child we need to check it
444
- p = left(p)
445
- elsif val_at(right(p)).x <= x0
446
- # right(p) might be in Q, but nothing in the left subtree can be, by the PST property on x.
447
- p = right(p)
448
- elsif val_at(left(p)).x >= x0
449
- # Both children are in Q, so try the higher of them. Note that nothing in either subtree will beat this one.
450
- higher = left(p)
451
- if val_at(right(p)).y > val_at(left(p)).y
452
- higher = right(p)
453
- end
454
- p = higher
455
- elsif val_at(right(p)).y < y0
456
- # Nothing in the right subtree is in Q, but maybe we'll find something in the left
457
- p = left(p)
458
- else
459
- # At this point we know that right(p) \in Q so we need to check it. Nothing in its subtree can beat it so we don't need to
460
- # look there. But there might be something better in the left subtree.
461
- update_highest.call(right(p))
462
- p = left(p)
463
- end
464
- end
465
- update_highest.call(p) # try the leaf
466
- best
467
- end
468
-
469
- # O(n log^2 n)
470
- private def construct_pst
471
- # We follow the algorithm in [3]. Indexing is from 1 there and we follow that here. The algorithm is almost exactly the same as
472
- # for the (max) PST.
473
- h = Math.log2(@size).floor
474
- a = @size - (2**h - 1) # the paper calls it A
475
- sort_subarray(1, @size)
476
- level = 0 # TODO: isn't level always equal to i in the loop?
477
-
478
- (0...h).each do |i|
479
- sense = level.even? ? :max : :min
480
- pow_of_2 = 2**i
481
-
482
- k = a / (2**(h - i))
483
- k1 = 2**(h + 1 - i) - 1
484
- k2 = (1 - k) * 2**(h - i) - 1 + a
485
- k3 = 2**(h - i) - 1
486
- (1..k).each do |j|
487
- l = index_with_extremal_y_in(pow_of_2 + (j - 1) * k1, pow_of_2 + j * k1 - 1, sense:)
488
- swap(l, pow_of_2 + j - 1)
489
- end
490
-
491
- if k < pow_of_2
492
- l = index_with_extremal_y_in(pow_of_2 + k * k1, pow_of_2 + k * k1 + k2 - 1, sense:)
493
- swap(l, pow_of_2 + k)
494
-
495
- m = pow_of_2 + k * k1 + k2
496
- (1..(pow_of_2 - k - 1)).each do |j|
497
- l = index_with_extremal_y_in(m + (j - 1) * k3, m + j * k3 - 1, sense:)
498
- swap(l, pow_of_2 + k + j)
499
- end
500
- end
501
- sort_subarray(2 * pow_of_2, @size)
502
- level += 1
503
- end
504
- end
505
-
506
- ########################################
507
- # Indexing the data structure as though it were from 1, even though the underlying @data is indexed from zero.
508
-
509
- # First element and root of the tree structure
510
- private def root
511
- 1
512
- end
513
-
514
- private def val_at(idx)
515
- @data[idx - 1]
516
- end
517
-
518
- # Indexing is from 1
519
- private def parent(i)
520
- i >> 1
521
- end
522
-
523
- private def left(i)
524
- i << 1
525
- end
526
-
527
- private def right(i)
528
- 1 + (i << 1)
529
- end
530
-
531
- private def leaf?(i)
532
- left(i) > @size
533
- end
534
-
535
- private def one_child?(i)
536
- left(i) <= @size && right(i) > @size
537
- end
538
-
539
- private def swap(index1, index2)
540
- return if index1 == index2
541
-
542
- @data[index1 - 1], @data[index2 - 1] = @data[index2 - 1], @data[index1 - 1]
543
- end
544
-
545
- private def level(i)
546
- count = 0
547
- while i > root
548
- i >>= 1
549
- count += 1
550
- end
551
- count
552
- end
553
-
554
- # The index in @data[l..r] having the largest/smallest value for y
555
- # The sense argument should be :min or :max
556
- private def index_with_extremal_y_in(l, r, sense:)
557
- return nil if r < l
558
-
559
- case sense
560
- when :min
561
- (l..r).min_by { |idx| val_at(idx).y }
562
- when :max
563
- (l..r).max_by { |idx| val_at(idx).y }
564
- else
565
- raise "Bad comparison sense #{sense}"
566
- end
567
- end
568
-
569
- # Sort the subarray @data[l..r]. This is much faster than a Ruby-layer heapsort because it is mostly happening in C.
570
- private def sort_subarray(l, r)
571
- # heapsort_subarray(l, r)
572
- return if l == r # 1-array already sorted!
573
-
574
- l -= 1
575
- r -= 1
576
- @data[l..r] = @data[l..r].sort_by(&:x)
577
- end
578
-
579
- ########################################
580
- # Debugging support
581
- #
582
- # These methods are not written for speed
583
-
584
- # Check that our data satisfies the requirements of a Priority Search Tree:
585
- # - max-heap in y
586
- # - all the x values in the left subtree are less than all the x values in the right subtree
587
- def verify_properties
588
- # It's a min-max heap in y
589
- (2..@size).each do |node|
590
- level = Math.log2(node).floor
591
- parent_level = level - 1
592
-
593
- _, _, min_y, max_y = minmax_in_subtree(node)
594
- parent_y = val_at(parent(node)).y
595
-
596
- it_is_fine = if parent_level.even?
597
- # max!
598
- parent_y > max_y
599
- else
600
- parent_y < min_y
601
- end
602
-
603
- raise "Heap property violated at child #{node}" unless it_is_fine
604
- end
605
-
606
- # Left subtree has x values less than all of the right subtree
607
- (1..@size).each do |node|
608
- next if right(node) >= @size
609
-
610
- left_max = max_x_in_subtree(left(node))
611
- right_min = min_x_in_subtree(right(node))
612
-
613
- raise "Left-right property of x-values violated at #{node}" unless left_max < right_min
614
- end
615
-
616
- nil
617
- end
618
-
619
- private def max_x_in_subtree(root)
620
- minmax_in_subtree(root)[1]
621
- end
622
-
623
- private def min_x_in_subtree(root)
624
- minmax_in_subtree(root)[0]
625
- end
626
-
627
- # Return min_x, max_x, min_y, max_y in subtree rooted at and including root
628
- private def minmax_in_subtree(root)
629
- @minmax_vals ||= []
630
- @minmax_vals[root] ||= calc_minmax_at(root).freeze
631
- end
632
-
633
- # No memoization
634
- private def calc_minmax_at(root)
635
- return [INFINITY, -INFINITY, INFINITY, -INFINITY] if root > @size
636
-
637
- pair = val_at(root)
638
-
639
- return [pair.x, pair.x, pair.y, pair.y] if leaf?(root)
640
-
641
- left = left(root)
642
- left_min_max = minmax_in_subtree(left)
643
- return left_min_max if one_child?(root)
644
-
645
- right = right(root)
646
- right_min_max = minmax_in_subtree(right)
647
-
648
- [
649
- [pair.x, left_min_max[0], right_min_max[0]].min,
650
- [pair.x, left_min_max[1], right_min_max[1]].max,
651
- [pair.y, left_min_max[2], right_min_max[2]].min,
652
- [pair.y, left_min_max[3], right_min_max[3]].max
653
- ]
654
- end
655
-
656
- private def output_quasi_dot
657
- (2..@size).to_a.reverse.map do |node|
658
- "#{val_at(parent(node)).fmt} -- #{val_at(node).fmt}"
659
- end.join("\n")
660
- end
661
-
662
- private def pair_to_s
663
- end
664
-
665
- ########################################
666
- # Dead code
667
-
668
- end