data_structures_rmolinari 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9f006234ee3b216d5607e9b10bb1958a6107ccfa0cc8c359f98383dc7fde14ee
4
- data.tar.gz: f281ab0768e24e7c983cd046ba7b185dab8fd972fb3065fd73ff575782bf5486
3
+ metadata.gz: 87a44faaaa62f555546867230df704981671491f040f6be29eeed7db7eb22a0a
4
+ data.tar.gz: 0a0f1f6cf22bdde5d0510a818af9d8a6dbdbf11a6e69ce2e178bf6f336bb3d92
5
5
  SHA512:
6
- metadata.gz: e274a97f177fad44bad20ecf24ecca1385fee3c217e7e42aac076c24377970c6444dfdbadc6fd3e1e201555177429c9f8eddaee211e463dd60f6b36e74004eec
7
- data.tar.gz: 293fc0b2973a8d851c27f4e64177dbf7b9a25b2bb7eb9efb4b33abdb07c4e006f80f4450996ef99da7e8bb1516ca8aa89ab893258960d9127d101995906254ed
6
+ metadata.gz: 990fc38cbc64c20290317bf2858ff6f2813f832d0046f249faea32c7f88f389e8c8c2db892f8288a0747aa9446181864a3e62435e4846a230411b6afa4b75faf
7
+ data.tar.gz: f1e641b03d30c4726268c1c8da6d6364f635251152230f89aba2b551f0355d37ce843dba8e631c2fbd4a20e87ae94c78cf30b46dc3d472f1a1b55add258de32a
data/CHANGELOG.md CHANGED
@@ -2,6 +2,21 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ### Changed
6
+
7
+ - MaxPrioritySearchTree
8
+ - Duplicate y values are now allowed. Ties are broken with a preference for smaller values of x.
9
+ - Method names have changed
10
+ - Instead of "highest", "leftmost", "rightmost" we use "largest_y", "smallest_x", "largest_x"
11
+ - For example, +highest_ne+ is now +largest_y_in_nw+
12
+ - DisjointUnion
13
+ - the size argument to initializer is optional. The default value is 0.
14
+ - elements can be added to the "universe" of known values with +make_set+
15
+
16
+ ### Removed
17
+ - MinmaxPrioritySearchTree is no longer available
18
+ - it was only a partial implementation anyway
19
+
5
20
  ## [0.3.0] 2023-01-06
6
21
 
7
22
  ### Added
@@ -10,34 +10,48 @@
10
10
  # See https://en.wikipedia.org/wiki/Disjoint-set_data_structure for a good introduction.
11
11
  #
12
12
  # The code uses several ideas from Tarjan and van Leeuwen for efficiency. We use "union by rank" in +unite+ and path-halving in
13
- # +find+. Together, these make the amortized cost for each of n such operations effectively constant.
13
+ # +find+. Together, these make the amortized cost of each opperation effectively constant.
14
14
  #
15
- # - Tarjan, Robert E., van Leeuwen, Jan (1984). "Worst-case analysis of set union algorithms". Journal of the ACM. 31 (2): 245–281.
15
+ # - Tarjan, Robert E., van Leeuwen, Jan (1984). _Worst-case analysis of set union algorithms_. Journal of the ACM. 31 (2): 245–281.
16
16
  #
17
17
  # @todo
18
18
  # - allow caller to expand the size of the universe. This operation is called "make set".
19
19
  # - All we need to do is increase the size of @d, set the parent pointers, define the new ranks (zero), and update @size.
20
20
  class DataStructuresRMolinari::DisjointUnion
21
+ include Shared
22
+
21
23
  # The number of subsets in the partition.
22
24
  attr_reader :subset_count
23
25
 
24
- # @param size the size of the universe, which must be known at the time of construction. The elements 0, 1, ..., size - 1 start
25
- # out in disjoint singleton subsets.
26
- def initialize(size)
27
- @size = size
26
+ # @param initial_size the initial size of the universe. The elements 0, 1, ..., initial_size - 1 start out in disjoint singleton
27
+ # subsets.
28
+ def initialize(initial_size = 0)
28
29
  # Initialize to
29
- @d = (0...size).to_a
30
- @rank = [0] * size
30
+ @d = (0...initial_size).to_a
31
+ @rank = [0] * initial_size
32
+
33
+ @subset_count = initial_size
34
+ end
35
+
36
+ # Add a new subset to the universe containing the element +new_v+
37
+ # @param new_v the new element, starting in its own singleton subset
38
+ # - it must be a non-negative integer, not already part of the universe of elements.
39
+ def make_set(new_v)
40
+ raise DataError, "Element #{new_v} must be a non-negative integer" unless new_v.is_a?(Integer) && !new_v.negative?
41
+ raise DataError, "Element #{new_v} is already present" if @d[new_v]
31
42
 
32
- @subset_count = size
43
+ @d[new_v] = new_v
44
+ @rank[new_v] = 0
45
+ @subset_count += 1
33
46
  end
34
47
 
35
48
  # Declare that e and f are equivalent, i.e., in the same subset. If they are already in the same subset this is a no-op.
36
49
  #
37
- # Each argument must be one of 0, 1, ..., size-1.
50
+ # Each argument must be in the universe of elements
38
51
  def unite(e, f)
39
52
  check_value(e)
40
53
  check_value(f)
54
+
41
55
  raise 'Uniting an element with itself is meaningless' if e == f
42
56
 
43
57
  e_root = find(e)
@@ -50,9 +64,11 @@ class DataStructuresRMolinari::DisjointUnion
50
64
 
51
65
  # The canonical representative of the subset containing e. Two elements d and e are in the same subset exactly when find(d) ==
52
66
  # find(e).
53
- # @param e must be one of 0, 1, ..., size-1.
54
- # @return (Integer) one of 0, 1, ..., size-1.
67
+ # @param e must be in the universe of elements
68
+ # @return (Integer) one of the universe of elements
55
69
  def find(e)
70
+ check_value(e)
71
+
56
72
  # We implement find with "halving" to shrink the length of paths to the root. See Tarjan and van Leeuwin p 252.
57
73
  x = e
58
74
  x = @d[x] = @d[@d[x]] while @d[@d[x]] != @d[x]
@@ -60,7 +76,7 @@ class DataStructuresRMolinari::DisjointUnion
60
76
  end
61
77
 
62
78
  private def check_value(v)
63
- raise DataError, "Value must be given and be in (0..#{@size - 1})" unless v && v.between?(0, @size - 1)
79
+ raise Shared::DataError, "Value #{v} is not part of the univserse." unless @d[v]
64
80
  end
65
81
 
66
82
  private def link(e, f)
@@ -1,7 +1,7 @@
1
1
  require_relative 'shared'
2
2
 
3
- # A Segment Tree, which can be used for various interval-related purposes, like efficiently finding the sum (or min or max) on a
4
- # arbitrary subarray of a given array.
3
+ # The template of Segment Tree, which can be used for various interval-related purposes, like efficiently finding the sum (or min or
4
+ # max) on a arbitrary subarray of a given array.
5
5
  #
6
6
  # There is an excellent description of the data structure at https://cp-algorithms.com/data_structures/segment_tree.html. The
7
7
  # Wikipedia article (https://en.wikipedia.org/wiki/Segment_tree) appears to describe a different data structure which is sometimes
@@ -16,7 +16,7 @@ require_relative 'shared'
16
16
  # initializer and the definitions of concrete realisations like MaxValSegmentTree.
17
17
  #
18
18
  # We do O(n) work to build the internal data structure at initialization. Then we answer queries in O(log n) time.
19
- class DataStructuresRMolinari::GenericSegmentTree
19
+ class DataStructuresRMolinari::SegmentTreeTemplate
20
20
  include Shared::BinaryTreeArithmetic
21
21
 
22
22
  # Construct a concrete instance of a Segment Tree. See details at the links above for the underlying concepts here.
@@ -1,3 +1,4 @@
1
+ require 'must_be'
1
2
  require 'set'
2
3
  require_relative 'shared'
3
4
 
@@ -9,18 +10,18 @@ require_relative 'shared'
9
10
  # operations. It is their approach that we have implemented.
10
11
  #
11
12
  # The PST structure is an implicit, balanced binary tree with the following properties:
12
- # * The tree is a _max-heap_ in the y coordinate. That is, the point at each node has a y-value less than its parent.
13
+ # * The tree is a _max-heap_ in the y coordinate. That is, the point at each node has a y-value no greater than its parent.
13
14
  # * For each node p, the x-values of all the nodes in the left subtree of p are less than the x-values of all the nodes in the right
14
15
  # subtree of p. Note that this says nothing about the x-value at the node p itself. The tree is thus _almost_ a binary search tree
15
16
  # in the x coordinate.
16
17
  #
17
18
  # Given a set of n points, we can answer the following questions quickly:
18
19
  #
19
- # - +leftmost_ne+: for x0 and y0, what is the leftmost point (x, y) in P satisfying x >= x0 and y >= y0?
20
- # - +rightmost_nw+: for x0 and y0, what is the rightmost point (x, y) in P satisfying x <= x0 and y >= y0?
21
- # - +highest_ne+: for x0 and y0, what is the highest point (x, y) in P satisfying x >= x0 and y >= y0?
22
- # - +highest_nw+: for x0 and y0, what is the highest point (x, y) in P satisfying x <= x0 and y >= y0?
23
- # - +highest_3_sided+: for x0, x1, and y0, what is the highest point (x, y) in P satisfying x >= x0, x <= x1 and y >= y0?
20
+ # - +smallest_x_in_ne+: for x0 and y0, what is the leftmost point (x, y) in P satisfying x >= x0 and y >= y0?
21
+ # - +largest_x_in_nw+: for x0 and y0, what is the rightmost point (x, y) in P satisfying x <= x0 and y >= y0?
22
+ # - +largest_y_in_ne+: for x0 and y0, what is the highest point (x, y) in P satisfying x >= x0 and y >= y0?
23
+ # - +largest_y_in_nw+: for x0 and y0, what is the highest point (x, y) in P satisfying x <= x0 and y >= y0?
24
+ # - +largest_y_in_3_sided+: for x0, x1, and y0, what is the highest point (x, y) in P satisfying x >= x0, x <= x1 and y >= y0?
24
25
  # - +enumerate_3_sided+: for x0, x1, and y0, enumerate all points in P satisfying x >= x0, x <= x1 and y >= y0.
25
26
  #
26
27
  # (Here, "leftmost/rightmost" means "minimal/maximal x", and "highest" means "maximal y".)
@@ -29,8 +30,8 @@ require_relative 'shared'
29
30
  #
30
31
  # The final operation (enumerate) takes O(m + log n) time, where m is the number of points that are enumerated.
31
32
  #
32
- # In the current implementation no two points can share an x-value and no two points can share a y-value. This (rather severe)
33
- # restriction can be relaxed with some more complicated code.
33
+ # In the current implementation no two points can share an x-value. This (rather severe) restriction can be relaxed with some more
34
+ # complicated code, but it hasn't been written yet. See issue #9.
34
35
  #
35
36
  #
36
37
  # There is a related data structure called the Min-max priority search tree so we have called this a "Max priority search tree", or
@@ -49,7 +50,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
49
50
  # @param data [Array] the set P of points presented as an array. The tree is built in the array in-place without cloning.
50
51
  # - Each element of the array must respond to +#x+ and +#y+.
51
52
  # - This is not checked explicitly but a missing method exception will be thrown when we try to call one of them.
52
- # - The +x+ values must be distinct, as must the +y+ values. We raise a +Shared::DataError+ if this isn't the case.
53
+ # - The +x+ values must be distinct. We raise a +Shared::DataError+ if this isn't the case.
53
54
  # - This is a restriction that simplifies some of the algorithm code. It can be removed as the cost of some extra work. Issue
54
55
  # #9.
55
56
  #
@@ -60,9 +61,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
60
61
  @size = @data.size
61
62
 
62
63
  construct_pst
63
- return unless verify
64
64
 
65
- verify_properties
65
+ verify_properties if verify
66
66
  end
67
67
 
68
68
  ########################################
@@ -74,11 +74,11 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
74
74
  # structure. Define p* as
75
75
  #
76
76
  # - (infty, -infty) if Q \intersect P is empty and
77
- # - the highest (max-x) point in Q \intersect P otherwise.
77
+ # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
78
78
  #
79
79
  # This method returns p* in O(log n) time and O(1) extra space.
80
- def highest_ne(x0, y0)
81
- highest_in_quadrant(x0, y0, :ne)
80
+ def largest_y_in_ne(x0, y0)
81
+ largest_y_in_quadrant(x0, y0, :ne)
82
82
  end
83
83
 
84
84
  # Return the highest point in P to the "northwest" of (x0, y0).
@@ -87,17 +87,17 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
87
87
  # structure. Define p* as
88
88
  #
89
89
  # - (-infty, -infty) if Q \intersect P is empty and
90
- # - the highest (max-y) point in Q \intersect P otherwise.
90
+ # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
91
91
  #
92
92
  # This method returns p* in O(log n) time and O(1) extra space.
93
- def highest_nw(x0, y0)
94
- highest_in_quadrant(x0, y0, :nw)
93
+ def largest_y_in_nw(x0, y0)
94
+ largest_y_in_quadrant(x0, y0, :nw)
95
95
  end
96
96
 
97
- # The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both highest_ne and
98
- # highest_nw
97
+ # The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both largest_y_in_ne and
98
+ # largest_y_in_nw
99
99
  #
100
- # Note that highest_ne(x0, y0) = highest_3_sided(x0, infinty, y0) so we don't really need this. But it's a bit faster than the
100
+ # Note that largest_y_in_ne(x0, y0) = largest_y_in_3_sided(x0, infinty, y0) so we don't really need this. But it's a bit faster than the
101
101
  # general case and is a simple algorithm that introduces a typical way that an algorithm interacts with the data structure.
102
102
  #
103
103
  # From the paper:
@@ -108,7 +108,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
108
108
  # - If Q intersect P is empty then p* = best
109
109
  #
110
110
  # Here, P is the set of points in our data structure and T_p is the subtree rooted at p
111
- private def highest_in_quadrant(x0, y0, quadrant)
111
+ private def largest_y_in_quadrant(x0, y0, quadrant)
112
112
  quadrant.must_be_in [:ne, :nw]
113
113
 
114
114
  p = root
@@ -135,10 +135,10 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
135
135
  #
136
136
  # takes as input a point t and does the following: if t \in Q and y(t) > y(best) then it assignes best = t
137
137
  #
138
- # Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
138
+ # We break ties by preferring points with smaller x values
139
139
  update_highest = lambda do |node|
140
140
  t = @data[node]
141
- if in_q.call(t) && t.y > best.y
141
+ if in_q.call(t) && (t.y > best.y || (t.y == best.y && t.x < best.x))
142
142
  best = t
143
143
  end
144
144
  end
@@ -194,7 +194,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
194
194
  # - the leftmost (min-x) point in Q \intersect P otherwise.
195
195
  #
196
196
  # This method returns p* in O(log n) time and O(1) extra space.
197
- def leftmost_ne(x0, y0)
197
+ def smallest_x_in_ne(x0, y0)
198
198
  extremal_in_x_dimension(x0, y0, :ne)
199
199
  end
200
200
 
@@ -207,14 +207,14 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
207
207
  # - the leftmost (min-x) point in Q \intersect P otherwise.
208
208
  #
209
209
  # This method returns p* in O(log n) time and O(1) extra space.
210
- def rightmost_nw(x0, y0)
210
+ def largest_x_in_nw(x0, y0)
211
211
  extremal_in_x_dimension(x0, y0, :nw)
212
212
  end
213
213
 
214
- # A genericized version of the paper's leftmost_ne that can calculate either leftmost_ne or rightmost_nw as specifies via a
214
+ # A genericized version of the paper's smallest_x_in_ne that can calculate either smallest_x_in_ne or largest_x_in_nw as specifies via a
215
215
  # parameter.
216
216
  #
217
- # Quadrant is either :ne (which gives leftmost_ne) or :nw (which gives rightmost_nw).
217
+ # Quadrant is either :ne (which gives smallest_x_in_ne) or :nw (which gives largest_x_in_nw).
218
218
  #
219
219
  # From De et al:
220
220
  #
@@ -245,7 +245,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
245
245
  # takes as input a point t and does the following: if t \in Q and x(t) < x(best) then it assignes best = t
246
246
  #
247
247
  # Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
248
- update_leftmost = lambda do |node|
248
+ update_best = lambda do |node|
249
249
  t = @data[node]
250
250
  if in_q.call(t) && sign * t.x < sign * best.x
251
251
  best = t
@@ -261,13 +261,13 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
261
261
  #
262
262
  # - If x0 <= x(c1) then all subtrees have large enough x values and we look for the leftmost node in c with a large enough y
263
263
  # value. Both p and q are sent into that subtree.
264
- # - If x0 >= x(ck) the the rightmost subtree is our only hope the rightmost subtree.
264
+ # - If x0 >= x(ck) the the rightmost subtree is our only hope
265
265
  # - Otherwise, x(c1) < x0 < x(ck) and we let i be least so that x(ci) <= x0 < x(c(i+1)). Then q becomes the lefmost cj in c not
266
266
  # to the left of ci such that y(cj) >= y0, if any. p becomes ci if y(ci) >= y0 and q otherwise. If there is no such j, we put
267
267
  # q = p. This may leave both of p, q undefined which means there is no useful way forward and we return nils to signal this to
268
268
  # calling code.
269
269
  #
270
- # The same logic applies to rightmost_nw, though everything is "backwards"
270
+ # The same logic applies to largest_x_in_nw, though everything is "backwards"
271
271
  # - membership of Q depends on having a small-enough value of x, rather than a large-enough one
272
272
  # - among the ci, values towards the end of the array tend not to be in Q while values towards the start of the array tend to be
273
273
  # in Q
@@ -302,14 +302,14 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
302
302
  new_p ||= new_q # if nodes[i] is no good, send p along with q
303
303
  new_q ||= new_p # but if there is no worthwhile value for q we should send it along with p
304
304
 
305
- return [new_q, new_p] if quadrant == :nw # swap for the rightmost_nw case.
305
+ return [new_q, new_p] if quadrant == :nw # swap for the largest_x_in_nw case.
306
306
 
307
307
  [new_p, new_q]
308
308
  end
309
309
 
310
310
  until leaf?(p)
311
- update_leftmost.call(p)
312
- update_leftmost.call(q)
311
+ update_best.call(p)
312
+ update_best.call(q)
313
313
 
314
314
  if p == q
315
315
  if one_child?(p)
@@ -324,7 +324,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
324
324
  q = p # p itself is just one layer above the leaves, or is itself a leaf
325
325
  elsif one_child?(q)
326
326
  # This generic approach is not as fast as the bespoke checks described in the paper. But it is easier to maintain the code
327
- # this way and allows easy implementation of rightmost_nw
327
+ # this way and allows easy implementation of largest_x_in_nw
328
328
  p, q = determine_next_nodes.call(left(p), right(p), left(q))
329
329
  else
330
330
  p, q = determine_next_nodes.call(left(p), right(p), left(q), right(q))
@@ -332,8 +332,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
332
332
  break unless p # we've run out of useful nodes
333
333
  end
334
334
  end
335
- update_leftmost.call(p) if p
336
- update_leftmost.call(q) if q
335
+ update_best.call(p) if p
336
+ update_best.call(q) if q
337
337
  best
338
338
  end
339
339
 
@@ -346,10 +346,10 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
346
346
  # MaxPST. (Note that Q is empty if x1 < x0.) Define p* as
347
347
  #
348
348
  # - (infty, -infty) if Q \intersect P is empty and
349
- # - the highest (max-x) point in Q \intersect P otherwise.
349
+ # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller x values.
350
350
  #
351
351
  # This method returns p* in O(log n) time and O(1) extra space.
352
- def highest_3_sided(x0, x1, y0)
352
+ def largest_y_in_3_sided(x0, x1, y0)
353
353
  # From the paper:
354
354
  #
355
355
  # The three real numbers x0, x1, and y0 define the three-sided range Q = [x0,x1] X [y0,∞). If Q \intersect P̸ is not \empty,
@@ -389,7 +389,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
389
389
  # Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
390
390
  update_highest = lambda do |node|
391
391
  t = @data[node]
392
- if in_q.call(t) && t.y > best.y
392
+ if in_q.call(t) && (t.y > best.y || (t.y == best.y && t.x < best.x))
393
393
  best = t
394
394
  end
395
395
  end
@@ -570,7 +570,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
570
570
  # My high-level understanding of the algorithm
571
571
  # --------------------------------------------
572
572
  #
573
- # We need to find all elements of Q \intersect P, so it isn't enough, as it was in highest_3_sided simply to keep track of p and
573
+ # We need to find all elements of Q \intersect P, so it isn't enough, as it was in largest_y_in_3_sided simply to keep track of p and
574
574
  # q. We need to track four nodes, p, p', q', and q which are (with a little handwaving) respectively
575
575
  #
576
576
  # - the rightmost node to the left of Q' = [x0, x1] X [-infinity, infinity],
@@ -692,8 +692,6 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
692
692
  # The four key helpers described in the paper
693
693
 
694
694
  # Handle the next step of the subtree at p
695
- #
696
- # I need to go through this with paper, pencil, and some diagrams.
697
695
  enumerate_left = lambda do
698
696
  if leaf?(p)
699
697
  left = false
@@ -999,13 +997,14 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
999
997
 
1000
998
  private def construct_pst
1001
999
  raise DataError, 'Duplicate x values are not supported' if contains_duplicates?(@data, by: :x)
1002
- raise DataError, 'Duplicate y values are not supported' if contains_duplicates?(@data, by: :y)
1003
1000
 
1004
- # We follow the algorithm in the paper by De, Maheshwari et al.
1001
+ # We follow the algorithm in the paper by De, Maheshwari et al, which takes O(n log^2 n) time. Their follow-up paper that
1002
+ # defines the Min-max PST, describes how to do the construction in O(n log n) time, but it is more complex and probably not
1003
+ # worth the trouble of both a bespoke heapsort the special sorting algorithm of Katajainen and Pasanen.
1005
1004
 
1006
- # Since we are building an implicit binary tree, things are simpler if the array is 1-based. This probably requires a malloc and
1007
- # data copy, which isn't great, but it's in the C layer so cheap compared to the O(n log^2 n) work we need to do for
1008
- # construction. In fact, we are probably doing O(n^2) work because of all the calls to #index_with_largest_y_in.
1005
+ # Since we are building an implicit binary tree, things are simpler if the array is 1-based. This requires a malloc (perhaps)
1006
+ # and memcpy (for sure), which isn't great, but it's in the C layer so cheap compared to the O(n log^2 n) work we need to do for
1007
+ # construction.
1009
1008
  @data.unshift nil
1010
1009
 
1011
1010
  h = Math.log2(@size).floor
@@ -1052,63 +1051,14 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
1052
1051
  end
1053
1052
  end
1054
1053
 
1055
- ########################################
1056
- # Tree arithmetic
1057
-
1058
- # # First element and root of the tree structure
1059
- # private def root
1060
- # 1
1061
- # end
1062
-
1063
- # # Indexing is from 1
1064
- # private def parent(i)
1065
- # i >> 1
1066
- # end
1067
-
1068
- # private def left(i)
1069
- # i << 1
1070
- # end
1071
-
1072
- # private def right(i)
1073
- # 1 + (i << 1)
1074
- # end
1075
-
1076
- # private def level(i)
1077
- # l = 0
1078
- # while i > root
1079
- # i >>= 1
1080
- # l += 1
1081
- # end
1082
- # l
1083
- # end
1084
-
1085
- # # i has no children
1086
- # private def leaf?(i)
1087
- # i > @last_non_leaf
1088
- # end
1089
-
1090
- # # i has exactly one child (the left)
1091
- # private def one_child?(i)
1092
- # i == @parent_of_one_child
1093
- # end
1094
-
1095
- # # i has two children
1096
- # private def two_children?(i)
1097
- # i <= @last_parent_of_two_children
1098
- # end
1099
-
1100
- # # i is the left child of its parent.
1101
- # private def left_child?(i)
1102
- # (i & 1).zero?
1103
- # end
1104
-
1105
1054
  private def swap(index1, index2)
1106
1055
  return if index1 == index2
1107
1056
 
1108
1057
  @data[index1], @data[index2] = @data[index2], @data[index1]
1109
1058
  end
1110
1059
 
1111
- # The index in @data[l..r] having the largest value for y
1060
+ # The index in @data[l..r] having the largest value for y, breaking ties with the smaller x value. Since we are already sorted by
1061
+ # x we don't actually need to check this.
1112
1062
  private def index_with_largest_y_in(l, r)
1113
1063
  return nil if r < l
1114
1064
 
@@ -1134,7 +1084,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
1134
1084
  private def verify_properties
1135
1085
  # It's a max-heap in y
1136
1086
  (2..@size).each do |node|
1137
- raise InternalLogicError, "Heap property violated at child #{node}" unless @data[node].y < @data[parent(node)].y
1087
+ byebug unless @data[node].y <= @data[parent(node)].y
1088
+ raise InternalLogicError, "Heap property violated at child #{node}" unless @data[node].y <= @data[parent(node)].y
1138
1089
  end
1139
1090
 
1140
1091
  # Left subtree has x values less than all of the right subtree
@@ -1,3 +1,5 @@
1
+ require 'forwardable'
2
+
1
3
  require_relative 'data_structures_rmolinari/shared'
2
4
 
3
5
  module DataStructuresRMolinari
@@ -10,14 +12,13 @@ require_relative 'data_structures_rmolinari/disjoint_union'
10
12
  require_relative 'data_structures_rmolinari/generic_segment_tree'
11
13
  require_relative 'data_structures_rmolinari/heap'
12
14
  require_relative 'data_structures_rmolinari/max_priority_search_tree'
13
- require_relative 'data_structures_rmolinari/minmax_priority_search_tree'
14
15
 
15
16
  # A namespace to hold the provided classes. We want to avoid polluting the global namespace with names like "Heap"
16
17
  module DataStructuresRMolinari
17
18
  ########################################
18
19
  # Concrete instances of Segment Tree
19
20
  #
20
- # @todo consider moving these into generic_segment_tree.rb
21
+ # @todo consider moving these into generic_segment_tree.rb and renaming that file
21
22
 
22
23
  # A segment tree that for an array A(0...n) answers questions of the form "what is the maximum value in the subinterval A(i..j)?"
23
24
  # in O(log n) time.
@@ -30,7 +31,7 @@ module DataStructuresRMolinari
30
31
  # @param data an object that contains values at integer indices based at 0, via +data[i]+.
31
32
  # - This will usually be an Array, but it could also be a hash or a proc.
32
33
  def initialize(data)
33
- @structure = GenericSegmentTree.new(
34
+ @structure = SegmentTreeTemplate.new(
34
35
  combine: ->(a, b) { [a, b].max },
35
36
  single_cell_array_val: ->(i) { data[i] },
36
37
  size: data.size,
@@ -57,7 +58,7 @@ module DataStructuresRMolinari
57
58
 
58
59
  # @param (see MaxValSegmentTree#initialize)
59
60
  def initialize(data)
60
- @structure = GenericSegmentTree.new(
61
+ @structure = SegmentTreeTemplate.new(
61
62
  combine: ->(p1, p2) { p1[1] >= p2[1] ? p1 : p2 },
62
63
  single_cell_array_val: ->(i) { [i, data[i]] },
63
64
  size: data.size,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_structures_rmolinari
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rory Molinari
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-06 00:00:00.000000000 Z
11
+ date: 2023-01-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: must_be
@@ -86,7 +86,6 @@ files:
86
86
  - lib/data_structures_rmolinari/generic_segment_tree.rb
87
87
  - lib/data_structures_rmolinari/heap.rb
88
88
  - lib/data_structures_rmolinari/max_priority_search_tree.rb
89
- - lib/data_structures_rmolinari/minmax_priority_search_tree.rb
90
89
  - lib/data_structures_rmolinari/shared.rb
91
90
  homepage: https://github.com/rmolinari/data_structures
92
91
  licenses:
@@ -1,668 +0,0 @@
1
- require 'must_be'
2
-
3
- require_relative 'shared'
4
-
5
- # THIS CLASS IS INCOMPLETE AND NOT USABLE
6
- #
7
- # A priority search tree (PST) stores points in two dimensions (x,y) and can efficiently answer certain questions about the set of
8
- # point.
9
- #
10
- # The structure was introduced by McCreight [1].
11
- #
12
- # See more: https://en.wikipedia.org/wiki/Priority_search_tree
13
- #
14
- # It is possible to build such a tree in place, given an array of pairs. See [2]. In a follow-up paper, [3], the authors show how to
15
- # construct a more flexible data structure,
16
- #
17
- # "[T]he Min-Max Priority Search tree for a set P of n points in R^2. It is a binary tree T with the following properties:
18
- #
19
- # * For each internal node u, all points in the left subtree of u have an x-coordinate which is less than the x-coordinate of any
20
- # point in the right subtree of u.
21
- # * The y-coordinate values of the nodes on even (resp. odd) levels are smaller (resp. greater) than the y-coordinate values of
22
- # their descendants (if any), where the root is at level zero.
23
- #
24
- # "The first property implies that T is a binary search three on the x-coordinates of the points in P, excepts that there is no
25
- # relation between the x-coordinates of the points stored at u and any of its children. The second property implies that T is a
26
- # min-max heap on the y-coordinates of the points in P."
27
- #
28
- # I started implementing the in-place PST. Then, finding the follow-up paper [3], decided to do that one instead, as the paper says
29
- # it is more flexible. The point is to learn a new data structure and its associated algorithms.
30
- #
31
- # The algorithms are rather bewildering. Highest3SidedUp is complicated, and only two of the functions CheckLeft, CheckLeftIn,
32
- # CheckRight, CheckRightIn are given; the other two are "symmetric". But it's not really clear what the first are actually doing, so
33
- # it's hard to know what the others actually do.
34
- #
35
- # The implementation is incomplete. The pseduo-code in the paper is buggy (see the code below), which makes progress difficult.
36
- #
37
- # [1] E. McCreight, _Priority Search Trees_, SIAM J. Computing, v14, no 3, May 1985, pp 257-276.
38
- # [2] De, Maheshwari, Nandy, Smid, _An in-place priority search tree_, 23rd Annual Canadian Conference on Computational Geometry.
39
- # [3] De, Maheshwari, Nandy, Smid, _An in-place min-max priority search tree_, Computational Geometry, v46 (2013), pp 310-327.
40
- # [4] Atkinson, Sack, Santoro, Strothotte, _Min-max heaps and generalized priority queues_, Commun. ACM 29 (10) (1986), pp 996-1000.
41
- class DataStructuresRMolinari::MinmaxPrioritySearchTree
42
- include Shared
43
-
44
- # The array of pairs is turned into a minmax PST in-place without cloning. So clone before passing it in, if you care.
45
- #
46
- # Each element must respond to #x and #y. Use Point (above) if you like.
47
- def initialize(data, verify: false)
48
- @data = data
49
- @size = @data.size
50
-
51
- construct_pst
52
- return unless verify
53
-
54
- # puts "Validating tree structure..."
55
- verify_properties
56
- end
57
-
58
- # Let Q = [x0, infty) X [y0, infty) be the northeast "quadrant" defined by the point (x0, y0) and let P be the points in this data
59
- # structure. Define p* as
60
- #
61
- # - (infty, infty) if Q \intersect P is empty and
62
- # - the leftmost (i.e., min-x) point in Q \intersect P otherwise
63
- #
64
- # This method returns p*.
65
- #
66
- # From De et al:
67
- #
68
- # [t]he variables best, p, and q satisfy the folling invariant:
69
- #
70
- # - if Q \intersect P is nonempty then p* \in {best} \union T(p) \union T(q)
71
- # - if Q \intersect P is empty then p* = best
72
- # - p and q are at the same level of T and x(p) <= x(q)
73
- #
74
- # Here T(x) is the subtree rooted at x
75
- def leftmost_ne(x0, y0)
76
- best = Point.new(INFINITY, INFINITY)
77
- p = q = root
78
-
79
- in_q = ->(pair) { pair.x >= x0 && pair.y >= y0 }
80
-
81
- # From the paper:
82
- #
83
- # takes as input a point t \in P and updates best as follows: if t \in Q and x(t) < x(best) then it assignes best = t
84
- #
85
- # Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
86
- update_leftmost = lambda do |node|
87
- t = val_at(node)
88
- if in_q.call(t) && t.x < best.x
89
- best = t
90
- end
91
- end
92
-
93
- # Generalize the c1,...,c4 idea from the paper in line with the BUG 2 IN PAPER notes, below.
94
- #
95
- # Given: 0 or more nodes n1, ..., nk in the tree. All are at the same level, which is a "max level" in our MinmaxPST, such that
96
- # x(n1) <= x(n2) <= ... <= x(nk). (Note: it is expected that the nj are either children or grandchildren of p and q, though we
97
- # don't check that.)
98
- #
99
- # If k = 0 return nil. Otherwise...
100
- #
101
- # We return two values p_goal, q_goal (possibly equal) from among the nj such that
102
- #
103
- # - p_goal is not to the right of q_goal in the tree and so, in particular x(p_goal) <= x(q_goal)
104
- # - if and when the auction reaches p = p_goal and q = q_goal the algorithm invariant will be satisfied.
105
- #
106
- # As a special case, we return nil if we detect that none of the subtrees T(nj) contain any points in Q. This is a sign to
107
- # terminate the algorithm.
108
- #
109
- # See the notes at "BUG 2 IN PAPER" below for more details about what is going on.
110
- determine_goal_nodes = lambda do |nodes|
111
- node_count = nodes.size
112
- return nil if node_count.zero?
113
-
114
- if val_at(nodes.last).x <= x0
115
- # Only the rightmost subtree can possibly have anything Q, assuming that all the x-values are distinct.
116
- return [nodes.last, nodes.last]
117
- end
118
-
119
- if val_at(nodes.first).x > x0
120
- # All subtrees have x-values large enough to provide elements of Q. Since we are at a max-level the y-values help us work
121
- # out which subtree to focus on.
122
- leftmost = nodes.find { |node| val_at(node).y >= y0 }
123
-
124
- return nil unless leftmost # nothing left to find
125
-
126
- # Otherwise we explore the leftmost subtree. Its root is in Q and can't be beaten by anything to its right.
127
- return [leftmost, leftmost]
128
- end
129
-
130
- values = nodes.map { |n| val_at(n) }
131
-
132
- # Otherwise x(n1) <= x0 < x(nk). Thus i is well-defined.
133
- i = (0...node_count).select { |j| values[j].x <= x0 && x0 < values[j + 1].x }.min
134
-
135
- # these nodes all have large-enough x-values and so this finds the ones in the set Q.
136
- new_q = nodes[(i + 1)..].select { |node| val_at(node).y >= y0 }.min # could be nil
137
- new_p = nodes[i] if values[i].y >= y0 # The leftmost subtree is worth exploring if the y-value is big enough. Otherwise not
138
- new_p ||= new_q # if nodes[i] is no good we send p along with q
139
- new_q ||= new_p # but if there was no worthwhile value for q we should send it along with p
140
-
141
- return nil unless new_p
142
-
143
- [new_p, new_q]
144
- end
145
-
146
- until leaf?(p)
147
- level = Math.log2(p).floor # TODO: don't calculate log every time!
148
-
149
- update_leftmost.call(p)
150
- update_leftmost.call(q)
151
-
152
- if p == q
153
- if one_child?(p)
154
- p = q = left(p)
155
- else
156
- q = right(p)
157
- p = left(p)
158
- end
159
- else
160
- # p != q
161
- if leaf?(q)
162
- q = p # p itself is just one layer above the leaves, or is itself a leaf
163
- elsif one_child?(q)
164
- # Note that p has two children
165
- if val_at(left(q)).x < x0
166
- # x-values below p are too small
167
- p = q = left(q)
168
- elsif val_at(right(p)).x <= x0
169
- # x-values in T(right(p)) are too small. DISTINCT-X
170
- p = right(p)
171
- q = left(q)
172
- else
173
- # BUG 1 IN PAPER.
174
- #
175
- # So, x(q_l) >= x0 and x(p_r) > x0. But how can we be sure that the child of q isn't the winner?. Should we be trying
176
- # it in this case?
177
- #
178
- # Yes: otherwise it never gets checked.
179
-
180
- update_leftmost.call(left(q))
181
- q = right(p)
182
- p = left(p)
183
- end
184
- else
185
- # p and q both have two children
186
-
187
- # BUG 2 IN PAPER.
188
- #
189
- # Define c as the paper does:
190
- #
191
- # (c1, c2, c3, c4) = (left(p), right(p), left(q), right(q))
192
- #
193
- # Because of the PST property on x and the invariant x(p) <= x(q) we know that
194
- #
195
- # x(c1) <= x(c2) <= x(c3) <= x(c4)
196
- #
197
- # Similarly, the sets of values x(T(ci)) are pairwise ordered in the same sense.
198
- #
199
- # Suppose further that x(ci) <= x0 <= x(c(i+i)). Then we know several things
200
- #
201
- # - there might be a "winner" (point in Q) in T(ci), perhaps ci itself.
202
- # - there are not any winners in T(cj) for j < i, becasue the x-values there aren't big enough
203
- # - any winner in ck, for k >= i, will be the left of and thus beat any winner in c(k+1), because of the ordering of
204
- # x-values
205
- #
206
- # If x(c4) <= x0 then the rightmost subtree T(c4) is the only one worth checking and we set p = q = c4.
207
- # If x(c1) > x0 then we take i = 0 and ignore the logic on ci in what follows and setting p = q.
208
- #
209
- # Pretend for the moment that we are using a MaxPST instead of a MinmaxPST. Then we can look at y values to learn more.
210
- #
211
- # - if y(ci) >= y0 then we need to search T(ci), so we will update p = ci
212
- # - but if y(ci) < y0 then there are no winners in T(ci) because the y-values are too small.
213
- # - similarly, if y(c(i+i)) >= y0 then we need to search T(c(i+1)). Indeed c(i+1) itself is in Q and beats any winner in
214
- # subtrees further to the right
215
- # - so, let k > i be minimal such that y(ck) >= y0, if there is any. Note that ck is itself a winner. Then
216
- # - if y(ci) >= y0,
217
- # - set p = ci, and q = ck (or q = ci if there is no such k)
218
- # - otherwise (T(ci) has no winners because its y-values are too small)
219
- # - if k is defined set p = q = ck. Otherwise HALT (there are no more winners)
220
- #
221
- # But we are working with a MinmaxPST rather than a MaxPST, so we have to work harder. If c1, ..., c4 (the children of p
222
- # and q) are in a "max-level" of the tree - that is, an even level - then the logic above still applies. But if they are
223
- # at a min level things are trickier and we need to go another layer down.
224
- #
225
- # The paper knows that we need to look a further layer down, but the logic is too simplistic. It looks at cj for j > i and
226
- # checks if cj or either of its children are in Q. But that's not good enough. For the same reason that in a MaxPST we may
227
- # need to explore below T(ci) even if ci isn't in Q, we may need to decend through one of the grandchilden of p or q even
228
- # if that grandchild isn't in Q.
229
- #
230
- # Getting a bit handwavey especially over what happens near the leaves...
231
- #
232
- # Consider the children d1, d2, ..., dm, of ci, ..., c4 (and so grandchildren of p and q). They are at a max-level and so
233
- # the logic described applies to the dk. If ci happens to be a winner we can set p = ci and work out what to do with q by
234
- # looking at the children of c(i+1), ..., c4. Otherwise we look at all the dj values (up to 8 of them), apply the logic
235
- # above to work out that we want to head for, say, p = ds and q = dt, and in this cycle update p = parent(ds), q =
236
- # parent(dt). (We also need to submit the values c(i+1)..c4 to UpdateLeftmost.)
237
- #
238
- # In other words, we can use the MaxPST logic on d1,...,dm to decide where we need to go, and then step to the relevant
239
- # parents among the cj.
240
-
241
- c = [left(p), right(p), left(q), right(q)]
242
- if level.odd?
243
- # the elements of c are at an even level, and hence their y values are maxima for the subtrees. We can learn what we
244
- # need to know from them
245
- p, q = determine_goal_nodes.call(c)
246
- if p && !q
247
- # byebug
248
- # determine_goal_nodes.call(c)
249
- raise 'bad logic'
250
- end
251
- else
252
- # They are at an odd level and so aren't helpful in working out what to do next: we look at their children, which are in
253
- # a max-level. We need to check the elements of c against best since we are otherwise ignoring them.
254
- c.each { |n| update_leftmost.call(n) }
255
-
256
- d = c.map { [left(_1), right(_1)]}.flatten.select { |n| n <= @size }
257
-
258
- # Note that we are jumping down two levels here!
259
- p, q = determine_goal_nodes.call(d)
260
- if p && !q
261
- # byebug
262
- # determine_goal_nodes.call(c)
263
- raise 'bad logic'
264
- end
265
-
266
- p
267
- end
268
-
269
- return best unless p # nothing more to do
270
- end
271
- end
272
- end
273
- update_leftmost.call(p)
274
- update_leftmost.call(q)
275
- best
276
- end
277
-
278
- # Let Q be the "three-sided query range" [x0, x1] X [y0, infty) and let P_Q be P \intersect Q.
279
- #
280
- # If P_Q is empty then p* = (infty, -infty).
281
- # Otherwise, p* is the point in P_Q with maximal y value.
282
- #
283
- # This method returns p*
284
- # def highest_3_sided_up(x0, x1, y0)
285
- # best = Point.new(INFINITY, -INFINITY)
286
-
287
- # in_q = lambda do |pair|
288
- # pair.x >= x0 && pair.x <= x1 && pair.y >= y0
289
- # end
290
-
291
- # # From the paper:
292
- # #
293
- # # takes as input a point t and does the following: if t \in Q and y(t) > y(best) then it assignes best = t
294
- # #
295
- # # Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
296
- # #
297
- # # The algorithm is complicated. From the paper:
298
- # #
299
- # # Since Q is bounded by two vertical sides, we use four index variables p, p', q and q' to guide the search path. In addition,
300
- # # we use four bits L, L', R and R'; these correspond to the subtrees of T rooted at the nodes p, p', q, and q', respectively;
301
- # # if a bit is equal to one, then the corresonding node is referred to as an _active node_ (for example, if L = 1 then p is an
302
- # # active node), and the subtree rooted at that node may contain a candidate point for p*. So the search is required to be
303
- # # performed in the subtree rooted at all active nodes. More formally, at any instant of time the variables satisfy the folling
304
- # # invariants:
305
- # #
306
- # # - If L = 1 the x(p) < x0.
307
- # # - If L' = 1 then x0 <= x(p') <= x1.
308
- # # - If R = 1 then x(q) > x1.
309
- # # - If R' = 1 then x0 <= x(q') <= x1.
310
- # # - If L' = 1 and R' = 1 then x(p') <= x(q').
311
- # # - If P_Q is non-empty then p* = best or p* is in the subtree rooted at any one of the active nodes.
312
- # #
313
- # # There are more details in the paper
314
- # update_highest = lambda do |node|
315
- # t = val_at(node)
316
- # if in_q.call(t) && t.y > best.y
317
- # best = t
318
- # end
319
- # end
320
-
321
- # ex_update_highest = lambda do |node|
322
- # update_highest.call(node)
323
- # update_highest.call(left(node)) unless leaf?(node)
324
- # update_highest.call(right(node)) unless one_child?(node)
325
- # end
326
-
327
- # if val_at(root).x < x0
328
- # p = root
329
- # l = true
330
- # l_prime = r = r_prime = false
331
- # elsif val_at(root).x < x1
332
- # p_prime = root
333
- # l_prime = true
334
- # l = r = r_prime = false
335
- # else
336
- # q = root
337
- # r = true
338
- # l = l_prime = r_prime = false
339
- # end
340
-
341
- # set_z = lambda do
342
- # r = []
343
- # r << p if l
344
- # r << p_prime if l_prime
345
- # r << q if r
346
- # r << q_prime if r_primg
347
- # r
348
- # end
349
-
350
- # check_left = lambda do
351
- # if leaf?(p)
352
- # l = false
353
- # elsif one_child?(p)
354
- # p_l_x = val_at(left(p))
355
- # if x0 <= p_l_x && p_l_x <= x1
356
- # update_highest.call(left(p))
357
- # if l_prime && r_prime
358
- # ex_update_highest.call(p_prime)
359
- # elsif l_prime
360
- # q_prime = p_prime
361
- # r_prime = true
362
- # end
363
- # p_prime = left(p)
364
- # l_prime = true
365
- # l = false
366
- # elsif p_l_x < x0
367
- # p = left(p)
368
- # else
369
- # q = left(p)
370
- # r = true
371
- # l = false
372
- # end
373
- # else
374
- # # p has two children
375
-
376
- # end
377
-
378
- # while l || l_prime || r || r_prime
379
- # z_star = set_z.call.min_by(4) { level(_1) }
380
- # if z_star.include? p_prime
381
- # check_left_in(p_prime)
382
- # elsif z_star.include? q_prime
383
- # check_right_in(q_prime)
384
- # elsif z_star.include? p
385
- # check_left(p)
386
- # else
387
- # check_right(q)
388
- # end
389
- # end
390
- # end
391
-
392
- # Find the "highest" (max-y) point that is "northeast" of (x, y).
393
- #
394
- # That is, the point p* in Q = [x, infty) X [y, infty) with the largest y value, or (infty, -infty) if there is no point in that
395
- # quadrant.
396
- #
397
- # Algorithm is from De et al. section 3.1
398
- def highest_ne(x0, y0)
399
- raise "Write me"
400
- # From the paper:
401
- #
402
- # The algorithm uses two variables best and p, which satisfy the following invariant
403
- #
404
- # - If Q intersect P is nonempty then p* in {best} union T_p
405
- # - If Q intersect P is empty then p* = best
406
- #
407
- # Here, P is the set of points in our data structure and T_p is the subtree rooted at p
408
- best = Point.new(INFINITY, -INFINITY)
409
- p = root # root of the whole tree AND the pair stored there
410
-
411
- in_q = lambda do |pair|
412
- pair.x >= x0 && pair.y >= y0
413
- end
414
-
415
- # From the paper:
416
- #
417
- # takes as input a point t and does the following: if t \in Q and y(t) > y(best) then it assignes best = t
418
- #
419
- # Note that the paper identifies a node in the tree with its value. We need to grab the correct node.
420
- update_highest = lambda do |node|
421
- t = val_at(node)
422
- if in_q.call(t) && t.y > best.y
423
- best = t
424
- end
425
- end
426
-
427
- # We could make this code more efficient. But since we only have O(log n) steps we won't actually gain much so let's keep it
428
- # readable and close to the paper's pseudocode for now.
429
- until leaf?(p)
430
- p_val = val_at(p)
431
- if in_q.call(p_val)
432
- # p \in Q and nothing in its subtree can beat it because of the max-heap
433
- update_highest.call(p)
434
- return best
435
-
436
- # p = left(p) <- from paper
437
- elsif p_val.y < y0
438
- # p is too low for Q, so the entire subtree is too low as well
439
- return best
440
-
441
- # p = left(p)
442
- elsif one_child?(p)
443
- # With just one child we need to check it
444
- p = left(p)
445
- elsif val_at(right(p)).x <= x0
446
- # right(p) might be in Q, but nothing in the left subtree can be, by the PST property on x.
447
- p = right(p)
448
- elsif val_at(left(p)).x >= x0
449
- # Both children are in Q, so try the higher of them. Note that nothing in either subtree will beat this one.
450
- higher = left(p)
451
- if val_at(right(p)).y > val_at(left(p)).y
452
- higher = right(p)
453
- end
454
- p = higher
455
- elsif val_at(right(p)).y < y0
456
- # Nothing in the right subtree is in Q, but maybe we'll find something in the left
457
- p = left(p)
458
- else
459
- # At this point we know that right(p) \in Q so we need to check it. Nothing in its subtree can beat it so we don't need to
460
- # look there. But there might be something better in the left subtree.
461
- update_highest.call(right(p))
462
- p = left(p)
463
- end
464
- end
465
- update_highest.call(p) # try the leaf
466
- best
467
- end
468
-
469
- # O(n log^2 n)
470
- private def construct_pst
471
- # We follow the algorithm in [3]. Indexing is from 1 there and we follow that here. The algorithm is almost exactly the same as
472
- # for the (max) PST.
473
- h = Math.log2(@size).floor
474
- a = @size - (2**h - 1) # the paper calls it A
475
- sort_subarray(1, @size)
476
- level = 0 # TODO: isn't level always equal to i in the loop?
477
-
478
- (0...h).each do |i|
479
- sense = level.even? ? :max : :min
480
- pow_of_2 = 2**i
481
-
482
- k = a / (2**(h - i))
483
- k1 = 2**(h + 1 - i) - 1
484
- k2 = (1 - k) * 2**(h - i) - 1 + a
485
- k3 = 2**(h - i) - 1
486
- (1..k).each do |j|
487
- l = index_with_extremal_y_in(pow_of_2 + (j - 1) * k1, pow_of_2 + j * k1 - 1, sense:)
488
- swap(l, pow_of_2 + j - 1)
489
- end
490
-
491
- if k < pow_of_2
492
- l = index_with_extremal_y_in(pow_of_2 + k * k1, pow_of_2 + k * k1 + k2 - 1, sense:)
493
- swap(l, pow_of_2 + k)
494
-
495
- m = pow_of_2 + k * k1 + k2
496
- (1..(pow_of_2 - k - 1)).each do |j|
497
- l = index_with_extremal_y_in(m + (j - 1) * k3, m + j * k3 - 1, sense:)
498
- swap(l, pow_of_2 + k + j)
499
- end
500
- end
501
- sort_subarray(2 * pow_of_2, @size)
502
- level += 1
503
- end
504
- end
505
-
506
- ########################################
507
- # Indexing the data structure as though it were from 1, even though the underlying @data is indexed from zero.
508
-
509
- # First element and root of the tree structure
510
- private def root
511
- 1
512
- end
513
-
514
- private def val_at(idx)
515
- @data[idx - 1]
516
- end
517
-
518
- # Indexing is from 1
519
- private def parent(i)
520
- i >> 1
521
- end
522
-
523
- private def left(i)
524
- i << 1
525
- end
526
-
527
- private def right(i)
528
- 1 + (i << 1)
529
- end
530
-
531
- private def leaf?(i)
532
- left(i) > @size
533
- end
534
-
535
- private def one_child?(i)
536
- left(i) <= @size && right(i) > @size
537
- end
538
-
539
- private def swap(index1, index2)
540
- return if index1 == index2
541
-
542
- @data[index1 - 1], @data[index2 - 1] = @data[index2 - 1], @data[index1 - 1]
543
- end
544
-
545
- private def level(i)
546
- count = 0
547
- while i > root
548
- i >>= 1
549
- count += 1
550
- end
551
- count
552
- end
553
-
554
- # The index in @data[l..r] having the largest/smallest value for y
555
- # The sense argument should be :min or :max
556
- private def index_with_extremal_y_in(l, r, sense:)
557
- return nil if r < l
558
-
559
- case sense
560
- when :min
561
- (l..r).min_by { |idx| val_at(idx).y }
562
- when :max
563
- (l..r).max_by { |idx| val_at(idx).y }
564
- else
565
- raise "Bad comparison sense #{sense}"
566
- end
567
- end
568
-
569
- # Sort the subarray @data[l..r]. This is much faster than a Ruby-layer heapsort because it is mostly happening in C.
570
- private def sort_subarray(l, r)
571
- # heapsort_subarray(l, r)
572
- return if l == r # 1-array already sorted!
573
-
574
- l -= 1
575
- r -= 1
576
- @data[l..r] = @data[l..r].sort_by(&:x)
577
- end
578
-
579
- ########################################
580
- # Debugging support
581
- #
582
- # These methods are not written for speed
583
-
584
- # Check that our data satisfies the requirements of a Priority Search Tree:
585
- # - max-heap in y
586
- # - all the x values in the left subtree are less than all the x values in the right subtree
587
- def verify_properties
588
- # It's a min-max heap in y
589
- (2..@size).each do |node|
590
- level = Math.log2(node).floor
591
- parent_level = level - 1
592
-
593
- _, _, min_y, max_y = minmax_in_subtree(node)
594
- parent_y = val_at(parent(node)).y
595
-
596
- it_is_fine = if parent_level.even?
597
- # max!
598
- parent_y > max_y
599
- else
600
- parent_y < min_y
601
- end
602
-
603
- raise "Heap property violated at child #{node}" unless it_is_fine
604
- end
605
-
606
- # Left subtree has x values less than all of the right subtree
607
- (1..@size).each do |node|
608
- next if right(node) >= @size
609
-
610
- left_max = max_x_in_subtree(left(node))
611
- right_min = min_x_in_subtree(right(node))
612
-
613
- raise "Left-right property of x-values violated at #{node}" unless left_max < right_min
614
- end
615
-
616
- nil
617
- end
618
-
619
- private def max_x_in_subtree(root)
620
- minmax_in_subtree(root)[1]
621
- end
622
-
623
- private def min_x_in_subtree(root)
624
- minmax_in_subtree(root)[0]
625
- end
626
-
627
- # Return min_x, max_x, min_y, max_y in subtree rooted at and including root
628
- private def minmax_in_subtree(root)
629
- @minmax_vals ||= []
630
- @minmax_vals[root] ||= calc_minmax_at(root).freeze
631
- end
632
-
633
- # No memoization
634
- private def calc_minmax_at(root)
635
- return [INFINITY, -INFINITY, INFINITY, -INFINITY] if root > @size
636
-
637
- pair = val_at(root)
638
-
639
- return [pair.x, pair.x, pair.y, pair.y] if leaf?(root)
640
-
641
- left = left(root)
642
- left_min_max = minmax_in_subtree(left)
643
- return left_min_max if one_child?(root)
644
-
645
- right = right(root)
646
- right_min_max = minmax_in_subtree(right)
647
-
648
- [
649
- [pair.x, left_min_max[0], right_min_max[0]].min,
650
- [pair.x, left_min_max[1], right_min_max[1]].max,
651
- [pair.y, left_min_max[2], right_min_max[2]].min,
652
- [pair.y, left_min_max[3], right_min_max[3]].max
653
- ]
654
- end
655
-
656
- private def output_quasi_dot
657
- (2..@size).to_a.reverse.map do |node|
658
- "#{val_at(parent(node)).fmt} -- #{val_at(node).fmt}"
659
- end.join("\n")
660
- end
661
-
662
- private def pair_to_s
663
- end
664
-
665
- ########################################
666
- # Dead code
667
-
668
- end