data_structures_rmolinari 0.5.3 → 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 78d2f04135a4e241d64b8d6c71fbe05d84c1995ee2ce58362df313a9c75f527b
4
- data.tar.gz: 6def12eef0b4aeb5ef12eff37f89bb49b695c60369e02c407969255cf03c5f57
3
+ metadata.gz: b194edc412e6bce073e73f874c4a392dd821f333e4ba9b07908af397819cdffa
4
+ data.tar.gz: 5dabb2d689cb0b0ebacf2b804e6c27ece1e3e7f7db604f5722d1657915bf0bda
5
5
  SHA512:
6
- metadata.gz: '08a7b5abf025e232d5863310860f5b0e2bfea014688750b34e48028cc2a57f4bc1ba4d0638974e64342fe377d729dd7feab0b985db684035f6e8905f8f7b708c'
7
- data.tar.gz: 923ac00b27e35c41d392d465aacdd73dd8313d46ac66a741eb008355df2abbb6c10ee62bfe8bce9e5a8b0623be06dea98a298f5ef07efb10cb5cfc0ec447bf66
6
+ metadata.gz: cf36912f4242d7a91e8227464993ac634441bdcff30d7b6ec5a10149cfdc0a14a132fa5c319f2edafdc9d9ab6b11ee0af2354fdd45f2638d0099ccdb84eff436
7
+ data.tar.gz: a1ce15decbc869b9f26902d391f27967778032f54d2543b914d93bbe15ab1ceb7aae5c576cc93f99c80aa8f62d8f23c747591a6beb520d7c756facb125a5a72d
data/CHANGELOG.md CHANGED
@@ -2,7 +2,23 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
- ## [0.5.0] 2023-02.03
5
+ ## [0.5.4] 2023-12-12
6
+
7
+ (Unfortunately this note was added long after the changes were made and my memory of the changes is poor.)
8
+
9
+ - SegmentTree
10
+ - Sum version is provided
11
+ - PrioritySearchTree
12
+ - Open regions
13
+
14
+ - Some bug fixes
15
+ - Some refactoring of test cases
16
+
17
+ ## [0.5.1] - [0.5.3]
18
+
19
+ - Releases to fix some bad gemspec data.
20
+
21
+ ## [0.5.0] 2023-02-03
6
22
 
7
23
  - SegmentTree
8
24
  - Reorganize the code into a SegmentTree submodule.
data/README.md CHANGED
@@ -106,6 +106,9 @@ There is no `smallest_x_in_3_sided(x0, x1, y0)`. Just use `smallest_x_in_ne(x0,
106
106
  (These queries appear rather abstract at first but there are interesting applications. See, for example, section 4 of
107
107
  [[McC85]](#references), keeping in mind that the data structure in that paper is actually a _MinPST_.)
108
108
 
109
+ Each method also has a named parameter `open:` that makes the search region an open set. For example, if we call `smallest_x_in_ne`
110
+ with `open: true` then we consider points satisifying x > x0 and y > y0. The default value for this parameter is always `false`.
111
+
109
112
  The single-point queries run in O(log n) time, where n is the size of P, while `enumerate_3_sided` runs in O(m + log n), where m is
110
113
  the number of points actually enumerated.
111
114
 
@@ -32,16 +32,10 @@ module DataStructuresRMolinari::Algorithms
32
32
  x_max = sorted_points.last.x
33
33
  y_min, y_max = sorted_points.map(&:y).minmax
34
34
 
35
- # Half of the smallest non-zero gap between x values. This is needed below
36
- epsilon = INFINITY
37
-
38
35
  # Enumerate type 1
39
36
  sorted_points.each_cons(2) do |pt1, pt2|
40
37
  next if pt1.x == pt2.x
41
38
 
42
- d = (pt2.x.to_f - pt1.x) / 2
43
- epsilon = d if d < epsilon
44
-
45
39
  yield [pt1.x, pt2.x, y_min, y_max]
46
40
  end
47
41
 
@@ -54,9 +48,9 @@ module DataStructuresRMolinari::Algorithms
54
48
  next if pt.y == y_max # 0 area
55
49
  next if pt.y == y_min # type 1
56
50
 
57
- # Epsilon means we don't just get pt back again. The De et al. paper is rather vague.
58
- left_bound = max_pst.largest_x_in_nw( pt.x - epsilon, pt.y)
59
- right_bound = max_pst.smallest_x_in_ne(pt.x + epsilon, pt.y)
51
+ # Open region means we don't just get pt back again. The De et al. paper is rather vague.
52
+ left_bound = max_pst.largest_x_in_nw(pt.x, pt.y, open: true)
53
+ right_bound = max_pst.smallest_x_in_ne(pt.x, pt.y, open: true)
60
54
 
61
55
  left = left_bound.x.infinite? ? x_min : left_bound.x
62
56
  right = right_bound.x.infinite? ? x_max : right_bound.x
@@ -74,7 +68,7 @@ module DataStructuresRMolinari::Algorithms
74
68
  #
75
69
  # largest_y_in_3_sided(l, r, y_min)
76
70
  #
77
- # That call considers the points in the closed region l <= x <= r and y >= y_min, so we use l + epsilon and r - epsilon.
71
+ # That call considers the points in the closed region l <= x <= r and y >= y_min, so we use an open search region instead.
78
72
  until max_pst.empty?
79
73
  top_pt = max_pst.delete_top!
80
74
  top = top_pt.y
@@ -85,7 +79,7 @@ module DataStructuresRMolinari::Algorithms
85
79
  r = x_max
86
80
 
87
81
  loop do
88
- next_pt = max_pst.largest_y_in_3_sided(l + epsilon, r - epsilon, y_min)
82
+ next_pt = max_pst.largest_y_in_3_sided(l, r, y_min, open: true)
89
83
 
90
84
  bottom = next_pt.y.infinite? ? y_min : next_pt.y
91
85
  yield [l, r, bottom, top]
@@ -78,8 +78,9 @@ class DataStructuresRMolinari::Heap
78
78
  # Insert a new element into the heap with the given priority.
79
79
  # @param value the item to be inserted.
80
80
  # - If the heap is addressible (the default) it is an error to insert an item that is already present in the heap.
81
- # @param priority the priority to use for new item. The values used as priorities must be totally ordered via +<=>+.
82
- def insert(value, priority)
81
+ # @param (optional) priority the priority to use for new item. The values used as priorities must be totally ordered via +<=>+.
82
+ # If omitted we use the inserted value as its own priority.
83
+ def insert(value, priority = value)
83
84
  raise DataError, "Heap already contains #{value}" if @addressable && contains?(value)
84
85
 
85
86
  @size += 1
@@ -103,7 +104,7 @@ class DataStructuresRMolinari::Heap
103
104
  # Return the top of the heap and remove it, updating the structure to maintain the necessary properties.
104
105
  # @return (see #top)
105
106
  def pop
106
- result = top
107
+ result = top # raises if empty
107
108
  assign(@data[@size], root)
108
109
 
109
110
  @data[@size] = nil
@@ -30,6 +30,10 @@ require_relative 'shared'
30
30
  #
31
31
  # The final operation (enumerate) takes O(m + log n) time, where m is the number of points that are enumerated.
32
32
  #
33
+ # Each of these methods has a named parameter +open:+ that makes the search region an open set. For example, if we call
34
+ # +smallest_x_in_ne+ with +open: true+ then we consider points satisifying x > x0 and y > y0. The default value for this parameter
35
+ # is always +false+. See below for limitations in this functionality.
36
+ #
33
37
  # If the MaxPST is constructed to be "dynamic" we also have an operation that deletes the top element.
34
38
  #
35
39
  # - +delete_top!+: remove the top (max-y) element of the tree and return it.
@@ -39,14 +43,123 @@ require_relative 'shared'
39
43
  # In the current implementation no two points can share an x-value. This restriction can be relaxed with some more complicated code,
40
44
  # but it hasn't been written yet. See issue #9.
41
45
  #
42
- # There is a related data structure called the Min-max priority search tree so we have called this a "Max priority search tree", or
46
+ # There is a related data structure called a Min-max priority search tree so we have called this a "Max priority search tree", or
43
47
  # MaxPST.
44
48
  #
49
+ # ## Open regions: limitations
50
+ #
51
+ # Calls involving open regions - using the +open:+ argument - are implemented internally using closed regions in which the
52
+ # boundaries have been "nudged" by a tiny amount so as to exclude points on the boundary. Since there are only finitely many points
53
+ # in the PST there are no limit points, and the open region given by x > x0 and y > y0 contains the same PST members as a closed
54
+ # region x >= x0 + e and y >= y0 + e for small enough values of e.
55
+ #
56
+ # But it is hard to determine e robustly. Indeed, assume for the moment that all x- and y-values are floating-point, i.e., IEEE754
57
+ # double-precision. We can easily determine a value for e: the smallest difference between any two distinct x-values or any two
58
+ # distinct y-values. But the scaling of floating-point numbers makes this buggy. Consider the case in which we have consecutive
59
+ # x-values
60
+ #
61
+ # 0, 5e-324, 1, and 2.
62
+ #
63
+ # (5e-324 is the smallest positive Float value in Ruby). Our value for e is thus 5e-324 and, because of the way floating point
64
+ # values are represented, 1 + e = 1.0. Any query on open region with x0 = 1 will be run on a closed region with x0 = 1 + e = 1.0,
65
+ # and we may get the wrong result.
66
+ #
67
+ # The solution here is to replace (x0, y0) with (x0.next_float, y0.next_float) rather than (x0 + e, y0 + e). +Float::next_float+ is
68
+ # an implementation of the IEEE754 operation +nextafter+ which gives, for a floating point value z, the smallest floating point
69
+ # value larger than z. If our PST contains only points with finite, floating point coordinates, then this approach implements open
70
+ # search regions correctly. This is what the implementation currently does.
71
+ #
72
+ # However, when coordinates are not all Floats there are cases when this approach will fail. Consider the case in which we have the
73
+ # following consecutive x-values:
74
+ #
75
+ # 0, 1e-324, 2e-324, 1.
76
+ #
77
+ # (Here 1e-324 and 2e-324 are the Ruby values +Rational(1, 10**324)+ and +Rational(2, 10**324)+.) Then, given an argument x0 =
78
+ # 1e-324, +x0.to_f == 0.0+ and so +x0.to_f.next_float == 5e-324+ and the region we use internally for our query incorrectly excludes
79
+ # the point with x value 2e-324. This is a bug in the code.
80
+ #
81
+ # There are also issues with numeric values (Integer or Rational) that are larger than the maximum floating point value,
82
+ # approximately 1.8e308. For such values z, +z.to_f == Float::INFINITY+, and we will incorrectly exclude any larger-but-finite
83
+ # coordinates from the search region.
84
+ #
85
+ # Yet more issues arise when the coordinates of points in the PST aren't numeric at all, but are some other sort of comparable
86
+ # objects, such as arrays.
87
+ #
88
+ # So, we may say that queries on open regions will work as expected if either
89
+ # - all coordinates of the points in the PST are finite Ruby Floats, or
90
+ # - all coordinates of the points are finite Numeric values and for no such pair of x-values s, t (or pair of y-values) is it such
91
+ # that +s.to_f.next_float > t+.
92
+ #
93
+ # Otherwise, use this functionality at your own risk, and not at all with coordinates that do not respond reasonably to +to_f+.
94
+ #
45
95
  # References:
46
96
  # * E.M. McCreight, _Priority search trees_, SIAM J. Comput., 14(2):257-276, 1985.
47
- # * M. De, A. Maheshwari, S. C. Nandy, M. Smid, _An In-Place Priority Search Tree_, 23rd Canadian Conference on Computational
48
- # Geometry, 2011
97
+ # * M. De, A. Maheshwari, S. C. Nandy, M. Smid, _An In-Place Priority Search Tree_, 23rd Canadian Conference on Computational Geometry, 2011
49
98
  class DataStructuresRMolinari::MaxPrioritySearchTree
99
+ # IMPLEMENTATION NOTES
100
+ #
101
+ # Open regions
102
+ #
103
+ # The search methods each have an argument +open:+ that changes the search region from closed (x >= x0) to open (x > x0). I had
104
+ # initially intended to implement this by varying the internals of the search code. But this turned out to be error-prone because
105
+ # the code is written for closed regions. When deciding which children to take in the next level of the tree, say, we assume the
106
+ # search space is closed, sometimes in a way that means we won't find the optimal point when the search region is open. Changing
107
+ # the logic turned out to be finicky and buggy.
108
+ #
109
+ # It is much easier and safer to replace a search request on, say (x0, y0, open) with (x0 + e, y0 + e, closed) where e[psilon] is
110
+ # small enough that we don't exclude any points other than those on the boundary of the closed region. Then we can just call the
111
+ # existing search code as-is. This is what the code did at first. We calculated e as the smallest difference between any two
112
+ # distinct x-values or distinct y-values.
113
+ #
114
+ # But this approach is not robust. Assume for the moment that all x- and y-values are floating-point. We can easily determine the
115
+ # value e. But the scaling of floating-point numbers makes this buggy. Consider the case in which we have consecutive x-values
116
+ #
117
+ # 0, 5e-324, 1, and 2.
118
+ #
119
+ # (5e-324 is the smallest positive Float value in Ruby). Our value for e is thus 5e-324 and, because of the way floating point
120
+ # values are represented, 1 + e = 1.0. Any query on open region with x0 = 1 will be run on a closed region with x0 = 1 + e = 1.0,
121
+ # and we may get the wrong result.
122
+ #
123
+ # I see the following possible approaches.
124
+ #
125
+ # 1. Rewrite the code to do open regions "properly"
126
+ # Pro:
127
+ # - we don't need to worry about numerical issues.
128
+ # Con:
129
+ # - too complicated and error-prone.
130
+ #
131
+ # 2. Instead of calculating e like this, replace each bounding value x with x.next_float or x.prev_float as required.
132
+ # Note that #next_float gives the next-largest value representable as a floating point value.
133
+ # Pro:
134
+ # - we don't need to worry about the scaling issues in type Float
135
+ # - simple and supported by the Ruby libraries (and by the C standard library if we decide to implement as a C extension)
136
+ # Con:
137
+ # - [minor] will fail with Float::INFINITY.
138
+ # - this is an unlikely edge case that could be handled directly or simply documented away.
139
+ # - won't work if the x0 value is a Numeric outside of Float range, roughly [-1.798e308, 1.798e308]
140
+ # - For example, (10**400).to_f.next_float == Infinity
141
+ # - We could warn about this case in documentation
142
+ # - For numeric values x in the Float range we would need to check that x.to_f.next_float > x, but I suspect that this is
143
+ # guaranteed.
144
+ # - won't work with comparable but non-numeric values, like arrays, or some sort of user-defined type
145
+ # - We would simply have to document that this case is not supported (or just throw an exception on +#to_f+)
146
+ #
147
+ # 3. Handle numeric values on a case-by-case values. So for numeric values x in the float range we use x.to_f.next_float while for
148
+ # other values - like BigDecimal - do something different that depends on the next value in the data set with an x-value
149
+ # greater than x.
150
+ # Pro:
151
+ # - more cases are handled
152
+ # Con:
153
+ # - complicated and perhaps non-performant in the general case
154
+ # - doesn't handle non-numeric cases (just like idea 2)
155
+ # - possibly error-prone in corner cases.
156
+ #
157
+ # Idea: maintain @x_chain and @y_chain hashes mapping each distinct x/y values to the next largest and smallest such
158
+ # value. Then just use that. Lookup is fast. Downside: O(n) extra memory.
159
+ #
160
+ # For now approach 2 looks best. It doesn't cover all cases, but covers cases most likely in practice - the Float range is large -
161
+ # and other cases can be documented away in a clean way.
162
+
50
163
  include Shared
51
164
  include BinaryTreeArithmetic
52
165
 
@@ -83,35 +196,54 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
83
196
 
84
197
  # Return the highest point in P to the "northeast" of (x0, y0).
85
198
  #
86
- # Let Q = [x0, infty) X [y0, infty) be the northeast quadrant defined by the point (x0, y0) and let P be the points in this data
87
- # structure. Define p* as
199
+ # Let Q = be the northeast quadrant defined by the point (x0, y0):
200
+ # - \[x0, infty) X [y0, infty) if +open+ is false and
201
+ # - (x0, infty) X (y0, infty) if +open+ is true.
202
+ #
203
+ # Let P be the points in this data structure.
204
+ #
205
+ # Define p* as
88
206
  #
89
207
  # - (infty, -infty) if Q \intersect P is empty and
90
208
  # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
91
209
  #
92
210
  # This method returns p* in O(log n) time and O(1) extra space.
93
- def largest_y_in_ne(x0, y0)
94
- largest_y_in_quadrant(x0, y0, :ne)
211
+ def largest_y_in_ne(x0, y0, open: false)
212
+ if open
213
+ largest_y_in_quadrant(slightly_bigger(x0), slightly_bigger(y0), :ne)
214
+ else
215
+ largest_y_in_quadrant(x0, y0, :ne)
216
+ end
95
217
  end
96
218
 
97
219
  # Return the highest point in P to the "northwest" of (x0, y0).
98
220
  #
99
- # Let Q = (-infty, x0] X [y0, infty) be the northwest quadrant defined by the point (x0, y0) and let P be the points in this data
100
- # structure. Define p* as
221
+ # Let Q = be the northwest quadrant defined by the point (x0, y0):
222
+ # - (infty, x0] X [y0, infty) if +open+ is false and
223
+ # - (infity, x0) X (y0, infty) if +open+ is true.
224
+ #
225
+ # Let P be the points in this data structure.
226
+ #
227
+ # Define p* as
101
228
  #
102
229
  # - (-infty, -infty) if Q \intersect P is empty and
103
230
  # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
104
231
  #
105
232
  # This method returns p* in O(log n) time and O(1) extra space.
106
- def largest_y_in_nw(x0, y0)
107
- largest_y_in_quadrant(x0, y0, :nw)
233
+ def largest_y_in_nw(x0, y0, open: false)
234
+ if open
235
+ largest_y_in_quadrant(slightly_smaller(x0), slightly_bigger(y0), :nw)
236
+ else
237
+ largest_y_in_quadrant(x0, y0, :nw)
238
+ end
108
239
  end
109
240
 
110
- # The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both largest_y_in_ne and
111
- # largest_y_in_nw
241
+ # The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both
242
+ # largest_y_in_ne and largest_y_in_nw
112
243
  #
113
- # Note that largest_y_in_ne(x0, y0) = largest_y_in_3_sided(x0, infinty, y0) so we don't really need this. But it's a bit faster than the
114
- # general case and is a simple algorithm that introduces a typical way that an algorithm interacts with the data structure.
244
+ # Note that largest_y_in_ne(x0, y0) = largest_y_in_3_sided(x0, infinty, y0) so we don't really need this. But it's a bit faster
245
+ # than the general case and is a simple algorithm that introduces a typical way that an algorithm interacts with the data
246
+ # structure.
115
247
  #
116
248
  # From the paper:
117
249
  #
@@ -137,6 +269,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
137
269
  sufficient_x = ->(x) { x <= x0 }
138
270
  end
139
271
 
272
+ return best if empty?
273
+
140
274
  # x == x0 or is not sufficient. This test sometimes excludes the other child of a node from consideration.
141
275
  exclusionary_x = ->(x) { x == x0 || !sufficient_x.call(x) }
142
276
 
@@ -200,28 +334,46 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
200
334
 
201
335
  # Return the leftmost (min-x) point in P to the northeast of (x0, y0).
202
336
  #
203
- # Let Q = [x0, infty) X [y0, infty) be the northeast quadrant defined by the point (x0, y0) and let P be the points in this data
204
- # structure. Define p* as
337
+ # Let Q = be the northeast quadrant defined by the point (x0, y0):
338
+ # - [x0, infty) X [y0, infty) if +open+ is false and
339
+ # - (x0, infty) X (y0, infty) if +open+ is true.
340
+ #
341
+ # Let P be the points in this data structure.
342
+ #
343
+ # Define p* as
205
344
  #
206
345
  # - (infty, infty) if Q \intersect P is empty and
207
346
  # - the leftmost (min-x) point in Q \intersect P otherwise.
208
347
  #
209
348
  # This method returns p* in O(log n) time and O(1) extra space.
210
- def smallest_x_in_ne(x0, y0)
211
- extremal_in_x_dimension(x0, y0, :ne)
349
+ def smallest_x_in_ne(x0, y0, open: false)
350
+ if open
351
+ extremal_in_x_dimension(slightly_bigger(x0), slightly_bigger(y0), :ne)
352
+ else
353
+ extremal_in_x_dimension(x0, y0, :ne)
354
+ end
212
355
  end
213
356
 
214
357
  # Return the rightmost (max-x) point in P to the northwest of (x0, y0).
215
358
  #
216
- # Let Q = (-infty, x0] X [y0, infty) be the northwest quadrant defined by the point (x0, y0) and let P be the points in this data
217
- # structure. Define p* as
359
+ # Let Q = be the northwest quadrant defined by the point (x0, y0):
360
+ # - (infty, x0] X [y0, infty) if +open+ is false and
361
+ # - (infty, x0) X (y0, infty) if +open+ is true.
362
+ #
363
+ # Let P be the points in this data structure.
364
+ #
365
+ # Define p* as
218
366
  #
219
367
  # - (-infty, infty) if Q \intersect P is empty and
220
368
  # - the leftmost (min-x) point in Q \intersect P otherwise.
221
369
  #
222
370
  # This method returns p* in O(log n) time and O(1) extra space.
223
- def largest_x_in_nw(x0, y0)
224
- extremal_in_x_dimension(x0, y0, :nw)
371
+ def largest_x_in_nw(x0, y0, open: false)
372
+ if open
373
+ extremal_in_x_dimension(slightly_smaller(x0), slightly_bigger(y0), :nw)
374
+ else
375
+ extremal_in_x_dimension(x0, y0, :nw)
376
+ end
225
377
  end
226
378
 
227
379
  # A genericized version of the paper's smallest_x_in_ne that can calculate either smallest_x_in_ne or largest_x_in_nw as specifies via a
@@ -369,14 +521,26 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
369
521
 
370
522
  # Return the highest point of P in the box bounded by x0, x1, and y0.
371
523
  #
372
- # Let Q = [x0, x1] X [y0, infty) be the "three-sided" box bounded by x0, x1, and y0, and let P be the set of points in the
373
- # MaxPST. (Note that Q is empty if x1 < x0.) Define p* as
524
+ # Let Q be the "three-sided" box bounded by x0, x1, and y0:
525
+ # - \[x0, x1] X [y0, infty) if +open+ is false and
526
+ # - (x0, x1) X (y0, infty) if +open+ is true.
527
+ #
528
+ # Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
529
+ #
530
+ # Let P be the set of points in the MaxPST.
531
+ #
532
+ # Define p* as
374
533
  #
375
534
  # - (infty, -infty) if Q \intersect P is empty and
376
535
  # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller x values.
377
536
  #
378
537
  # This method returns p* in O(log n) time and O(1) extra space.
379
- def largest_y_in_3_sided(x0, x1, y0)
538
+ def largest_y_in_3_sided(x0, x1, y0, open: false)
539
+ if open
540
+ x0 = slightly_bigger(x0)
541
+ x1 = slightly_smaller(x1)
542
+ y0 = slightly_bigger(y0)
543
+ end
380
544
  # From the paper:
381
545
  #
382
546
  # The three real numbers x0, x1, and y0 define the three-sided range Q = [x0,x1] X [y0,∞). If Q \intersect P̸ is not \empty,
@@ -572,14 +736,27 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
572
736
 
573
737
  # Enumerate the points of P in the box bounded by x0, x1, and y0.
574
738
  #
575
- # Let Q = [x0, x1] X [y0, infty) be the "three-sided" box bounded by x0, x1, and y0, and let P be the set of points in the
576
- # MaxPST. (Note that Q is empty if x1 < x0.) We find an enumerate all the points in Q \intersect P.
739
+ # Let Q be the "three-sided" box bounded by x0, x1, and y0:
740
+ # - \[x0, x1] X [y0, infty) if +open+ is false and
741
+ # - (x0, x1) X (y0, infty) if +open+ is true.
742
+ #
743
+ # Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
744
+ #
745
+ # Let P be the set of points in the MaxPST.
746
+ #
747
+ # We find and enumerate all the points in Q \intersect P.
577
748
  #
578
749
  # If the calling code provides a block then we +yield+ each point to it. Otherwise we return a set containing all the points in
579
750
  # the intersection.
580
751
  #
581
752
  # This method runs in O(m + log n) time and O(1) extra space, where m is the number of points found.
582
- def enumerate_3_sided(x0, x1, y0)
753
+ def enumerate_3_sided(x0, x1, y0, open: false)
754
+ if open
755
+ x0 = slightly_bigger(x0)
756
+ x1 = slightly_smaller(x1)
757
+ y0 = slightly_bigger(y0)
758
+ end
759
+
583
760
  # From the paper
584
761
  #
585
762
  # Given three real numbers x0, x1, and y0 define the three sided range Q = [x0, x1] X [y0, infty). Algorithm
@@ -1143,7 +1320,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
1143
1320
 
1144
1321
  # We follow the algorithm in the paper by De, Maheshwari et al, which takes O(n log^2 n) time. Their follow-up paper that
1145
1322
  # defines the Min-max PST, describes how to do the construction in O(n log n) time, but it is more complex and probably not
1146
- # worth the trouble of both a bespoke heapsort the special sorting algorithm of Katajainen and Pasanen.
1323
+ # worth the trouble of both a bespoke heapsort and the special sorting algorithm of Katajainen and Pasanen.
1147
1324
 
1148
1325
  # Since we are building an implicit binary tree, things are simpler if the array is 1-based. This requires a malloc (perhaps)
1149
1326
  # and memcpy (for sure), which isn't great, but it's in the C layer so cheap compared to the O(n log^2 n) work we need to do for
@@ -1216,6 +1393,22 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
1216
1393
  @data[l..r] = @data[l..r].sort_by(&:x)
1217
1394
  end
1218
1395
 
1396
+ # The smallest floating point number larger than x
1397
+ private def slightly_bigger(x)
1398
+ x_f = x.to_f
1399
+ raise "#{x} out of Float range" if x_f.infinite?
1400
+
1401
+ x_f.next_float
1402
+ end
1403
+
1404
+ # The largest floating point number smaller than x
1405
+ private def slightly_smaller(x)
1406
+ x_f = x.to_f
1407
+ raise "#{x} out of Float range" if x_f.infinite?
1408
+
1409
+ x_f.prev_float
1410
+ end
1411
+
1219
1412
  ########################################
1220
1413
  # Debugging support
1221
1414
  #
@@ -22,8 +22,7 @@ require_relative 'shared'
22
22
  # et al. But we don't do that, as we create a separate array of Points.
23
23
  # - Whereas the implementation of MaxPST means that client code gets the same (x, y) objects back in results as it passed into the
24
24
  # contructor, that's not the case here.
25
- # - we map each point in the input - which is an object responding to +#x+ and +#y+ - to an instance of +Point+, and will return
26
- # (different) instances of +Point+ in response to queries.
25
+ # - we map each point in the input - which is an object responding to +#x+ and +#y+ - to an instance of +Point+, and will return (different) instances of +Point+ in response to queries.
27
26
  # - client code is unlikely to care, but be aware of this, just in case.
28
27
  #
29
28
  # Given a set of n points, we can answer the following questions quickly:
@@ -37,6 +36,10 @@ require_relative 'shared'
37
36
  #
38
37
  # (Here, "leftmost/rightmost" means "minimal/maximal x", and "lowest" means "minimal y".)
39
38
  #
39
+ # Each of these methods has a named parameter +open:+ that makes the search region an open set. For example, if we call
40
+ # +smallest_x_in_ne+ with +open: true+ then we consider points satisifying x > x0 and y < y0. The default value for this parameter
41
+ # is always +false+. See the documentation of MaxPrioritySearchTree for limitiations of this support.
42
+ #
40
43
  # The first 5 operations take O(log n) time and O(1) extra space.
41
44
  #
42
45
  # The final operation (enumerate) takes O(m + log n) time and O(1) extra space, where m is the number of points that are enumerated.
@@ -77,28 +80,38 @@ class DataStructuresRMolinari::MinPrioritySearchTree
77
80
 
78
81
  # Return the "lowest" point in P to the "southeast" of (x0, y0).
79
82
  #
80
- # Let Q = [x0, infty) X (infty, y0] be the southeast quadrant defined by the point (x0, y0) and let P be the points in this data
81
- # structure. Define p* as
83
+ # Let Q = be the southeast quadrant defined by the point (x0, y0):
84
+ # - \[x0, infty) X (infty, y0] if +open+ is false and
85
+ # - (x0, infty) X (infty, y0) if +open+ is true.
86
+ #
87
+ # Let P be the points in this data structure.
88
+ #
89
+ # Define p* as
82
90
  #
83
91
  # - (infty, infty) if Q \intersect P is empty and
84
92
  # - the lowest (min-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
85
93
  #
86
94
  # This method returns p* in O(log n) time and O(1) extra space.
87
- def smallest_y_in_se(x0, y0)
88
- flip @max_pst.largest_y_in_ne(x0, -y0)
95
+ def smallest_y_in_se(x0, y0, open: false)
96
+ flip @max_pst.largest_y_in_ne(x0, -y0, open:)
89
97
  end
90
98
 
91
99
  # Return the "lowest" point in P to the "southwest" of (x0, y0).
92
100
  #
93
- # Let Q = (-infty, x0] X (-infty, y0] be the southwest quadrant defined by the point (x0, y0) and let P be the points in this data
94
- # structure. Define p* as
101
+ # Let Q = be the southwest quadrant defined by the point (x0, y0):
102
+ # - (infty, x0] X (infty, y0] if +open+ is false and
103
+ # - (infty, x0) X (infty, y0) if +open+ is true.
104
+ #
105
+ # Let P be the points in this data structure.
106
+ #
107
+ # Define p* as
95
108
  #
96
109
  # - (-infty, infty) if Q \intersect P is empty and
97
110
  # - the lowest (min-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
98
111
  #
99
112
  # This method returns p* in O(log n) time and O(1) extra space.
100
- def smallest_y_in_sw(x0, y0)
101
- flip @max_pst.largest_y_in_nw(x0, -y0)
113
+ def smallest_y_in_sw(x0, y0, open: false)
114
+ flip @max_pst.largest_y_in_nw(x0, -y0, open:)
102
115
  end
103
116
 
104
117
  ########################################
@@ -106,28 +119,38 @@ class DataStructuresRMolinari::MinPrioritySearchTree
106
119
 
107
120
  # Return the leftmost (min-x) point in P to the southeast of (x0, y0).
108
121
  #
109
- # Let Q = [x0, infty) X (infty, y0] be the southeast quadrant defined by the point (x0, y0) and let P be the points in this data
110
- # structure. Define p* as
122
+ # Let Q = be the southeast quadrant defined by the point (x0, y0):
123
+ # - \[x0, infty) X (infty, y0] if +open+ is false and
124
+ # - (x0, infty) X (infty, y0) if +open+ is true.
125
+ #
126
+ # Let P be the points in this data structure.
127
+ #
128
+ # Define p* as
111
129
  #
112
130
  # - (infty, -infty) if Q \intersect P is empty and
113
131
  # - the leftmost (min-x) point in Q \intersect P otherwise.
114
132
  #
115
133
  # This method returns p* in O(log n) time and O(1) extra space.
116
- def smallest_x_in_se(x0, y0)
117
- flip @max_pst.smallest_x_in_ne(x0, -y0)
134
+ def smallest_x_in_se(x0, y0, open: false)
135
+ flip @max_pst.smallest_x_in_ne(x0, -y0, open:)
118
136
  end
119
137
 
120
138
  # Return the rightmost (max-x) point in P to the southwest of (x0, y0).
121
139
  #
122
- # Let Q = (-infty, x0] X (infty, y0] be the southwest quadrant defined by the point (x0, y0) and let P be the points in this data
123
- # structure. Define p* as
140
+ # Let Q = be the southwest quadrant defined by the point (x0, y0):
141
+ # - (infty, x0] X (infty, y0] if +open+ is false and
142
+ # - (infty, x0) X (infty, y0) if +open+ is true.
143
+ #
144
+ # Let P be the points in this data structure.
145
+ #
146
+ # Define p* as
124
147
  #
125
148
  # - (-infty, -infty) if Q \intersect P is empty and
126
149
  # - the leftmost (min-x) point in Q \intersect P otherwise.
127
150
  #
128
151
  # This method returns p* in O(log n) time and O(1) extra space.
129
- def largest_x_in_sw(x0, y0)
130
- flip @max_pst.largest_x_in_nw(x0, -y0)
152
+ def largest_x_in_sw(x0, y0, open: false)
153
+ flip @max_pst.largest_x_in_nw(x0, -y0, open:)
131
154
  end
132
155
 
133
156
  ########################################
@@ -135,15 +158,22 @@ class DataStructuresRMolinari::MinPrioritySearchTree
135
158
 
136
159
  # Return the lowest point of P in the box bounded by x0, x1, and y0.
137
160
  #
138
- # Let Q = [x0, x1] X (infty, y0] be the "three-sided" box bounded by x0, x1, and y0, and let P be the set of points in the
139
- # MaxPST. (Note that Q is empty if x1 < x0.) Define p* as
161
+ # Let Q be the "three-sided" box bounded by x0, x1, and y0:
162
+ # - \[x0, x1] X (infty, y0] if +open+ is false and
163
+ # - (x0, x1) X (infty, y0) if +open+ is true.
164
+ #
165
+ # Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
166
+ #
167
+ # Let P be the set of points in the MaxPST.
168
+ #
169
+ # Define p* as
140
170
  #
141
171
  # - (infty, infty) if Q \intersect P is empty and
142
172
  # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller x values.
143
173
  #
144
174
  # This method returns p* in O(log n) time and O(1) extra space.
145
- def smallest_y_in_3_sided(x0, x1, y0)
146
- flip @max_pst.largest_y_in_3_sided(x0, x1, -y0)
175
+ def smallest_y_in_3_sided(x0, x1, y0, open: false)
176
+ flip @max_pst.largest_y_in_3_sided(x0, x1, -y0, open:)
147
177
  end
148
178
 
149
179
  ########################################
@@ -151,18 +181,25 @@ class DataStructuresRMolinari::MinPrioritySearchTree
151
181
 
152
182
  # Enumerate the points of P in the box bounded by x0, x1, and y0.
153
183
  #
154
- # Let Q = [x0, x1] X [y0, infty) be the "three-sided" box bounded by x0, x1, and y0, and let P be the set of points in the
155
- # MaxPST. (Note that Q is empty if x1 < x0.) We find an enumerate all the points in Q \intersect P.
184
+ # Let Q be the "three-sided" box bounded by x0, x1, and y0:
185
+ # - \[x0, x1] X (infty, y0] if +open+ is false and
186
+ # - (x0, x1) X (infty, y0) if +open+ is true.
187
+ #
188
+ # Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
189
+ #
190
+ # Let P be the set of points in the MaxPST.
191
+ #
192
+ # We find and enumerate all the points in Q \intersect P.
156
193
  #
157
194
  # If the calling code provides a block then we +yield+ each point to it. Otherwise we return a set containing all the points in
158
195
  # the intersection.
159
196
  #
160
197
  # This method runs in O(m + log n) time and O(1) extra space, where m is the number of points found.
161
- def enumerate_3_sided(x0, x1, y0)
198
+ def enumerate_3_sided(x0, x1, y0, open: false)
162
199
  if block_given?
163
- @max_pst.enumerate_3_sided(x0, x1, -y0) { |point| yield(flip point) }
200
+ @max_pst.enumerate_3_sided(x0, x1, -y0, open:) { |point| yield(flip point) }
164
201
  else
165
- Set.new( @max_pst.enumerate_3_sided(x0, x1, -y0).map { |pt| flip pt })
202
+ Set.new( @max_pst.enumerate_3_sided(x0, x1, -y0, open:).map { |pt| flip pt })
166
203
  end
167
204
  end
168
205
 
@@ -36,10 +36,15 @@ module DataStructuresRMolinari
36
36
  # - +:c+ or +:ruby+
37
37
  # - the C version will run faster but for now may be buggier and harder to debug
38
38
  module_function def construct(data, operation, lang)
39
- operation.must_be_in [:max, :index_of_max]
39
+ operation.must_be_in [:max, :index_of_max, :sum]
40
40
  lang.must_be_in [:ruby, :c]
41
41
 
42
- klass = operation == :max ? MaxValSegmentTree : IndexOfMaxValSegmentTree
42
+ klass = case operation
43
+ when :max then MaxValSegmentTree
44
+ when :index_of_max then IndexOfMaxValSegmentTree
45
+ when :sum then SumSegmentTree
46
+ else raise ArgumentError, "Unknown operation #{operation}"
47
+ end
43
48
  template = lang == :ruby ? SegmentTreeTemplate : CSegmentTreeTemplate
44
49
 
45
50
  klass.new(template, data)
@@ -107,6 +112,33 @@ module DataStructuresRMolinari
107
112
  end
108
113
  end
109
114
 
115
+ class SumSegmentTree
116
+ extend Forwardable
117
+
118
+ # Tell the tree that the value at idx has changed
119
+ def_delegator :@structure, :update_at
120
+
121
+ # @param (see MaxValSegmentTree#initialize)
122
+ def initialize(template_klass, data)
123
+ data.must_be_a Enumerable
124
+
125
+ @structure = template_klass.new(
126
+ combine: ->(a, b) { a + b },
127
+ single_cell_array_val: ->(i) { data[i] },
128
+ size: data.size,
129
+ identity: 0
130
+ )
131
+ end
132
+
133
+ # The sum of the values in A(i..j)
134
+ #
135
+ # The arguments must be integers in 0...(A.size)
136
+ # @return the sum of the values in A(i..j) or 0 if i > j.
137
+ def sum_on(i, j)
138
+ @structure.query_on(i, j)
139
+ end
140
+ end
141
+
110
142
  # The underlying functionality of the Segment Tree data type, implemented in C as a Ruby extension.
111
143
  #
112
144
  # See SegmentTreeTemplate for more information.
@@ -71,6 +71,7 @@ class DataStructuresRMolinari::SegmentTree::SegmentTreeTemplate
71
71
  # Note that we don't need the updated value itself. We get that by calling the lambda +single_cell_array_val+ supplied at
72
72
  # construction.
73
73
  def update_at(idx)
74
+ raise DataError, "Bad update index #{idx} (size = #{@size})" unless (0...@size).cover?(idx)
74
75
 
75
76
  update_val_at(idx, root, 0, @size - 1)
76
77
  end
@@ -69,12 +69,9 @@ module Shared
69
69
  # duplication. When nil we don't call anything and just use the elements themselves.
70
70
  def contains_duplicates?(enum, by: nil)
71
71
  seen = Set.new
72
- enum.each do |v|
72
+ enum.any? do |v|
73
73
  v = v.send(by) if by
74
- return true if seen.include? v
75
-
76
- seen << v
74
+ !seen.add?(v)
77
75
  end
78
- false
79
76
  end
80
77
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_structures_rmolinari
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.5.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rory Molinari
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-15 00:00:00.000000000 Z
11
+ date: 2023-12-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: must_be