data_structures_rmolinari 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5d10fb46bf10f119b95239cf8e3ed06585804d37ccb9b7b1da0c7139dfcb31b9
4
- data.tar.gz: dc393fb3e3f597df278832b3c4faae08866ef93ad747ee0a30fca19345fa6c8a
3
+ metadata.gz: b194edc412e6bce073e73f874c4a392dd821f333e4ba9b07908af397819cdffa
4
+ data.tar.gz: 5dabb2d689cb0b0ebacf2b804e6c27ece1e3e7f7db604f5722d1657915bf0bda
5
5
  SHA512:
6
- metadata.gz: 6ec16b3eb2a1f4deccf8a8c45cb20c9558a0267049236389de53444124feeefe282751f964f2cf1875e1fccd5fccc6ad5fea4edd0098fed7b358bc6051666b4e
7
- data.tar.gz: 0435cf6031c7e40bf9706a7c8d7b493e7e31f7667131be682de59ad5dcd26a5151add0d03cee83b23da6fa6f0c132c964ef551341dc94ff5d17cffddc2b48f2a
6
+ metadata.gz: cf36912f4242d7a91e8227464993ac634441bdcff30d7b6ec5a10149cfdc0a14a132fa5c319f2edafdc9d9ab6b11ee0af2354fdd45f2638d0099ccdb84eff436
7
+ data.tar.gz: a1ce15decbc869b9f26902d391f27967778032f54d2543b914d93bbe15ab1ceb7aae5c576cc93f99c80aa8f62d8f23c747591a6beb520d7c756facb125a5a72d
data/CHANGELOG.md CHANGED
@@ -2,7 +2,23 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
- ## [0.5.0] 2023-02.03
5
+ ## [0.5.4] 2023-12-12
6
+
7
+ (Unfortunately this note was added long after the changes were made and my memory of the changes is poor.)
8
+
9
+ - SegmentTree
10
+ - Sum version is provided
11
+ - PrioritySearchTree
12
+ - Open regions
13
+
14
+ - Some bug fixes
15
+ - Some refactoring of test cases
16
+
17
+ ## [0.5.1] - [0.5.3]
18
+
19
+ - Releases to fix some bad gemspec data.
20
+
21
+ ## [0.5.0] 2023-02-03
6
22
 
7
23
  - SegmentTree
8
24
  - Reorganize the code into a SegmentTree submodule.
data/README.md CHANGED
@@ -106,6 +106,9 @@ There is no `smallest_x_in_3_sided(x0, x1, y0)`. Just use `smallest_x_in_ne(x0,
106
106
  (These queries appear rather abstract at first but there are interesting applications. See, for example, section 4 of
107
107
  [[McC85]](#references), keeping in mind that the data structure in that paper is actually a _MinPST_.)
108
108
 
109
+ Each method also has a named parameter `open:` that makes the search region an open set. For example, if we call `smallest_x_in_ne`
110
+ with `open: true` then we consider points satisifying x > x0 and y > y0. The default value for this parameter is always `false`.
111
+
109
112
  The single-point queries run in O(log n) time, where n is the size of P, while `enumerate_3_sided` runs in O(m + log n), where m is
110
113
  the number of points actually enumerated.
111
114
 
@@ -0,0 +1,19 @@
1
+ def generate_makefile(name)
2
+ extension_name = "c_#{name}"
3
+ source_name = "#{name}.c"
4
+
5
+ abort 'missing malloc()' unless have_func "malloc"
6
+ abort 'missing realloc()' unless have_func "realloc"
7
+
8
+ if try_cflags('-O3')
9
+ append_cflags('-O3')
10
+ end
11
+
12
+ dir_config(extension_name)
13
+
14
+ $srcs = [source_name, "../shared.c"]
15
+ $INCFLAGS << " -I$(srcdir)/.."
16
+ $VPATH << "$(srcdir)/.."
17
+
18
+ create_makefile("data_structures_rmolinari/#{extension_name}")
19
+ end
@@ -32,16 +32,10 @@ module DataStructuresRMolinari::Algorithms
32
32
  x_max = sorted_points.last.x
33
33
  y_min, y_max = sorted_points.map(&:y).minmax
34
34
 
35
- # Half of the smallest non-zero gap between x values. This is needed below
36
- epsilon = INFINITY
37
-
38
35
  # Enumerate type 1
39
36
  sorted_points.each_cons(2) do |pt1, pt2|
40
37
  next if pt1.x == pt2.x
41
38
 
42
- d = (pt2.x.to_f - pt1.x) / 2
43
- epsilon = d if d < epsilon
44
-
45
39
  yield [pt1.x, pt2.x, y_min, y_max]
46
40
  end
47
41
 
@@ -54,9 +48,9 @@ module DataStructuresRMolinari::Algorithms
54
48
  next if pt.y == y_max # 0 area
55
49
  next if pt.y == y_min # type 1
56
50
 
57
- # Epsilon means we don't just get pt back again. The De et al. paper is rather vague.
58
- left_bound = max_pst.largest_x_in_nw( pt.x - epsilon, pt.y)
59
- right_bound = max_pst.smallest_x_in_ne(pt.x + epsilon, pt.y)
51
+ # Open region means we don't just get pt back again. The De et al. paper is rather vague.
52
+ left_bound = max_pst.largest_x_in_nw(pt.x, pt.y, open: true)
53
+ right_bound = max_pst.smallest_x_in_ne(pt.x, pt.y, open: true)
60
54
 
61
55
  left = left_bound.x.infinite? ? x_min : left_bound.x
62
56
  right = right_bound.x.infinite? ? x_max : right_bound.x
@@ -74,7 +68,7 @@ module DataStructuresRMolinari::Algorithms
74
68
  #
75
69
  # largest_y_in_3_sided(l, r, y_min)
76
70
  #
77
- # That call considers the points in the closed region l <= x <= r and y >= y_min, so we use l + epsilon and r - epsilon.
71
+ # That call considers the points in the closed region l <= x <= r and y >= y_min, so we use an open search region instead.
78
72
  until max_pst.empty?
79
73
  top_pt = max_pst.delete_top!
80
74
  top = top_pt.y
@@ -85,7 +79,7 @@ module DataStructuresRMolinari::Algorithms
85
79
  r = x_max
86
80
 
87
81
  loop do
88
- next_pt = max_pst.largest_y_in_3_sided(l + epsilon, r - epsilon, y_min)
82
+ next_pt = max_pst.largest_y_in_3_sided(l, r, y_min, open: true)
89
83
 
90
84
  bottom = next_pt.y.infinite? ? y_min : next_pt.y
91
85
  yield [l, r, bottom, top]
@@ -78,8 +78,9 @@ class DataStructuresRMolinari::Heap
78
78
  # Insert a new element into the heap with the given priority.
79
79
  # @param value the item to be inserted.
80
80
  # - If the heap is addressible (the default) it is an error to insert an item that is already present in the heap.
81
- # @param priority the priority to use for new item. The values used as priorities must be totally ordered via +<=>+.
82
- def insert(value, priority)
81
+ # @param (optional) priority the priority to use for new item. The values used as priorities must be totally ordered via +<=>+.
82
+ # If omitted we use the inserted value as its own priority.
83
+ def insert(value, priority = value)
83
84
  raise DataError, "Heap already contains #{value}" if @addressable && contains?(value)
84
85
 
85
86
  @size += 1
@@ -103,7 +104,7 @@ class DataStructuresRMolinari::Heap
103
104
  # Return the top of the heap and remove it, updating the structure to maintain the necessary properties.
104
105
  # @return (see #top)
105
106
  def pop
106
- result = top
107
+ result = top # raises if empty
107
108
  assign(@data[@size], root)
108
109
 
109
110
  @data[@size] = nil
@@ -30,6 +30,10 @@ require_relative 'shared'
30
30
  #
31
31
  # The final operation (enumerate) takes O(m + log n) time, where m is the number of points that are enumerated.
32
32
  #
33
+ # Each of these methods has a named parameter +open:+ that makes the search region an open set. For example, if we call
34
+ # +smallest_x_in_ne+ with +open: true+ then we consider points satisifying x > x0 and y > y0. The default value for this parameter
35
+ # is always +false+. See below for limitations in this functionality.
36
+ #
33
37
  # If the MaxPST is constructed to be "dynamic" we also have an operation that deletes the top element.
34
38
  #
35
39
  # - +delete_top!+: remove the top (max-y) element of the tree and return it.
@@ -39,14 +43,123 @@ require_relative 'shared'
39
43
  # In the current implementation no two points can share an x-value. This restriction can be relaxed with some more complicated code,
40
44
  # but it hasn't been written yet. See issue #9.
41
45
  #
42
- # There is a related data structure called the Min-max priority search tree so we have called this a "Max priority search tree", or
46
+ # There is a related data structure called a Min-max priority search tree so we have called this a "Max priority search tree", or
43
47
  # MaxPST.
44
48
  #
49
+ # ## Open regions: limitations
50
+ #
51
+ # Calls involving open regions - using the +open:+ argument - are implemented internally using closed regions in which the
52
+ # boundaries have been "nudged" by a tiny amount so as to exclude points on the boundary. Since there are only finitely many points
53
+ # in the PST there are no limit points, and the open region given by x > x0 and y > y0 contains the same PST members as a closed
54
+ # region x >= x0 + e and y >= y0 + e for small enough values of e.
55
+ #
56
+ # But it is hard to determine e robustly. Indeed, assume for the moment that all x- and y-values are floating-point, i.e., IEEE754
57
+ # double-precision. We can easily determine a value for e: the smallest difference between any two distinct x-values or any two
58
+ # distinct y-values. But the scaling of floating-point numbers makes this buggy. Consider the case in which we have consecutive
59
+ # x-values
60
+ #
61
+ # 0, 5e-324, 1, and 2.
62
+ #
63
+ # (5e-324 is the smallest positive Float value in Ruby). Our value for e is thus 5e-324 and, because of the way floating point
64
+ # values are represented, 1 + e = 1.0. Any query on open region with x0 = 1 will be run on a closed region with x0 = 1 + e = 1.0,
65
+ # and we may get the wrong result.
66
+ #
67
+ # The solution here is to replace (x0, y0) with (x0.next_float, y0.next_float) rather than (x0 + e, y0 + e). +Float::next_float+ is
68
+ # an implementation of the IEEE754 operation +nextafter+ which gives, for a floating point value z, the smallest floating point
69
+ # value larger than z. If our PST contains only points with finite, floating point coordinates, then this approach implements open
70
+ # search regions correctly. This is what the implementation currently does.
71
+ #
72
+ # However, when coordinates are not all Floats there are cases when this approach will fail. Consider the case in which we have the
73
+ # following consecutive x-values:
74
+ #
75
+ # 0, 1e-324, 2e-324, 1.
76
+ #
77
+ # (Here 1e-324 and 2e-324 are the Ruby values +Rational(1, 10**324)+ and +Rational(2, 10**324)+.) Then, given an argument x0 =
78
+ # 1e-324, +x0.to_f == 0.0+ and so +x0.to_f.next_float == 5e-324+ and the region we use internally for our query incorrectly excludes
79
+ # the point with x value 2e-324. This is a bug in the code.
80
+ #
81
+ # There are also issues with numeric values (Integer or Rational) that are larger than the maximum floating point value,
82
+ # approximately 1.8e308. For such values z, +z.to_f == Float::INFINITY+, and we will incorrectly exclude any larger-but-finite
83
+ # coordinates from the search region.
84
+ #
85
+ # Yet more issues arise when the coordinates of points in the PST aren't numeric at all, but are some other sort of comparable
86
+ # objects, such as arrays.
87
+ #
88
+ # So, we may say that queries on open regions will work as expected if either
89
+ # - all coordinates of the points in the PST are finite Ruby Floats, or
90
+ # - all coordinates of the points are finite Numeric values and for no such pair of x-values s, t (or pair of y-values) is it such
91
+ # that +s.to_f.next_float > t+.
92
+ #
93
+ # Otherwise, use this functionality at your own risk, and not at all with coordinates that do not respond reasonably to +to_f+.
94
+ #
45
95
  # References:
46
96
  # * E.M. McCreight, _Priority search trees_, SIAM J. Comput., 14(2):257-276, 1985.
47
- # * M. De, A. Maheshwari, S. C. Nandy, M. Smid, _An In-Place Priority Search Tree_, 23rd Canadian Conference on Computational
48
- # Geometry, 2011
97
+ # * M. De, A. Maheshwari, S. C. Nandy, M. Smid, _An In-Place Priority Search Tree_, 23rd Canadian Conference on Computational Geometry, 2011
49
98
  class DataStructuresRMolinari::MaxPrioritySearchTree
99
+ # IMPLEMENTATION NOTES
100
+ #
101
+ # Open regions
102
+ #
103
+ # The search methods each have an argument +open:+ that changes the search region from closed (x >= x0) to open (x > x0). I had
104
+ # initially intended to implement this by varying the internals of the search code. But this turned out to be error-prone because
105
+ # the code is written for closed regions. When deciding which children to take in the next level of the tree, say, we assume the
106
+ # search space is closed, sometimes in a way that means we won't find the optimal point when the search region is open. Changing
107
+ # the logic turned out to be finicky and buggy.
108
+ #
109
+ # It is much easier and safer to replace a search request on, say (x0, y0, open) with (x0 + e, y0 + e, closed) where e[psilon] is
110
+ # small enough that we don't exclude any points other than those on the boundary of the closed region. Then we can just call the
111
+ # existing search code as-is. This is what the code did at first. We calculated e as the smallest difference between any two
112
+ # distinct x-values or distinct y-values.
113
+ #
114
+ # But this approach is not robust. Assume for the moment that all x- and y-values are floating-point. We can easily determine the
115
+ # value e. But the scaling of floating-point numbers makes this buggy. Consider the case in which we have consecutive x-values
116
+ #
117
+ # 0, 5e-324, 1, and 2.
118
+ #
119
+ # (5e-324 is the smallest positive Float value in Ruby). Our value for e is thus 5e-324 and, because of the way floating point
120
+ # values are represented, 1 + e = 1.0. Any query on open region with x0 = 1 will be run on a closed region with x0 = 1 + e = 1.0,
121
+ # and we may get the wrong result.
122
+ #
123
+ # I see the following possible approaches.
124
+ #
125
+ # 1. Rewrite the code to do open regions "properly"
126
+ # Pro:
127
+ # - we don't need to worry about numerical issues.
128
+ # Con:
129
+ # - too complicated and error-prone.
130
+ #
131
+ # 2. Instead of calculating e like this, replace each bounding value x with x.next_float or x.prev_float as required.
132
+ # Note that #next_float gives the next-largest value representable as a floating point value.
133
+ # Pro:
134
+ # - we don't need to worry about the scaling issues in type Float
135
+ # - simple and supported by the Ruby libraries (and by the C standard library if we decide to implement as a C extension)
136
+ # Con:
137
+ # - [minor] will fail with Float::INFINITY.
138
+ # - this is an unlikely edge case that could be handled directly or simply documented away.
139
+ # - won't work if the x0 value is a Numeric outside of Float range, roughly [-1.798e308, 1.798e308]
140
+ # - For example, (10**400).to_f.next_float == Infinity
141
+ # - We could warn about this case in documentation
142
+ # - For numeric values x in the Float range we would need to check that x.to_f.next_float > x, but I suspect that this is
143
+ # guaranteed.
144
+ # - won't work with comparable but non-numeric values, like arrays, or some sort of user-defined type
145
+ # - We would simply have to document that this case is not supported (or just throw an exception on +#to_f+)
146
+ #
147
+ # 3. Handle numeric values on a case-by-case values. So for numeric values x in the float range we use x.to_f.next_float while for
148
+ # other values - like BigDecimal - do something different that depends on the next value in the data set with an x-value
149
+ # greater than x.
150
+ # Pro:
151
+ # - more cases are handled
152
+ # Con:
153
+ # - complicated and perhaps non-performant in the general case
154
+ # - doesn't handle non-numeric cases (just like idea 2)
155
+ # - possibly error-prone in corner cases.
156
+ #
157
+ # Idea: maintain @x_chain and @y_chain hashes mapping each distinct x/y values to the next largest and smallest such
158
+ # value. Then just use that. Lookup is fast. Downside: O(n) extra memory.
159
+ #
160
+ # For now approach 2 looks best. It doesn't cover all cases, but covers cases most likely in practice - the Float range is large -
161
+ # and other cases can be documented away in a clean way.
162
+
50
163
  include Shared
51
164
  include BinaryTreeArithmetic
52
165
 
@@ -83,35 +196,54 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
83
196
 
84
197
  # Return the highest point in P to the "northeast" of (x0, y0).
85
198
  #
86
- # Let Q = [x0, infty) X [y0, infty) be the northeast quadrant defined by the point (x0, y0) and let P be the points in this data
87
- # structure. Define p* as
199
+ # Let Q = be the northeast quadrant defined by the point (x0, y0):
200
+ # - \[x0, infty) X [y0, infty) if +open+ is false and
201
+ # - (x0, infty) X (y0, infty) if +open+ is true.
202
+ #
203
+ # Let P be the points in this data structure.
204
+ #
205
+ # Define p* as
88
206
  #
89
207
  # - (infty, -infty) if Q \intersect P is empty and
90
208
  # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
91
209
  #
92
210
  # This method returns p* in O(log n) time and O(1) extra space.
93
- def largest_y_in_ne(x0, y0)
94
- largest_y_in_quadrant(x0, y0, :ne)
211
+ def largest_y_in_ne(x0, y0, open: false)
212
+ if open
213
+ largest_y_in_quadrant(slightly_bigger(x0), slightly_bigger(y0), :ne)
214
+ else
215
+ largest_y_in_quadrant(x0, y0, :ne)
216
+ end
95
217
  end
96
218
 
97
219
  # Return the highest point in P to the "northwest" of (x0, y0).
98
220
  #
99
- # Let Q = (-infty, x0] X [y0, infty) be the northwest quadrant defined by the point (x0, y0) and let P be the points in this data
100
- # structure. Define p* as
221
+ # Let Q = be the northwest quadrant defined by the point (x0, y0):
222
+ # - (infty, x0] X [y0, infty) if +open+ is false and
223
+ # - (infity, x0) X (y0, infty) if +open+ is true.
224
+ #
225
+ # Let P be the points in this data structure.
226
+ #
227
+ # Define p* as
101
228
  #
102
229
  # - (-infty, -infty) if Q \intersect P is empty and
103
230
  # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
104
231
  #
105
232
  # This method returns p* in O(log n) time and O(1) extra space.
106
- def largest_y_in_nw(x0, y0)
107
- largest_y_in_quadrant(x0, y0, :nw)
233
+ def largest_y_in_nw(x0, y0, open: false)
234
+ if open
235
+ largest_y_in_quadrant(slightly_smaller(x0), slightly_bigger(y0), :nw)
236
+ else
237
+ largest_y_in_quadrant(x0, y0, :nw)
238
+ end
108
239
  end
109
240
 
110
- # The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both largest_y_in_ne and
111
- # largest_y_in_nw
241
+ # The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both
242
+ # largest_y_in_ne and largest_y_in_nw
112
243
  #
113
- # Note that largest_y_in_ne(x0, y0) = largest_y_in_3_sided(x0, infinty, y0) so we don't really need this. But it's a bit faster than the
114
- # general case and is a simple algorithm that introduces a typical way that an algorithm interacts with the data structure.
244
+ # Note that largest_y_in_ne(x0, y0) = largest_y_in_3_sided(x0, infinty, y0) so we don't really need this. But it's a bit faster
245
+ # than the general case and is a simple algorithm that introduces a typical way that an algorithm interacts with the data
246
+ # structure.
115
247
  #
116
248
  # From the paper:
117
249
  #
@@ -137,6 +269,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
137
269
  sufficient_x = ->(x) { x <= x0 }
138
270
  end
139
271
 
272
+ return best if empty?
273
+
140
274
  # x == x0 or is not sufficient. This test sometimes excludes the other child of a node from consideration.
141
275
  exclusionary_x = ->(x) { x == x0 || !sufficient_x.call(x) }
142
276
 
@@ -200,28 +334,46 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
200
334
 
201
335
  # Return the leftmost (min-x) point in P to the northeast of (x0, y0).
202
336
  #
203
- # Let Q = [x0, infty) X [y0, infty) be the northeast quadrant defined by the point (x0, y0) and let P be the points in this data
204
- # structure. Define p* as
337
+ # Let Q = be the northeast quadrant defined by the point (x0, y0):
338
+ # - [x0, infty) X [y0, infty) if +open+ is false and
339
+ # - (x0, infty) X (y0, infty) if +open+ is true.
340
+ #
341
+ # Let P be the points in this data structure.
342
+ #
343
+ # Define p* as
205
344
  #
206
345
  # - (infty, infty) if Q \intersect P is empty and
207
346
  # - the leftmost (min-x) point in Q \intersect P otherwise.
208
347
  #
209
348
  # This method returns p* in O(log n) time and O(1) extra space.
210
- def smallest_x_in_ne(x0, y0)
211
- extremal_in_x_dimension(x0, y0, :ne)
349
+ def smallest_x_in_ne(x0, y0, open: false)
350
+ if open
351
+ extremal_in_x_dimension(slightly_bigger(x0), slightly_bigger(y0), :ne)
352
+ else
353
+ extremal_in_x_dimension(x0, y0, :ne)
354
+ end
212
355
  end
213
356
 
214
357
  # Return the rightmost (max-x) point in P to the northwest of (x0, y0).
215
358
  #
216
- # Let Q = (-infty, x0] X [y0, infty) be the northwest quadrant defined by the point (x0, y0) and let P be the points in this data
217
- # structure. Define p* as
359
+ # Let Q = be the northwest quadrant defined by the point (x0, y0):
360
+ # - (infty, x0] X [y0, infty) if +open+ is false and
361
+ # - (infty, x0) X (y0, infty) if +open+ is true.
362
+ #
363
+ # Let P be the points in this data structure.
364
+ #
365
+ # Define p* as
218
366
  #
219
367
  # - (-infty, infty) if Q \intersect P is empty and
220
368
  # - the leftmost (min-x) point in Q \intersect P otherwise.
221
369
  #
222
370
  # This method returns p* in O(log n) time and O(1) extra space.
223
- def largest_x_in_nw(x0, y0)
224
- extremal_in_x_dimension(x0, y0, :nw)
371
+ def largest_x_in_nw(x0, y0, open: false)
372
+ if open
373
+ extremal_in_x_dimension(slightly_smaller(x0), slightly_bigger(y0), :nw)
374
+ else
375
+ extremal_in_x_dimension(x0, y0, :nw)
376
+ end
225
377
  end
226
378
 
227
379
  # A genericized version of the paper's smallest_x_in_ne that can calculate either smallest_x_in_ne or largest_x_in_nw as specifies via a
@@ -369,14 +521,26 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
369
521
 
370
522
  # Return the highest point of P in the box bounded by x0, x1, and y0.
371
523
  #
372
- # Let Q = [x0, x1] X [y0, infty) be the "three-sided" box bounded by x0, x1, and y0, and let P be the set of points in the
373
- # MaxPST. (Note that Q is empty if x1 < x0.) Define p* as
524
+ # Let Q be the "three-sided" box bounded by x0, x1, and y0:
525
+ # - \[x0, x1] X [y0, infty) if +open+ is false and
526
+ # - (x0, x1) X (y0, infty) if +open+ is true.
527
+ #
528
+ # Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
529
+ #
530
+ # Let P be the set of points in the MaxPST.
531
+ #
532
+ # Define p* as
374
533
  #
375
534
  # - (infty, -infty) if Q \intersect P is empty and
376
535
  # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller x values.
377
536
  #
378
537
  # This method returns p* in O(log n) time and O(1) extra space.
379
- def largest_y_in_3_sided(x0, x1, y0)
538
+ def largest_y_in_3_sided(x0, x1, y0, open: false)
539
+ if open
540
+ x0 = slightly_bigger(x0)
541
+ x1 = slightly_smaller(x1)
542
+ y0 = slightly_bigger(y0)
543
+ end
380
544
  # From the paper:
381
545
  #
382
546
  # The three real numbers x0, x1, and y0 define the three-sided range Q = [x0,x1] X [y0,∞). If Q \intersect P̸ is not \empty,
@@ -572,14 +736,27 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
572
736
 
573
737
  # Enumerate the points of P in the box bounded by x0, x1, and y0.
574
738
  #
575
- # Let Q = [x0, x1] X [y0, infty) be the "three-sided" box bounded by x0, x1, and y0, and let P be the set of points in the
576
- # MaxPST. (Note that Q is empty if x1 < x0.) We find an enumerate all the points in Q \intersect P.
739
+ # Let Q be the "three-sided" box bounded by x0, x1, and y0:
740
+ # - \[x0, x1] X [y0, infty) if +open+ is false and
741
+ # - (x0, x1) X (y0, infty) if +open+ is true.
742
+ #
743
+ # Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
744
+ #
745
+ # Let P be the set of points in the MaxPST.
746
+ #
747
+ # We find and enumerate all the points in Q \intersect P.
577
748
  #
578
749
  # If the calling code provides a block then we +yield+ each point to it. Otherwise we return a set containing all the points in
579
750
  # the intersection.
580
751
  #
581
752
  # This method runs in O(m + log n) time and O(1) extra space, where m is the number of points found.
582
- def enumerate_3_sided(x0, x1, y0)
753
+ def enumerate_3_sided(x0, x1, y0, open: false)
754
+ if open
755
+ x0 = slightly_bigger(x0)
756
+ x1 = slightly_smaller(x1)
757
+ y0 = slightly_bigger(y0)
758
+ end
759
+
583
760
  # From the paper
584
761
  #
585
762
  # Given three real numbers x0, x1, and y0 define the three sided range Q = [x0, x1] X [y0, infty). Algorithm
@@ -1143,7 +1320,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
1143
1320
 
1144
1321
  # We follow the algorithm in the paper by De, Maheshwari et al, which takes O(n log^2 n) time. Their follow-up paper that
1145
1322
  # defines the Min-max PST, describes how to do the construction in O(n log n) time, but it is more complex and probably not
1146
- # worth the trouble of both a bespoke heapsort the special sorting algorithm of Katajainen and Pasanen.
1323
+ # worth the trouble of both a bespoke heapsort and the special sorting algorithm of Katajainen and Pasanen.
1147
1324
 
1148
1325
  # Since we are building an implicit binary tree, things are simpler if the array is 1-based. This requires a malloc (perhaps)
1149
1326
  # and memcpy (for sure), which isn't great, but it's in the C layer so cheap compared to the O(n log^2 n) work we need to do for
@@ -1216,6 +1393,22 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
1216
1393
  @data[l..r] = @data[l..r].sort_by(&:x)
1217
1394
  end
1218
1395
 
1396
+ # The smallest floating point number larger than x
1397
+ private def slightly_bigger(x)
1398
+ x_f = x.to_f
1399
+ raise "#{x} out of Float range" if x_f.infinite?
1400
+
1401
+ x_f.next_float
1402
+ end
1403
+
1404
+ # The largest floating point number smaller than x
1405
+ private def slightly_smaller(x)
1406
+ x_f = x.to_f
1407
+ raise "#{x} out of Float range" if x_f.infinite?
1408
+
1409
+ x_f.prev_float
1410
+ end
1411
+
1219
1412
  ########################################
1220
1413
  # Debugging support
1221
1414
  #
@@ -22,8 +22,7 @@ require_relative 'shared'
22
22
  # et al. But we don't do that, as we create a separate array of Points.
23
23
  # - Whereas the implementation of MaxPST means that client code gets the same (x, y) objects back in results as it passed into the
24
24
  # contructor, that's not the case here.
25
- # - we map each point in the input - which is an object responding to +#x+ and +#y+ - to an instance of +Point+, and will return
26
- # (different) instances of +Point+ in response to queries.
25
+ # - we map each point in the input - which is an object responding to +#x+ and +#y+ - to an instance of +Point+, and will return (different) instances of +Point+ in response to queries.
27
26
  # - client code is unlikely to care, but be aware of this, just in case.
28
27
  #
29
28
  # Given a set of n points, we can answer the following questions quickly:
@@ -37,6 +36,10 @@ require_relative 'shared'
37
36
  #
38
37
  # (Here, "leftmost/rightmost" means "minimal/maximal x", and "lowest" means "minimal y".)
39
38
  #
39
+ # Each of these methods has a named parameter +open:+ that makes the search region an open set. For example, if we call
40
+ # +smallest_x_in_ne+ with +open: true+ then we consider points satisifying x > x0 and y < y0. The default value for this parameter
41
+ # is always +false+. See the documentation of MaxPrioritySearchTree for limitiations of this support.
42
+ #
40
43
  # The first 5 operations take O(log n) time and O(1) extra space.
41
44
  #
42
45
  # The final operation (enumerate) takes O(m + log n) time and O(1) extra space, where m is the number of points that are enumerated.
@@ -77,28 +80,38 @@ class DataStructuresRMolinari::MinPrioritySearchTree
77
80
 
78
81
  # Return the "lowest" point in P to the "southeast" of (x0, y0).
79
82
  #
80
- # Let Q = [x0, infty) X (infty, y0] be the southeast quadrant defined by the point (x0, y0) and let P be the points in this data
81
- # structure. Define p* as
83
+ # Let Q = be the southeast quadrant defined by the point (x0, y0):
84
+ # - \[x0, infty) X (infty, y0] if +open+ is false and
85
+ # - (x0, infty) X (infty, y0) if +open+ is true.
86
+ #
87
+ # Let P be the points in this data structure.
88
+ #
89
+ # Define p* as
82
90
  #
83
91
  # - (infty, infty) if Q \intersect P is empty and
84
92
  # - the lowest (min-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
85
93
  #
86
94
  # This method returns p* in O(log n) time and O(1) extra space.
87
- def smallest_y_in_se(x0, y0)
88
- flip @max_pst.largest_y_in_ne(x0, -y0)
95
+ def smallest_y_in_se(x0, y0, open: false)
96
+ flip @max_pst.largest_y_in_ne(x0, -y0, open:)
89
97
  end
90
98
 
91
99
  # Return the "lowest" point in P to the "southwest" of (x0, y0).
92
100
  #
93
- # Let Q = (-infty, x0] X (-infty, y0] be the southwest quadrant defined by the point (x0, y0) and let P be the points in this data
94
- # structure. Define p* as
101
+ # Let Q = be the southwest quadrant defined by the point (x0, y0):
102
+ # - (infty, x0] X (infty, y0] if +open+ is false and
103
+ # - (infty, x0) X (infty, y0) if +open+ is true.
104
+ #
105
+ # Let P be the points in this data structure.
106
+ #
107
+ # Define p* as
95
108
  #
96
109
  # - (-infty, infty) if Q \intersect P is empty and
97
110
  # - the lowest (min-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
98
111
  #
99
112
  # This method returns p* in O(log n) time and O(1) extra space.
100
- def smallest_y_in_sw(x0, y0)
101
- flip @max_pst.largest_y_in_nw(x0, -y0)
113
+ def smallest_y_in_sw(x0, y0, open: false)
114
+ flip @max_pst.largest_y_in_nw(x0, -y0, open:)
102
115
  end
103
116
 
104
117
  ########################################
@@ -106,28 +119,38 @@ class DataStructuresRMolinari::MinPrioritySearchTree
106
119
 
107
120
  # Return the leftmost (min-x) point in P to the southeast of (x0, y0).
108
121
  #
109
- # Let Q = [x0, infty) X (infty, y0] be the southeast quadrant defined by the point (x0, y0) and let P be the points in this data
110
- # structure. Define p* as
122
+ # Let Q = be the southeast quadrant defined by the point (x0, y0):
123
+ # - \[x0, infty) X (infty, y0] if +open+ is false and
124
+ # - (x0, infty) X (infty, y0) if +open+ is true.
125
+ #
126
+ # Let P be the points in this data structure.
127
+ #
128
+ # Define p* as
111
129
  #
112
130
  # - (infty, -infty) if Q \intersect P is empty and
113
131
  # - the leftmost (min-x) point in Q \intersect P otherwise.
114
132
  #
115
133
  # This method returns p* in O(log n) time and O(1) extra space.
116
- def smallest_x_in_se(x0, y0)
117
- flip @max_pst.smallest_x_in_ne(x0, -y0)
134
+ def smallest_x_in_se(x0, y0, open: false)
135
+ flip @max_pst.smallest_x_in_ne(x0, -y0, open:)
118
136
  end
119
137
 
120
138
  # Return the rightmost (max-x) point in P to the southwest of (x0, y0).
121
139
  #
122
- # Let Q = (-infty, x0] X (infty, y0] be the southwest quadrant defined by the point (x0, y0) and let P be the points in this data
123
- # structure. Define p* as
140
+ # Let Q = be the southwest quadrant defined by the point (x0, y0):
141
+ # - (infty, x0] X (infty, y0] if +open+ is false and
142
+ # - (infty, x0) X (infty, y0) if +open+ is true.
143
+ #
144
+ # Let P be the points in this data structure.
145
+ #
146
+ # Define p* as
124
147
  #
125
148
  # - (-infty, -infty) if Q \intersect P is empty and
126
149
  # - the leftmost (min-x) point in Q \intersect P otherwise.
127
150
  #
128
151
  # This method returns p* in O(log n) time and O(1) extra space.
129
- def largest_x_in_sw(x0, y0)
130
- flip @max_pst.largest_x_in_nw(x0, -y0)
152
+ def largest_x_in_sw(x0, y0, open: false)
153
+ flip @max_pst.largest_x_in_nw(x0, -y0, open:)
131
154
  end
132
155
 
133
156
  ########################################
@@ -135,15 +158,22 @@ class DataStructuresRMolinari::MinPrioritySearchTree
135
158
 
136
159
  # Return the lowest point of P in the box bounded by x0, x1, and y0.
137
160
  #
138
- # Let Q = [x0, x1] X (infty, y0] be the "three-sided" box bounded by x0, x1, and y0, and let P be the set of points in the
139
- # MaxPST. (Note that Q is empty if x1 < x0.) Define p* as
161
+ # Let Q be the "three-sided" box bounded by x0, x1, and y0:
162
+ # - \[x0, x1] X (infty, y0] if +open+ is false and
163
+ # - (x0, x1) X (infty, y0) if +open+ is true.
164
+ #
165
+ # Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
166
+ #
167
+ # Let P be the set of points in the MaxPST.
168
+ #
169
+ # Define p* as
140
170
  #
141
171
  # - (infty, infty) if Q \intersect P is empty and
142
172
  # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller x values.
143
173
  #
144
174
  # This method returns p* in O(log n) time and O(1) extra space.
145
- def smallest_y_in_3_sided(x0, x1, y0)
146
- flip @max_pst.largest_y_in_3_sided(x0, x1, -y0)
175
+ def smallest_y_in_3_sided(x0, x1, y0, open: false)
176
+ flip @max_pst.largest_y_in_3_sided(x0, x1, -y0, open:)
147
177
  end
148
178
 
149
179
  ########################################
@@ -151,18 +181,25 @@ class DataStructuresRMolinari::MinPrioritySearchTree
151
181
 
152
182
  # Enumerate the points of P in the box bounded by x0, x1, and y0.
153
183
  #
154
- # Let Q = [x0, x1] X [y0, infty) be the "three-sided" box bounded by x0, x1, and y0, and let P be the set of points in the
155
- # MaxPST. (Note that Q is empty if x1 < x0.) We find an enumerate all the points in Q \intersect P.
184
+ # Let Q be the "three-sided" box bounded by x0, x1, and y0:
185
+ # - \[x0, x1] X (infty, y0] if +open+ is false and
186
+ # - (x0, x1) X (infty, y0) if +open+ is true.
187
+ #
188
+ # Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
189
+ #
190
+ # Let P be the set of points in the MaxPST.
191
+ #
192
+ # We find and enumerate all the points in Q \intersect P.
156
193
  #
157
194
  # If the calling code provides a block then we +yield+ each point to it. Otherwise we return a set containing all the points in
158
195
  # the intersection.
159
196
  #
160
197
  # This method runs in O(m + log n) time and O(1) extra space, where m is the number of points found.
161
- def enumerate_3_sided(x0, x1, y0)
198
+ def enumerate_3_sided(x0, x1, y0, open: false)
162
199
  if block_given?
163
- @max_pst.enumerate_3_sided(x0, x1, -y0) { |point| yield(flip point) }
200
+ @max_pst.enumerate_3_sided(x0, x1, -y0, open:) { |point| yield(flip point) }
164
201
  else
165
- Set.new( @max_pst.enumerate_3_sided(x0, x1, -y0).map { |pt| flip pt })
202
+ Set.new( @max_pst.enumerate_3_sided(x0, x1, -y0, open:).map { |pt| flip pt })
166
203
  end
167
204
  end
168
205
 
@@ -36,10 +36,15 @@ module DataStructuresRMolinari
36
36
  # - +:c+ or +:ruby+
37
37
  # - the C version will run faster but for now may be buggier and harder to debug
38
38
  module_function def construct(data, operation, lang)
39
- operation.must_be_in [:max, :index_of_max]
39
+ operation.must_be_in [:max, :index_of_max, :sum]
40
40
  lang.must_be_in [:ruby, :c]
41
41
 
42
- klass = operation == :max ? MaxValSegmentTree : IndexOfMaxValSegmentTree
42
+ klass = case operation
43
+ when :max then MaxValSegmentTree
44
+ when :index_of_max then IndexOfMaxValSegmentTree
45
+ when :sum then SumSegmentTree
46
+ else raise ArgumentError, "Unknown operation #{operation}"
47
+ end
43
48
  template = lang == :ruby ? SegmentTreeTemplate : CSegmentTreeTemplate
44
49
 
45
50
  klass.new(template, data)
@@ -107,6 +112,33 @@ module DataStructuresRMolinari
107
112
  end
108
113
  end
109
114
 
115
+ class SumSegmentTree
116
+ extend Forwardable
117
+
118
+ # Tell the tree that the value at idx has changed
119
+ def_delegator :@structure, :update_at
120
+
121
+ # @param (see MaxValSegmentTree#initialize)
122
+ def initialize(template_klass, data)
123
+ data.must_be_a Enumerable
124
+
125
+ @structure = template_klass.new(
126
+ combine: ->(a, b) { a + b },
127
+ single_cell_array_val: ->(i) { data[i] },
128
+ size: data.size,
129
+ identity: 0
130
+ )
131
+ end
132
+
133
+ # The sum of the values in A(i..j)
134
+ #
135
+ # The arguments must be integers in 0...(A.size)
136
+ # @return the sum of the values in A(i..j) or 0 if i > j.
137
+ def sum_on(i, j)
138
+ @structure.query_on(i, j)
139
+ end
140
+ end
141
+
110
142
  # The underlying functionality of the Segment Tree data type, implemented in C as a Ruby extension.
111
143
  #
112
144
  # See SegmentTreeTemplate for more information.
@@ -71,6 +71,7 @@ class DataStructuresRMolinari::SegmentTree::SegmentTreeTemplate
71
71
  # Note that we don't need the updated value itself. We get that by calling the lambda +single_cell_array_val+ supplied at
72
72
  # construction.
73
73
  def update_at(idx)
74
+ raise DataError, "Bad update index #{idx} (size = #{@size})" unless (0...@size).cover?(idx)
74
75
 
75
76
  update_val_at(idx, root, 0, @size - 1)
76
77
  end
@@ -69,12 +69,9 @@ module Shared
69
69
  # duplication. When nil we don't call anything and just use the elements themselves.
70
70
  def contains_duplicates?(enum, by: nil)
71
71
  seen = Set.new
72
- enum.each do |v|
72
+ enum.any? do |v|
73
73
  v = v.send(by) if by
74
- return true if seen.include? v
75
-
76
- seen << v
74
+ !seen.add?(v)
77
75
  end
78
- false
79
76
  end
80
77
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_structures_rmolinari
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rory Molinari
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-15 00:00:00.000000000 Z
11
+ date: 2023-12-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: must_be
@@ -90,6 +90,7 @@ files:
90
90
  - ext/c_segment_tree_template/extconf.rb
91
91
  - ext/c_segment_tree_template/segment_tree_template.c
92
92
  - ext/cc.h
93
+ - ext/extconf_shared.rb
93
94
  - ext/shared.c
94
95
  - ext/shared.h
95
96
  - lib/data_structures_rmolinari.rb