data_structures_rmolinari 0.5.3 → 0.5.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -1
- data/README.md +3 -0
- data/lib/data_structures_rmolinari/algorithms.rb +5 -11
- data/lib/data_structures_rmolinari/heap.rb +4 -3
- data/lib/data_structures_rmolinari/max_priority_search_tree.rb +223 -30
- data/lib/data_structures_rmolinari/min_priority_search_tree.rb +64 -27
- data/lib/data_structures_rmolinari/segment_tree.rb +34 -2
- data/lib/data_structures_rmolinari/segment_tree_template.rb +1 -0
- data/lib/data_structures_rmolinari/shared.rb +2 -5
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3ec5653a5bec033196042ad1af1e850877696eb02a1c37a6efdf1c89ce8e726c
|
4
|
+
data.tar.gz: 88893649d4329549fd2d2994f9b643614b46eb1d2d5b541418d6ad183f8f9f02
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5c2a7ba2ca4630ae925358130d4b85297ea081d37ce032fe0db5e2b13d9c84cbef5be58f9d82027feb63cc476b59200c993d608f48a9db8871594415544aab32
|
7
|
+
data.tar.gz: fc5ad5901038397b7d42eb44ee2b199654612f043c2eeceeb05b380da54c9e4db858612daf3dff7eb8ae580c0af0905ddc237867580f44057a0d9df6e063df6c
|
data/CHANGELOG.md
CHANGED
@@ -2,7 +2,27 @@
|
|
2
2
|
|
3
3
|
## [Unreleased]
|
4
4
|
|
5
|
-
## [0.5.
|
5
|
+
## [0.5.5] 2023-12-19
|
6
|
+
|
7
|
+
Support Ruby v3.2.
|
8
|
+
|
9
|
+
## [0.5.4] 2023-12-12
|
10
|
+
|
11
|
+
(Unfortunately this note was added long after the changes were made and my memory of the changes is poor.)
|
12
|
+
|
13
|
+
- SegmentTree
|
14
|
+
- Sum version is provided
|
15
|
+
- PrioritySearchTree
|
16
|
+
- Open regions
|
17
|
+
|
18
|
+
- Some bug fixes
|
19
|
+
- Some refactoring of test cases
|
20
|
+
|
21
|
+
## [0.5.1] - [0.5.3]
|
22
|
+
|
23
|
+
- Releases to fix some bad gemspec data.
|
24
|
+
|
25
|
+
## [0.5.0] 2023-02-03
|
6
26
|
|
7
27
|
- SegmentTree
|
8
28
|
- Reorganize the code into a SegmentTree submodule.
|
data/README.md
CHANGED
@@ -106,6 +106,9 @@ There is no `smallest_x_in_3_sided(x0, x1, y0)`. Just use `smallest_x_in_ne(x0,
|
|
106
106
|
(These queries appear rather abstract at first but there are interesting applications. See, for example, section 4 of
|
107
107
|
[[McC85]](#references), keeping in mind that the data structure in that paper is actually a _MinPST_.)
|
108
108
|
|
109
|
+
Each method also has a named parameter `open:` that makes the search region an open set. For example, if we call `smallest_x_in_ne`
|
110
|
+
with `open: true` then we consider points satisifying x > x0 and y > y0. The default value for this parameter is always `false`.
|
111
|
+
|
109
112
|
The single-point queries run in O(log n) time, where n is the size of P, while `enumerate_3_sided` runs in O(m + log n), where m is
|
110
113
|
the number of points actually enumerated.
|
111
114
|
|
@@ -32,16 +32,10 @@ module DataStructuresRMolinari::Algorithms
|
|
32
32
|
x_max = sorted_points.last.x
|
33
33
|
y_min, y_max = sorted_points.map(&:y).minmax
|
34
34
|
|
35
|
-
# Half of the smallest non-zero gap between x values. This is needed below
|
36
|
-
epsilon = INFINITY
|
37
|
-
|
38
35
|
# Enumerate type 1
|
39
36
|
sorted_points.each_cons(2) do |pt1, pt2|
|
40
37
|
next if pt1.x == pt2.x
|
41
38
|
|
42
|
-
d = (pt2.x.to_f - pt1.x) / 2
|
43
|
-
epsilon = d if d < epsilon
|
44
|
-
|
45
39
|
yield [pt1.x, pt2.x, y_min, y_max]
|
46
40
|
end
|
47
41
|
|
@@ -54,9 +48,9 @@ module DataStructuresRMolinari::Algorithms
|
|
54
48
|
next if pt.y == y_max # 0 area
|
55
49
|
next if pt.y == y_min # type 1
|
56
50
|
|
57
|
-
#
|
58
|
-
left_bound = max_pst.largest_x_in_nw(
|
59
|
-
right_bound = max_pst.smallest_x_in_ne(pt.x
|
51
|
+
# Open region means we don't just get pt back again. The De et al. paper is rather vague.
|
52
|
+
left_bound = max_pst.largest_x_in_nw(pt.x, pt.y, open: true)
|
53
|
+
right_bound = max_pst.smallest_x_in_ne(pt.x, pt.y, open: true)
|
60
54
|
|
61
55
|
left = left_bound.x.infinite? ? x_min : left_bound.x
|
62
56
|
right = right_bound.x.infinite? ? x_max : right_bound.x
|
@@ -74,7 +68,7 @@ module DataStructuresRMolinari::Algorithms
|
|
74
68
|
#
|
75
69
|
# largest_y_in_3_sided(l, r, y_min)
|
76
70
|
#
|
77
|
-
# That call considers the points in the closed region l <= x <= r and y >= y_min, so we use
|
71
|
+
# That call considers the points in the closed region l <= x <= r and y >= y_min, so we use an open search region instead.
|
78
72
|
until max_pst.empty?
|
79
73
|
top_pt = max_pst.delete_top!
|
80
74
|
top = top_pt.y
|
@@ -85,7 +79,7 @@ module DataStructuresRMolinari::Algorithms
|
|
85
79
|
r = x_max
|
86
80
|
|
87
81
|
loop do
|
88
|
-
next_pt = max_pst.largest_y_in_3_sided(l
|
82
|
+
next_pt = max_pst.largest_y_in_3_sided(l, r, y_min, open: true)
|
89
83
|
|
90
84
|
bottom = next_pt.y.infinite? ? y_min : next_pt.y
|
91
85
|
yield [l, r, bottom, top]
|
@@ -78,8 +78,9 @@ class DataStructuresRMolinari::Heap
|
|
78
78
|
# Insert a new element into the heap with the given priority.
|
79
79
|
# @param value the item to be inserted.
|
80
80
|
# - If the heap is addressible (the default) it is an error to insert an item that is already present in the heap.
|
81
|
-
# @param priority the priority to use for new item. The values used as priorities must be totally ordered via +<=>+.
|
82
|
-
|
81
|
+
# @param (optional) priority the priority to use for new item. The values used as priorities must be totally ordered via +<=>+.
|
82
|
+
# If omitted we use the inserted value as its own priority.
|
83
|
+
def insert(value, priority = value)
|
83
84
|
raise DataError, "Heap already contains #{value}" if @addressable && contains?(value)
|
84
85
|
|
85
86
|
@size += 1
|
@@ -103,7 +104,7 @@ class DataStructuresRMolinari::Heap
|
|
103
104
|
# Return the top of the heap and remove it, updating the structure to maintain the necessary properties.
|
104
105
|
# @return (see #top)
|
105
106
|
def pop
|
106
|
-
result = top
|
107
|
+
result = top # raises if empty
|
107
108
|
assign(@data[@size], root)
|
108
109
|
|
109
110
|
@data[@size] = nil
|
@@ -30,6 +30,10 @@ require_relative 'shared'
|
|
30
30
|
#
|
31
31
|
# The final operation (enumerate) takes O(m + log n) time, where m is the number of points that are enumerated.
|
32
32
|
#
|
33
|
+
# Each of these methods has a named parameter +open:+ that makes the search region an open set. For example, if we call
|
34
|
+
# +smallest_x_in_ne+ with +open: true+ then we consider points satisifying x > x0 and y > y0. The default value for this parameter
|
35
|
+
# is always +false+. See below for limitations in this functionality.
|
36
|
+
#
|
33
37
|
# If the MaxPST is constructed to be "dynamic" we also have an operation that deletes the top element.
|
34
38
|
#
|
35
39
|
# - +delete_top!+: remove the top (max-y) element of the tree and return it.
|
@@ -39,14 +43,123 @@ require_relative 'shared'
|
|
39
43
|
# In the current implementation no two points can share an x-value. This restriction can be relaxed with some more complicated code,
|
40
44
|
# but it hasn't been written yet. See issue #9.
|
41
45
|
#
|
42
|
-
# There is a related data structure called
|
46
|
+
# There is a related data structure called a Min-max priority search tree so we have called this a "Max priority search tree", or
|
43
47
|
# MaxPST.
|
44
48
|
#
|
49
|
+
# ## Open regions: limitations
|
50
|
+
#
|
51
|
+
# Calls involving open regions - using the +open:+ argument - are implemented internally using closed regions in which the
|
52
|
+
# boundaries have been "nudged" by a tiny amount so as to exclude points on the boundary. Since there are only finitely many points
|
53
|
+
# in the PST there are no limit points, and the open region given by x > x0 and y > y0 contains the same PST members as a closed
|
54
|
+
# region x >= x0 + e and y >= y0 + e for small enough values of e.
|
55
|
+
#
|
56
|
+
# But it is hard to determine e robustly. Indeed, assume for the moment that all x- and y-values are floating-point, i.e., IEEE754
|
57
|
+
# double-precision. We can easily determine a value for e: the smallest difference between any two distinct x-values or any two
|
58
|
+
# distinct y-values. But the scaling of floating-point numbers makes this buggy. Consider the case in which we have consecutive
|
59
|
+
# x-values
|
60
|
+
#
|
61
|
+
# 0, 5e-324, 1, and 2.
|
62
|
+
#
|
63
|
+
# (5e-324 is the smallest positive Float value in Ruby). Our value for e is thus 5e-324 and, because of the way floating point
|
64
|
+
# values are represented, 1 + e = 1.0. Any query on open region with x0 = 1 will be run on a closed region with x0 = 1 + e = 1.0,
|
65
|
+
# and we may get the wrong result.
|
66
|
+
#
|
67
|
+
# The solution here is to replace (x0, y0) with (x0.next_float, y0.next_float) rather than (x0 + e, y0 + e). +Float::next_float+ is
|
68
|
+
# an implementation of the IEEE754 operation +nextafter+ which gives, for a floating point value z, the smallest floating point
|
69
|
+
# value larger than z. If our PST contains only points with finite, floating point coordinates, then this approach implements open
|
70
|
+
# search regions correctly. This is what the implementation currently does.
|
71
|
+
#
|
72
|
+
# However, when coordinates are not all Floats there are cases when this approach will fail. Consider the case in which we have the
|
73
|
+
# following consecutive x-values:
|
74
|
+
#
|
75
|
+
# 0, 1e-324, 2e-324, 1.
|
76
|
+
#
|
77
|
+
# (Here 1e-324 and 2e-324 are the Ruby values +Rational(1, 10**324)+ and +Rational(2, 10**324)+.) Then, given an argument x0 =
|
78
|
+
# 1e-324, +x0.to_f == 0.0+ and so +x0.to_f.next_float == 5e-324+ and the region we use internally for our query incorrectly excludes
|
79
|
+
# the point with x value 2e-324. This is a bug in the code.
|
80
|
+
#
|
81
|
+
# There are also issues with numeric values (Integer or Rational) that are larger than the maximum floating point value,
|
82
|
+
# approximately 1.8e308. For such values z, +z.to_f == Float::INFINITY+, and we will incorrectly exclude any larger-but-finite
|
83
|
+
# coordinates from the search region.
|
84
|
+
#
|
85
|
+
# Yet more issues arise when the coordinates of points in the PST aren't numeric at all, but are some other sort of comparable
|
86
|
+
# objects, such as arrays.
|
87
|
+
#
|
88
|
+
# So, we may say that queries on open regions will work as expected if either
|
89
|
+
# - all coordinates of the points in the PST are finite Ruby Floats, or
|
90
|
+
# - all coordinates of the points are finite Numeric values and for no such pair of x-values s, t (or pair of y-values) is it such
|
91
|
+
# that +s.to_f.next_float > t+.
|
92
|
+
#
|
93
|
+
# Otherwise, use this functionality at your own risk, and not at all with coordinates that do not respond reasonably to +to_f+.
|
94
|
+
#
|
45
95
|
# References:
|
46
96
|
# * E.M. McCreight, _Priority search trees_, SIAM J. Comput., 14(2):257-276, 1985.
|
47
|
-
# * M. De, A. Maheshwari, S. C. Nandy, M. Smid, _An In-Place Priority Search Tree_, 23rd Canadian Conference on Computational
|
48
|
-
# Geometry, 2011
|
97
|
+
# * M. De, A. Maheshwari, S. C. Nandy, M. Smid, _An In-Place Priority Search Tree_, 23rd Canadian Conference on Computational Geometry, 2011
|
49
98
|
class DataStructuresRMolinari::MaxPrioritySearchTree
|
99
|
+
# IMPLEMENTATION NOTES
|
100
|
+
#
|
101
|
+
# Open regions
|
102
|
+
#
|
103
|
+
# The search methods each have an argument +open:+ that changes the search region from closed (x >= x0) to open (x > x0). I had
|
104
|
+
# initially intended to implement this by varying the internals of the search code. But this turned out to be error-prone because
|
105
|
+
# the code is written for closed regions. When deciding which children to take in the next level of the tree, say, we assume the
|
106
|
+
# search space is closed, sometimes in a way that means we won't find the optimal point when the search region is open. Changing
|
107
|
+
# the logic turned out to be finicky and buggy.
|
108
|
+
#
|
109
|
+
# It is much easier and safer to replace a search request on, say (x0, y0, open) with (x0 + e, y0 + e, closed) where e[psilon] is
|
110
|
+
# small enough that we don't exclude any points other than those on the boundary of the closed region. Then we can just call the
|
111
|
+
# existing search code as-is. This is what the code did at first. We calculated e as the smallest difference between any two
|
112
|
+
# distinct x-values or distinct y-values.
|
113
|
+
#
|
114
|
+
# But this approach is not robust. Assume for the moment that all x- and y-values are floating-point. We can easily determine the
|
115
|
+
# value e. But the scaling of floating-point numbers makes this buggy. Consider the case in which we have consecutive x-values
|
116
|
+
#
|
117
|
+
# 0, 5e-324, 1, and 2.
|
118
|
+
#
|
119
|
+
# (5e-324 is the smallest positive Float value in Ruby). Our value for e is thus 5e-324 and, because of the way floating point
|
120
|
+
# values are represented, 1 + e = 1.0. Any query on open region with x0 = 1 will be run on a closed region with x0 = 1 + e = 1.0,
|
121
|
+
# and we may get the wrong result.
|
122
|
+
#
|
123
|
+
# I see the following possible approaches.
|
124
|
+
#
|
125
|
+
# 1. Rewrite the code to do open regions "properly"
|
126
|
+
# Pro:
|
127
|
+
# - we don't need to worry about numerical issues.
|
128
|
+
# Con:
|
129
|
+
# - too complicated and error-prone.
|
130
|
+
#
|
131
|
+
# 2. Instead of calculating e like this, replace each bounding value x with x.next_float or x.prev_float as required.
|
132
|
+
# Note that #next_float gives the next-largest value representable as a floating point value.
|
133
|
+
# Pro:
|
134
|
+
# - we don't need to worry about the scaling issues in type Float
|
135
|
+
# - simple and supported by the Ruby libraries (and by the C standard library if we decide to implement as a C extension)
|
136
|
+
# Con:
|
137
|
+
# - [minor] will fail with Float::INFINITY.
|
138
|
+
# - this is an unlikely edge case that could be handled directly or simply documented away.
|
139
|
+
# - won't work if the x0 value is a Numeric outside of Float range, roughly [-1.798e308, 1.798e308]
|
140
|
+
# - For example, (10**400).to_f.next_float == Infinity
|
141
|
+
# - We could warn about this case in documentation
|
142
|
+
# - For numeric values x in the Float range we would need to check that x.to_f.next_float > x, but I suspect that this is
|
143
|
+
# guaranteed.
|
144
|
+
# - won't work with comparable but non-numeric values, like arrays, or some sort of user-defined type
|
145
|
+
# - We would simply have to document that this case is not supported (or just throw an exception on +#to_f+)
|
146
|
+
#
|
147
|
+
# 3. Handle numeric values on a case-by-case values. So for numeric values x in the float range we use x.to_f.next_float while for
|
148
|
+
# other values - like BigDecimal - do something different that depends on the next value in the data set with an x-value
|
149
|
+
# greater than x.
|
150
|
+
# Pro:
|
151
|
+
# - more cases are handled
|
152
|
+
# Con:
|
153
|
+
# - complicated and perhaps non-performant in the general case
|
154
|
+
# - doesn't handle non-numeric cases (just like idea 2)
|
155
|
+
# - possibly error-prone in corner cases.
|
156
|
+
#
|
157
|
+
# Idea: maintain @x_chain and @y_chain hashes mapping each distinct x/y values to the next largest and smallest such
|
158
|
+
# value. Then just use that. Lookup is fast. Downside: O(n) extra memory.
|
159
|
+
#
|
160
|
+
# For now approach 2 looks best. It doesn't cover all cases, but covers cases most likely in practice - the Float range is large -
|
161
|
+
# and other cases can be documented away in a clean way.
|
162
|
+
|
50
163
|
include Shared
|
51
164
|
include BinaryTreeArithmetic
|
52
165
|
|
@@ -83,35 +196,54 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
83
196
|
|
84
197
|
# Return the highest point in P to the "northeast" of (x0, y0).
|
85
198
|
#
|
86
|
-
# Let Q =
|
87
|
-
#
|
199
|
+
# Let Q = be the northeast quadrant defined by the point (x0, y0):
|
200
|
+
# - \[x0, infty) X [y0, infty) if +open+ is false and
|
201
|
+
# - (x0, infty) X (y0, infty) if +open+ is true.
|
202
|
+
#
|
203
|
+
# Let P be the points in this data structure.
|
204
|
+
#
|
205
|
+
# Define p* as
|
88
206
|
#
|
89
207
|
# - (infty, -infty) if Q \intersect P is empty and
|
90
208
|
# - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
|
91
209
|
#
|
92
210
|
# This method returns p* in O(log n) time and O(1) extra space.
|
93
|
-
def largest_y_in_ne(x0, y0)
|
94
|
-
|
211
|
+
def largest_y_in_ne(x0, y0, open: false)
|
212
|
+
if open
|
213
|
+
largest_y_in_quadrant(slightly_bigger(x0), slightly_bigger(y0), :ne)
|
214
|
+
else
|
215
|
+
largest_y_in_quadrant(x0, y0, :ne)
|
216
|
+
end
|
95
217
|
end
|
96
218
|
|
97
219
|
# Return the highest point in P to the "northwest" of (x0, y0).
|
98
220
|
#
|
99
|
-
# Let Q =
|
100
|
-
#
|
221
|
+
# Let Q = be the northwest quadrant defined by the point (x0, y0):
|
222
|
+
# - (infty, x0] X [y0, infty) if +open+ is false and
|
223
|
+
# - (infity, x0) X (y0, infty) if +open+ is true.
|
224
|
+
#
|
225
|
+
# Let P be the points in this data structure.
|
226
|
+
#
|
227
|
+
# Define p* as
|
101
228
|
#
|
102
229
|
# - (-infty, -infty) if Q \intersect P is empty and
|
103
230
|
# - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
|
104
231
|
#
|
105
232
|
# This method returns p* in O(log n) time and O(1) extra space.
|
106
|
-
def largest_y_in_nw(x0, y0)
|
107
|
-
|
233
|
+
def largest_y_in_nw(x0, y0, open: false)
|
234
|
+
if open
|
235
|
+
largest_y_in_quadrant(slightly_smaller(x0), slightly_bigger(y0), :nw)
|
236
|
+
else
|
237
|
+
largest_y_in_quadrant(x0, y0, :nw)
|
238
|
+
end
|
108
239
|
end
|
109
240
|
|
110
|
-
# The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both
|
111
|
-
# largest_y_in_nw
|
241
|
+
# The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both
|
242
|
+
# largest_y_in_ne and largest_y_in_nw
|
112
243
|
#
|
113
|
-
# Note that largest_y_in_ne(x0, y0) = largest_y_in_3_sided(x0, infinty, y0) so we don't really need this. But it's a bit faster
|
114
|
-
# general case and is a simple algorithm that introduces a typical way that an algorithm interacts with the data
|
244
|
+
# Note that largest_y_in_ne(x0, y0) = largest_y_in_3_sided(x0, infinty, y0) so we don't really need this. But it's a bit faster
|
245
|
+
# than the general case and is a simple algorithm that introduces a typical way that an algorithm interacts with the data
|
246
|
+
# structure.
|
115
247
|
#
|
116
248
|
# From the paper:
|
117
249
|
#
|
@@ -137,6 +269,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
137
269
|
sufficient_x = ->(x) { x <= x0 }
|
138
270
|
end
|
139
271
|
|
272
|
+
return best if empty?
|
273
|
+
|
140
274
|
# x == x0 or is not sufficient. This test sometimes excludes the other child of a node from consideration.
|
141
275
|
exclusionary_x = ->(x) { x == x0 || !sufficient_x.call(x) }
|
142
276
|
|
@@ -200,28 +334,46 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
200
334
|
|
201
335
|
# Return the leftmost (min-x) point in P to the northeast of (x0, y0).
|
202
336
|
#
|
203
|
-
# Let Q =
|
204
|
-
#
|
337
|
+
# Let Q = be the northeast quadrant defined by the point (x0, y0):
|
338
|
+
# - [x0, infty) X [y0, infty) if +open+ is false and
|
339
|
+
# - (x0, infty) X (y0, infty) if +open+ is true.
|
340
|
+
#
|
341
|
+
# Let P be the points in this data structure.
|
342
|
+
#
|
343
|
+
# Define p* as
|
205
344
|
#
|
206
345
|
# - (infty, infty) if Q \intersect P is empty and
|
207
346
|
# - the leftmost (min-x) point in Q \intersect P otherwise.
|
208
347
|
#
|
209
348
|
# This method returns p* in O(log n) time and O(1) extra space.
|
210
|
-
def smallest_x_in_ne(x0, y0)
|
211
|
-
|
349
|
+
def smallest_x_in_ne(x0, y0, open: false)
|
350
|
+
if open
|
351
|
+
extremal_in_x_dimension(slightly_bigger(x0), slightly_bigger(y0), :ne)
|
352
|
+
else
|
353
|
+
extremal_in_x_dimension(x0, y0, :ne)
|
354
|
+
end
|
212
355
|
end
|
213
356
|
|
214
357
|
# Return the rightmost (max-x) point in P to the northwest of (x0, y0).
|
215
358
|
#
|
216
|
-
# Let Q =
|
217
|
-
#
|
359
|
+
# Let Q = be the northwest quadrant defined by the point (x0, y0):
|
360
|
+
# - (infty, x0] X [y0, infty) if +open+ is false and
|
361
|
+
# - (infty, x0) X (y0, infty) if +open+ is true.
|
362
|
+
#
|
363
|
+
# Let P be the points in this data structure.
|
364
|
+
#
|
365
|
+
# Define p* as
|
218
366
|
#
|
219
367
|
# - (-infty, infty) if Q \intersect P is empty and
|
220
368
|
# - the leftmost (min-x) point in Q \intersect P otherwise.
|
221
369
|
#
|
222
370
|
# This method returns p* in O(log n) time and O(1) extra space.
|
223
|
-
def largest_x_in_nw(x0, y0)
|
224
|
-
|
371
|
+
def largest_x_in_nw(x0, y0, open: false)
|
372
|
+
if open
|
373
|
+
extremal_in_x_dimension(slightly_smaller(x0), slightly_bigger(y0), :nw)
|
374
|
+
else
|
375
|
+
extremal_in_x_dimension(x0, y0, :nw)
|
376
|
+
end
|
225
377
|
end
|
226
378
|
|
227
379
|
# A genericized version of the paper's smallest_x_in_ne that can calculate either smallest_x_in_ne or largest_x_in_nw as specifies via a
|
@@ -369,14 +521,26 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
369
521
|
|
370
522
|
# Return the highest point of P in the box bounded by x0, x1, and y0.
|
371
523
|
#
|
372
|
-
# Let Q
|
373
|
-
#
|
524
|
+
# Let Q be the "three-sided" box bounded by x0, x1, and y0:
|
525
|
+
# - \[x0, x1] X [y0, infty) if +open+ is false and
|
526
|
+
# - (x0, x1) X (y0, infty) if +open+ is true.
|
527
|
+
#
|
528
|
+
# Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
|
529
|
+
#
|
530
|
+
# Let P be the set of points in the MaxPST.
|
531
|
+
#
|
532
|
+
# Define p* as
|
374
533
|
#
|
375
534
|
# - (infty, -infty) if Q \intersect P is empty and
|
376
535
|
# - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller x values.
|
377
536
|
#
|
378
537
|
# This method returns p* in O(log n) time and O(1) extra space.
|
379
|
-
def largest_y_in_3_sided(x0, x1, y0)
|
538
|
+
def largest_y_in_3_sided(x0, x1, y0, open: false)
|
539
|
+
if open
|
540
|
+
x0 = slightly_bigger(x0)
|
541
|
+
x1 = slightly_smaller(x1)
|
542
|
+
y0 = slightly_bigger(y0)
|
543
|
+
end
|
380
544
|
# From the paper:
|
381
545
|
#
|
382
546
|
# The three real numbers x0, x1, and y0 define the three-sided range Q = [x0,x1] X [y0,∞). If Q \intersect P̸ is not \empty,
|
@@ -572,14 +736,27 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
572
736
|
|
573
737
|
# Enumerate the points of P in the box bounded by x0, x1, and y0.
|
574
738
|
#
|
575
|
-
# Let Q
|
576
|
-
#
|
739
|
+
# Let Q be the "three-sided" box bounded by x0, x1, and y0:
|
740
|
+
# - \[x0, x1] X [y0, infty) if +open+ is false and
|
741
|
+
# - (x0, x1) X (y0, infty) if +open+ is true.
|
742
|
+
#
|
743
|
+
# Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
|
744
|
+
#
|
745
|
+
# Let P be the set of points in the MaxPST.
|
746
|
+
#
|
747
|
+
# We find and enumerate all the points in Q \intersect P.
|
577
748
|
#
|
578
749
|
# If the calling code provides a block then we +yield+ each point to it. Otherwise we return a set containing all the points in
|
579
750
|
# the intersection.
|
580
751
|
#
|
581
752
|
# This method runs in O(m + log n) time and O(1) extra space, where m is the number of points found.
|
582
|
-
def enumerate_3_sided(x0, x1, y0)
|
753
|
+
def enumerate_3_sided(x0, x1, y0, open: false)
|
754
|
+
if open
|
755
|
+
x0 = slightly_bigger(x0)
|
756
|
+
x1 = slightly_smaller(x1)
|
757
|
+
y0 = slightly_bigger(y0)
|
758
|
+
end
|
759
|
+
|
583
760
|
# From the paper
|
584
761
|
#
|
585
762
|
# Given three real numbers x0, x1, and y0 define the three sided range Q = [x0, x1] X [y0, infty). Algorithm
|
@@ -1143,7 +1320,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
1143
1320
|
|
1144
1321
|
# We follow the algorithm in the paper by De, Maheshwari et al, which takes O(n log^2 n) time. Their follow-up paper that
|
1145
1322
|
# defines the Min-max PST, describes how to do the construction in O(n log n) time, but it is more complex and probably not
|
1146
|
-
# worth the trouble of both a bespoke heapsort the special sorting algorithm of Katajainen and Pasanen.
|
1323
|
+
# worth the trouble of both a bespoke heapsort and the special sorting algorithm of Katajainen and Pasanen.
|
1147
1324
|
|
1148
1325
|
# Since we are building an implicit binary tree, things are simpler if the array is 1-based. This requires a malloc (perhaps)
|
1149
1326
|
# and memcpy (for sure), which isn't great, but it's in the C layer so cheap compared to the O(n log^2 n) work we need to do for
|
@@ -1216,6 +1393,22 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
1216
1393
|
@data[l..r] = @data[l..r].sort_by(&:x)
|
1217
1394
|
end
|
1218
1395
|
|
1396
|
+
# The smallest floating point number larger than x
|
1397
|
+
private def slightly_bigger(x)
|
1398
|
+
x_f = x.to_f
|
1399
|
+
raise "#{x} out of Float range" if x_f.infinite?
|
1400
|
+
|
1401
|
+
x_f.next_float
|
1402
|
+
end
|
1403
|
+
|
1404
|
+
# The largest floating point number smaller than x
|
1405
|
+
private def slightly_smaller(x)
|
1406
|
+
x_f = x.to_f
|
1407
|
+
raise "#{x} out of Float range" if x_f.infinite?
|
1408
|
+
|
1409
|
+
x_f.prev_float
|
1410
|
+
end
|
1411
|
+
|
1219
1412
|
########################################
|
1220
1413
|
# Debugging support
|
1221
1414
|
#
|
@@ -22,8 +22,7 @@ require_relative 'shared'
|
|
22
22
|
# et al. But we don't do that, as we create a separate array of Points.
|
23
23
|
# - Whereas the implementation of MaxPST means that client code gets the same (x, y) objects back in results as it passed into the
|
24
24
|
# contructor, that's not the case here.
|
25
|
-
# - we map each point in the input - which is an object responding to +#x+ and +#y+ - to an instance of +Point+, and will return
|
26
|
-
# (different) instances of +Point+ in response to queries.
|
25
|
+
# - we map each point in the input - which is an object responding to +#x+ and +#y+ - to an instance of +Point+, and will return (different) instances of +Point+ in response to queries.
|
27
26
|
# - client code is unlikely to care, but be aware of this, just in case.
|
28
27
|
#
|
29
28
|
# Given a set of n points, we can answer the following questions quickly:
|
@@ -37,6 +36,10 @@ require_relative 'shared'
|
|
37
36
|
#
|
38
37
|
# (Here, "leftmost/rightmost" means "minimal/maximal x", and "lowest" means "minimal y".)
|
39
38
|
#
|
39
|
+
# Each of these methods has a named parameter +open:+ that makes the search region an open set. For example, if we call
|
40
|
+
# +smallest_x_in_ne+ with +open: true+ then we consider points satisifying x > x0 and y < y0. The default value for this parameter
|
41
|
+
# is always +false+. See the documentation of MaxPrioritySearchTree for limitiations of this support.
|
42
|
+
#
|
40
43
|
# The first 5 operations take O(log n) time and O(1) extra space.
|
41
44
|
#
|
42
45
|
# The final operation (enumerate) takes O(m + log n) time and O(1) extra space, where m is the number of points that are enumerated.
|
@@ -77,28 +80,38 @@ class DataStructuresRMolinari::MinPrioritySearchTree
|
|
77
80
|
|
78
81
|
# Return the "lowest" point in P to the "southeast" of (x0, y0).
|
79
82
|
#
|
80
|
-
# Let Q =
|
81
|
-
#
|
83
|
+
# Let Q = be the southeast quadrant defined by the point (x0, y0):
|
84
|
+
# - \[x0, infty) X (infty, y0] if +open+ is false and
|
85
|
+
# - (x0, infty) X (infty, y0) if +open+ is true.
|
86
|
+
#
|
87
|
+
# Let P be the points in this data structure.
|
88
|
+
#
|
89
|
+
# Define p* as
|
82
90
|
#
|
83
91
|
# - (infty, infty) if Q \intersect P is empty and
|
84
92
|
# - the lowest (min-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
|
85
93
|
#
|
86
94
|
# This method returns p* in O(log n) time and O(1) extra space.
|
87
|
-
def smallest_y_in_se(x0, y0)
|
88
|
-
flip @max_pst.largest_y_in_ne(x0, -y0)
|
95
|
+
def smallest_y_in_se(x0, y0, open: false)
|
96
|
+
flip @max_pst.largest_y_in_ne(x0, -y0, open:)
|
89
97
|
end
|
90
98
|
|
91
99
|
# Return the "lowest" point in P to the "southwest" of (x0, y0).
|
92
100
|
#
|
93
|
-
# Let Q =
|
94
|
-
#
|
101
|
+
# Let Q = be the southwest quadrant defined by the point (x0, y0):
|
102
|
+
# - (infty, x0] X (infty, y0] if +open+ is false and
|
103
|
+
# - (infty, x0) X (infty, y0) if +open+ is true.
|
104
|
+
#
|
105
|
+
# Let P be the points in this data structure.
|
106
|
+
#
|
107
|
+
# Define p* as
|
95
108
|
#
|
96
109
|
# - (-infty, infty) if Q \intersect P is empty and
|
97
110
|
# - the lowest (min-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
|
98
111
|
#
|
99
112
|
# This method returns p* in O(log n) time and O(1) extra space.
|
100
|
-
def smallest_y_in_sw(x0, y0)
|
101
|
-
flip @max_pst.largest_y_in_nw(x0, -y0)
|
113
|
+
def smallest_y_in_sw(x0, y0, open: false)
|
114
|
+
flip @max_pst.largest_y_in_nw(x0, -y0, open:)
|
102
115
|
end
|
103
116
|
|
104
117
|
########################################
|
@@ -106,28 +119,38 @@ class DataStructuresRMolinari::MinPrioritySearchTree
|
|
106
119
|
|
107
120
|
# Return the leftmost (min-x) point in P to the southeast of (x0, y0).
|
108
121
|
#
|
109
|
-
# Let Q =
|
110
|
-
#
|
122
|
+
# Let Q = be the southeast quadrant defined by the point (x0, y0):
|
123
|
+
# - \[x0, infty) X (infty, y0] if +open+ is false and
|
124
|
+
# - (x0, infty) X (infty, y0) if +open+ is true.
|
125
|
+
#
|
126
|
+
# Let P be the points in this data structure.
|
127
|
+
#
|
128
|
+
# Define p* as
|
111
129
|
#
|
112
130
|
# - (infty, -infty) if Q \intersect P is empty and
|
113
131
|
# - the leftmost (min-x) point in Q \intersect P otherwise.
|
114
132
|
#
|
115
133
|
# This method returns p* in O(log n) time and O(1) extra space.
|
116
|
-
def smallest_x_in_se(x0, y0)
|
117
|
-
flip @max_pst.smallest_x_in_ne(x0, -y0)
|
134
|
+
def smallest_x_in_se(x0, y0, open: false)
|
135
|
+
flip @max_pst.smallest_x_in_ne(x0, -y0, open:)
|
118
136
|
end
|
119
137
|
|
120
138
|
# Return the rightmost (max-x) point in P to the southwest of (x0, y0).
|
121
139
|
#
|
122
|
-
# Let Q =
|
123
|
-
#
|
140
|
+
# Let Q = be the southwest quadrant defined by the point (x0, y0):
|
141
|
+
# - (infty, x0] X (infty, y0] if +open+ is false and
|
142
|
+
# - (infty, x0) X (infty, y0) if +open+ is true.
|
143
|
+
#
|
144
|
+
# Let P be the points in this data structure.
|
145
|
+
#
|
146
|
+
# Define p* as
|
124
147
|
#
|
125
148
|
# - (-infty, -infty) if Q \intersect P is empty and
|
126
149
|
# - the leftmost (min-x) point in Q \intersect P otherwise.
|
127
150
|
#
|
128
151
|
# This method returns p* in O(log n) time and O(1) extra space.
|
129
|
-
def largest_x_in_sw(x0, y0)
|
130
|
-
flip @max_pst.largest_x_in_nw(x0, -y0)
|
152
|
+
def largest_x_in_sw(x0, y0, open: false)
|
153
|
+
flip @max_pst.largest_x_in_nw(x0, -y0, open:)
|
131
154
|
end
|
132
155
|
|
133
156
|
########################################
|
@@ -135,15 +158,22 @@ class DataStructuresRMolinari::MinPrioritySearchTree
|
|
135
158
|
|
136
159
|
# Return the lowest point of P in the box bounded by x0, x1, and y0.
|
137
160
|
#
|
138
|
-
# Let Q
|
139
|
-
#
|
161
|
+
# Let Q be the "three-sided" box bounded by x0, x1, and y0:
|
162
|
+
# - \[x0, x1] X (infty, y0] if +open+ is false and
|
163
|
+
# - (x0, x1) X (infty, y0) if +open+ is true.
|
164
|
+
#
|
165
|
+
# Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
|
166
|
+
#
|
167
|
+
# Let P be the set of points in the MaxPST.
|
168
|
+
#
|
169
|
+
# Define p* as
|
140
170
|
#
|
141
171
|
# - (infty, infty) if Q \intersect P is empty and
|
142
172
|
# - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller x values.
|
143
173
|
#
|
144
174
|
# This method returns p* in O(log n) time and O(1) extra space.
|
145
|
-
def smallest_y_in_3_sided(x0, x1, y0)
|
146
|
-
flip @max_pst.largest_y_in_3_sided(x0, x1, -y0)
|
175
|
+
def smallest_y_in_3_sided(x0, x1, y0, open: false)
|
176
|
+
flip @max_pst.largest_y_in_3_sided(x0, x1, -y0, open:)
|
147
177
|
end
|
148
178
|
|
149
179
|
########################################
|
@@ -151,18 +181,25 @@ class DataStructuresRMolinari::MinPrioritySearchTree
|
|
151
181
|
|
152
182
|
# Enumerate the points of P in the box bounded by x0, x1, and y0.
|
153
183
|
#
|
154
|
-
# Let Q
|
155
|
-
#
|
184
|
+
# Let Q be the "three-sided" box bounded by x0, x1, and y0:
|
185
|
+
# - \[x0, x1] X (infty, y0] if +open+ is false and
|
186
|
+
# - (x0, x1) X (infty, y0) if +open+ is true.
|
187
|
+
#
|
188
|
+
# Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
|
189
|
+
#
|
190
|
+
# Let P be the set of points in the MaxPST.
|
191
|
+
#
|
192
|
+
# We find and enumerate all the points in Q \intersect P.
|
156
193
|
#
|
157
194
|
# If the calling code provides a block then we +yield+ each point to it. Otherwise we return a set containing all the points in
|
158
195
|
# the intersection.
|
159
196
|
#
|
160
197
|
# This method runs in O(m + log n) time and O(1) extra space, where m is the number of points found.
|
161
|
-
def enumerate_3_sided(x0, x1, y0)
|
198
|
+
def enumerate_3_sided(x0, x1, y0, open: false)
|
162
199
|
if block_given?
|
163
|
-
@max_pst.enumerate_3_sided(x0, x1, -y0) { |point| yield(flip point) }
|
200
|
+
@max_pst.enumerate_3_sided(x0, x1, -y0, open:) { |point| yield(flip point) }
|
164
201
|
else
|
165
|
-
Set.new( @max_pst.enumerate_3_sided(x0, x1, -y0).map { |pt| flip pt })
|
202
|
+
Set.new( @max_pst.enumerate_3_sided(x0, x1, -y0, open:).map { |pt| flip pt })
|
166
203
|
end
|
167
204
|
end
|
168
205
|
|
@@ -36,10 +36,15 @@ module DataStructuresRMolinari
|
|
36
36
|
# - +:c+ or +:ruby+
|
37
37
|
# - the C version will run faster but for now may be buggier and harder to debug
|
38
38
|
module_function def construct(data, operation, lang)
|
39
|
-
operation.must_be_in [:max, :index_of_max]
|
39
|
+
operation.must_be_in [:max, :index_of_max, :sum]
|
40
40
|
lang.must_be_in [:ruby, :c]
|
41
41
|
|
42
|
-
klass = operation
|
42
|
+
klass = case operation
|
43
|
+
when :max then MaxValSegmentTree
|
44
|
+
when :index_of_max then IndexOfMaxValSegmentTree
|
45
|
+
when :sum then SumSegmentTree
|
46
|
+
else raise ArgumentError, "Unknown operation #{operation}"
|
47
|
+
end
|
43
48
|
template = lang == :ruby ? SegmentTreeTemplate : CSegmentTreeTemplate
|
44
49
|
|
45
50
|
klass.new(template, data)
|
@@ -107,6 +112,33 @@ module DataStructuresRMolinari
|
|
107
112
|
end
|
108
113
|
end
|
109
114
|
|
115
|
+
class SumSegmentTree
|
116
|
+
extend Forwardable
|
117
|
+
|
118
|
+
# Tell the tree that the value at idx has changed
|
119
|
+
def_delegator :@structure, :update_at
|
120
|
+
|
121
|
+
# @param (see MaxValSegmentTree#initialize)
|
122
|
+
def initialize(template_klass, data)
|
123
|
+
data.must_be_a Enumerable
|
124
|
+
|
125
|
+
@structure = template_klass.new(
|
126
|
+
combine: ->(a, b) { a + b },
|
127
|
+
single_cell_array_val: ->(i) { data[i] },
|
128
|
+
size: data.size,
|
129
|
+
identity: 0
|
130
|
+
)
|
131
|
+
end
|
132
|
+
|
133
|
+
# The sum of the values in A(i..j)
|
134
|
+
#
|
135
|
+
# The arguments must be integers in 0...(A.size)
|
136
|
+
# @return the sum of the values in A(i..j) or 0 if i > j.
|
137
|
+
def sum_on(i, j)
|
138
|
+
@structure.query_on(i, j)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
110
142
|
# The underlying functionality of the Segment Tree data type, implemented in C as a Ruby extension.
|
111
143
|
#
|
112
144
|
# See SegmentTreeTemplate for more information.
|
@@ -71,6 +71,7 @@ class DataStructuresRMolinari::SegmentTree::SegmentTreeTemplate
|
|
71
71
|
# Note that we don't need the updated value itself. We get that by calling the lambda +single_cell_array_val+ supplied at
|
72
72
|
# construction.
|
73
73
|
def update_at(idx)
|
74
|
+
raise DataError, "Bad update index #{idx} (size = #{@size})" unless (0...@size).cover?(idx)
|
74
75
|
|
75
76
|
update_val_at(idx, root, 0, @size - 1)
|
76
77
|
end
|
@@ -69,12 +69,9 @@ module Shared
|
|
69
69
|
# duplication. When nil we don't call anything and just use the elements themselves.
|
70
70
|
def contains_duplicates?(enum, by: nil)
|
71
71
|
seen = Set.new
|
72
|
-
enum.
|
72
|
+
enum.any? do |v|
|
73
73
|
v = v.send(by) if by
|
74
|
-
|
75
|
-
|
76
|
-
seen << v
|
74
|
+
!seen.add?(v)
|
77
75
|
end
|
78
|
-
false
|
79
76
|
end
|
80
77
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_structures_rmolinari
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rory Molinari
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-12-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: must_be
|
@@ -113,7 +113,7 @@ require_paths:
|
|
113
113
|
- lib
|
114
114
|
required_ruby_version: !ruby/object:Gem::Requirement
|
115
115
|
requirements:
|
116
|
-
- - "
|
116
|
+
- - ">="
|
117
117
|
- !ruby/object:Gem::Version
|
118
118
|
version: 3.1.3
|
119
119
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
@@ -122,7 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
122
|
- !ruby/object:Gem::Version
|
123
123
|
version: '0'
|
124
124
|
requirements: []
|
125
|
-
rubygems_version: 3.4.
|
125
|
+
rubygems_version: 3.4.10
|
126
126
|
signing_key:
|
127
127
|
specification_version: 4
|
128
128
|
summary: Several miscellaneous data structures I have implemented to learn about them.
|