data_structures_rmolinari 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -1
- data/README.md +3 -0
- data/ext/extconf_shared.rb +19 -0
- data/lib/data_structures_rmolinari/algorithms.rb +5 -11
- data/lib/data_structures_rmolinari/heap.rb +4 -3
- data/lib/data_structures_rmolinari/max_priority_search_tree.rb +223 -30
- data/lib/data_structures_rmolinari/min_priority_search_tree.rb +64 -27
- data/lib/data_structures_rmolinari/segment_tree.rb +34 -2
- data/lib/data_structures_rmolinari/segment_tree_template.rb +1 -0
- data/lib/data_structures_rmolinari/shared.rb +2 -5
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b194edc412e6bce073e73f874c4a392dd821f333e4ba9b07908af397819cdffa
|
4
|
+
data.tar.gz: 5dabb2d689cb0b0ebacf2b804e6c27ece1e3e7f7db604f5722d1657915bf0bda
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf36912f4242d7a91e8227464993ac634441bdcff30d7b6ec5a10149cfdc0a14a132fa5c319f2edafdc9d9ab6b11ee0af2354fdd45f2638d0099ccdb84eff436
|
7
|
+
data.tar.gz: a1ce15decbc869b9f26902d391f27967778032f54d2543b914d93bbe15ab1ceb7aae5c576cc93f99c80aa8f62d8f23c747591a6beb520d7c756facb125a5a72d
|
data/CHANGELOG.md
CHANGED
@@ -2,7 +2,23 @@
|
|
2
2
|
|
3
3
|
## [Unreleased]
|
4
4
|
|
5
|
-
## [0.5.
|
5
|
+
## [0.5.4] 2023-12-12
|
6
|
+
|
7
|
+
(Unfortunately this note was added long after the changes were made and my memory of the changes is poor.)
|
8
|
+
|
9
|
+
- SegmentTree
|
10
|
+
- Sum version is provided
|
11
|
+
- PrioritySearchTree
|
12
|
+
- Open regions
|
13
|
+
|
14
|
+
- Some bug fixes
|
15
|
+
- Some refactoring of test cases
|
16
|
+
|
17
|
+
## [0.5.1] - [0.5.3]
|
18
|
+
|
19
|
+
- Releases to fix some bad gemspec data.
|
20
|
+
|
21
|
+
## [0.5.0] 2023-02-03
|
6
22
|
|
7
23
|
- SegmentTree
|
8
24
|
- Reorganize the code into a SegmentTree submodule.
|
data/README.md
CHANGED
@@ -106,6 +106,9 @@ There is no `smallest_x_in_3_sided(x0, x1, y0)`. Just use `smallest_x_in_ne(x0,
|
|
106
106
|
(These queries appear rather abstract at first but there are interesting applications. See, for example, section 4 of
|
107
107
|
[[McC85]](#references), keeping in mind that the data structure in that paper is actually a _MinPST_.)
|
108
108
|
|
109
|
+
Each method also has a named parameter `open:` that makes the search region an open set. For example, if we call `smallest_x_in_ne`
|
110
|
+
with `open: true` then we consider points satisifying x > x0 and y > y0. The default value for this parameter is always `false`.
|
111
|
+
|
109
112
|
The single-point queries run in O(log n) time, where n is the size of P, while `enumerate_3_sided` runs in O(m + log n), where m is
|
110
113
|
the number of points actually enumerated.
|
111
114
|
|
@@ -0,0 +1,19 @@
|
|
1
|
+
def generate_makefile(name)
|
2
|
+
extension_name = "c_#{name}"
|
3
|
+
source_name = "#{name}.c"
|
4
|
+
|
5
|
+
abort 'missing malloc()' unless have_func "malloc"
|
6
|
+
abort 'missing realloc()' unless have_func "realloc"
|
7
|
+
|
8
|
+
if try_cflags('-O3')
|
9
|
+
append_cflags('-O3')
|
10
|
+
end
|
11
|
+
|
12
|
+
dir_config(extension_name)
|
13
|
+
|
14
|
+
$srcs = [source_name, "../shared.c"]
|
15
|
+
$INCFLAGS << " -I$(srcdir)/.."
|
16
|
+
$VPATH << "$(srcdir)/.."
|
17
|
+
|
18
|
+
create_makefile("data_structures_rmolinari/#{extension_name}")
|
19
|
+
end
|
@@ -32,16 +32,10 @@ module DataStructuresRMolinari::Algorithms
|
|
32
32
|
x_max = sorted_points.last.x
|
33
33
|
y_min, y_max = sorted_points.map(&:y).minmax
|
34
34
|
|
35
|
-
# Half of the smallest non-zero gap between x values. This is needed below
|
36
|
-
epsilon = INFINITY
|
37
|
-
|
38
35
|
# Enumerate type 1
|
39
36
|
sorted_points.each_cons(2) do |pt1, pt2|
|
40
37
|
next if pt1.x == pt2.x
|
41
38
|
|
42
|
-
d = (pt2.x.to_f - pt1.x) / 2
|
43
|
-
epsilon = d if d < epsilon
|
44
|
-
|
45
39
|
yield [pt1.x, pt2.x, y_min, y_max]
|
46
40
|
end
|
47
41
|
|
@@ -54,9 +48,9 @@ module DataStructuresRMolinari::Algorithms
|
|
54
48
|
next if pt.y == y_max # 0 area
|
55
49
|
next if pt.y == y_min # type 1
|
56
50
|
|
57
|
-
#
|
58
|
-
left_bound = max_pst.largest_x_in_nw(
|
59
|
-
right_bound = max_pst.smallest_x_in_ne(pt.x
|
51
|
+
# Open region means we don't just get pt back again. The De et al. paper is rather vague.
|
52
|
+
left_bound = max_pst.largest_x_in_nw(pt.x, pt.y, open: true)
|
53
|
+
right_bound = max_pst.smallest_x_in_ne(pt.x, pt.y, open: true)
|
60
54
|
|
61
55
|
left = left_bound.x.infinite? ? x_min : left_bound.x
|
62
56
|
right = right_bound.x.infinite? ? x_max : right_bound.x
|
@@ -74,7 +68,7 @@ module DataStructuresRMolinari::Algorithms
|
|
74
68
|
#
|
75
69
|
# largest_y_in_3_sided(l, r, y_min)
|
76
70
|
#
|
77
|
-
# That call considers the points in the closed region l <= x <= r and y >= y_min, so we use
|
71
|
+
# That call considers the points in the closed region l <= x <= r and y >= y_min, so we use an open search region instead.
|
78
72
|
until max_pst.empty?
|
79
73
|
top_pt = max_pst.delete_top!
|
80
74
|
top = top_pt.y
|
@@ -85,7 +79,7 @@ module DataStructuresRMolinari::Algorithms
|
|
85
79
|
r = x_max
|
86
80
|
|
87
81
|
loop do
|
88
|
-
next_pt = max_pst.largest_y_in_3_sided(l
|
82
|
+
next_pt = max_pst.largest_y_in_3_sided(l, r, y_min, open: true)
|
89
83
|
|
90
84
|
bottom = next_pt.y.infinite? ? y_min : next_pt.y
|
91
85
|
yield [l, r, bottom, top]
|
@@ -78,8 +78,9 @@ class DataStructuresRMolinari::Heap
|
|
78
78
|
# Insert a new element into the heap with the given priority.
|
79
79
|
# @param value the item to be inserted.
|
80
80
|
# - If the heap is addressible (the default) it is an error to insert an item that is already present in the heap.
|
81
|
-
# @param priority the priority to use for new item. The values used as priorities must be totally ordered via +<=>+.
|
82
|
-
|
81
|
+
# @param (optional) priority the priority to use for new item. The values used as priorities must be totally ordered via +<=>+.
|
82
|
+
# If omitted we use the inserted value as its own priority.
|
83
|
+
def insert(value, priority = value)
|
83
84
|
raise DataError, "Heap already contains #{value}" if @addressable && contains?(value)
|
84
85
|
|
85
86
|
@size += 1
|
@@ -103,7 +104,7 @@ class DataStructuresRMolinari::Heap
|
|
103
104
|
# Return the top of the heap and remove it, updating the structure to maintain the necessary properties.
|
104
105
|
# @return (see #top)
|
105
106
|
def pop
|
106
|
-
result = top
|
107
|
+
result = top # raises if empty
|
107
108
|
assign(@data[@size], root)
|
108
109
|
|
109
110
|
@data[@size] = nil
|
@@ -30,6 +30,10 @@ require_relative 'shared'
|
|
30
30
|
#
|
31
31
|
# The final operation (enumerate) takes O(m + log n) time, where m is the number of points that are enumerated.
|
32
32
|
#
|
33
|
+
# Each of these methods has a named parameter +open:+ that makes the search region an open set. For example, if we call
|
34
|
+
# +smallest_x_in_ne+ with +open: true+ then we consider points satisifying x > x0 and y > y0. The default value for this parameter
|
35
|
+
# is always +false+. See below for limitations in this functionality.
|
36
|
+
#
|
33
37
|
# If the MaxPST is constructed to be "dynamic" we also have an operation that deletes the top element.
|
34
38
|
#
|
35
39
|
# - +delete_top!+: remove the top (max-y) element of the tree and return it.
|
@@ -39,14 +43,123 @@ require_relative 'shared'
|
|
39
43
|
# In the current implementation no two points can share an x-value. This restriction can be relaxed with some more complicated code,
|
40
44
|
# but it hasn't been written yet. See issue #9.
|
41
45
|
#
|
42
|
-
# There is a related data structure called
|
46
|
+
# There is a related data structure called a Min-max priority search tree so we have called this a "Max priority search tree", or
|
43
47
|
# MaxPST.
|
44
48
|
#
|
49
|
+
# ## Open regions: limitations
|
50
|
+
#
|
51
|
+
# Calls involving open regions - using the +open:+ argument - are implemented internally using closed regions in which the
|
52
|
+
# boundaries have been "nudged" by a tiny amount so as to exclude points on the boundary. Since there are only finitely many points
|
53
|
+
# in the PST there are no limit points, and the open region given by x > x0 and y > y0 contains the same PST members as a closed
|
54
|
+
# region x >= x0 + e and y >= y0 + e for small enough values of e.
|
55
|
+
#
|
56
|
+
# But it is hard to determine e robustly. Indeed, assume for the moment that all x- and y-values are floating-point, i.e., IEEE754
|
57
|
+
# double-precision. We can easily determine a value for e: the smallest difference between any two distinct x-values or any two
|
58
|
+
# distinct y-values. But the scaling of floating-point numbers makes this buggy. Consider the case in which we have consecutive
|
59
|
+
# x-values
|
60
|
+
#
|
61
|
+
# 0, 5e-324, 1, and 2.
|
62
|
+
#
|
63
|
+
# (5e-324 is the smallest positive Float value in Ruby). Our value for e is thus 5e-324 and, because of the way floating point
|
64
|
+
# values are represented, 1 + e = 1.0. Any query on open region with x0 = 1 will be run on a closed region with x0 = 1 + e = 1.0,
|
65
|
+
# and we may get the wrong result.
|
66
|
+
#
|
67
|
+
# The solution here is to replace (x0, y0) with (x0.next_float, y0.next_float) rather than (x0 + e, y0 + e). +Float::next_float+ is
|
68
|
+
# an implementation of the IEEE754 operation +nextafter+ which gives, for a floating point value z, the smallest floating point
|
69
|
+
# value larger than z. If our PST contains only points with finite, floating point coordinates, then this approach implements open
|
70
|
+
# search regions correctly. This is what the implementation currently does.
|
71
|
+
#
|
72
|
+
# However, when coordinates are not all Floats there are cases when this approach will fail. Consider the case in which we have the
|
73
|
+
# following consecutive x-values:
|
74
|
+
#
|
75
|
+
# 0, 1e-324, 2e-324, 1.
|
76
|
+
#
|
77
|
+
# (Here 1e-324 and 2e-324 are the Ruby values +Rational(1, 10**324)+ and +Rational(2, 10**324)+.) Then, given an argument x0 =
|
78
|
+
# 1e-324, +x0.to_f == 0.0+ and so +x0.to_f.next_float == 5e-324+ and the region we use internally for our query incorrectly excludes
|
79
|
+
# the point with x value 2e-324. This is a bug in the code.
|
80
|
+
#
|
81
|
+
# There are also issues with numeric values (Integer or Rational) that are larger than the maximum floating point value,
|
82
|
+
# approximately 1.8e308. For such values z, +z.to_f == Float::INFINITY+, and we will incorrectly exclude any larger-but-finite
|
83
|
+
# coordinates from the search region.
|
84
|
+
#
|
85
|
+
# Yet more issues arise when the coordinates of points in the PST aren't numeric at all, but are some other sort of comparable
|
86
|
+
# objects, such as arrays.
|
87
|
+
#
|
88
|
+
# So, we may say that queries on open regions will work as expected if either
|
89
|
+
# - all coordinates of the points in the PST are finite Ruby Floats, or
|
90
|
+
# - all coordinates of the points are finite Numeric values and for no such pair of x-values s, t (or pair of y-values) is it such
|
91
|
+
# that +s.to_f.next_float > t+.
|
92
|
+
#
|
93
|
+
# Otherwise, use this functionality at your own risk, and not at all with coordinates that do not respond reasonably to +to_f+.
|
94
|
+
#
|
45
95
|
# References:
|
46
96
|
# * E.M. McCreight, _Priority search trees_, SIAM J. Comput., 14(2):257-276, 1985.
|
47
|
-
# * M. De, A. Maheshwari, S. C. Nandy, M. Smid, _An In-Place Priority Search Tree_, 23rd Canadian Conference on Computational
|
48
|
-
# Geometry, 2011
|
97
|
+
# * M. De, A. Maheshwari, S. C. Nandy, M. Smid, _An In-Place Priority Search Tree_, 23rd Canadian Conference on Computational Geometry, 2011
|
49
98
|
class DataStructuresRMolinari::MaxPrioritySearchTree
|
99
|
+
# IMPLEMENTATION NOTES
|
100
|
+
#
|
101
|
+
# Open regions
|
102
|
+
#
|
103
|
+
# The search methods each have an argument +open:+ that changes the search region from closed (x >= x0) to open (x > x0). I had
|
104
|
+
# initially intended to implement this by varying the internals of the search code. But this turned out to be error-prone because
|
105
|
+
# the code is written for closed regions. When deciding which children to take in the next level of the tree, say, we assume the
|
106
|
+
# search space is closed, sometimes in a way that means we won't find the optimal point when the search region is open. Changing
|
107
|
+
# the logic turned out to be finicky and buggy.
|
108
|
+
#
|
109
|
+
# It is much easier and safer to replace a search request on, say (x0, y0, open) with (x0 + e, y0 + e, closed) where e[psilon] is
|
110
|
+
# small enough that we don't exclude any points other than those on the boundary of the closed region. Then we can just call the
|
111
|
+
# existing search code as-is. This is what the code did at first. We calculated e as the smallest difference between any two
|
112
|
+
# distinct x-values or distinct y-values.
|
113
|
+
#
|
114
|
+
# But this approach is not robust. Assume for the moment that all x- and y-values are floating-point. We can easily determine the
|
115
|
+
# value e. But the scaling of floating-point numbers makes this buggy. Consider the case in which we have consecutive x-values
|
116
|
+
#
|
117
|
+
# 0, 5e-324, 1, and 2.
|
118
|
+
#
|
119
|
+
# (5e-324 is the smallest positive Float value in Ruby). Our value for e is thus 5e-324 and, because of the way floating point
|
120
|
+
# values are represented, 1 + e = 1.0. Any query on open region with x0 = 1 will be run on a closed region with x0 = 1 + e = 1.0,
|
121
|
+
# and we may get the wrong result.
|
122
|
+
#
|
123
|
+
# I see the following possible approaches.
|
124
|
+
#
|
125
|
+
# 1. Rewrite the code to do open regions "properly"
|
126
|
+
# Pro:
|
127
|
+
# - we don't need to worry about numerical issues.
|
128
|
+
# Con:
|
129
|
+
# - too complicated and error-prone.
|
130
|
+
#
|
131
|
+
# 2. Instead of calculating e like this, replace each bounding value x with x.next_float or x.prev_float as required.
|
132
|
+
# Note that #next_float gives the next-largest value representable as a floating point value.
|
133
|
+
# Pro:
|
134
|
+
# - we don't need to worry about the scaling issues in type Float
|
135
|
+
# - simple and supported by the Ruby libraries (and by the C standard library if we decide to implement as a C extension)
|
136
|
+
# Con:
|
137
|
+
# - [minor] will fail with Float::INFINITY.
|
138
|
+
# - this is an unlikely edge case that could be handled directly or simply documented away.
|
139
|
+
# - won't work if the x0 value is a Numeric outside of Float range, roughly [-1.798e308, 1.798e308]
|
140
|
+
# - For example, (10**400).to_f.next_float == Infinity
|
141
|
+
# - We could warn about this case in documentation
|
142
|
+
# - For numeric values x in the Float range we would need to check that x.to_f.next_float > x, but I suspect that this is
|
143
|
+
# guaranteed.
|
144
|
+
# - won't work with comparable but non-numeric values, like arrays, or some sort of user-defined type
|
145
|
+
# - We would simply have to document that this case is not supported (or just throw an exception on +#to_f+)
|
146
|
+
#
|
147
|
+
# 3. Handle numeric values on a case-by-case values. So for numeric values x in the float range we use x.to_f.next_float while for
|
148
|
+
# other values - like BigDecimal - do something different that depends on the next value in the data set with an x-value
|
149
|
+
# greater than x.
|
150
|
+
# Pro:
|
151
|
+
# - more cases are handled
|
152
|
+
# Con:
|
153
|
+
# - complicated and perhaps non-performant in the general case
|
154
|
+
# - doesn't handle non-numeric cases (just like idea 2)
|
155
|
+
# - possibly error-prone in corner cases.
|
156
|
+
#
|
157
|
+
# Idea: maintain @x_chain and @y_chain hashes mapping each distinct x/y values to the next largest and smallest such
|
158
|
+
# value. Then just use that. Lookup is fast. Downside: O(n) extra memory.
|
159
|
+
#
|
160
|
+
# For now approach 2 looks best. It doesn't cover all cases, but covers cases most likely in practice - the Float range is large -
|
161
|
+
# and other cases can be documented away in a clean way.
|
162
|
+
|
50
163
|
include Shared
|
51
164
|
include BinaryTreeArithmetic
|
52
165
|
|
@@ -83,35 +196,54 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
83
196
|
|
84
197
|
# Return the highest point in P to the "northeast" of (x0, y0).
|
85
198
|
#
|
86
|
-
# Let Q =
|
87
|
-
#
|
199
|
+
# Let Q = be the northeast quadrant defined by the point (x0, y0):
|
200
|
+
# - \[x0, infty) X [y0, infty) if +open+ is false and
|
201
|
+
# - (x0, infty) X (y0, infty) if +open+ is true.
|
202
|
+
#
|
203
|
+
# Let P be the points in this data structure.
|
204
|
+
#
|
205
|
+
# Define p* as
|
88
206
|
#
|
89
207
|
# - (infty, -infty) if Q \intersect P is empty and
|
90
208
|
# - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
|
91
209
|
#
|
92
210
|
# This method returns p* in O(log n) time and O(1) extra space.
|
93
|
-
def largest_y_in_ne(x0, y0)
|
94
|
-
|
211
|
+
def largest_y_in_ne(x0, y0, open: false)
|
212
|
+
if open
|
213
|
+
largest_y_in_quadrant(slightly_bigger(x0), slightly_bigger(y0), :ne)
|
214
|
+
else
|
215
|
+
largest_y_in_quadrant(x0, y0, :ne)
|
216
|
+
end
|
95
217
|
end
|
96
218
|
|
97
219
|
# Return the highest point in P to the "northwest" of (x0, y0).
|
98
220
|
#
|
99
|
-
# Let Q =
|
100
|
-
#
|
221
|
+
# Let Q = be the northwest quadrant defined by the point (x0, y0):
|
222
|
+
# - (infty, x0] X [y0, infty) if +open+ is false and
|
223
|
+
# - (infity, x0) X (y0, infty) if +open+ is true.
|
224
|
+
#
|
225
|
+
# Let P be the points in this data structure.
|
226
|
+
#
|
227
|
+
# Define p* as
|
101
228
|
#
|
102
229
|
# - (-infty, -infty) if Q \intersect P is empty and
|
103
230
|
# - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
|
104
231
|
#
|
105
232
|
# This method returns p* in O(log n) time and O(1) extra space.
|
106
|
-
def largest_y_in_nw(x0, y0)
|
107
|
-
|
233
|
+
def largest_y_in_nw(x0, y0, open: false)
|
234
|
+
if open
|
235
|
+
largest_y_in_quadrant(slightly_smaller(x0), slightly_bigger(y0), :nw)
|
236
|
+
else
|
237
|
+
largest_y_in_quadrant(x0, y0, :nw)
|
238
|
+
end
|
108
239
|
end
|
109
240
|
|
110
|
-
# The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both
|
111
|
-
# largest_y_in_nw
|
241
|
+
# The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both
|
242
|
+
# largest_y_in_ne and largest_y_in_nw
|
112
243
|
#
|
113
|
-
# Note that largest_y_in_ne(x0, y0) = largest_y_in_3_sided(x0, infinty, y0) so we don't really need this. But it's a bit faster
|
114
|
-
# general case and is a simple algorithm that introduces a typical way that an algorithm interacts with the data
|
244
|
+
# Note that largest_y_in_ne(x0, y0) = largest_y_in_3_sided(x0, infinty, y0) so we don't really need this. But it's a bit faster
|
245
|
+
# than the general case and is a simple algorithm that introduces a typical way that an algorithm interacts with the data
|
246
|
+
# structure.
|
115
247
|
#
|
116
248
|
# From the paper:
|
117
249
|
#
|
@@ -137,6 +269,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
137
269
|
sufficient_x = ->(x) { x <= x0 }
|
138
270
|
end
|
139
271
|
|
272
|
+
return best if empty?
|
273
|
+
|
140
274
|
# x == x0 or is not sufficient. This test sometimes excludes the other child of a node from consideration.
|
141
275
|
exclusionary_x = ->(x) { x == x0 || !sufficient_x.call(x) }
|
142
276
|
|
@@ -200,28 +334,46 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
200
334
|
|
201
335
|
# Return the leftmost (min-x) point in P to the northeast of (x0, y0).
|
202
336
|
#
|
203
|
-
# Let Q =
|
204
|
-
#
|
337
|
+
# Let Q = be the northeast quadrant defined by the point (x0, y0):
|
338
|
+
# - [x0, infty) X [y0, infty) if +open+ is false and
|
339
|
+
# - (x0, infty) X (y0, infty) if +open+ is true.
|
340
|
+
#
|
341
|
+
# Let P be the points in this data structure.
|
342
|
+
#
|
343
|
+
# Define p* as
|
205
344
|
#
|
206
345
|
# - (infty, infty) if Q \intersect P is empty and
|
207
346
|
# - the leftmost (min-x) point in Q \intersect P otherwise.
|
208
347
|
#
|
209
348
|
# This method returns p* in O(log n) time and O(1) extra space.
|
210
|
-
def smallest_x_in_ne(x0, y0)
|
211
|
-
|
349
|
+
def smallest_x_in_ne(x0, y0, open: false)
|
350
|
+
if open
|
351
|
+
extremal_in_x_dimension(slightly_bigger(x0), slightly_bigger(y0), :ne)
|
352
|
+
else
|
353
|
+
extremal_in_x_dimension(x0, y0, :ne)
|
354
|
+
end
|
212
355
|
end
|
213
356
|
|
214
357
|
# Return the rightmost (max-x) point in P to the northwest of (x0, y0).
|
215
358
|
#
|
216
|
-
# Let Q =
|
217
|
-
#
|
359
|
+
# Let Q = be the northwest quadrant defined by the point (x0, y0):
|
360
|
+
# - (infty, x0] X [y0, infty) if +open+ is false and
|
361
|
+
# - (infty, x0) X (y0, infty) if +open+ is true.
|
362
|
+
#
|
363
|
+
# Let P be the points in this data structure.
|
364
|
+
#
|
365
|
+
# Define p* as
|
218
366
|
#
|
219
367
|
# - (-infty, infty) if Q \intersect P is empty and
|
220
368
|
# - the leftmost (min-x) point in Q \intersect P otherwise.
|
221
369
|
#
|
222
370
|
# This method returns p* in O(log n) time and O(1) extra space.
|
223
|
-
def largest_x_in_nw(x0, y0)
|
224
|
-
|
371
|
+
def largest_x_in_nw(x0, y0, open: false)
|
372
|
+
if open
|
373
|
+
extremal_in_x_dimension(slightly_smaller(x0), slightly_bigger(y0), :nw)
|
374
|
+
else
|
375
|
+
extremal_in_x_dimension(x0, y0, :nw)
|
376
|
+
end
|
225
377
|
end
|
226
378
|
|
227
379
|
# A genericized version of the paper's smallest_x_in_ne that can calculate either smallest_x_in_ne or largest_x_in_nw as specifies via a
|
@@ -369,14 +521,26 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
369
521
|
|
370
522
|
# Return the highest point of P in the box bounded by x0, x1, and y0.
|
371
523
|
#
|
372
|
-
# Let Q
|
373
|
-
#
|
524
|
+
# Let Q be the "three-sided" box bounded by x0, x1, and y0:
|
525
|
+
# - \[x0, x1] X [y0, infty) if +open+ is false and
|
526
|
+
# - (x0, x1) X (y0, infty) if +open+ is true.
|
527
|
+
#
|
528
|
+
# Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
|
529
|
+
#
|
530
|
+
# Let P be the set of points in the MaxPST.
|
531
|
+
#
|
532
|
+
# Define p* as
|
374
533
|
#
|
375
534
|
# - (infty, -infty) if Q \intersect P is empty and
|
376
535
|
# - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller x values.
|
377
536
|
#
|
378
537
|
# This method returns p* in O(log n) time and O(1) extra space.
|
379
|
-
def largest_y_in_3_sided(x0, x1, y0)
|
538
|
+
def largest_y_in_3_sided(x0, x1, y0, open: false)
|
539
|
+
if open
|
540
|
+
x0 = slightly_bigger(x0)
|
541
|
+
x1 = slightly_smaller(x1)
|
542
|
+
y0 = slightly_bigger(y0)
|
543
|
+
end
|
380
544
|
# From the paper:
|
381
545
|
#
|
382
546
|
# The three real numbers x0, x1, and y0 define the three-sided range Q = [x0,x1] X [y0,∞). If Q \intersect P̸ is not \empty,
|
@@ -572,14 +736,27 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
572
736
|
|
573
737
|
# Enumerate the points of P in the box bounded by x0, x1, and y0.
|
574
738
|
#
|
575
|
-
# Let Q
|
576
|
-
#
|
739
|
+
# Let Q be the "three-sided" box bounded by x0, x1, and y0:
|
740
|
+
# - \[x0, x1] X [y0, infty) if +open+ is false and
|
741
|
+
# - (x0, x1) X (y0, infty) if +open+ is true.
|
742
|
+
#
|
743
|
+
# Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
|
744
|
+
#
|
745
|
+
# Let P be the set of points in the MaxPST.
|
746
|
+
#
|
747
|
+
# We find and enumerate all the points in Q \intersect P.
|
577
748
|
#
|
578
749
|
# If the calling code provides a block then we +yield+ each point to it. Otherwise we return a set containing all the points in
|
579
750
|
# the intersection.
|
580
751
|
#
|
581
752
|
# This method runs in O(m + log n) time and O(1) extra space, where m is the number of points found.
|
582
|
-
def enumerate_3_sided(x0, x1, y0)
|
753
|
+
def enumerate_3_sided(x0, x1, y0, open: false)
|
754
|
+
if open
|
755
|
+
x0 = slightly_bigger(x0)
|
756
|
+
x1 = slightly_smaller(x1)
|
757
|
+
y0 = slightly_bigger(y0)
|
758
|
+
end
|
759
|
+
|
583
760
|
# From the paper
|
584
761
|
#
|
585
762
|
# Given three real numbers x0, x1, and y0 define the three sided range Q = [x0, x1] X [y0, infty). Algorithm
|
@@ -1143,7 +1320,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
1143
1320
|
|
1144
1321
|
# We follow the algorithm in the paper by De, Maheshwari et al, which takes O(n log^2 n) time. Their follow-up paper that
|
1145
1322
|
# defines the Min-max PST, describes how to do the construction in O(n log n) time, but it is more complex and probably not
|
1146
|
-
# worth the trouble of both a bespoke heapsort the special sorting algorithm of Katajainen and Pasanen.
|
1323
|
+
# worth the trouble of both a bespoke heapsort and the special sorting algorithm of Katajainen and Pasanen.
|
1147
1324
|
|
1148
1325
|
# Since we are building an implicit binary tree, things are simpler if the array is 1-based. This requires a malloc (perhaps)
|
1149
1326
|
# and memcpy (for sure), which isn't great, but it's in the C layer so cheap compared to the O(n log^2 n) work we need to do for
|
@@ -1216,6 +1393,22 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
|
|
1216
1393
|
@data[l..r] = @data[l..r].sort_by(&:x)
|
1217
1394
|
end
|
1218
1395
|
|
1396
|
+
# The smallest floating point number larger than x
|
1397
|
+
private def slightly_bigger(x)
|
1398
|
+
x_f = x.to_f
|
1399
|
+
raise "#{x} out of Float range" if x_f.infinite?
|
1400
|
+
|
1401
|
+
x_f.next_float
|
1402
|
+
end
|
1403
|
+
|
1404
|
+
# The largest floating point number smaller than x
|
1405
|
+
private def slightly_smaller(x)
|
1406
|
+
x_f = x.to_f
|
1407
|
+
raise "#{x} out of Float range" if x_f.infinite?
|
1408
|
+
|
1409
|
+
x_f.prev_float
|
1410
|
+
end
|
1411
|
+
|
1219
1412
|
########################################
|
1220
1413
|
# Debugging support
|
1221
1414
|
#
|
@@ -22,8 +22,7 @@ require_relative 'shared'
|
|
22
22
|
# et al. But we don't do that, as we create a separate array of Points.
|
23
23
|
# - Whereas the implementation of MaxPST means that client code gets the same (x, y) objects back in results as it passed into the
|
24
24
|
# contructor, that's not the case here.
|
25
|
-
# - we map each point in the input - which is an object responding to +#x+ and +#y+ - to an instance of +Point+, and will return
|
26
|
-
# (different) instances of +Point+ in response to queries.
|
25
|
+
# - we map each point in the input - which is an object responding to +#x+ and +#y+ - to an instance of +Point+, and will return (different) instances of +Point+ in response to queries.
|
27
26
|
# - client code is unlikely to care, but be aware of this, just in case.
|
28
27
|
#
|
29
28
|
# Given a set of n points, we can answer the following questions quickly:
|
@@ -37,6 +36,10 @@ require_relative 'shared'
|
|
37
36
|
#
|
38
37
|
# (Here, "leftmost/rightmost" means "minimal/maximal x", and "lowest" means "minimal y".)
|
39
38
|
#
|
39
|
+
# Each of these methods has a named parameter +open:+ that makes the search region an open set. For example, if we call
|
40
|
+
# +smallest_x_in_ne+ with +open: true+ then we consider points satisifying x > x0 and y < y0. The default value for this parameter
|
41
|
+
# is always +false+. See the documentation of MaxPrioritySearchTree for limitiations of this support.
|
42
|
+
#
|
40
43
|
# The first 5 operations take O(log n) time and O(1) extra space.
|
41
44
|
#
|
42
45
|
# The final operation (enumerate) takes O(m + log n) time and O(1) extra space, where m is the number of points that are enumerated.
|
@@ -77,28 +80,38 @@ class DataStructuresRMolinari::MinPrioritySearchTree
|
|
77
80
|
|
78
81
|
# Return the "lowest" point in P to the "southeast" of (x0, y0).
|
79
82
|
#
|
80
|
-
# Let Q =
|
81
|
-
#
|
83
|
+
# Let Q = be the southeast quadrant defined by the point (x0, y0):
|
84
|
+
# - \[x0, infty) X (infty, y0] if +open+ is false and
|
85
|
+
# - (x0, infty) X (infty, y0) if +open+ is true.
|
86
|
+
#
|
87
|
+
# Let P be the points in this data structure.
|
88
|
+
#
|
89
|
+
# Define p* as
|
82
90
|
#
|
83
91
|
# - (infty, infty) if Q \intersect P is empty and
|
84
92
|
# - the lowest (min-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
|
85
93
|
#
|
86
94
|
# This method returns p* in O(log n) time and O(1) extra space.
|
87
|
-
def smallest_y_in_se(x0, y0)
|
88
|
-
flip @max_pst.largest_y_in_ne(x0, -y0)
|
95
|
+
def smallest_y_in_se(x0, y0, open: false)
|
96
|
+
flip @max_pst.largest_y_in_ne(x0, -y0, open:)
|
89
97
|
end
|
90
98
|
|
91
99
|
# Return the "lowest" point in P to the "southwest" of (x0, y0).
|
92
100
|
#
|
93
|
-
# Let Q =
|
94
|
-
#
|
101
|
+
# Let Q = be the southwest quadrant defined by the point (x0, y0):
|
102
|
+
# - (infty, x0] X (infty, y0] if +open+ is false and
|
103
|
+
# - (infty, x0) X (infty, y0) if +open+ is true.
|
104
|
+
#
|
105
|
+
# Let P be the points in this data structure.
|
106
|
+
#
|
107
|
+
# Define p* as
|
95
108
|
#
|
96
109
|
# - (-infty, infty) if Q \intersect P is empty and
|
97
110
|
# - the lowest (min-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
|
98
111
|
#
|
99
112
|
# This method returns p* in O(log n) time and O(1) extra space.
|
100
|
-
def smallest_y_in_sw(x0, y0)
|
101
|
-
flip @max_pst.largest_y_in_nw(x0, -y0)
|
113
|
+
def smallest_y_in_sw(x0, y0, open: false)
|
114
|
+
flip @max_pst.largest_y_in_nw(x0, -y0, open:)
|
102
115
|
end
|
103
116
|
|
104
117
|
########################################
|
@@ -106,28 +119,38 @@ class DataStructuresRMolinari::MinPrioritySearchTree
|
|
106
119
|
|
107
120
|
# Return the leftmost (min-x) point in P to the southeast of (x0, y0).
|
108
121
|
#
|
109
|
-
# Let Q =
|
110
|
-
#
|
122
|
+
# Let Q = be the southeast quadrant defined by the point (x0, y0):
|
123
|
+
# - \[x0, infty) X (infty, y0] if +open+ is false and
|
124
|
+
# - (x0, infty) X (infty, y0) if +open+ is true.
|
125
|
+
#
|
126
|
+
# Let P be the points in this data structure.
|
127
|
+
#
|
128
|
+
# Define p* as
|
111
129
|
#
|
112
130
|
# - (infty, -infty) if Q \intersect P is empty and
|
113
131
|
# - the leftmost (min-x) point in Q \intersect P otherwise.
|
114
132
|
#
|
115
133
|
# This method returns p* in O(log n) time and O(1) extra space.
|
116
|
-
def smallest_x_in_se(x0, y0)
|
117
|
-
flip @max_pst.smallest_x_in_ne(x0, -y0)
|
134
|
+
def smallest_x_in_se(x0, y0, open: false)
|
135
|
+
flip @max_pst.smallest_x_in_ne(x0, -y0, open:)
|
118
136
|
end
|
119
137
|
|
120
138
|
# Return the rightmost (max-x) point in P to the southwest of (x0, y0).
|
121
139
|
#
|
122
|
-
# Let Q =
|
123
|
-
#
|
140
|
+
# Let Q = be the southwest quadrant defined by the point (x0, y0):
|
141
|
+
# - (infty, x0] X (infty, y0] if +open+ is false and
|
142
|
+
# - (infty, x0) X (infty, y0) if +open+ is true.
|
143
|
+
#
|
144
|
+
# Let P be the points in this data structure.
|
145
|
+
#
|
146
|
+
# Define p* as
|
124
147
|
#
|
125
148
|
# - (-infty, -infty) if Q \intersect P is empty and
|
126
149
|
# - the leftmost (min-x) point in Q \intersect P otherwise.
|
127
150
|
#
|
128
151
|
# This method returns p* in O(log n) time and O(1) extra space.
|
129
|
-
def largest_x_in_sw(x0, y0)
|
130
|
-
flip @max_pst.largest_x_in_nw(x0, -y0)
|
152
|
+
def largest_x_in_sw(x0, y0, open: false)
|
153
|
+
flip @max_pst.largest_x_in_nw(x0, -y0, open:)
|
131
154
|
end
|
132
155
|
|
133
156
|
########################################
|
@@ -135,15 +158,22 @@ class DataStructuresRMolinari::MinPrioritySearchTree
|
|
135
158
|
|
136
159
|
# Return the lowest point of P in the box bounded by x0, x1, and y0.
|
137
160
|
#
|
138
|
-
# Let Q
|
139
|
-
#
|
161
|
+
# Let Q be the "three-sided" box bounded by x0, x1, and y0:
|
162
|
+
# - \[x0, x1] X (infty, y0] if +open+ is false and
|
163
|
+
# - (x0, x1) X (infty, y0) if +open+ is true.
|
164
|
+
#
|
165
|
+
# Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
|
166
|
+
#
|
167
|
+
# Let P be the set of points in the MaxPST.
|
168
|
+
#
|
169
|
+
# Define p* as
|
140
170
|
#
|
141
171
|
# - (infty, infty) if Q \intersect P is empty and
|
142
172
|
# - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller x values.
|
143
173
|
#
|
144
174
|
# This method returns p* in O(log n) time and O(1) extra space.
|
145
|
-
def smallest_y_in_3_sided(x0, x1, y0)
|
146
|
-
flip @max_pst.largest_y_in_3_sided(x0, x1, -y0)
|
175
|
+
def smallest_y_in_3_sided(x0, x1, y0, open: false)
|
176
|
+
flip @max_pst.largest_y_in_3_sided(x0, x1, -y0, open:)
|
147
177
|
end
|
148
178
|
|
149
179
|
########################################
|
@@ -151,18 +181,25 @@ class DataStructuresRMolinari::MinPrioritySearchTree
|
|
151
181
|
|
152
182
|
# Enumerate the points of P in the box bounded by x0, x1, and y0.
|
153
183
|
#
|
154
|
-
# Let Q
|
155
|
-
#
|
184
|
+
# Let Q be the "three-sided" box bounded by x0, x1, and y0:
|
185
|
+
# - \[x0, x1] X (infty, y0] if +open+ is false and
|
186
|
+
# - (x0, x1) X (infty, y0) if +open+ is true.
|
187
|
+
#
|
188
|
+
# Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
|
189
|
+
#
|
190
|
+
# Let P be the set of points in the MaxPST.
|
191
|
+
#
|
192
|
+
# We find and enumerate all the points in Q \intersect P.
|
156
193
|
#
|
157
194
|
# If the calling code provides a block then we +yield+ each point to it. Otherwise we return a set containing all the points in
|
158
195
|
# the intersection.
|
159
196
|
#
|
160
197
|
# This method runs in O(m + log n) time and O(1) extra space, where m is the number of points found.
|
161
|
-
def enumerate_3_sided(x0, x1, y0)
|
198
|
+
def enumerate_3_sided(x0, x1, y0, open: false)
|
162
199
|
if block_given?
|
163
|
-
@max_pst.enumerate_3_sided(x0, x1, -y0) { |point| yield(flip point) }
|
200
|
+
@max_pst.enumerate_3_sided(x0, x1, -y0, open:) { |point| yield(flip point) }
|
164
201
|
else
|
165
|
-
Set.new( @max_pst.enumerate_3_sided(x0, x1, -y0).map { |pt| flip pt })
|
202
|
+
Set.new( @max_pst.enumerate_3_sided(x0, x1, -y0, open:).map { |pt| flip pt })
|
166
203
|
end
|
167
204
|
end
|
168
205
|
|
@@ -36,10 +36,15 @@ module DataStructuresRMolinari
|
|
36
36
|
# - +:c+ or +:ruby+
|
37
37
|
# - the C version will run faster but for now may be buggier and harder to debug
|
38
38
|
module_function def construct(data, operation, lang)
|
39
|
-
operation.must_be_in [:max, :index_of_max]
|
39
|
+
operation.must_be_in [:max, :index_of_max, :sum]
|
40
40
|
lang.must_be_in [:ruby, :c]
|
41
41
|
|
42
|
-
klass = operation
|
42
|
+
klass = case operation
|
43
|
+
when :max then MaxValSegmentTree
|
44
|
+
when :index_of_max then IndexOfMaxValSegmentTree
|
45
|
+
when :sum then SumSegmentTree
|
46
|
+
else raise ArgumentError, "Unknown operation #{operation}"
|
47
|
+
end
|
43
48
|
template = lang == :ruby ? SegmentTreeTemplate : CSegmentTreeTemplate
|
44
49
|
|
45
50
|
klass.new(template, data)
|
@@ -107,6 +112,33 @@ module DataStructuresRMolinari
|
|
107
112
|
end
|
108
113
|
end
|
109
114
|
|
115
|
+
class SumSegmentTree
|
116
|
+
extend Forwardable
|
117
|
+
|
118
|
+
# Tell the tree that the value at idx has changed
|
119
|
+
def_delegator :@structure, :update_at
|
120
|
+
|
121
|
+
# @param (see MaxValSegmentTree#initialize)
|
122
|
+
def initialize(template_klass, data)
|
123
|
+
data.must_be_a Enumerable
|
124
|
+
|
125
|
+
@structure = template_klass.new(
|
126
|
+
combine: ->(a, b) { a + b },
|
127
|
+
single_cell_array_val: ->(i) { data[i] },
|
128
|
+
size: data.size,
|
129
|
+
identity: 0
|
130
|
+
)
|
131
|
+
end
|
132
|
+
|
133
|
+
# The sum of the values in A(i..j)
|
134
|
+
#
|
135
|
+
# The arguments must be integers in 0...(A.size)
|
136
|
+
# @return the sum of the values in A(i..j) or 0 if i > j.
|
137
|
+
def sum_on(i, j)
|
138
|
+
@structure.query_on(i, j)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
110
142
|
# The underlying functionality of the Segment Tree data type, implemented in C as a Ruby extension.
|
111
143
|
#
|
112
144
|
# See SegmentTreeTemplate for more information.
|
@@ -71,6 +71,7 @@ class DataStructuresRMolinari::SegmentTree::SegmentTreeTemplate
|
|
71
71
|
# Note that we don't need the updated value itself. We get that by calling the lambda +single_cell_array_val+ supplied at
|
72
72
|
# construction.
|
73
73
|
def update_at(idx)
|
74
|
+
raise DataError, "Bad update index #{idx} (size = #{@size})" unless (0...@size).cover?(idx)
|
74
75
|
|
75
76
|
update_val_at(idx, root, 0, @size - 1)
|
76
77
|
end
|
@@ -69,12 +69,9 @@ module Shared
|
|
69
69
|
# duplication. When nil we don't call anything and just use the elements themselves.
|
70
70
|
def contains_duplicates?(enum, by: nil)
|
71
71
|
seen = Set.new
|
72
|
-
enum.
|
72
|
+
enum.any? do |v|
|
73
73
|
v = v.send(by) if by
|
74
|
-
|
75
|
-
|
76
|
-
seen << v
|
74
|
+
!seen.add?(v)
|
77
75
|
end
|
78
|
-
false
|
79
76
|
end
|
80
77
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_structures_rmolinari
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rory Molinari
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-12-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: must_be
|
@@ -90,6 +90,7 @@ files:
|
|
90
90
|
- ext/c_segment_tree_template/extconf.rb
|
91
91
|
- ext/c_segment_tree_template/segment_tree_template.c
|
92
92
|
- ext/cc.h
|
93
|
+
- ext/extconf_shared.rb
|
93
94
|
- ext/shared.c
|
94
95
|
- ext/shared.h
|
95
96
|
- lib/data_structures_rmolinari.rb
|