data_structures_rmolinari 0.2.2 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +33 -0
- data/lib/data_structures_rmolinari/disjoint_union.rb +30 -14
- data/lib/data_structures_rmolinari/generic_segment_tree.rb +8 -8
- data/lib/data_structures_rmolinari/heap.rb +64 -42
- data/lib/data_structures_rmolinari/max_priority_search_tree.rb +70 -119
- data/lib/data_structures_rmolinari/shared.rb +9 -1
- data/lib/data_structures_rmolinari.rb +17 -14
- metadata +3 -3
- data/lib/data_structures_rmolinari/minmax_priority_search_tree.rb +0 -670
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 87a44faaaa62f555546867230df704981671491f040f6be29eeed7db7eb22a0a
|
4
|
+
data.tar.gz: 0a0f1f6cf22bdde5d0510a818af9d8a6dbdbf11a6e69ce2e178bf6f336bb3d92
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 990fc38cbc64c20290317bf2858ff6f2813f832d0046f249faea32c7f88f389e8c8c2db892f8288a0747aa9446181864a3e62435e4846a230411b6afa4b75faf
|
7
|
+
data.tar.gz: f1e641b03d30c4726268c1c8da6d6364f635251152230f89aba2b551f0355d37ce843dba8e631c2fbd4a20e87ae94c78cf30b46dc3d472f1a1b55add258de32a
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
## [Unreleased]
|
4
|
+
|
5
|
+
### Changed
|
6
|
+
|
7
|
+
- MaxPrioritySearchTree
|
8
|
+
- Duplicate y values are now allowed. Ties are broken with a preference for smaller values of x.
|
9
|
+
- Method names have changed
|
10
|
+
- Instead of "highest", "leftmost", "rightmost" we use "largest_y", "smallest_x", "largest_x"
|
11
|
+
- For example, +highest_ne+ is now +largest_y_in_nw+
|
12
|
+
- DisjointUnion
|
13
|
+
- the size argument to initializer is optional. The default value is 0.
|
14
|
+
- elements can be added to the "universe" of known values with +make_set+
|
15
|
+
|
16
|
+
### Removed
|
17
|
+
- MinmaxPrioritySearchTree is no longer available
|
18
|
+
- it was only a partial implementation anyway
|
19
|
+
|
20
|
+
## [0.3.0] 2023-01-06
|
21
|
+
|
22
|
+
### Added
|
23
|
+
|
24
|
+
- Start this file
|
25
|
+
- `Heap` can be constructed as "non-addressable"
|
26
|
+
- `update` is not possible but duplicates can be inserted and overall performance is a little better.
|
27
|
+
|
28
|
+
### Changed
|
29
|
+
|
30
|
+
- `LogicError` gets a subclassed `InternalLogicError` for issues inside the library.
|
31
|
+
- `Shared::Pair` becomes `Shared::Point`
|
32
|
+
- this doesn't change the API of `MaxPrioritySearchTree` because of ducktyping. But client code (of which there is none) might be
|
33
|
+
using the `Pair` name.
|
@@ -4,40 +4,54 @@
|
|
4
4
|
# The data structure provides efficient actions to merge two disjoint subsets, i.e., replace them by their union, and determine if
|
5
5
|
# two elements are in the same subset.
|
6
6
|
#
|
7
|
-
# The elements of the set
|
7
|
+
# The elements of the set are 0, 1, ..., n-1, where n is the size of the universe. Client code can map its data to these
|
8
8
|
# representatives.
|
9
9
|
#
|
10
10
|
# See https://en.wikipedia.org/wiki/Disjoint-set_data_structure for a good introduction.
|
11
11
|
#
|
12
12
|
# The code uses several ideas from Tarjan and van Leeuwen for efficiency. We use "union by rank" in +unite+ and path-halving in
|
13
|
-
# +find+. Together, these make the amortized cost
|
13
|
+
# +find+. Together, these make the amortized cost of each opperation effectively constant.
|
14
14
|
#
|
15
|
-
# - Tarjan, Robert E., van Leeuwen, Jan (1984).
|
15
|
+
# - Tarjan, Robert E., van Leeuwen, Jan (1984). _Worst-case analysis of set union algorithms_. Journal of the ACM. 31 (2): 245–281.
|
16
16
|
#
|
17
17
|
# @todo
|
18
18
|
# - allow caller to expand the size of the universe. This operation is called "make set".
|
19
19
|
# - All we need to do is increase the size of @d, set the parent pointers, define the new ranks (zero), and update @size.
|
20
20
|
class DataStructuresRMolinari::DisjointUnion
|
21
|
+
include Shared
|
22
|
+
|
21
23
|
# The number of subsets in the partition.
|
22
24
|
attr_reader :subset_count
|
23
25
|
|
24
|
-
# @param
|
25
|
-
#
|
26
|
-
def initialize(
|
27
|
-
@size = size
|
26
|
+
# @param initial_size the initial size of the universe. The elements 0, 1, ..., initial_size - 1 start out in disjoint singleton
|
27
|
+
# subsets.
|
28
|
+
def initialize(initial_size = 0)
|
28
29
|
# Initialize to
|
29
|
-
@d = (0...
|
30
|
-
@rank = [0] *
|
30
|
+
@d = (0...initial_size).to_a
|
31
|
+
@rank = [0] * initial_size
|
32
|
+
|
33
|
+
@subset_count = initial_size
|
34
|
+
end
|
35
|
+
|
36
|
+
# Add a new subset to the universe containing the element +new_v+
|
37
|
+
# @param new_v the new element, starting in its own singleton subset
|
38
|
+
# - it must be a non-negative integer, not already part of the universe of elements.
|
39
|
+
def make_set(new_v)
|
40
|
+
raise DataError, "Element #{new_v} must be a non-negative integer" unless new_v.is_a?(Integer) && !new_v.negative?
|
41
|
+
raise DataError, "Element #{new_v} is already present" if @d[new_v]
|
31
42
|
|
32
|
-
@
|
43
|
+
@d[new_v] = new_v
|
44
|
+
@rank[new_v] = 0
|
45
|
+
@subset_count += 1
|
33
46
|
end
|
34
47
|
|
35
48
|
# Declare that e and f are equivalent, i.e., in the same subset. If they are already in the same subset this is a no-op.
|
36
49
|
#
|
37
|
-
# Each argument must be
|
50
|
+
# Each argument must be in the universe of elements
|
38
51
|
def unite(e, f)
|
39
52
|
check_value(e)
|
40
53
|
check_value(f)
|
54
|
+
|
41
55
|
raise 'Uniting an element with itself is meaningless' if e == f
|
42
56
|
|
43
57
|
e_root = find(e)
|
@@ -50,9 +64,11 @@ class DataStructuresRMolinari::DisjointUnion
|
|
50
64
|
|
51
65
|
# The canonical representative of the subset containing e. Two elements d and e are in the same subset exactly when find(d) ==
|
52
66
|
# find(e).
|
53
|
-
# @param e must be
|
54
|
-
# @return (Integer) one of
|
67
|
+
# @param e must be in the universe of elements
|
68
|
+
# @return (Integer) one of the universe of elements
|
55
69
|
def find(e)
|
70
|
+
check_value(e)
|
71
|
+
|
56
72
|
# We implement find with "halving" to shrink the length of paths to the root. See Tarjan and van Leeuwin p 252.
|
57
73
|
x = e
|
58
74
|
x = @d[x] = @d[@d[x]] while @d[@d[x]] != @d[x]
|
@@ -60,7 +76,7 @@ class DataStructuresRMolinari::DisjointUnion
|
|
60
76
|
end
|
61
77
|
|
62
78
|
private def check_value(v)
|
63
|
-
raise "Value
|
79
|
+
raise Shared::DataError, "Value #{v} is not part of the univserse." unless @d[v]
|
64
80
|
end
|
65
81
|
|
66
82
|
private def link(e, f)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require_relative 'shared'
|
2
2
|
|
3
|
-
#
|
4
|
-
# arbitrary subarray of a given array.
|
3
|
+
# The template of Segment Tree, which can be used for various interval-related purposes, like efficiently finding the sum (or min or
|
4
|
+
# max) on a arbitrary subarray of a given array.
|
5
5
|
#
|
6
6
|
# There is an excellent description of the data structure at https://cp-algorithms.com/data_structures/segment_tree.html. The
|
7
7
|
# Wikipedia article (https://en.wikipedia.org/wiki/Segment_tree) appears to describe a different data structure which is sometimes
|
@@ -13,10 +13,10 @@ require_relative 'shared'
|
|
13
13
|
# Ruby.
|
14
14
|
#
|
15
15
|
# This is a generic implementation, intended to allow easy configuration for concrete instances. See the parameters to the
|
16
|
-
# initializer and the
|
16
|
+
# initializer and the definitions of concrete realisations like MaxValSegmentTree.
|
17
17
|
#
|
18
18
|
# We do O(n) work to build the internal data structure at initialization. Then we answer queries in O(log n) time.
|
19
|
-
class DataStructuresRMolinari::
|
19
|
+
class DataStructuresRMolinari::SegmentTreeTemplate
|
20
20
|
include Shared::BinaryTreeArithmetic
|
21
21
|
|
22
22
|
# Construct a concrete instance of a Segment Tree. See details at the links above for the underlying concepts here.
|
@@ -24,14 +24,14 @@ class DataStructuresRMolinari::GenericSegmentTree
|
|
24
24
|
# - For example, if we are calculating sums over subintervals, combine.call(a, b) = a + b, while if we are doing maxima we will
|
25
25
|
# return max(a, b).
|
26
26
|
# - Things get more complicated when we are calculating, say, the _index_ of the maximal value in a subinterval. Now it is not
|
27
|
-
# enough
|
27
|
+
# enough simply to store that index at each tree node, because to combine the indices from two child nodes we need to know
|
28
28
|
# both the index of the maximal element in each child node's interval, but also the maximal values themselves, so we know
|
29
29
|
# which one "wins" for the parent node. This affects the sort of work we need to do when combining and the value provided by
|
30
30
|
# the +single_cell_array_val+ lambda.
|
31
31
|
# @param single_cell_array_val a lambda that takes an index i and returns the value we need to store in the #build
|
32
32
|
# operation for the subinterval i..i.
|
33
|
-
# - This
|
34
|
-
# calculating the index of the maximal value on each subinterval we
|
33
|
+
# - This will often simply be the value data[i], but in some cases it will be something else. For example, when we are
|
34
|
+
# calculating the index of the maximal value on each subinterval we need [i, data[i]] here.
|
35
35
|
# - If +update_at+ is called later, this lambda must close over the underlying data in a way that captures the updated value.
|
36
36
|
# @param size the size of the underlying data array, used in certain internal arithmetic.
|
37
37
|
# @param identity the value to return when we are querying on an empty interval
|
@@ -96,7 +96,7 @@ class DataStructuresRMolinari::GenericSegmentTree
|
|
96
96
|
private def update_val_at(idx, tree_idx, tree_l, tree_r)
|
97
97
|
if tree_l == tree_r
|
98
98
|
# We have found the spot!
|
99
|
-
raise
|
99
|
+
raise InternalLogicError, 'tree_l == tree_r, but they do not agree with the idx holding the updated value' unless tree_l == idx
|
100
100
|
|
101
101
|
@tree[tree_idx] = @single_cell_array_val.call(tree_l)
|
102
102
|
else
|
@@ -13,8 +13,8 @@ require_relative 'shared'
|
|
13
13
|
# - +empty?+
|
14
14
|
# - is the heap empty?
|
15
15
|
# - O(1)
|
16
|
-
# - +insert+
|
17
|
-
# - add a new
|
16
|
+
# - +insert(item, priority)+
|
17
|
+
# - add a new item to the heap with an associated priority
|
18
18
|
# - O(log N)
|
19
19
|
# - +top+
|
20
20
|
# - return the lowest-priority element, which is the element at the root of the tree. In a max-heap this is the highest-priority
|
@@ -23,12 +23,18 @@ require_relative 'shared'
|
|
23
23
|
# - +pop+
|
24
24
|
# - removes and returns the item that would be returned by +top+
|
25
25
|
# - O(log N)
|
26
|
-
# - +update+
|
26
|
+
# - +update(item, priority)+
|
27
27
|
# - tell the heap that the priority of a particular item has changed
|
28
28
|
# - O(log N)
|
29
29
|
#
|
30
30
|
# Here N is the number of elements in the heap.
|
31
31
|
#
|
32
|
+
# The internal requirements needed to implement +update+ have several consequences.
|
33
|
+
# - Items added to the heap must be distinct. Otherwise we would not know which occurrence to update
|
34
|
+
# - There is some bookkeeping overhead.
|
35
|
+
# If client code doesn't need to call +update+ then we can create a "non-addressable" heap that allows for the insertion of
|
36
|
+
# duplicate items and has slightly faster runtime overall. See the arguments to the initializer.
|
37
|
+
#
|
32
38
|
# References:
|
33
39
|
#
|
34
40
|
# - https://en.wikipedia.org/wiki/Binary_heap
|
@@ -36,31 +42,31 @@ require_relative 'shared'
|
|
36
42
|
# DOI 10.1007/s00224-017-9760-2
|
37
43
|
#
|
38
44
|
# @todo
|
39
|
-
# -
|
40
|
-
# - this requires different handling for max-heaps, as we can't just negate the priorities and use min-heap logic
|
41
|
-
# - relax the requirement that priorities must be comparable vai +<+ and respond to negation. Instead, allow comparison via +<=>+
|
42
|
-
# and handle max-heaps differently.
|
43
|
-
# - this will allow priorities to be arrays for tie-breakers and similar.
|
44
|
-
# - offer a non-addressable version that doesn't support +update+
|
45
|
-
# - configure through the initializer
|
46
|
-
# - other operations will be a little quicker, and we can add the same item more than once. The paper by Chen et al. referenced
|
47
|
-
# in the Wikipedia article for Pairing Heaps suggests that using such a priority queue for Dijkstra's algorithm and inserting
|
48
|
-
# multiple copies of a key rather than updating its priority is faster in practice than other approaches that have better
|
49
|
-
# theoretical performance.
|
45
|
+
# - let caller see the priority of the top element. Maybe this is useful sometimes.
|
50
46
|
class DataStructuresRMolinari::Heap
|
47
|
+
include Shared
|
51
48
|
include Shared::BinaryTreeArithmetic
|
52
49
|
|
50
|
+
# The number of items currently in the heap
|
53
51
|
attr_reader :size
|
54
52
|
|
55
|
-
|
53
|
+
# An (item, priority) pair
|
54
|
+
InternalPair = Struct.new(:item, :priority)
|
55
|
+
private_constant :InternalPair
|
56
56
|
|
57
57
|
# @param max_heap when truthy, make a max-heap rather than a min-heap
|
58
|
-
# @param
|
59
|
-
|
58
|
+
# @param addressable when truthy, the heap is _addressable_. This means that
|
59
|
+
# - item priorities are updatable with +update(item, p)+, and
|
60
|
+
# - items added to the heap must be distinct.
|
61
|
+
# When falsy, priorities are not updateable but items may be inserted multiple times. Operations are slightly faster because
|
62
|
+
# there is less internal bookkeeping.
|
63
|
+
# @param debug when truthy, verify the heap property after each change that might violate it. This makes operations much slower.
|
64
|
+
def initialize(max_heap: false, addressable: true, debug: false)
|
60
65
|
@data = []
|
61
66
|
@size = 0
|
62
67
|
@max_heap = max_heap
|
63
|
-
@
|
68
|
+
@addressable = addressable
|
69
|
+
@index_of = {} # used in addressable heaps
|
64
70
|
@debug = debug
|
65
71
|
end
|
66
72
|
|
@@ -70,26 +76,24 @@ class DataStructuresRMolinari::Heap
|
|
70
76
|
end
|
71
77
|
|
72
78
|
# Insert a new element into the heap with the given priority.
|
73
|
-
# @param value the item to be inserted.
|
74
|
-
#
|
75
|
-
# @param priority the priority to use for new item. The values used as priorities
|
76
|
-
# a max-heap, must respond to negation +@-+ in the natural order-respecting way.
|
77
|
-
# @todo
|
78
|
-
# - check for duplicate
|
79
|
+
# @param value the item to be inserted.
|
80
|
+
# - If the heap is addressible (the default) it is an error to insert an item that is already present in the heap.
|
81
|
+
# @param priority the priority to use for new item. The values used as priorities must be totally ordered via +<=>+.
|
79
82
|
def insert(value, priority)
|
80
|
-
|
83
|
+
raise DataError, "Heap already contains #{value}" if @addressable && contains?(value)
|
81
84
|
|
82
85
|
@size += 1
|
83
86
|
|
84
|
-
d =
|
87
|
+
d = InternalPair.new(value, priority)
|
85
88
|
assign(d, @size)
|
86
89
|
|
87
90
|
sift_up(@size)
|
88
91
|
end
|
89
92
|
|
90
93
|
# Return the top of the heap without removing it
|
91
|
-
# @return
|
92
|
-
# binary tree; this element has minimal priority, but there may be other elements with the same priority
|
94
|
+
# @return a value with minimal priority (maximal for max-heaps). Strictly speaking, it returns the item at the root of the
|
95
|
+
# binary tree; this element has minimal priority, but there may be other elements with the same priority and they do not appear
|
96
|
+
# at the top of the heap in any guaranteed order.
|
93
97
|
def top
|
94
98
|
raise 'Heap is empty!' unless @size.positive?
|
95
99
|
|
@@ -100,12 +104,11 @@ class DataStructuresRMolinari::Heap
|
|
100
104
|
# @return (see #top)
|
101
105
|
def pop
|
102
106
|
result = top
|
103
|
-
@index_of.delete(result)
|
104
|
-
|
105
107
|
assign(@data[@size], root)
|
106
108
|
|
107
109
|
@data[@size] = nil
|
108
110
|
@size -= 1
|
111
|
+
@index_of.delete(result) if @addressable
|
109
112
|
|
110
113
|
sift_down(root) if @size.positive?
|
111
114
|
|
@@ -113,21 +116,20 @@ class DataStructuresRMolinari::Heap
|
|
113
116
|
end
|
114
117
|
|
115
118
|
# Update the priority of the given element and maintain the necessary heap properties.
|
119
|
+
#
|
116
120
|
# @param element the item whose priority we are updating. It is an error to update the priority of an element not already in the
|
117
121
|
# heap
|
118
122
|
# @param priority the new priority
|
119
|
-
#
|
120
|
-
# @todo
|
121
|
-
# - check that the element is in the heap
|
122
123
|
def update(element, priority)
|
123
|
-
|
124
|
+
raise LogicError, 'Cannot update priorities in a non-addressable heap' unless @addressable
|
125
|
+
raise DataError, "Cannot update priority for value #{element} not already in the heap" unless contains?(element)
|
124
126
|
|
125
127
|
idx = @index_of[element]
|
126
128
|
old = @data[idx].priority
|
127
129
|
@data[idx].priority = priority
|
128
|
-
if priority
|
130
|
+
if less_than_priority?(old, priority)
|
129
131
|
sift_down(idx)
|
130
|
-
elsif priority
|
132
|
+
elsif less_than_priority?(priority, old)
|
131
133
|
sift_up(idx)
|
132
134
|
end
|
133
135
|
|
@@ -141,7 +143,7 @@ class DataStructuresRMolinari::Heap
|
|
141
143
|
x = @data[idx]
|
142
144
|
while idx != root
|
143
145
|
i = parent(idx)
|
144
|
-
break unless x
|
146
|
+
break unless less_than?(x, @data[i])
|
145
147
|
|
146
148
|
assign(@data[i], idx)
|
147
149
|
idx = i
|
@@ -156,9 +158,9 @@ class DataStructuresRMolinari::Heap
|
|
156
158
|
x = @data[idx]
|
157
159
|
|
158
160
|
while (j = left(idx)) <= @size
|
159
|
-
j += 1 if j + 1 <= @size && @data[j + 1]
|
161
|
+
j += 1 if j + 1 <= @size && less_than?(@data[j + 1], @data[j])
|
160
162
|
|
161
|
-
break unless @data[j]
|
163
|
+
break unless less_than?(@data[j], x)
|
162
164
|
|
163
165
|
assign(@data[j], idx)
|
164
166
|
idx = j
|
@@ -171,7 +173,27 @@ class DataStructuresRMolinari::Heap
|
|
171
173
|
# Put the pair in the given heap location
|
172
174
|
private def assign(pair, idx)
|
173
175
|
@data[idx] = pair
|
174
|
-
@index_of[pair.item] = idx
|
176
|
+
@index_of[pair.item] = idx if @addressable
|
177
|
+
end
|
178
|
+
|
179
|
+
# Compare the priorities of two items with <=> and return truthy exactly when the result is -1.
|
180
|
+
#
|
181
|
+
# If this is a max-heap return truthy exactly when the result of <=> is 1.
|
182
|
+
#
|
183
|
+
# The arguments can also be the priorities themselves.
|
184
|
+
private def less_than?(p1, p2)
|
185
|
+
less_than_priority?(p1.priority, p2.priority)
|
186
|
+
end
|
187
|
+
|
188
|
+
# Direct comparison of priorities
|
189
|
+
private def less_than_priority?(priority1, priority2)
|
190
|
+
return (priority1 <=> priority2) == 1 if @max_heap
|
191
|
+
|
192
|
+
(priority1 <=> priority2) == -1
|
193
|
+
end
|
194
|
+
|
195
|
+
private def contains?(item)
|
196
|
+
!!@index_of[item]
|
175
197
|
end
|
176
198
|
|
177
199
|
# For debugging
|
@@ -180,8 +202,8 @@ class DataStructuresRMolinari::Heap
|
|
180
202
|
left = left(idx)
|
181
203
|
right = right(idx)
|
182
204
|
|
183
|
-
raise "Heap property violated by left child of index #{idx}" if left <= @size && @data[
|
184
|
-
raise "Heap property violated by right child of index #{idx}" if right <= @size && @data[
|
205
|
+
raise InternalLogicError, "Heap property violated by left child of index #{idx}" if left <= @size && less_than?(@data[left], @data[idx])
|
206
|
+
raise InternalLogicError, "Heap property violated by right child of index #{idx}" if right <= @size && less_than?(@data[right], @data[idx])
|
185
207
|
end
|
186
208
|
end
|
187
209
|
end
|