data_structures_rmolinari 0.2.2 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3765b8df91fcc62eb885e32ff5ad4b0b4678bba6f322cb5c8282657052aed8c6
4
- data.tar.gz: 845bea3649dc51dab697927132c0fc2f62dcacf5c25e1c717b99e57819b52286
3
+ metadata.gz: 87a44faaaa62f555546867230df704981671491f040f6be29eeed7db7eb22a0a
4
+ data.tar.gz: 0a0f1f6cf22bdde5d0510a818af9d8a6dbdbf11a6e69ce2e178bf6f336bb3d92
5
5
  SHA512:
6
- metadata.gz: 23687561ec6ddb12369ca5e75db33ffd710295097cd0c91b72fb278fea3b11b23152867bb4aea6a4fd17b2f95184fb5433c9ff009db92a5c4bab78686ae472de
7
- data.tar.gz: d930a674f85aa0a57030ed59f2b39979c7dbb8d8f74e0a9718d44112c1efe05657e0d55773b623c3724f040054e2fedf453de30b72d5631404642081455406f9
6
+ metadata.gz: 990fc38cbc64c20290317bf2858ff6f2813f832d0046f249faea32c7f88f389e8c8c2db892f8288a0747aa9446181864a3e62435e4846a230411b6afa4b75faf
7
+ data.tar.gz: f1e641b03d30c4726268c1c8da6d6364f635251152230f89aba2b551f0355d37ce843dba8e631c2fbd4a20e87ae94c78cf30b46dc3d472f1a1b55add258de32a
data/CHANGELOG.md ADDED
@@ -0,0 +1,33 @@
1
+ # Changelog
2
+
3
+ ## [Unreleased]
4
+
5
+ ### Changed
6
+
7
+ - MaxPrioritySearchTree
8
+ - Duplicate y values are now allowed. Ties are broken with a preference for smaller values of x.
9
+ - Method names have changed
10
+ - Instead of "highest", "leftmost", "rightmost" we use "largest_y", "smallest_x", "largest_x"
11
+ - For example, +highest_ne+ is now +largest_y_in_nw+
12
+ - DisjointUnion
13
+ - the size argument to initializer is optional. The default value is 0.
14
+ - elements can be added to the "universe" of known values with +make_set+
15
+
16
+ ### Removed
17
+ - MinmaxPrioritySearchTree is no longer available
18
+ - it was only a partial implementation anyway
19
+
20
+ ## [0.3.0] 2023-01-06
21
+
22
+ ### Added
23
+
24
+ - Start this file
25
+ - `Heap` can be constructed as "non-addressable"
26
+ - `update` is not possible but duplicates can be inserted and overall performance is a little better.
27
+
28
+ ### Changed
29
+
30
+ - `LogicError` gets a subclassed `InternalLogicError` for issues inside the library.
31
+ - `Shared::Pair` becomes `Shared::Point`
32
+ - this doesn't change the API of `MaxPrioritySearchTree` because of ducktyping. But client code (of which there is none) might be
33
+ using the `Pair` name.
@@ -4,40 +4,54 @@
4
4
  # The data structure provides efficient actions to merge two disjoint subsets, i.e., replace them by their union, and determine if
5
5
  # two elements are in the same subset.
6
6
  #
7
- # The elements of the set must be 0, 1, ..., n-1, where n is the size of the universe. Client code can map its data to these
7
+ # The elements of the set are 0, 1, ..., n-1, where n is the size of the universe. Client code can map its data to these
8
8
  # representatives.
9
9
  #
10
10
  # See https://en.wikipedia.org/wiki/Disjoint-set_data_structure for a good introduction.
11
11
  #
12
12
  # The code uses several ideas from Tarjan and van Leeuwen for efficiency. We use "union by rank" in +unite+ and path-halving in
13
- # +find+. Together, these make the amortized cost for each of n such operations effectively constant.
13
+ # +find+. Together, these make the amortized cost of each opperation effectively constant.
14
14
  #
15
- # - Tarjan, Robert E., van Leeuwen, Jan (1984). "Worst-case analysis of set union algorithms". Journal of the ACM. 31 (2): 245–281.
15
+ # - Tarjan, Robert E., van Leeuwen, Jan (1984). _Worst-case analysis of set union algorithms_. Journal of the ACM. 31 (2): 245–281.
16
16
  #
17
17
  # @todo
18
18
  # - allow caller to expand the size of the universe. This operation is called "make set".
19
19
  # - All we need to do is increase the size of @d, set the parent pointers, define the new ranks (zero), and update @size.
20
20
  class DataStructuresRMolinari::DisjointUnion
21
+ include Shared
22
+
21
23
  # The number of subsets in the partition.
22
24
  attr_reader :subset_count
23
25
 
24
- # @param size the size of the universe, which must be known at the time of construction. The elements 0, 1, ..., size - 1 start
25
- # out in disjoint singleton subsets.
26
- def initialize(size)
27
- @size = size
26
+ # @param initial_size the initial size of the universe. The elements 0, 1, ..., initial_size - 1 start out in disjoint singleton
27
+ # subsets.
28
+ def initialize(initial_size = 0)
28
29
  # Initialize to
29
- @d = (0...size).to_a
30
- @rank = [0] * size
30
+ @d = (0...initial_size).to_a
31
+ @rank = [0] * initial_size
32
+
33
+ @subset_count = initial_size
34
+ end
35
+
36
+ # Add a new subset to the universe containing the element +new_v+
37
+ # @param new_v the new element, starting in its own singleton subset
38
+ # - it must be a non-negative integer, not already part of the universe of elements.
39
+ def make_set(new_v)
40
+ raise DataError, "Element #{new_v} must be a non-negative integer" unless new_v.is_a?(Integer) && !new_v.negative?
41
+ raise DataError, "Element #{new_v} is already present" if @d[new_v]
31
42
 
32
- @subset_count = size
43
+ @d[new_v] = new_v
44
+ @rank[new_v] = 0
45
+ @subset_count += 1
33
46
  end
34
47
 
35
48
  # Declare that e and f are equivalent, i.e., in the same subset. If they are already in the same subset this is a no-op.
36
49
  #
37
- # Each argument must be one of 0, 1, ..., size-1.
50
+ # Each argument must be in the universe of elements
38
51
  def unite(e, f)
39
52
  check_value(e)
40
53
  check_value(f)
54
+
41
55
  raise 'Uniting an element with itself is meaningless' if e == f
42
56
 
43
57
  e_root = find(e)
@@ -50,9 +64,11 @@ class DataStructuresRMolinari::DisjointUnion
50
64
 
51
65
  # The canonical representative of the subset containing e. Two elements d and e are in the same subset exactly when find(d) ==
52
66
  # find(e).
53
- # @param e must be one of 0, 1, ..., size-1.
54
- # @return (Integer) one of 0, 1, ..., size-1.
67
+ # @param e must be in the universe of elements
68
+ # @return (Integer) one of the universe of elements
55
69
  def find(e)
70
+ check_value(e)
71
+
56
72
  # We implement find with "halving" to shrink the length of paths to the root. See Tarjan and van Leeuwin p 252.
57
73
  x = e
58
74
  x = @d[x] = @d[@d[x]] while @d[@d[x]] != @d[x]
@@ -60,7 +76,7 @@ class DataStructuresRMolinari::DisjointUnion
60
76
  end
61
77
 
62
78
  private def check_value(v)
63
- raise "Value must be given and be in (0..#{@size - 1})" unless v && v.between?(0, @size - 1)
79
+ raise Shared::DataError, "Value #{v} is not part of the univserse." unless @d[v]
64
80
  end
65
81
 
66
82
  private def link(e, f)
@@ -1,7 +1,7 @@
1
1
  require_relative 'shared'
2
2
 
3
- # A Segment Tree, which can be used for various interval-related purposes, like efficiently finding the sum (or min or max) on a
4
- # arbitrary subarray of a given array.
3
+ # The template of Segment Tree, which can be used for various interval-related purposes, like efficiently finding the sum (or min or
4
+ # max) on a arbitrary subarray of a given array.
5
5
  #
6
6
  # There is an excellent description of the data structure at https://cp-algorithms.com/data_structures/segment_tree.html. The
7
7
  # Wikipedia article (https://en.wikipedia.org/wiki/Segment_tree) appears to describe a different data structure which is sometimes
@@ -13,10 +13,10 @@ require_relative 'shared'
13
13
  # Ruby.
14
14
  #
15
15
  # This is a generic implementation, intended to allow easy configuration for concrete instances. See the parameters to the
16
- # initializer and the defintiaons concrete realisations like MaxValSegmentTree.
16
+ # initializer and the definitions of concrete realisations like MaxValSegmentTree.
17
17
  #
18
18
  # We do O(n) work to build the internal data structure at initialization. Then we answer queries in O(log n) time.
19
- class DataStructuresRMolinari::GenericSegmentTree
19
+ class DataStructuresRMolinari::SegmentTreeTemplate
20
20
  include Shared::BinaryTreeArithmetic
21
21
 
22
22
  # Construct a concrete instance of a Segment Tree. See details at the links above for the underlying concepts here.
@@ -24,14 +24,14 @@ class DataStructuresRMolinari::GenericSegmentTree
24
24
  # - For example, if we are calculating sums over subintervals, combine.call(a, b) = a + b, while if we are doing maxima we will
25
25
  # return max(a, b).
26
26
  # - Things get more complicated when we are calculating, say, the _index_ of the maximal value in a subinterval. Now it is not
27
- # enough simple to store that index at each tree node, because to combine the indices from two child nodes we need to know
27
+ # enough simply to store that index at each tree node, because to combine the indices from two child nodes we need to know
28
28
  # both the index of the maximal element in each child node's interval, but also the maximal values themselves, so we know
29
29
  # which one "wins" for the parent node. This affects the sort of work we need to do when combining and the value provided by
30
30
  # the +single_cell_array_val+ lambda.
31
31
  # @param single_cell_array_val a lambda that takes an index i and returns the value we need to store in the #build
32
32
  # operation for the subinterval i..i.
33
- # - This is often simply be the value data[i], but in some cases it will be something else. For example, when we are
34
- # calculating the index of the maximal value on each subinterval we will retern the pair [i, data[i]] here.
33
+ # - This will often simply be the value data[i], but in some cases it will be something else. For example, when we are
34
+ # calculating the index of the maximal value on each subinterval we need [i, data[i]] here.
35
35
  # - If +update_at+ is called later, this lambda must close over the underlying data in a way that captures the updated value.
36
36
  # @param size the size of the underlying data array, used in certain internal arithmetic.
37
37
  # @param identity the value to return when we are querying on an empty interval
@@ -96,7 +96,7 @@ class DataStructuresRMolinari::GenericSegmentTree
96
96
  private def update_val_at(idx, tree_idx, tree_l, tree_r)
97
97
  if tree_l == tree_r
98
98
  # We have found the spot!
99
- raise LogicError, 'tree_l == tree_r, but they do not agree with the idx holding the updated value' unless tree_l == idx
99
+ raise InternalLogicError, 'tree_l == tree_r, but they do not agree with the idx holding the updated value' unless tree_l == idx
100
100
 
101
101
  @tree[tree_idx] = @single_cell_array_val.call(tree_l)
102
102
  else
@@ -13,8 +13,8 @@ require_relative 'shared'
13
13
  # - +empty?+
14
14
  # - is the heap empty?
15
15
  # - O(1)
16
- # - +insert+
17
- # - add a new element to the heap with an associated priority
16
+ # - +insert(item, priority)+
17
+ # - add a new item to the heap with an associated priority
18
18
  # - O(log N)
19
19
  # - +top+
20
20
  # - return the lowest-priority element, which is the element at the root of the tree. In a max-heap this is the highest-priority
@@ -23,12 +23,18 @@ require_relative 'shared'
23
23
  # - +pop+
24
24
  # - removes and returns the item that would be returned by +top+
25
25
  # - O(log N)
26
- # - +update+
26
+ # - +update(item, priority)+
27
27
  # - tell the heap that the priority of a particular item has changed
28
28
  # - O(log N)
29
29
  #
30
30
  # Here N is the number of elements in the heap.
31
31
  #
32
+ # The internal requirements needed to implement +update+ have several consequences.
33
+ # - Items added to the heap must be distinct. Otherwise we would not know which occurrence to update
34
+ # - There is some bookkeeping overhead.
35
+ # If client code doesn't need to call +update+ then we can create a "non-addressable" heap that allows for the insertion of
36
+ # duplicate items and has slightly faster runtime overall. See the arguments to the initializer.
37
+ #
32
38
  # References:
33
39
  #
34
40
  # - https://en.wikipedia.org/wiki/Binary_heap
@@ -36,31 +42,31 @@ require_relative 'shared'
36
42
  # DOI 10.1007/s00224-017-9760-2
37
43
  #
38
44
  # @todo
39
- # - allow for priorities comparable only via +<=>+, like arrays
40
- # - this requires different handling for max-heaps, as we can't just negate the priorities and use min-heap logic
41
- # - relax the requirement that priorities must be comparable vai +<+ and respond to negation. Instead, allow comparison via +<=>+
42
- # and handle max-heaps differently.
43
- # - this will allow priorities to be arrays for tie-breakers and similar.
44
- # - offer a non-addressable version that doesn't support +update+
45
- # - configure through the initializer
46
- # - other operations will be a little quicker, and we can add the same item more than once. The paper by Chen et al. referenced
47
- # in the Wikipedia article for Pairing Heaps suggests that using such a priority queue for Dijkstra's algorithm and inserting
48
- # multiple copies of a key rather than updating its priority is faster in practice than other approaches that have better
49
- # theoretical performance.
45
+ # - let caller see the priority of the top element. Maybe this is useful sometimes.
50
46
  class DataStructuresRMolinari::Heap
47
+ include Shared
51
48
  include Shared::BinaryTreeArithmetic
52
49
 
50
+ # The number of items currently in the heap
53
51
  attr_reader :size
54
52
 
55
- Pair = Struct.new(:priority, :item)
53
+ # An (item, priority) pair
54
+ InternalPair = Struct.new(:item, :priority)
55
+ private_constant :InternalPair
56
56
 
57
57
  # @param max_heap when truthy, make a max-heap rather than a min-heap
58
- # @param debug when truthy, verify the heap property after each update than might violate it. This makes operations much slower.
59
- def initialize(max_heap: false, debug: false)
58
+ # @param addressable when truthy, the heap is _addressable_. This means that
59
+ # - item priorities are updatable with +update(item, p)+, and
60
+ # - items added to the heap must be distinct.
61
+ # When falsy, priorities are not updateable but items may be inserted multiple times. Operations are slightly faster because
62
+ # there is less internal bookkeeping.
63
+ # @param debug when truthy, verify the heap property after each change that might violate it. This makes operations much slower.
64
+ def initialize(max_heap: false, addressable: true, debug: false)
60
65
  @data = []
61
66
  @size = 0
62
67
  @max_heap = max_heap
63
- @index_of = {}
68
+ @addressable = addressable
69
+ @index_of = {} # used in addressable heaps
64
70
  @debug = debug
65
71
  end
66
72
 
@@ -70,26 +76,24 @@ class DataStructuresRMolinari::Heap
70
76
  end
71
77
 
72
78
  # Insert a new element into the heap with the given priority.
73
- # @param value the item to be inserted. It is an error to insert an item that is already present in the heap, though we don't
74
- # check for this.
75
- # @param priority the priority to use for new item. The values used as priorities ust be totally ordered via +<+ and, if +self+ is
76
- # a max-heap, must respond to negation +@-+ in the natural order-respecting way.
77
- # @todo
78
- # - check for duplicate
79
+ # @param value the item to be inserted.
80
+ # - If the heap is addressible (the default) it is an error to insert an item that is already present in the heap.
81
+ # @param priority the priority to use for new item. The values used as priorities must be totally ordered via +<=>+.
79
82
  def insert(value, priority)
80
- priority *= -1 if @max_heap
83
+ raise DataError, "Heap already contains #{value}" if @addressable && contains?(value)
81
84
 
82
85
  @size += 1
83
86
 
84
- d = Pair.new(priority, value)
87
+ d = InternalPair.new(value, priority)
85
88
  assign(d, @size)
86
89
 
87
90
  sift_up(@size)
88
91
  end
89
92
 
90
93
  # Return the top of the heap without removing it
91
- # @return the value with minimal (maximal for max-heaps) priority. Strictly speaking, it returns the item at the root of the
92
- # binary tree; this element has minimal priority, but there may be other elements with the same priority.
94
+ # @return a value with minimal priority (maximal for max-heaps). Strictly speaking, it returns the item at the root of the
95
+ # binary tree; this element has minimal priority, but there may be other elements with the same priority and they do not appear
96
+ # at the top of the heap in any guaranteed order.
93
97
  def top
94
98
  raise 'Heap is empty!' unless @size.positive?
95
99
 
@@ -100,12 +104,11 @@ class DataStructuresRMolinari::Heap
100
104
  # @return (see #top)
101
105
  def pop
102
106
  result = top
103
- @index_of.delete(result)
104
-
105
107
  assign(@data[@size], root)
106
108
 
107
109
  @data[@size] = nil
108
110
  @size -= 1
111
+ @index_of.delete(result) if @addressable
109
112
 
110
113
  sift_down(root) if @size.positive?
111
114
 
@@ -113,21 +116,20 @@ class DataStructuresRMolinari::Heap
113
116
  end
114
117
 
115
118
  # Update the priority of the given element and maintain the necessary heap properties.
119
+ #
116
120
  # @param element the item whose priority we are updating. It is an error to update the priority of an element not already in the
117
121
  # heap
118
122
  # @param priority the new priority
119
- #
120
- # @todo
121
- # - check that the element is in the heap
122
123
  def update(element, priority)
123
- priority *= -1 if @max_heap
124
+ raise LogicError, 'Cannot update priorities in a non-addressable heap' unless @addressable
125
+ raise DataError, "Cannot update priority for value #{element} not already in the heap" unless contains?(element)
124
126
 
125
127
  idx = @index_of[element]
126
128
  old = @data[idx].priority
127
129
  @data[idx].priority = priority
128
- if priority > old
130
+ if less_than_priority?(old, priority)
129
131
  sift_down(idx)
130
- elsif priority < old
132
+ elsif less_than_priority?(priority, old)
131
133
  sift_up(idx)
132
134
  end
133
135
 
@@ -141,7 +143,7 @@ class DataStructuresRMolinari::Heap
141
143
  x = @data[idx]
142
144
  while idx != root
143
145
  i = parent(idx)
144
- break unless x.priority < @data[i].priority
146
+ break unless less_than?(x, @data[i])
145
147
 
146
148
  assign(@data[i], idx)
147
149
  idx = i
@@ -156,9 +158,9 @@ class DataStructuresRMolinari::Heap
156
158
  x = @data[idx]
157
159
 
158
160
  while (j = left(idx)) <= @size
159
- j += 1 if j + 1 <= @size && @data[j + 1].priority < @data[j].priority
161
+ j += 1 if j + 1 <= @size && less_than?(@data[j + 1], @data[j])
160
162
 
161
- break unless @data[j].priority < x.priority
163
+ break unless less_than?(@data[j], x)
162
164
 
163
165
  assign(@data[j], idx)
164
166
  idx = j
@@ -171,7 +173,27 @@ class DataStructuresRMolinari::Heap
171
173
  # Put the pair in the given heap location
172
174
  private def assign(pair, idx)
173
175
  @data[idx] = pair
174
- @index_of[pair.item] = idx
176
+ @index_of[pair.item] = idx if @addressable
177
+ end
178
+
179
+ # Compare the priorities of two items with <=> and return truthy exactly when the result is -1.
180
+ #
181
+ # If this is a max-heap return truthy exactly when the result of <=> is 1.
182
+ #
183
+ # The arguments can also be the priorities themselves.
184
+ private def less_than?(p1, p2)
185
+ less_than_priority?(p1.priority, p2.priority)
186
+ end
187
+
188
+ # Direct comparison of priorities
189
+ private def less_than_priority?(priority1, priority2)
190
+ return (priority1 <=> priority2) == 1 if @max_heap
191
+
192
+ (priority1 <=> priority2) == -1
193
+ end
194
+
195
+ private def contains?(item)
196
+ !!@index_of[item]
175
197
  end
176
198
 
177
199
  # For debugging
@@ -180,8 +202,8 @@ class DataStructuresRMolinari::Heap
180
202
  left = left(idx)
181
203
  right = right(idx)
182
204
 
183
- raise "Heap property violated by left child of index #{idx}" if left <= @size && @data[idx].priority >= @data[left].priority
184
- raise "Heap property violated by right child of index #{idx}" if right <= @size && @data[idx].priority >= @data[right].priority
205
+ raise InternalLogicError, "Heap property violated by left child of index #{idx}" if left <= @size && less_than?(@data[left], @data[idx])
206
+ raise InternalLogicError, "Heap property violated by right child of index #{idx}" if right <= @size && less_than?(@data[right], @data[idx])
185
207
  end
186
208
  end
187
209
  end