data_structures_rmolinari 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/data_structures_rmolinari/disjoint_union.rb +77 -0
- data/lib/data_structures_rmolinari/{generic_segment_tree_internal.rb → generic_segment_tree.rb} +52 -31
- data/lib/data_structures_rmolinari/{heap_internal.rb → heap.rb} +10 -2
- data/lib/data_structures_rmolinari/{max_priority_search_tree_internal.rb → max_priority_search_tree.rb} +19 -12
- data/lib/data_structures_rmolinari/{minmax_priority_search_tree_internal.rb → minmax_priority_search_tree.rb} +1 -1
- data/lib/data_structures_rmolinari/shared.rb +19 -0
- data/lib/data_structures_rmolinari.rb +53 -23
- metadata +13 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3765b8df91fcc62eb885e32ff5ad4b0b4678bba6f322cb5c8282657052aed8c6
|
4
|
+
data.tar.gz: 845bea3649dc51dab697927132c0fc2f62dcacf5c25e1c717b99e57819b52286
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23687561ec6ddb12369ca5e75db33ffd710295097cd0c91b72fb278fea3b11b23152867bb4aea6a4fd17b2f95184fb5433c9ff009db92a5c4bab78686ae472de
|
7
|
+
data.tar.gz: d930a674f85aa0a57030ed59f2b39979c7dbb8d8f74e0a9718d44112c1efe05657e0d55773b623c3724f040054e2fedf453de30b72d5631404642081455406f9
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# A "disjoint set union" that represents a set of elements that belonging to _disjoint_ subsets. Alternatively, this expresses a
|
2
|
+
# partion of a fixed set.
|
3
|
+
#
|
4
|
+
# The data structure provides efficient actions to merge two disjoint subsets, i.e., replace them by their union, and determine if
|
5
|
+
# two elements are in the same subset.
|
6
|
+
#
|
7
|
+
# The elements of the set must be 0, 1, ..., n-1, where n is the size of the universe. Client code can map its data to these
|
8
|
+
# representatives.
|
9
|
+
#
|
10
|
+
# See https://en.wikipedia.org/wiki/Disjoint-set_data_structure for a good introduction.
|
11
|
+
#
|
12
|
+
# The code uses several ideas from Tarjan and van Leeuwen for efficiency. We use "union by rank" in +unite+ and path-halving in
|
13
|
+
# +find+. Together, these make the amortized cost for each of n such operations effectively constant.
|
14
|
+
#
|
15
|
+
# - Tarjan, Robert E., van Leeuwen, Jan (1984). "Worst-case analysis of set union algorithms". Journal of the ACM. 31 (2): 245–281.
|
16
|
+
#
|
17
|
+
# @todo
|
18
|
+
# - allow caller to expand the size of the universe. This operation is called "make set".
|
19
|
+
# - All we need to do is increase the size of @d, set the parent pointers, define the new ranks (zero), and update @size.
|
20
|
+
class DataStructuresRMolinari::DisjointUnion
|
21
|
+
# The number of subsets in the partition.
|
22
|
+
attr_reader :subset_count
|
23
|
+
|
24
|
+
# @param size the size of the universe, which must be known at the time of construction. The elements 0, 1, ..., size - 1 start
|
25
|
+
# out in disjoint singleton subsets.
|
26
|
+
def initialize(size)
|
27
|
+
@size = size
|
28
|
+
# Initialize to
|
29
|
+
@d = (0...size).to_a
|
30
|
+
@rank = [0] * size
|
31
|
+
|
32
|
+
@subset_count = size
|
33
|
+
end
|
34
|
+
|
35
|
+
# Declare that e and f are equivalent, i.e., in the same subset. If they are already in the same subset this is a no-op.
|
36
|
+
#
|
37
|
+
# Each argument must be one of 0, 1, ..., size-1.
|
38
|
+
def unite(e, f)
|
39
|
+
check_value(e)
|
40
|
+
check_value(f)
|
41
|
+
raise 'Uniting an element with itself is meaningless' if e == f
|
42
|
+
|
43
|
+
e_root = find(e)
|
44
|
+
f_root = find(f)
|
45
|
+
return if e_root == f_root
|
46
|
+
|
47
|
+
@subset_count -= 1
|
48
|
+
link(e_root, f_root)
|
49
|
+
end
|
50
|
+
|
51
|
+
# The canonical representative of the subset containing e. Two elements d and e are in the same subset exactly when find(d) ==
|
52
|
+
# find(e).
|
53
|
+
# @param e must be one of 0, 1, ..., size-1.
|
54
|
+
# @return (Integer) one of 0, 1, ..., size-1.
|
55
|
+
def find(e)
|
56
|
+
# We implement find with "halving" to shrink the length of paths to the root. See Tarjan and van Leeuwin p 252.
|
57
|
+
x = e
|
58
|
+
x = @d[x] = @d[@d[x]] while @d[@d[x]] != @d[x]
|
59
|
+
@d[x]
|
60
|
+
end
|
61
|
+
|
62
|
+
private def check_value(v)
|
63
|
+
raise "Value must be given and be in (0..#{@size - 1})" unless v && v.between?(0, @size - 1)
|
64
|
+
end
|
65
|
+
|
66
|
+
private def link(e, f)
|
67
|
+
# Choose which way around to do the linking using the element "ranks". See Tarjan and van Leeuwen, p 250.
|
68
|
+
if @rank[e] > @rank[f]
|
69
|
+
@d[f] = e
|
70
|
+
elsif @rank[e] == @rank[f]
|
71
|
+
@d[f] = e
|
72
|
+
@rank[e] += 1
|
73
|
+
else
|
74
|
+
@d[e] = f
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/data_structures_rmolinari/{generic_segment_tree_internal.rb → generic_segment_tree.rb}
RENAMED
@@ -8,45 +8,33 @@ require_relative 'shared'
|
|
8
8
|
# called an "interval tree."
|
9
9
|
#
|
10
10
|
# For more details (and some close-to-metal analysis of run time, especially for large datasets) see
|
11
|
-
# https://en.algorithmica.org/hpc/data-structures/segment-trees/. In particular, this shows how to do a bottom-up
|
12
|
-
#
|
11
|
+
# https://en.algorithmica.org/hpc/data-structures/segment-trees/. In particular, this shows how to do a bottom-up implementation,
|
12
|
+
# which is faster, at least for large datasets and cache-relevant compiled code. These issues don't really apply to code written in
|
13
|
+
# Ruby.
|
13
14
|
#
|
14
|
-
# This is a generic implementation.
|
15
|
+
# This is a generic implementation, intended to allow easy configuration for concrete instances. See the parameters to the
|
16
|
+
# initializer and the defintiaons concrete realisations like MaxValSegmentTree.
|
15
17
|
#
|
16
18
|
# We do O(n) work to build the internal data structure at initialization. Then we answer queries in O(log n) time.
|
17
|
-
|
18
|
-
# @todo
|
19
|
-
# - provide a data-update operation like update_val_at(idx, val)
|
20
|
-
# - this is O(log n)
|
21
|
-
# - note that this may need some rework. Consider something like IndexOfMaxVal: @merge needs to know about the underlying data
|
22
|
-
# in that case. Hmmm. Maybe the lambda can close over the data in a way that makes it possible to change the data "from the
|
23
|
-
# outside". Yes:
|
24
|
-
# a = [1,2,3]
|
25
|
-
# foo = ->() { a.max }
|
26
|
-
# foo.call # 3
|
27
|
-
# a = [1,2,4]
|
28
|
-
# foo.call # 4
|
29
|
-
# - Offer an optional parameter base_case_value_extractor (<-- need better name) to be used in #determine_val in the case that
|
30
|
-
# left == tree_l && right == tree_r instead of simply returning @tree[tree_idx]
|
31
|
-
# - Use case: https://cp-algorithms.com/data_structures/segment_tree.html#saving-the-entire-subarrays-in-each-vertex, such as
|
32
|
-
# finding the least element in a subarray l..r no smaller than a given value x. In this case we store a sorted version the
|
33
|
-
# entire subarray at each node and use a binary search on it.
|
34
|
-
# - the default value would simply be the identity function.
|
35
|
-
# - NOTE that in this case, we have different "combine" functions in #determine_val and #build. In #build we would combine
|
36
|
-
# sorted lists into a larger sorted list. In #determine_val we combine results via #min.
|
37
|
-
# - Think about the interface before doing this.
|
38
|
-
class GenericSegmentTreeInternal
|
19
|
+
class DataStructuresRMolinari::GenericSegmentTree
|
39
20
|
include Shared::BinaryTreeArithmetic
|
40
21
|
|
41
22
|
# Construct a concrete instance of a Segment Tree. See details at the links above for the underlying concepts here.
|
42
23
|
# @param combine a lambda that takes two values and munges them into a combined value.
|
43
24
|
# - For example, if we are calculating sums over subintervals, combine.call(a, b) = a + b, while if we are doing maxima we will
|
44
|
-
# return max(a, b)
|
25
|
+
# return max(a, b).
|
26
|
+
# - Things get more complicated when we are calculating, say, the _index_ of the maximal value in a subinterval. Now it is not
|
27
|
+
# enough simple to store that index at each tree node, because to combine the indices from two child nodes we need to know
|
28
|
+
# both the index of the maximal element in each child node's interval, but also the maximal values themselves, so we know
|
29
|
+
# which one "wins" for the parent node. This affects the sort of work we need to do when combining and the value provided by
|
30
|
+
# the +single_cell_array_val+ lambda.
|
45
31
|
# @param single_cell_array_val a lambda that takes an index i and returns the value we need to store in the #build
|
46
|
-
# operation for the subinterval i..i.
|
47
|
-
# it will be something else.
|
32
|
+
# operation for the subinterval i..i.
|
33
|
+
# - This is often simply be the value data[i], but in some cases it will be something else. For example, when we are
|
34
|
+
# calculating the index of the maximal value on each subinterval we will retern the pair [i, data[i]] here.
|
35
|
+
# - If +update_at+ is called later, this lambda must close over the underlying data in a way that captures the updated value.
|
48
36
|
# @param size the size of the underlying data array, used in certain internal arithmetic.
|
49
|
-
# @param identity
|
37
|
+
# @param identity the value to return when we are querying on an empty interval
|
50
38
|
# - for sums, this will be zero; for maxima, this will be -Infinity, etc
|
51
39
|
def initialize(combine:, single_cell_array_val:, size:, identity:)
|
52
40
|
@combine = combine
|
@@ -62,15 +50,28 @@ class GenericSegmentTreeInternal
|
|
62
50
|
# @param left the left end of the subinterval.
|
63
51
|
# @param right the right end (inclusive) of the subinterval.
|
64
52
|
#
|
65
|
-
# The type of the return value depends on the concrete instance of the segment tree.
|
53
|
+
# The type of the return value depends on the concrete instance of the segment tree. We return the _identity_ element provided at
|
54
|
+
# construction time if the interval is empty.
|
66
55
|
def query_on(left, right)
|
67
|
-
raise "Bad query interval #{left}..#{right}" if left.negative? || right >= @size
|
56
|
+
raise DataError, "Bad query interval #{left}..#{right}" if left.negative? || right >= @size
|
68
57
|
|
69
58
|
return @identity if left > right # empty interval
|
70
59
|
|
71
60
|
determine_val(root, left, right, 0, @size - 1)
|
72
61
|
end
|
73
62
|
|
63
|
+
# Update the value in the underlying array at the given idx
|
64
|
+
#
|
65
|
+
# @param idx an index in the underlying data array.
|
66
|
+
#
|
67
|
+
# Note that we don't need the updated value itself. We get that by calling the lambda +single_cell_array_val+ supplied at
|
68
|
+
# construction.
|
69
|
+
def update_at(idx)
|
70
|
+
raise DataError, 'Cannot update an index outside the initial range of the underlying data' unless (0...@size).cover?(idx)
|
71
|
+
|
72
|
+
update_val_at(idx, root, 0, @size - 1)
|
73
|
+
end
|
74
|
+
|
74
75
|
private def determine_val(tree_idx, left, right, tree_l, tree_r)
|
75
76
|
# Does the current tree node exactly serve up the interval we're interested in?
|
76
77
|
return @tree[tree_idx] if left == tree_l && right == tree_r
|
@@ -92,6 +93,26 @@ class GenericSegmentTreeInternal
|
|
92
93
|
end
|
93
94
|
end
|
94
95
|
|
96
|
+
private def update_val_at(idx, tree_idx, tree_l, tree_r)
|
97
|
+
if tree_l == tree_r
|
98
|
+
# We have found the spot!
|
99
|
+
raise LogicError, 'tree_l == tree_r, but they do not agree with the idx holding the updated value' unless tree_l == idx
|
100
|
+
|
101
|
+
@tree[tree_idx] = @single_cell_array_val.call(tree_l)
|
102
|
+
else
|
103
|
+
# Recursively update the appropriate subtree
|
104
|
+
mid = midpoint(tree_l, tree_r)
|
105
|
+
left = left(tree_idx)
|
106
|
+
right = right(tree_idx)
|
107
|
+
if mid >= idx
|
108
|
+
update_val_at(idx, left(tree_idx), tree_l, mid)
|
109
|
+
else
|
110
|
+
update_val_at(idx, right(tree_idx), mid + 1, tree_r)
|
111
|
+
end
|
112
|
+
@tree[tree_idx] = @combine.call(@tree[left], @tree[right])
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
95
116
|
# Build the internal data structure.
|
96
117
|
#
|
97
118
|
# - tree_idx is the index into @tree
|
@@ -36,10 +36,18 @@ require_relative 'shared'
|
|
36
36
|
# DOI 10.1007/s00224-017-9760-2
|
37
37
|
#
|
38
38
|
# @todo
|
39
|
+
# - allow for priorities comparable only via +<=>+, like arrays
|
40
|
+
# - this requires different handling for max-heaps, as we can't just negate the priorities and use min-heap logic
|
39
41
|
# - relax the requirement that priorities must be comparable vai +<+ and respond to negation. Instead, allow comparison via +<=>+
|
40
42
|
# and handle max-heaps differently.
|
41
43
|
# - this will allow priorities to be arrays for tie-breakers and similar.
|
42
|
-
|
44
|
+
# - offer a non-addressable version that doesn't support +update+
|
45
|
+
# - configure through the initializer
|
46
|
+
# - other operations will be a little quicker, and we can add the same item more than once. The paper by Chen et al. referenced
|
47
|
+
# in the Wikipedia article for Pairing Heaps suggests that using such a priority queue for Dijkstra's algorithm and inserting
|
48
|
+
# multiple copies of a key rather than updating its priority is faster in practice than other approaches that have better
|
49
|
+
# theoretical performance.
|
50
|
+
class DataStructuresRMolinari::Heap
|
43
51
|
include Shared::BinaryTreeArithmetic
|
44
52
|
|
45
53
|
attr_reader :size
|
@@ -61,7 +69,7 @@ class HeapInternal
|
|
61
69
|
@size.zero?
|
62
70
|
end
|
63
71
|
|
64
|
-
# Insert a new element into the heap with the given
|
72
|
+
# Insert a new element into the heap with the given priority.
|
65
73
|
# @param value the item to be inserted. It is an error to insert an item that is already present in the heap, though we don't
|
66
74
|
# check for this.
|
67
75
|
# @param priority the priority to use for new item. The values used as priorities ust be totally ordered via +<+ and, if +self+ is
|
@@ -40,14 +40,18 @@ require_relative 'shared'
|
|
40
40
|
# * E.M. McCreight, _Priority search trees_, SIAM J. Comput., 14(2):257-276, 1985. Later, De,
|
41
41
|
# * M. De, A. Maheshwari, S. C. Nandy, M. Smid, _An In-Place Priority Search Tree_, 23rd Canadian Conference on Computational
|
42
42
|
# Geometry, 2011
|
43
|
-
class
|
43
|
+
class DataStructuresRMolinari::MaxPrioritySearchTree
|
44
44
|
include Shared
|
45
45
|
include BinaryTreeArithmetic
|
46
46
|
|
47
47
|
# Construct a MaxPST from the collection of points in +data+.
|
48
48
|
#
|
49
|
-
# @param data [Array] the set P of points presented as an array. The tree is built in the array in-place without cloning.
|
50
|
-
#
|
49
|
+
# @param data [Array] the set P of points presented as an array. The tree is built in the array in-place without cloning.
|
50
|
+
# - Each element of the array must respond to +#x+ and +#y+.
|
51
|
+
# - This is not checked explicitly but a missing method exception will be thrown when we try to call one of them.
|
52
|
+
# - The +x+ values must be distinct, as must the +y+ values. We raise a +Shared::DataError+ if this isn't the case.
|
53
|
+
# - This is a restriction that simplifies some of the algorithm code. It can be removed as the cost of some extra work. Issue
|
54
|
+
# #9.
|
51
55
|
#
|
52
56
|
# @param verify [Boolean] when truthy, check that the properties of a PST are satisified after construction, raising an exception
|
53
57
|
# if not.
|
@@ -69,7 +73,7 @@ class MaxPrioritySearchTreeInternal
|
|
69
73
|
# Let Q = [x0, infty) X [y0, infty) be the northeast quadrant defined by the point (x0, y0) and let P be the points in this data
|
70
74
|
# structure. Define p* as
|
71
75
|
#
|
72
|
-
# - (infty, -infty)
|
76
|
+
# - (infty, -infty) if Q \intersect P is empty and
|
73
77
|
# - the highest (max-x) point in Q \intersect P otherwise.
|
74
78
|
#
|
75
79
|
# This method returns p* in O(log n) time and O(1) extra space.
|
@@ -82,7 +86,7 @@ class MaxPrioritySearchTreeInternal
|
|
82
86
|
# Let Q = (-infty, x0] X [y0, infty) be the northwest quadrant defined by the point (x0, y0) and let P be the points in this data
|
83
87
|
# structure. Define p* as
|
84
88
|
#
|
85
|
-
# - (-infty, -infty)
|
89
|
+
# - (-infty, -infty) if Q \intersect P is empty and
|
86
90
|
# - the highest (max-y) point in Q \intersect P otherwise.
|
87
91
|
#
|
88
92
|
# This method returns p* in O(log n) time and O(1) extra space.
|
@@ -186,7 +190,7 @@ class MaxPrioritySearchTreeInternal
|
|
186
190
|
# Let Q = [x0, infty) X [y0, infty) be the northeast quadrant defined by the point (x0, y0) and let P be the points in this data
|
187
191
|
# structure. Define p* as
|
188
192
|
#
|
189
|
-
# - (infty, infty)
|
193
|
+
# - (infty, infty) if Q \intersect P is empty and
|
190
194
|
# - the leftmost (min-x) point in Q \intersect P otherwise.
|
191
195
|
#
|
192
196
|
# This method returns p* in O(log n) time and O(1) extra space.
|
@@ -994,9 +998,14 @@ class MaxPrioritySearchTreeInternal
|
|
994
998
|
# Build the initial stucture
|
995
999
|
|
996
1000
|
private def construct_pst
|
997
|
-
|
998
|
-
|
1001
|
+
raise DataError, 'Duplicate x values are not supported' if contains_duplicates?(@data, by: :x)
|
1002
|
+
raise DataError, 'Duplicate y values are not supported' if contains_duplicates?(@data, by: :y)
|
1003
|
+
|
1004
|
+
# We follow the algorithm in the paper by De, Maheshwari et al.
|
999
1005
|
|
1006
|
+
# Since we are building an implicit binary tree, things are simpler if the array is 1-based. This probably requires a malloc and
|
1007
|
+
# data copy, which isn't great, but it's in the C layer so cheap compared to the O(n log^2 n) work we need to do for
|
1008
|
+
# construction. In fact, we are probably doing O(n^2) work because of all the calls to #index_with_largest_y_in.
|
1000
1009
|
@data.unshift nil
|
1001
1010
|
|
1002
1011
|
h = Math.log2(@size).floor
|
@@ -1106,13 +1115,11 @@ class MaxPrioritySearchTreeInternal
|
|
1106
1115
|
(l..r).max_by { |idx| @data[idx].y }
|
1107
1116
|
end
|
1108
1117
|
|
1109
|
-
# Sort the subarray @data[l..r].
|
1118
|
+
# Sort the subarray @data[l..r].
|
1110
1119
|
private def sort_subarray(l, r)
|
1111
|
-
# heapsort_subarray(l, r)
|
1112
1120
|
return if l == r # 1-array already sorted!
|
1113
1121
|
|
1114
|
-
#
|
1115
|
-
#r -= 1
|
1122
|
+
# This slice-replacement is much faster than a Ruby-layer heapsort because it is mostly happening in C.
|
1116
1123
|
@data[l..r] = @data[l..r].sort_by(&:x)
|
1117
1124
|
end
|
1118
1125
|
|
@@ -40,7 +40,7 @@ require_relative 'shared'
|
|
40
40
|
# [2] De, Maheshwari, Nandy, Smid, _An in-place priority search tree_, 23rd Annual Canadian Conference on Computational Geometry.
|
41
41
|
# [3] De, Maheshwari, Nandy, Smid, _An in-place min-max priority search tree_, Computational Geometry, v46 (2013), pp 310-327.
|
42
42
|
# [4] Atkinson, Sack, Santoro, Strothotte, _Min-max heaps and generalized priority queues_, Commun. ACM 29 (10) (1986), pp 996-1000.
|
43
|
-
class
|
43
|
+
class DataStructuresRMolinari::MinmaxPrioritySearchTree
|
44
44
|
include Shared
|
45
45
|
|
46
46
|
# The array of pairs is turned into a minmax PST in-place without cloning. So clone before passing it in, if you care.
|
@@ -6,6 +6,7 @@ module Shared
|
|
6
6
|
|
7
7
|
# @private
|
8
8
|
class LogicError < StandardError; end
|
9
|
+
class DataError < StandardError; end
|
9
10
|
|
10
11
|
# @private
|
11
12
|
#
|
@@ -61,4 +62,22 @@ module Shared
|
|
61
62
|
(i & 1).zero?
|
62
63
|
end
|
63
64
|
end
|
65
|
+
|
66
|
+
# Simple O(n) check for duplicates in an enumerable.
|
67
|
+
#
|
68
|
+
# It may be worse than O(n), depending on how close to constant set insertion is.
|
69
|
+
#
|
70
|
+
# @param enum the enumerable to check for duplicates
|
71
|
+
# @param by a method to call on each element of enum before checking. The results of these methods are checked for
|
72
|
+
# duplication. When nil we don't call anything and just use the elements themselves.
|
73
|
+
def contains_duplicates?(enum, by: nil)
|
74
|
+
seen = Set.new
|
75
|
+
enum.each do |v|
|
76
|
+
v = v.send(by) if by
|
77
|
+
return true if seen.include? v
|
78
|
+
|
79
|
+
seen << v
|
80
|
+
end
|
81
|
+
false
|
82
|
+
end
|
64
83
|
end
|
@@ -1,35 +1,31 @@
|
|
1
1
|
require_relative 'data_structures_rmolinari/shared'
|
2
|
-
require_relative 'data_structures_rmolinari/generic_segment_tree_internal'
|
3
|
-
require_relative 'data_structures_rmolinari/heap_internal'
|
4
|
-
require_relative 'data_structures_rmolinari/max_priority_search_tree_internal'
|
5
|
-
require_relative 'data_structures_rmolinari/minmax_priority_search_tree_internal'
|
6
2
|
|
7
3
|
module DataStructuresRMolinari
|
8
4
|
Pair = Shared::Pair
|
5
|
+
end
|
9
6
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
7
|
+
# These define classes inside module DataStructuresRMolinari
|
8
|
+
require_relative 'data_structures_rmolinari/disjoint_union'
|
9
|
+
require_relative 'data_structures_rmolinari/generic_segment_tree'
|
10
|
+
require_relative 'data_structures_rmolinari/heap'
|
11
|
+
require_relative 'data_structures_rmolinari/max_priority_search_tree'
|
12
|
+
require_relative 'data_structures_rmolinari/minmax_priority_search_tree'
|
16
13
|
|
14
|
+
module DataStructuresRMolinari
|
17
15
|
########################################
|
18
|
-
# Segment
|
19
|
-
|
20
|
-
GenericSegmentTree = GenericSegmentTreeInternal
|
21
|
-
|
22
|
-
# Takes an array A[0...n] and tells us what the maximum value is on a subinterval i..j in O(log n) time.
|
16
|
+
# Concrete instances of Segment Tree
|
23
17
|
#
|
24
|
-
#
|
25
|
-
|
26
|
-
#
|
27
|
-
# - call it ExtremalValSegment tree or something similar
|
18
|
+
# @todo consider moving these into generic_segment_tree.rb
|
19
|
+
|
20
|
+
# Takes an array A(0...n) and tells us what the maximum value is on a subinterval A(i..j) in O(log n) time.
|
28
21
|
class MaxValSegmentTree
|
29
22
|
extend Forwardable
|
30
23
|
|
31
|
-
|
24
|
+
# Tell the tree that the value at idx has changed
|
25
|
+
def_delegator :@structure, :update_at
|
32
26
|
|
27
|
+
# @param data an object that contains values at integer indices based at 0, via +data[i]+.
|
28
|
+
# - The usual use case will be an Array, but it could also be a hash or a proc of some sort.
|
33
29
|
def initialize(data)
|
34
30
|
@structure = GenericSegmentTree.new(
|
35
31
|
combine: ->(a, b) { [a, b].max },
|
@@ -38,9 +34,43 @@ module DataStructuresRMolinari
|
|
38
34
|
identity: -Float::INFINITY
|
39
35
|
)
|
40
36
|
end
|
37
|
+
|
38
|
+
# The maximum value in A(i..j)
|
39
|
+
#
|
40
|
+
# The arguments must be integers in 0...(A.size)
|
41
|
+
# @return the largest value in A(i..j).
|
42
|
+
# - Return +nil+ if i > j
|
43
|
+
def max_on(i, j)
|
44
|
+
@structure.query_on(i, j)
|
45
|
+
end
|
41
46
|
end
|
42
47
|
|
43
|
-
|
44
|
-
#
|
45
|
-
|
48
|
+
# A segment tree that for an array A(0...n) efficiently answers questions of the form "what is the index of the maximal value in
|
49
|
+
# a subinterval A(i..j) in O(log n) time.
|
50
|
+
class IndexOfMaxValSegmentTree
|
51
|
+
extend Forwardable
|
52
|
+
|
53
|
+
# Tell the tree that the value at idx has changed
|
54
|
+
def_delegator :@structure, :update_at
|
55
|
+
|
56
|
+
# @param (see MaxValSegmentTree#initialize)
|
57
|
+
def initialize(data)
|
58
|
+
@structure = GenericSegmentTree.new(
|
59
|
+
combine: ->(p1, p2) { p1[1] >= p2[1] ? p1 : p2 },
|
60
|
+
single_cell_array_val: ->(i) { [i, data[i]] },
|
61
|
+
size: data.size,
|
62
|
+
identity: nil
|
63
|
+
)
|
64
|
+
end
|
65
|
+
|
66
|
+
# The index of the maximum value in A(i..j)
|
67
|
+
#
|
68
|
+
# The arguments must be integers in 0...(A.size)
|
69
|
+
# @return (Integer, nil) the index of the largest value in A(i..j).
|
70
|
+
# - If there is more than one entry with that value, return one the indices. There is no guarantee as to which one.
|
71
|
+
# - Return +nil+ if i > j
|
72
|
+
def index_of_max_val_on(i, j)
|
73
|
+
@structure.query_on(i, j)&.first # discard the value part of the pair
|
74
|
+
end
|
75
|
+
end
|
46
76
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_structures_rmolinari
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rory Molinari
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: must_be
|
@@ -67,21 +67,25 @@ dependencies:
|
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: 0.22.0
|
69
69
|
description: |
|
70
|
-
This small gem contains several data structures that I have implemented to learn how they work.
|
70
|
+
This small gem contains several data structures that I have implemented in Ruby to learn how they work.
|
71
71
|
|
72
72
|
Sometimes it is not enough to read the description of a data structure and accompanying pseudo-code.
|
73
|
-
Actually implementing
|
73
|
+
Actually implementing it is often helpful in understanding what is going on. It is also
|
74
74
|
usually fun.
|
75
|
-
|
75
|
+
|
76
|
+
The gem contains basic implementions of Disjoint Union, Heap, Priority Search Tree, and Segment Tree.
|
77
|
+
See the homepage for more details.
|
78
|
+
email: rorymolinari@gmail.com
|
76
79
|
executables: []
|
77
80
|
extensions: []
|
78
81
|
extra_rdoc_files: []
|
79
82
|
files:
|
80
83
|
- lib/data_structures_rmolinari.rb
|
81
|
-
- lib/data_structures_rmolinari/
|
82
|
-
- lib/data_structures_rmolinari/
|
83
|
-
- lib/data_structures_rmolinari/
|
84
|
-
- lib/data_structures_rmolinari/
|
84
|
+
- lib/data_structures_rmolinari/disjoint_union.rb
|
85
|
+
- lib/data_structures_rmolinari/generic_segment_tree.rb
|
86
|
+
- lib/data_structures_rmolinari/heap.rb
|
87
|
+
- lib/data_structures_rmolinari/max_priority_search_tree.rb
|
88
|
+
- lib/data_structures_rmolinari/minmax_priority_search_tree.rb
|
85
89
|
- lib/data_structures_rmolinari/shared.rb
|
86
90
|
homepage: https://github.com/rmolinari/data_structures
|
87
91
|
licenses:
|