data_structures_rmolinari 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8c7f82ec16e728941823e07e7ca20259b04e764c11db71708a1a614b3181fa2c
4
- data.tar.gz: e0084c9fe2a93457a2a04279939cf12ec5fb050c65ce04af4449f3e618bfd22c
3
+ metadata.gz: ea5f3d2fed60234fefe18577985f1e8ae967ecec92ef0812ccaeff0fb81ca392
4
+ data.tar.gz: e3e59746e8d7e881209a2c3a0e78ea186aa0562d138d7e2b6ed261943f6cb45f
5
5
  SHA512:
6
- metadata.gz: 8a8dfa180b3992318bbf23b844bb91d3d35c7883fcb9e4ae332ab23d83ad5cb1cf6ddbcd582ae4f0367b6bee0619fa00489dd0c964bd40c94374ec59761f3ecc
7
- data.tar.gz: d3d9d6a1f2554f535919ba9328670d97e98b82702d66ab430454c3c503d81e83d71e82325f253a4828399f61cb3cf88c40c391e85ee6116b3ae3a20aeffb09ca
6
+ metadata.gz: e480ccee258f97479f1768b8f94b65a1c13f3b1e8f15e843c4240adeac555574383ef7de3928a057fd0b6707823cb4eeddee2128a49175c2f671e1526f887c2e
7
+ data.tar.gz: 30ac4bf22fe37bdfbf4eeccdcdfc2feba23278703957eebf5f9be6c30bb3e68024ccd49ef63c892d78c1fd58b54336a762c86cb107f9f27937222e9baf252ef4
@@ -0,0 +1,119 @@
1
+ require_relative 'shared'
2
+
3
+ # A Segment Tree, which can be used for various interval-related purposes, like efficiently finding the sum (or min or max) on a
4
+ # arbitrary subarray of a given array.
5
+ #
6
+ # There is an excellent description of the data structure at https://cp-algorithms.com/data_structures/segment_tree.html. The
7
+ # Wikipedia article (https://en.wikipedia.org/wiki/Segment_tree) appears to describe a different data structure which is sometimes
8
+ # called an "interval tree."
9
+ #
10
+ # For more details (and some close-to-metal analysis of run time, especially for large datasets) see
11
+ # https://en.algorithmica.org/hpc/data-structures/segment-trees/. In particular, this shows how to do a bottom-up
12
+ # implementation, which is faster, at least for large datasets and cache-relevant compiled code.
13
+ #
14
+ # This is a generic implementation.
15
+ #
16
+ # We do O(n) work to build the internal data structure at initialization. Then we answer queries in O(log n) time.
17
+ #
18
+ # @todo
19
+ # - provide a data-update operation like update_val_at(idx, val)
20
+ # - this is O(log n)
21
+ # - note that this may need some rework. Consider something like IndexOfMaxVal: @merge needs to know about the underlying data
22
+ # in that case. Hmmm. Maybe the lambda can close over the data in a way that makes it possible to change the data "from the
23
+ # outside". Yes:
24
+ # a = [1,2,3]
25
+ # foo = ->() { a.max }
26
+ # foo.call # 3
27
+ # a = [1,2,4]
28
+ # foo.call # 4
29
+ # - Offer an optional parameter base_case_value_extractor (<-- need better name) to be used in #determine_val in the case that
30
+ # left == tree_l && right == tree_r instead of simply returning @tree[tree_idx]
31
+ # - Use case: https://cp-algorithms.com/data_structures/segment_tree.html#saving-the-entire-subarrays-in-each-vertex, such as
32
+ # finding the least element in a subarray l..r no smaller than a given value x. In this case we store a sorted version the
33
+ # entire subarray at each node and use a binary search on it.
34
+ # - the default value would simply be the identity function.
35
+ # - NOTE that in this case, we have different "combine" functions in #determine_val and #build. In #build we would combine
36
+ # sorted lists into a larger sorted list. In #determine_val we combine results via #min.
37
+ # - Think about the interface before doing this.
38
+ class GenericSegmentTreeInternal
39
+ include Shared::BinaryTreeArithmetic
40
+
41
+ # Construct a concrete instance of a Segment Tree. See details at the links above for the underlying concepts here.
42
+ # @param combine a lambda that takes two values and munges them into a combined value.
43
+ # - For example, if we are calculating sums over subintervals, combine.call(a, b) = a + b, while if we are doing maxima we will
44
+ # return max(a, b)
45
+ # @param single_cell_array_val a lambda that takes an index i and returns the value we need to store in the #build
46
+ # operation for the subinterval i..i. This is often simply be the value data[i], but in some cases - like "index of max val" -
47
+ # it will be something else.
48
+ # @param size the size of the underlying data array, used in certain internal arithmetic.
49
+ # @param identity is the value to return when we are querying on an empty interval
50
+ # - for sums, this will be zero; for maxima, this will be -Infinity, etc
51
+ def initialize(combine:, single_cell_array_val:, size:, identity:)
52
+ @combine = combine
53
+ @single_cell_array_val = single_cell_array_val
54
+ @size = size
55
+ @identity = identity
56
+
57
+ @tree = []
58
+ build(root, 0, @size - 1)
59
+ end
60
+
61
+ # The desired value (max, sum, etc.) on the subinterval left..right.
62
+ # @param left the left end of the subinterval.
63
+ # @param right the right end (inclusive) of the subinterval.
64
+ #
65
+ # The type of the return value depends on the concrete instance of the segment tree.
66
+ def query_on(left, right)
67
+ raise "Bad query interval #{left}..#{right}" if left.negative? || right >= @size
68
+
69
+ return @identity if left > right # empty interval
70
+
71
+ determine_val(root, left, right, 0, @size - 1)
72
+ end
73
+
74
+ private def determine_val(tree_idx, left, right, tree_l, tree_r)
75
+ # Does the current tree node exactly serve up the interval we're interested in?
76
+ return @tree[tree_idx] if left == tree_l && right == tree_r
77
+
78
+ # We need to go further down the tree
79
+ mid = midpoint(tree_l, tree_r)
80
+ if mid >= right
81
+ # Our interval is contained by the left child's interval
82
+ determine_val(left(tree_idx), left, right, tree_l, mid)
83
+ elsif mid + 1 <= left
84
+ # Our interval is contained by the right child's interval
85
+ determine_val(right(tree_idx), left, right, mid + 1, tree_r)
86
+ else
87
+ # Our interval is split between the two, so we need to combine the results from the children.
88
+ @combine.call(
89
+ determine_val(left(tree_idx), left, mid, tree_l, mid),
90
+ determine_val(right(tree_idx), mid + 1, right, mid + 1, tree_r)
91
+ )
92
+ end
93
+ end
94
+
95
+ # Build the internal data structure.
96
+ #
97
+ # - tree_idx is the index into @tree
98
+ # - tree_l..tree_r is the subinterval of the underlying data that node tree_idx corresponds to
99
+ private def build(tree_idx, tree_l, tree_r)
100
+ if tree_l == tree_r
101
+ @tree[tree_idx] = @single_cell_array_val.call(tree_l) # single-cell interval
102
+ else
103
+ # divide and conquer
104
+ mid = midpoint(tree_l, tree_r)
105
+ left = left(tree_idx)
106
+ right = right(tree_idx)
107
+
108
+ build(left, tree_l, mid)
109
+ build(right, mid + 1, tree_r)
110
+
111
+ @tree[tree_idx] = @combine.call(@tree[left], @tree[right])
112
+ end
113
+ end
114
+
115
+ # Do it in one place so we don't accidently round up here and down there, which would lead to chaos
116
+ private def midpoint(left, right)
117
+ (left + right) / 2
118
+ end
119
+ end
@@ -0,0 +1,179 @@
1
+ require_relative 'shared'
2
+
3
+ # A heap is a balanced binary tree in which each entry has an associated priority. For each node p of the tree that isn't the root,
4
+ # the priority of the element at p is not less than the priority of the element at the parent of p.
5
+ #
6
+ # Thus the priority at each node p - root or not - is no greater than the priorities of the elements in the subtree rooted at p. It
7
+ # is a "min-heap".
8
+ #
9
+ # We can make it a max-heap, in which each node's priority is no greater than the priority of its parent, via a parameter to the
10
+ # initializer.
11
+ #
12
+ # We provide the following operations
13
+ # - +empty?+
14
+ # - is the heap empty?
15
+ # - O(1)
16
+ # - +insert+
17
+ # - add a new element to the heap with an associated priority
18
+ # - O(log N)
19
+ # - +top+
20
+ # - return the lowest-priority element, which is the element at the root of the tree. In a max-heap this is the highest-priority
21
+ # element.
22
+ # - O(1)
23
+ # - +pop+
24
+ # - removes and returns the item that would be returned by +top+
25
+ # - O(log N)
26
+ # - +update+
27
+ # - tell the heap that the priority of a particular item has changed
28
+ # - O(log N)
29
+ #
30
+ # Here N is the number of elements in the heap.
31
+ #
32
+ # References:
33
+ #
34
+ # - https://en.wikipedia.org/wiki/Binary_heap
35
+ # - Edelkamp, S., Elmasry, A., Katajainen, J., _Optimizing Binary Heaps_, Theory Comput Syst (2017), vol 61, pp 606-636,
36
+ # DOI 10.1007/s00224-017-9760-2
37
+ #
38
+ # @todo
39
+ # - relax the requirement that priorities must be comparable vai +<+ and respond to negation. Instead, allow comparison via +<=>+
40
+ # and handle max-heaps differently.
41
+ # - this will allow priorities to be arrays for tie-breakers and similar.
42
+ class HeapInternal
43
+ include Shared::BinaryTreeArithmetic
44
+
45
+ attr_reader :size
46
+
47
+ Pair = Struct.new(:priority, :item)
48
+
49
+ # @param max_heap when truthy, make a max-heap rather than a min-heap
50
+ # @param debug when truthy, verify the heap property after each update than might violate it. This makes operations much slower.
51
+ def initialize(max_heap: false, debug: false)
52
+ @data = []
53
+ @size = 0
54
+ @max_heap = max_heap
55
+ @index_of = {}
56
+ @debug = debug
57
+ end
58
+
59
+ # Is the heap empty?
60
+ def empty?
61
+ @size.zero?
62
+ end
63
+
64
+ # Insert a new element into the heap with the given property.
65
+ # @param value the item to be inserted. It is an error to insert an item that is already present in the heap, though we don't
66
+ # check for this.
67
+ # @param priority the priority to use for new item. The values used as priorities ust be totally ordered via +<+ and, if +self+ is
68
+ # a max-heap, must respond to negation +@-+ in the natural order-respecting way.
69
+ # @todo
70
+ # - check for duplicate
71
+ def insert(value, priority)
72
+ priority *= -1 if @max_heap
73
+
74
+ @size += 1
75
+
76
+ d = Pair.new(priority, value)
77
+ assign(d, @size)
78
+
79
+ sift_up(@size)
80
+ end
81
+
82
+ # Return the top of the heap without removing it
83
+ # @return the value with minimal (maximal for max-heaps) priority. Strictly speaking, it returns the item at the root of the
84
+ # binary tree; this element has minimal priority, but there may be other elements with the same priority.
85
+ def top
86
+ raise 'Heap is empty!' unless @size.positive?
87
+
88
+ @data[root].item
89
+ end
90
+
91
+ # Return the top of the heap and remove it, updating the structure to maintain the necessary properties.
92
+ # @return (see #top)
93
+ def pop
94
+ result = top
95
+ @index_of.delete(result)
96
+
97
+ assign(@data[@size], root)
98
+
99
+ @data[@size] = nil
100
+ @size -= 1
101
+
102
+ sift_down(root) if @size.positive?
103
+
104
+ result
105
+ end
106
+
107
+ # Update the priority of the given element and maintain the necessary heap properties.
108
+ # @param element the item whose priority we are updating. It is an error to update the priority of an element not already in the
109
+ # heap
110
+ # @param priority the new priority
111
+ #
112
+ # @todo
113
+ # - check that the element is in the heap
114
+ def update(element, priority)
115
+ priority *= -1 if @max_heap
116
+
117
+ idx = @index_of[element]
118
+ old = @data[idx].priority
119
+ @data[idx].priority = priority
120
+ if priority > old
121
+ sift_down(idx)
122
+ elsif priority < old
123
+ sift_up(idx)
124
+ end
125
+
126
+ check_heap_property if @debug
127
+ end
128
+
129
+ # Filter the value at index up to its correct location. Algorithm from Edelkamp et. al.
130
+ private def sift_up(idx)
131
+ return if idx == root
132
+
133
+ x = @data[idx]
134
+ while idx != root
135
+ i = parent(idx)
136
+ break unless x.priority < @data[i].priority
137
+
138
+ assign(@data[i], idx)
139
+ idx = i
140
+ end
141
+ assign(x, idx)
142
+
143
+ check_heap_property if @debug
144
+ end
145
+
146
+ # Filter the value at index down to its correct location. Algorithm from Edelkamp et. al.
147
+ private def sift_down(idx)
148
+ x = @data[idx]
149
+
150
+ while (j = left(idx)) <= @size
151
+ j += 1 if j + 1 <= @size && @data[j + 1].priority < @data[j].priority
152
+
153
+ break unless @data[j].priority < x.priority
154
+
155
+ assign(@data[j], idx)
156
+ idx = j
157
+ end
158
+ assign(x, idx)
159
+
160
+ check_heap_property if @debug
161
+ end
162
+
163
+ # Put the pair in the given heap location
164
+ private def assign(pair, idx)
165
+ @data[idx] = pair
166
+ @index_of[pair.item] = idx
167
+ end
168
+
169
+ # For debugging
170
+ private def check_heap_property
171
+ (root..@size).each do |idx|
172
+ left = left(idx)
173
+ right = right(idx)
174
+
175
+ raise "Heap property violated by left child of index #{idx}" if left <= @size && @data[idx].priority >= @data[left].priority
176
+ raise "Heap property violated by right child of index #{idx}" if right <= @size && @data[idx].priority >= @data[right].priority
177
+ end
178
+ end
179
+ end
@@ -1,8 +1,6 @@
1
1
  require 'set'
2
-
3
2
  require_relative 'shared'
4
3
 
5
- class LogicError < StandardError; end
6
4
 
7
5
  # A priority search tree (PST) stores a set, P, of two-dimensional points (x,y) in a way that allows efficient answes to certain
8
6
  # questions about P.
@@ -44,6 +42,7 @@ class LogicError < StandardError; end
44
42
  # Geometry, 2011
45
43
  class MaxPrioritySearchTreeInternal
46
44
  include Shared
45
+ include BinaryTreeArithmetic
47
46
 
48
47
  # Construct a MaxPST from the collection of points in +data+.
49
48
  #
@@ -1047,52 +1046,52 @@ class MaxPrioritySearchTreeInternal
1047
1046
  ########################################
1048
1047
  # Tree arithmetic
1049
1048
 
1050
- # First element and root of the tree structure
1051
- private def root
1052
- 1
1053
- end
1054
-
1055
- # Indexing is from 1
1056
- private def parent(i)
1057
- i >> 1
1058
- end
1059
-
1060
- private def left(i)
1061
- i << 1
1062
- end
1063
-
1064
- private def right(i)
1065
- 1 + (i << 1)
1066
- end
1067
-
1068
- private def level(i)
1069
- l = 0
1070
- while i > root
1071
- i >>= 1
1072
- l += 1
1073
- end
1074
- l
1075
- end
1076
-
1077
- # i has no children
1078
- private def leaf?(i)
1079
- i > @last_non_leaf
1080
- end
1081
-
1082
- # i has exactly one child (the left)
1083
- private def one_child?(i)
1084
- i == @parent_of_one_child
1085
- end
1086
-
1087
- # i has two children
1088
- private def two_children?(i)
1089
- i <= @last_parent_of_two_children
1090
- end
1091
-
1092
- # i is the left child of its parent.
1093
- private def left_child?(i)
1094
- (i & 1).zero?
1095
- end
1049
+ # # First element and root of the tree structure
1050
+ # private def root
1051
+ # 1
1052
+ # end
1053
+
1054
+ # # Indexing is from 1
1055
+ # private def parent(i)
1056
+ # i >> 1
1057
+ # end
1058
+
1059
+ # private def left(i)
1060
+ # i << 1
1061
+ # end
1062
+
1063
+ # private def right(i)
1064
+ # 1 + (i << 1)
1065
+ # end
1066
+
1067
+ # private def level(i)
1068
+ # l = 0
1069
+ # while i > root
1070
+ # i >>= 1
1071
+ # l += 1
1072
+ # end
1073
+ # l
1074
+ # end
1075
+
1076
+ # # i has no children
1077
+ # private def leaf?(i)
1078
+ # i > @last_non_leaf
1079
+ # end
1080
+
1081
+ # # i has exactly one child (the left)
1082
+ # private def one_child?(i)
1083
+ # i == @parent_of_one_child
1084
+ # end
1085
+
1086
+ # # i has two children
1087
+ # private def two_children?(i)
1088
+ # i <= @last_parent_of_two_children
1089
+ # end
1090
+
1091
+ # # i is the left child of its parent.
1092
+ # private def left_child?(i)
1093
+ # (i & 1).zero?
1094
+ # end
1096
1095
 
1097
1096
  private def swap(index1, index2)
1098
1097
  return if index1 == index2
@@ -1,3 +1,7 @@
1
+ require 'must_be'
2
+
3
+ require_relative 'shared'
4
+
1
5
  # A priority search tree (PST) stores points in two dimensions (x,y) and can efficiently answer certain questions about the set of
2
6
  # point.
3
7
  #
@@ -26,28 +30,18 @@
26
30
  # I started implementing the in-place PST. Then, finding the follow-up paper [3], decided to do that one instead, as the paper says
27
31
  # it is more flexible. The point is to learn a new data structure and its associated algorithms.
28
32
  #
29
- # Hmmm. The algorithms are rather bewildering. Highest3SidedUp is complicated, and only two of the functions CheckLeft, CheckLeftIn,
33
+ # The algorithms are rather bewildering. Highest3SidedUp is complicated, and only two of the functions CheckLeft, CheckLeftIn,
30
34
  # CheckRight, CheckRightIn are given; the other two are "symmetric". But it's not really clear what the first are actually doing, so
31
35
  # it's hard to know what the others actually do.
32
36
  #
33
- # I either need to go back to MaxPST until I understand things better, or spend quite a lot of time going through the algorithms
34
- # here on paper.
35
-
37
+ # The implementation is incomplete. The pseduo-code in the paper is buggy (see the code below), which makes progress difficult.
38
+ #
36
39
  # [1] E. McCreight, _Priority Search Trees_, SIAM J. Computing, v14, no 3, May 1985, pp 257-276.
37
40
  # [2] De, Maheshwari, Nandy, Smid, _An in-place priority search tree_, 23rd Annual Canadian Conference on Computational Geometry.
38
41
  # [3] De, Maheshwari, Nandy, Smid, _An in-place min-max priority search tree_, Computational Geometry, v46 (2013), pp 310-327.
39
42
  # [4] Atkinson, Sack, Santoro, Strothotte, _Min-max heaps and generalized priority queues_, Commun. ACM 29 (10) (1986), pp 996-1000.
40
-
41
- require 'must_be'
42
-
43
- Pair = Struct.new(:x, :y) do
44
- def fmt
45
- "(#{x},#{y})"
46
- end
47
- end
48
-
49
43
  class MinmaxPrioritySearchTreeInternal
50
- INFINITY = Float::INFINITY
44
+ include Shared
51
45
 
52
46
  # The array of pairs is turned into a minmax PST in-place without cloning. So clone before passing it in, if you care.
53
47
  #
@@ -3,4 +3,62 @@ module Shared
3
3
  INFINITY = Float::INFINITY
4
4
 
5
5
  Pair = Struct.new(:x, :y)
6
+
7
+ # @private
8
+ class LogicError < StandardError; end
9
+
10
+ # @private
11
+ #
12
+ # Provide simple arithmetic for an implied binary tree stored in an array, with the root at 1
13
+ module BinaryTreeArithmetic
14
+ # First element and root of the tree structure
15
+ private def root
16
+ 1
17
+ end
18
+
19
+ # The parent of node i
20
+ private def parent(i)
21
+ i >> 1
22
+ end
23
+
24
+ # The left child of node i
25
+ private def left(i)
26
+ i << 1
27
+ end
28
+
29
+ # The right child of node i
30
+ private def right(i)
31
+ 1 + (i << 1)
32
+ end
33
+
34
+ # The level in the tree of node i. The root is at level 0.
35
+ private def level(i)
36
+ l = 0
37
+ while i > root
38
+ i >>= 1
39
+ l += 1
40
+ end
41
+ l
42
+ end
43
+
44
+ # i has no children
45
+ private def leaf?(i)
46
+ i > @last_non_leaf
47
+ end
48
+
49
+ # i has exactly one child (the left)
50
+ private def one_child?(i)
51
+ i == @parent_of_one_child
52
+ end
53
+
54
+ # i has two children
55
+ private def two_children?(i)
56
+ i <= @last_parent_of_two_children
57
+ end
58
+
59
+ # i is the left child of its parent.
60
+ private def left_child?(i)
61
+ (i & 1).zero?
62
+ end
63
+ end
6
64
  end
@@ -1,7 +1,46 @@
1
+ require_relative 'data_structures_rmolinari/shared'
2
+ require_relative 'data_structures_rmolinari/generic_segment_tree_internal'
3
+ require_relative 'data_structures_rmolinari/heap_internal'
1
4
  require_relative 'data_structures_rmolinari/max_priority_search_tree_internal'
2
5
  require_relative 'data_structures_rmolinari/minmax_priority_search_tree_internal'
3
6
 
4
7
  module DataStructuresRMolinari
8
+ Pair = Shared::Pair
9
+
10
+ ########################################
11
+ # Priority Search Trees
12
+ #
13
+ # Note that MinmaxPrioritySearchTree is only a fragment of what we need
5
14
  MaxPrioritySearchTree = MaxPrioritySearchTreeInternal
6
15
  MinmaxPrioritySearchTree = MinmaxPrioritySearchTreeInternal
16
+
17
+ ########################################
18
+ # Segment Trees
19
+
20
+ GenericSegmentTree = GenericSegmentTreeInternal
21
+
22
+ # Takes an array A[0...n] and tells us what the maximum value is on a subinterval i..j in O(log n) time.
23
+ #
24
+ # TODO:
25
+ # - allow min val too
26
+ # - add a flag to the initializer
27
+ # - call it ExtremalValSegment tree or something similar
28
+ class MaxValSegmentTree
29
+ extend Forwardable
30
+
31
+ def_delegator :@structure, :query_on, :max_on
32
+
33
+ def initialize(data)
34
+ @structure = GenericSegmentTree.new(
35
+ combine: ->(a, b) { [a, b].max },
36
+ single_cell_array_val: ->(i) { data[i] },
37
+ size: data.size,
38
+ identity: -Float::INFINITY
39
+ )
40
+ end
41
+ end
42
+
43
+ ########################################
44
+ # Heap
45
+ Heap = HeapInternal
7
46
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_structures_rmolinari
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rory Molinari
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-04 00:00:00.000000000 Z
11
+ date: 2023-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: must_be
@@ -78,6 +78,8 @@ extensions: []
78
78
  extra_rdoc_files: []
79
79
  files:
80
80
  - lib/data_structures_rmolinari.rb
81
+ - lib/data_structures_rmolinari/generic_segment_tree_internal.rb
82
+ - lib/data_structures_rmolinari/heap_internal.rb
81
83
  - lib/data_structures_rmolinari/max_priority_search_tree_internal.rb
82
84
  - lib/data_structures_rmolinari/minmax_priority_search_tree_internal.rb
83
85
  - lib/data_structures_rmolinari/shared.rb
@@ -91,7 +93,7 @@ require_paths:
91
93
  - lib
92
94
  required_ruby_version: !ruby/object:Gem::Requirement
93
95
  requirements:
94
- - - '='
96
+ - - "~>"
95
97
  - !ruby/object:Gem::Version
96
98
  version: 3.1.3
97
99
  required_rubygems_version: !ruby/object:Gem::Requirement