data_structures_rmolinari 0.4.3 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/README.md +119 -33
- data/Rakefile +6 -4
- data/ext/c_disjoint_union/disjoint_union.c +75 -129
- data/ext/c_disjoint_union/extconf.rb +7 -2
- data/ext/c_segment_tree_template/extconf.rb +17 -0
- data/ext/c_segment_tree_template/segment_tree_template.c +363 -0
- data/ext/shared.c +32 -0
- data/lib/data_structures_rmolinari/algorithms.rb +5 -5
- data/lib/data_structures_rmolinari/c_segment_tree_template_impl.rb +15 -0
- data/lib/data_structures_rmolinari/disjoint_union.rb +2 -0
- data/lib/data_structures_rmolinari/segment_tree.rb +126 -0
- data/lib/data_structures_rmolinari/segment_tree_template.rb +11 -8
- data/lib/data_structures_rmolinari.rb +5 -62
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7682f6d3b0779f347ce0797f55f33b9d7dcc7bd9c2039fc2fd6f865eb72e085a
|
4
|
+
data.tar.gz: d717e5e36f79ddc4ecb605a59b475b7114359dea7476445590deb300f7915bd4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c3ffd9a4f67f55b7a2df1c949cf2288c06fcae416d5ff03a10307a1b79c3dae1daa74e2576d5e190c989adeea47b046426fad8c3c64199aadf22ba500b317f36
|
7
|
+
data.tar.gz: 8380d6117f2955da9362395f8315f5121b4f7afba2f69aabb1981a01b675cbbed81d07c10b5745409080c2588c92df3d676ec36efa128571234a74dceef0e20d
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,24 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [Unreleased]
|
4
|
+
|
5
|
+
## [0.5.0] 2023-02.03
|
6
|
+
|
7
|
+
- SegmentTree
|
8
|
+
- Reorganize the code into a SegmentTree submodule.
|
9
|
+
- Provide a conveniece method for getting concrete instances.
|
10
|
+
|
11
|
+
- README.md
|
12
|
+
- Add some simple example code for the data types.
|
13
|
+
|
14
|
+
## [0.4.4] 2023-02-02
|
15
|
+
|
16
|
+
- Disjoint Union
|
17
|
+
- C extension: use Convenient Containers rather than my janky Dynamic Array attempt.
|
18
|
+
|
19
|
+
- Segment Tree
|
20
|
+
- Add a C implementation as CSegmentTreeTemplate.
|
21
|
+
|
3
22
|
## [0.4.3] 2023-01-27
|
4
23
|
|
5
24
|
- Fix bad directive in Rakefile for DisjointUnion C extension
|
data/README.md
CHANGED
@@ -4,8 +4,8 @@ This is a small collection of Ruby data structures that I have implemented for m
|
|
4
4
|
structure is almost always more educational than simply reading about it and is usually fun. I wrote some of them while
|
5
5
|
participating in the Advent of Code (https://adventofcode.com/).
|
6
6
|
|
7
|
-
|
8
|
-
|
7
|
+
The implementations are based on the expository descriptions and pseudo-code I found as I read about each structure and so are not
|
8
|
+
as fast as possible.
|
9
9
|
|
10
10
|
The code is available as a gem: https://rubygems.org/gems/data_structures_rmolinari.
|
11
11
|
|
@@ -14,18 +14,6 @@ The code is available as a gem: https://rubygems.org/gems/data_structures_rmolin
|
|
14
14
|
The right way to organize the code is not obvious to me. For now the data structures are all defined in the module
|
15
15
|
`DataStructuresRMolinari` to avoid polluting the global namespace.
|
16
16
|
|
17
|
-
Example usage after the gem is installed:
|
18
|
-
```
|
19
|
-
require 'data_structures_rmolinari`
|
20
|
-
|
21
|
-
# Pull what we need out of the namespace
|
22
|
-
MaxPrioritySearchTree = DataStructuresRMolinari::MaxPrioritySearchTree
|
23
|
-
Point = DataStructuresRMolinari::Point # anything responding to :x and :y is fine
|
24
|
-
|
25
|
-
pst = MaxPrioritySearchTree.new([Point.new(1, 1)])
|
26
|
-
puts pst.largest_y_in_ne(0, 0) # "Point(1,1)"
|
27
|
-
```
|
28
|
-
|
29
17
|
# Implementations
|
30
18
|
|
31
19
|
## Disjoint Union
|
@@ -42,8 +30,22 @@ It also provides
|
|
42
30
|
For more details see https://en.wikipedia.org/wiki/Disjoint-set_data_structure and the paper [[TvL1984]](#references) by Tarjan and
|
43
31
|
van Leeuwen.
|
44
32
|
|
45
|
-
|
46
|
-
|
33
|
+
``` ruby
|
34
|
+
require 'data_structures_rmolinari'
|
35
|
+
DisjointUnion = DataStructuresRMolinari::DisjointUnion
|
36
|
+
|
37
|
+
# Create an instance over the "universe" 0, 1, ..., 9.
|
38
|
+
du = DisjointUnion.new(10)
|
39
|
+
du.subset_count # => 10; each element starts out in its own subset
|
40
|
+
|
41
|
+
du.unite(2, 3) # say that 2 and 3 are actually in the same subset
|
42
|
+
du.subset_count # => 9
|
43
|
+
du.find(2) == du.find(3) # => true
|
44
|
+
|
45
|
+
du.unite(4, 5)
|
46
|
+
du.unite(3, 4) # now 2, 3, 4, and 5 are all in the same subset
|
47
|
+
du.subset_count # => 7
|
48
|
+
```
|
47
49
|
|
48
50
|
## Heap
|
49
51
|
|
@@ -66,6 +68,24 @@ allows the insertion of duplicate items (which is sometimes useful) and slightly
|
|
66
68
|
|
67
69
|
See https://en.wikipedia.org/wiki/Binary_heap and the paper by Edelkamp, Elmasry, and Katajainen [[EEK2017]](#references).
|
68
70
|
|
71
|
+
``` ruby
|
72
|
+
require 'data_structures_rmolinari'
|
73
|
+
Heap = DataStructuresRMolinari::Heap
|
74
|
+
|
75
|
+
data = [4, 3, 2, 1]
|
76
|
+
|
77
|
+
heap = Heap.new
|
78
|
+
|
79
|
+
# Insert the elements of data, each with itself as priority.
|
80
|
+
data.each { |v| heap.insert(v, v) }
|
81
|
+
|
82
|
+
heap.top # => 1, since we have a min-heap.
|
83
|
+
heap.pop # => 1
|
84
|
+
heap.top # => 2; with 1 gone, this is the element with least priority
|
85
|
+
heap.update(3, -3)
|
86
|
+
heap.top # => 3; now 3 is the element with least priority
|
87
|
+
```
|
88
|
+
|
69
89
|
## Priority Search Tree
|
70
90
|
|
71
91
|
A PST stores a set P of two-dimensional points in a way that allows certain queries about P to be answered efficiently. The data
|
@@ -84,41 +104,81 @@ pointing north.
|
|
84
104
|
|
85
105
|
There is no `smallest_x_in_3_sided(x0, x1, y0)`. Just use `smallest_x_in_ne(x0, y0)`.
|
86
106
|
|
107
|
+
(These queries appear rather abstract at first but there are interesting applications. See, for example, section 4 of
|
108
|
+
[[McC85]](#references), keeping in mind that the data structure in that paper is actually a _MinPST_.)
|
109
|
+
|
87
110
|
The single-point queries run in O(log n) time, where n is the size of P, while `enumerate_3_sided` runs in O(m + log n), where m is
|
88
111
|
the number of points actually enumerated.
|
89
112
|
|
90
113
|
The implementation is in `MaxPrioritySearchTree` (MaxPST for short), so called because internally the structure is, among other
|
91
114
|
things, a max-heap on the y-coordinates.
|
92
115
|
|
93
|
-
These queries appear rather abstract at first but there are interesting applications. See, for example, section 4 of
|
94
|
-
[[McC85]](#references), keeping in mind that the data structure in that paper is actually a _MinPST_.
|
95
|
-
|
96
116
|
We also provide a `MinPrioritySearchTree`, which answers analagous queries in the southward-infinite quadrants and 3-sided
|
97
117
|
regions.
|
98
118
|
|
99
119
|
By default these data structures are immutable: once constructed they cannot be changed. But there is a constructor option that
|
100
120
|
makes the instance "dynamic". This allows us to delete the element at the root of the tree - the one with largest y value (smallest
|
101
121
|
for MinPST) - with the `delete_top!` method. This operation is important in certain algorithms, such as enumerating all maximal
|
102
|
-
empty rectangles (see the second paper by De et al
|
122
|
+
empty rectangles (see the second paper by De et al[[DMNS2013]](#references)). Note that points can still not be added to the PST in
|
103
123
|
any case, and choosing the dynamic option makes certain internal bookkeeping operations slower.
|
104
124
|
|
105
125
|
In [[DMNS2013]](#references) De et al. generalize the in-place structure to a _Min-max Priority Search Tree_ (MinmaxPST) that can
|
106
126
|
answer queries in all four quadrants and both "kinds" of 3-sided boxes. Having one of these would save the trouble of constructing
|
107
127
|
both a MaxPST and MinPST. But the presentiation is hard to follow in places and the paper's pseudocode is buggy.[^minmaxpst]
|
108
128
|
|
129
|
+
``` ruby
|
130
|
+
require 'data_structures_rmolinari'
|
131
|
+
MaxPST = DataStructuresRMolinari::MaxPrioritySearchTree
|
132
|
+
Point = Shared::Point # simple (x, y) struct. Anything responding to #x and #y will work
|
133
|
+
|
134
|
+
data = [Point.new(0, 0), Point.new(1, 2), Point.new(2, 1)]
|
135
|
+
pst = MaxPST.new(data)
|
136
|
+
|
137
|
+
pst.largest_y_in_ne(0, 0) # => #<struct Shared::Point x=1, y=2>
|
138
|
+
pst.largest_y_in_ne(1, 1) # => #<struct Shared::Point x=1, y=2>
|
139
|
+
pst.largest_y_in_ne(1.5, 1) # => #<struct Shared::Point x=2, y=1>
|
140
|
+
pst.largest_y_in_3_sided(-0.5, 0.5, 0) # => #<struct Shared::Point x=0, y=0>
|
141
|
+
```
|
142
|
+
|
109
143
|
## Segment Tree
|
110
144
|
|
111
|
-
|
112
|
-
elements in an arbitrary subinterval A
|
113
|
-
of A in such a way that the values we store in the nodes can be combined efficiently to determine the desired result for
|
114
|
-
subarrays.
|
145
|
+
A segment tree stores information related to subintervals of a certain array. For example, a segment tree can be used to find the
|
146
|
+
sum of the elements in an arbitrary subinterval A(i..j) of an array A(0..n) in O(log n) time. Each node in the tree corresponds to a
|
147
|
+
subarray of A in such a way that the values we store in the nodes can be combined efficiently to determine the desired result for
|
148
|
+
arbitrary subarrays.
|
115
149
|
|
116
150
|
An excellent description of the idea is found at https://cp-algorithms.com/data_structures/segment_tree.html.
|
117
151
|
|
118
|
-
Generic code is provided in `SegmentTreeTemplate
|
119
|
-
|
120
|
-
|
121
|
-
|
152
|
+
Generic code is provided in `SegmentTree::SegmentTreeTemplate` and its equivalent (and faster) C-based sibling,
|
153
|
+
`SegmentTree::CSegmentTreeTemplate` (see [below](#c-extensions)).
|
154
|
+
|
155
|
+
Writing a concrete segment tree class just means providing some simple lambdas and constants to the template class's
|
156
|
+
initializer. Figuring out the details requires some knowledge of the internal mechanisms of a segment tree, for which the link at
|
157
|
+
cp-algorithms.com is very helpful. See the implementations of the concrete classes `MaxValSegmentTree` and
|
158
|
+
`IndexOfMaxValSegmentTree` for examples.
|
159
|
+
|
160
|
+
Since there are several concrete "types" and two underlying generic implementions there is a convenience method on the `SegmentTree`
|
161
|
+
module to get instances.
|
162
|
+
|
163
|
+
``` ruby
|
164
|
+
require 'data_structures_rmolinari'
|
165
|
+
SegmentTree = DataStructuresRMolinari::SegmentTree # namespace module
|
166
|
+
|
167
|
+
data = [1, -3, 2, 1, 5, -9]
|
168
|
+
|
169
|
+
# Get a segment tree instance that will answer "max over this subinterval" questions about data.
|
170
|
+
# Here we get one using the ruby implementation of the generic functionality.
|
171
|
+
#
|
172
|
+
# We offer :index_of_max as an alternative to :max. This will construct an instance that answers
|
173
|
+
# questions of the form "an index of the maximum value over this subinterval".
|
174
|
+
#
|
175
|
+
# To use the version written in C, put :c instead of :ruby.
|
176
|
+
seg_tree = SegmentTree.construct(data, :max, :ruby)
|
177
|
+
|
178
|
+
seg_tree.max_on(0, 2) # => 2
|
179
|
+
seg_tree.max_on(1, 4) # => 5
|
180
|
+
# ..etc..
|
181
|
+
```
|
122
182
|
|
123
183
|
## Algorithms
|
124
184
|
|
@@ -131,11 +191,37 @@ The Algorithms submodule contains some algorithms using the data structures.
|
|
131
191
|
[left, right, bottom, top].
|
132
192
|
- The algorithm is due to [[DMNS2013]](#references).
|
133
193
|
|
194
|
+
# C Extensions
|
195
|
+
|
196
|
+
As another learning process I have implemented several of these data structures as C extensions. The APIs are the same.
|
197
|
+
|
198
|
+
## Disjoint Union
|
199
|
+
|
200
|
+
The C version is called `CDisjointUnion`. A benchmark suggests that a long sequence of `unite` operations is about 3 times as fast
|
201
|
+
with `CDisjointUnion` as with `DisjointUnion`.
|
202
|
+
|
203
|
+
The implementation uses the remarkable Convenient Containers library from Jackson Allan.[[Allan]](#references).
|
204
|
+
|
205
|
+
## Segment Tree
|
206
|
+
|
207
|
+
`CSegmentTreeTemplate` is the C implementation of the generic class. Concrete classes are built on top of this in Ruby, just as with
|
208
|
+
the pure Ruby `SegmentTreeTemplate` class.
|
209
|
+
|
210
|
+
A benchmark suggests that a long sequence of `max_on` operations against a max-val Segment Tree is about 4 times as fast with C as
|
211
|
+
with Ruby. I'm a bit suprised the improvment isn't larger, but remember that the C code must still interact with the Ruby objects in
|
212
|
+
the underlying data array, and must combine them, etc., via Ruby lambdas.
|
213
|
+
|
134
214
|
# References
|
135
|
-
- [
|
136
|
-
- [
|
137
|
-
|
138
|
-
- [
|
139
|
-
-
|
215
|
+
- [Allan] Allan, J., _CC: Convenient Containers_, https://github.com/JacksonAllan/CC, (retrieved 2023-02-01).
|
216
|
+
- [TvL1984] Tarjan, Robert E., van Leeuwen, J., _Worst-case Analysis of Set Union Algorithms_, Journal of the ACM, v31:2 (1984), pp
|
217
|
+
245–281, https://dl.acm.org/doi/10.1145/62.2160 (retrieved 2022-02-01).
|
218
|
+
- [EEK2017] Edelkamp, S., Elmasry, A., Katajainen, J., _Optimizing Binary Heaps_, Theory Comput Syst (2017), vol 61, pp 606-636, DOI
|
219
|
+
10.1007/s00224-017-9760-2, https://kclpure.kcl.ac.uk/portal/files/87388857/TheoryComputingSzstems.pdf (retrieved 2022-02-02).
|
220
|
+
- [McC1985] McCreight, E. M., _Priority Search Trees_, SIAM J. Comput., 14(2):257-276, 1985,
|
221
|
+
http://www.cs.duke.edu/courses/fall08/cps234/handouts/SMJ000257.pdf (retrieved 2023-02-02).
|
222
|
+
- [DMNS2011] De, M., Maheshwari, A., Nandy, S. C., Smid, M., _An In-Place Priority Search Tree_, 23rd Canadian Conference on
|
223
|
+
Computational Geometry, 2011, http://www.cs.carleton.ca/~michiel/inplace_pst.pdf (retrieved 2023-02-02).
|
224
|
+
- [DMNS2013] De, M., Maheshwari, A., Nandy, S. C., Smid, M., _An In-Place Min-max Priority Search Tree_, Computational Geometry, v46
|
225
|
+
(2013), pp 310-327, https://people.scs.carleton.ca/~michiel/MinMaxPST.pdf (retrieved 2023-02-02).
|
140
226
|
|
141
227
|
[^minmaxpst]: See the comments in the fragmentary class `MinMaxPrioritySearchTree` for further details.
|
data/Rakefile
CHANGED
@@ -2,10 +2,12 @@ require 'rubygems'
|
|
2
2
|
require 'rake/testtask'
|
3
3
|
require 'rake/extensiontask'
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
['c_disjoint_union', 'c_segment_tree_template'].each do |extension_name|
|
6
|
+
Rake::ExtensionTask.new("data_structures_rmolinari/#{extension_name}") do |ext|
|
7
|
+
ext.name = extension_name
|
8
|
+
ext.ext_dir = "ext/#{extension_name}"
|
9
|
+
ext.lib_dir = 'lib/data_structures_rmolinari/'
|
10
|
+
end
|
9
11
|
end
|
10
12
|
|
11
13
|
Rake::TestTask.new do |t|
|
@@ -16,118 +16,69 @@
|
|
16
16
|
*/
|
17
17
|
|
18
18
|
#include "ruby.h"
|
19
|
-
|
20
|
-
|
21
|
-
#define mShared rb_define_module("Shared")
|
22
|
-
#define eSharedDataError rb_const_get(mShared, rb_intern_const("DataError"))
|
19
|
+
#include "cc.h" // Convenient Containers
|
20
|
+
#include "shared.h"
|
23
21
|
|
24
22
|
/**
|
25
|
-
*
|
26
|
-
*
|
27
|
-
* Dynamic array of longs, with an initial value for otherwise uninitialized elements.
|
28
|
-
* Based on https://stackoverflow.com/questions/3536153/c-dynamically-growing-array
|
29
|
-
*/
|
30
|
-
typedef struct {
|
31
|
-
long *array;
|
32
|
-
size_t size;
|
33
|
-
long default_val;
|
34
|
-
} DynamicArray;
|
35
|
-
|
36
|
-
/*
|
37
|
-
* Initialize a DynamicArray struct with the given initial size and with all values set to the default value.
|
38
|
-
*
|
39
|
-
* The default value is stored and used to initialize new array sections if and when the array needs to be expanded.
|
40
|
-
*/
|
41
|
-
void initDynamicArray(DynamicArray *a, size_t initial_size, long default_val) {
|
42
|
-
a->array = malloc(initial_size * sizeof(long));
|
43
|
-
a->size = initial_size;
|
44
|
-
a->default_val = default_val;
|
45
|
-
|
46
|
-
for (size_t i = 0; i < initial_size; i++) {
|
47
|
-
a->array[i] = default_val;
|
48
|
-
}
|
49
|
-
}
|
50
|
-
|
51
|
-
/*
|
52
|
-
* Assign +value+ to the the +index+-th element of the array, expanding the available space if necessary.
|
23
|
+
* Data type for the (parent, rank) pair, and some accessor helpers for the vec() container we are going to be using.
|
53
24
|
*/
|
54
|
-
void assignInDynamicArray(DynamicArray *a, unsigned long index, long value) {
|
55
|
-
if (a->size <= index) {
|
56
|
-
size_t new_size = a->size;
|
57
|
-
while (new_size <= index) {
|
58
|
-
new_size = 8 * new_size / 5 + 8; // 8/5 gives "Fibonnacci-like" growth; adding 8 to avoid small arrays having to reallocate
|
59
|
-
// too often as they grow. Who knows if it's worth being "clever".
|
60
|
-
}
|
61
25
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
26
|
+
typedef struct data_pair {
|
27
|
+
long parent;
|
28
|
+
unsigned long rank;
|
29
|
+
} data_pair;
|
66
30
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
}
|
71
|
-
|
72
|
-
a->size = new_size;
|
73
|
-
}
|
31
|
+
#define DEFAULT_PARENT -1
|
32
|
+
#define DEFAULT_RANK 0
|
33
|
+
static data_pair default_pair = { .parent = DEFAULT_PARENT, .rank = DEFAULT_RANK };
|
74
34
|
|
75
|
-
|
35
|
+
static data_pair make_data_pair(long parent, unsigned long rank) {
|
36
|
+
data_pair pair = { .parent = parent, .rank = rank };
|
37
|
+
return pair;
|
76
38
|
}
|
77
39
|
|
78
|
-
|
79
|
-
|
80
|
-
a->array = NULL;
|
81
|
-
a->size = 0;
|
82
|
-
}
|
40
|
+
/* The vector generic from Convenient Containers */
|
41
|
+
typedef vec(data_pair) pair_vector;
|
83
42
|
|
84
|
-
|
85
|
-
|
86
|
-
}
|
43
|
+
#define parent(disjoint_union_ptr, idx) (get(disjoint_union->pairs, idx)->parent)
|
44
|
+
#define rank(disjoint_union_ptr, idx) (get(disjoint_union->pairs, idx)->rank)
|
87
45
|
|
88
46
|
/**
|
89
47
|
* The C implementation of a Disjoint Union
|
90
48
|
*
|
91
|
-
* See
|
49
|
+
* See the paper for optimizations we use to get almost constant time for find() and unite().
|
50
|
+
*
|
51
|
+
* Tarjan, Robert E., van Leeuwen, J., _Worst-case Analysis of Set Union Algorithms_, Journal of the ACM, v31:2 (1984), pp 245–281.
|
92
52
|
*/
|
93
53
|
|
94
54
|
/*
|
95
55
|
* The Disjoint Union struct.
|
96
|
-
* -
|
97
|
-
* -
|
98
|
-
*
|
56
|
+
* - pairs: a vector (dynamic array) of pairs, the i-th of which contains
|
57
|
+
* - the "parent" of element i in its membership tree
|
58
|
+
* - An element e is the root of its tree just when it is its own parent
|
59
|
+
* - Two elements are in the same subset just when they are in the same tree in the forest.
|
99
60
|
* - So the key idea is that we can check this by navigating via parents from each element to their roots. Clever optimizations
|
100
61
|
* keep the trees flat and so most nodes are close to their roots.
|
101
|
-
*
|
102
|
-
*
|
103
|
-
* Leeuwen
|
62
|
+
* - the "rank" of element i
|
63
|
+
* - this value is used to guide the "linking" of trees when subsets are being merged to keep the trees flat.
|
104
64
|
* - subset_count: the number of (disjoint) subsets.
|
105
65
|
* - it isn't needed internally but may be useful to client code.
|
106
66
|
*/
|
107
67
|
typedef struct du_data {
|
108
|
-
|
109
|
-
DynamicArray *rank; // the "ranks" of the elements, used when uniting subsets
|
68
|
+
pair_vector *pairs; // The generic vector container from the amazing Convenient Containers library
|
110
69
|
size_t subset_count;
|
111
70
|
} disjoint_union_data;
|
112
71
|
|
113
72
|
/*
|
114
73
|
* Create one (on the heap).
|
115
|
-
*
|
116
|
-
* The dynamic arrays are initialized with a size of 100 because I didn't have a better idea. This will end up getting called from
|
117
|
-
* the Ruby #allocate method, which happens before #initialize. Thus we don't know the calling code's desired initial size.
|
118
74
|
*/
|
119
|
-
#define INITIAL_SIZE 100
|
120
75
|
static disjoint_union_data *create_disjoint_union() {
|
121
76
|
disjoint_union_data *disjoint_union = (disjoint_union_data *)malloc(sizeof(disjoint_union_data));
|
122
77
|
|
123
78
|
// Allocate the structures
|
124
|
-
|
125
|
-
|
126
|
-
initDynamicArray(forest, INITIAL_SIZE, -1);
|
127
|
-
initDynamicArray(rank, INITIAL_SIZE, 0);
|
79
|
+
disjoint_union->pairs = malloc(sizeof(pair_vector));
|
80
|
+
init(disjoint_union->pairs);
|
128
81
|
|
129
|
-
disjoint_union->forest = forest;
|
130
|
-
disjoint_union->rank = rank;
|
131
82
|
disjoint_union->subset_count = 0;
|
132
83
|
|
133
84
|
return disjoint_union;
|
@@ -141,15 +92,7 @@ static disjoint_union_data *create_disjoint_union() {
|
|
141
92
|
static void disjoint_union_free(void *ptr) {
|
142
93
|
if (ptr) {
|
143
94
|
disjoint_union_data *disjoint_union = ptr;
|
144
|
-
|
145
|
-
freeDynamicArray(disjoint_union->rank);
|
146
|
-
|
147
|
-
free(disjoint_union->forest);
|
148
|
-
disjoint_union->forest = NULL;
|
149
|
-
|
150
|
-
free(disjoint_union->rank);
|
151
|
-
disjoint_union->rank = NULL;
|
152
|
-
|
95
|
+
cleanup(disjoint_union->pairs);
|
153
96
|
xfree(disjoint_union);
|
154
97
|
}
|
155
98
|
}
|
@@ -162,8 +105,7 @@ static void disjoint_union_free(void *ptr) {
|
|
162
105
|
* Is the given element already a member of the universe?
|
163
106
|
*/
|
164
107
|
static int present_p(disjoint_union_data *disjoint_union, size_t element) {
|
165
|
-
|
166
|
-
return (forest->size > element && (forest->array[element] != forest->default_val));
|
108
|
+
return (size(disjoint_union->pairs) > element && (parent(disjoint_union, element) != DEFAULT_PARENT));
|
167
109
|
}
|
168
110
|
|
169
111
|
/*
|
@@ -172,6 +114,13 @@ static int present_p(disjoint_union_data *disjoint_union, size_t element) {
|
|
172
114
|
static void assert_membership(disjoint_union_data *disjoint_union, size_t element) {
|
173
115
|
if (!present_p(disjoint_union, element)) {
|
174
116
|
rb_raise(eSharedDataError, "Value %zu is not part of the universe", element);
|
117
|
+
/* rb_raise( */
|
118
|
+
/* eSharedDataError, */
|
119
|
+
/* "Value %zu is not part of the universe, size = %zu, forest_val = %lu", */
|
120
|
+
/* element, */
|
121
|
+
/* size(disjoint_union->pairs), */
|
122
|
+
/* get(disjoint_union->pairs, element)->parent */
|
123
|
+
/* ); */
|
175
124
|
}
|
176
125
|
}
|
177
126
|
|
@@ -185,47 +134,52 @@ static void add_new_element(disjoint_union_data *disjoint_union, size_t element)
|
|
185
134
|
rb_raise(eSharedDataError, "Element %zu already present in the universe", element);
|
186
135
|
}
|
187
136
|
|
188
|
-
|
189
|
-
|
137
|
+
// Expand the underlying vector if necessary
|
138
|
+
size_t sz = size(disjoint_union->pairs);
|
139
|
+
if (sz <= element) {
|
140
|
+
resize(disjoint_union->pairs, element + 1);
|
141
|
+
for (size_t i = sz + 1; i <= element; i++) {
|
142
|
+
lval(disjoint_union->pairs, i) = default_pair;
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
lval(disjoint_union->pairs, element) = make_data_pair(element, 0l);
|
190
147
|
disjoint_union->subset_count++;
|
191
148
|
}
|
192
149
|
|
193
150
|
/*
|
194
|
-
* Find the canonical representative of the given element. This is the root of the tree
|
151
|
+
* Find the canonical representative of the given element. This is the root of the tree containing it.
|
195
152
|
*
|
196
153
|
* Two elements are in the same subset exactly when their canonical representatives are equal.
|
197
154
|
*/
|
198
155
|
static size_t find(disjoint_union_data *disjoint_union, size_t element) {
|
199
156
|
assert_membership(disjoint_union, element);
|
200
157
|
|
201
|
-
// We
|
202
|
-
long *d = disjoint_union->forest->array; // the actual forest data
|
158
|
+
// We use "halving" to shrink the length of paths to the root. See Tarjan and van Leeuwin p 252.
|
203
159
|
size_t x = element;
|
204
|
-
|
205
|
-
|
160
|
+
long p, gp; // parent and grandparent
|
161
|
+
while (p = parent(disjoint_union, x), gp = parent(disjoint_union, p), p != gp) {
|
162
|
+
parent(disjoint_union, p) = gp;
|
163
|
+
x = gp;
|
206
164
|
}
|
207
|
-
return
|
165
|
+
return parent(disjoint_union, x);
|
208
166
|
}
|
209
167
|
|
210
168
|
/*
|
211
|
-
* "Link"
|
169
|
+
* "Link" the two given elements so that they are in the same subset now.
|
212
170
|
*
|
213
171
|
* In other words, merge the subtrees containing the two elements.
|
214
172
|
*
|
215
|
-
*
|
216
|
-
* though we don't check that here.
|
173
|
+
* elt1 and elt2 area must be disinct and the roots of their trees, though we don't check that here.
|
217
174
|
*/
|
218
175
|
static void link_roots(disjoint_union_data *disjoint_union, size_t elt1, size_t elt2) {
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
} else if (rank[elt1] == rank[elt2]) {
|
225
|
-
forest[elt2] = elt1;
|
226
|
-
rank[elt1]++;
|
176
|
+
if (rank(disjoint_union, elt1) > rank(disjoint_union, elt2)) {
|
177
|
+
parent(disjoint_union, elt2) = elt1;
|
178
|
+
} else if (rank(disjoint_union, elt1) == rank(disjoint_union, elt2)) {
|
179
|
+
parent(disjoint_union, elt2) = elt1;
|
180
|
+
rank(disjoint_union, elt1)++;
|
227
181
|
} else {
|
228
|
-
|
182
|
+
parent(disjoint_union, elt1) = elt2;
|
229
183
|
}
|
230
184
|
|
231
185
|
disjoint_union->subset_count--;
|
@@ -263,7 +217,9 @@ static void unite(disjoint_union_data *disjoint_union, size_t elt1, size_t elt2)
|
|
263
217
|
static size_t disjoint_union_memsize(const void *ptr) {
|
264
218
|
if (ptr) {
|
265
219
|
const disjoint_union_data *du = ptr;
|
266
|
-
|
220
|
+
|
221
|
+
// See https://github.com/JacksonAllan/CC/issues/3
|
222
|
+
return sizeof( cc_vec_hdr_ty ) + cap( du->pairs ) * CC_EL_SIZE( *(du->pairs) );
|
267
223
|
} else {
|
268
224
|
return 0;
|
269
225
|
}
|
@@ -286,21 +242,7 @@ static const rb_data_type_t disjoint_union_type = {
|
|
286
242
|
};
|
287
243
|
|
288
244
|
/*
|
289
|
-
*
|
290
|
-
*/
|
291
|
-
static unsigned long checked_nonneg_fixnum(VALUE val) {
|
292
|
-
Check_Type(val, T_FIXNUM);
|
293
|
-
long c_val = FIX2LONG(val);
|
294
|
-
|
295
|
-
if (c_val < 0) {
|
296
|
-
rb_raise(eSharedDataError, "Value must be non-negative");
|
297
|
-
}
|
298
|
-
|
299
|
-
return c_val;
|
300
|
-
}
|
301
|
-
|
302
|
-
/*
|
303
|
-
* Unwrap a Rubyfied disjoint union to get the C struct inside.
|
245
|
+
* Unwrap a Ruby-side disjoint union object to get the C struct inside.
|
304
246
|
*/
|
305
247
|
static disjoint_union_data *unwrapped(VALUE self) {
|
306
248
|
disjoint_union_data *disjoint_union;
|
@@ -333,9 +275,13 @@ static VALUE disjoint_union_init(int argc, VALUE *argv, VALUE self) {
|
|
333
275
|
size_t initial_size = checked_nonneg_fixnum(argv[0]);
|
334
276
|
disjoint_union_data *disjoint_union = unwrapped(self);
|
335
277
|
|
278
|
+
pair_vector *pair_vec = disjoint_union->pairs;
|
279
|
+
resize(pair_vec, initial_size);
|
280
|
+
|
336
281
|
for (size_t i = 0; i < initial_size; i++) {
|
337
|
-
|
282
|
+
lval(pair_vec, i) = make_data_pair(i, 0);
|
338
283
|
}
|
284
|
+
disjoint_union->subset_count = initial_size;
|
339
285
|
}
|
340
286
|
return self;
|
341
287
|
}
|
@@ -343,7 +289,7 @@ static VALUE disjoint_union_init(int argc, VALUE *argv, VALUE self) {
|
|
343
289
|
/**
|
344
290
|
* And now the simple wrappers around the Disjoint Union C functionality. In each case we
|
345
291
|
* - unwrap a 'VALUE self',
|
346
|
-
* - i.e.,
|
292
|
+
* - i.e., the CDisjointUnion instance on the Ruby side;
|
347
293
|
* - munge any other arguments into longs;
|
348
294
|
* - call the appropriate C function to act on the struct; and
|
349
295
|
* - return an appropriate VALUE for the Ruby runtime can use.
|
@@ -354,7 +300,7 @@ static VALUE disjoint_union_init(int argc, VALUE *argv, VALUE self) {
|
|
354
300
|
/*
|
355
301
|
* Add a new subset to the universe containing the element +new_v+.
|
356
302
|
*
|
357
|
-
* @param the new element, starting in its own singleton subset
|
303
|
+
* @param arg the new element, starting in its own singleton subset
|
358
304
|
* - it must be a non-negative integer, not already part of the universe of elements.
|
359
305
|
*/
|
360
306
|
static VALUE disjoint_union_make_set(VALUE self, VALUE arg) {
|
@@ -412,7 +358,7 @@ static VALUE disjoint_union_unite(VALUE self, VALUE arg1, VALUE arg2) {
|
|
412
358
|
* - Tarjan, Robert E., van Leeuwen, Jan (1984). _Worst-case analysis of set union algorithms_. Journal of the ACM. 31 (2): 245–281.
|
413
359
|
*/
|
414
360
|
void Init_c_disjoint_union() {
|
415
|
-
VALUE mDataStructuresRMolinari = rb_define_module("DataStructuresRMolinari");
|
361
|
+
//VALUE mDataStructuresRMolinari = rb_define_module("DataStructuresRMolinari");
|
416
362
|
VALUE cDisjointUnion = rb_define_class_under(mDataStructuresRMolinari, "CDisjointUnion", rb_cObject);
|
417
363
|
|
418
364
|
rb_define_alloc_func(cDisjointUnion, disjoint_union_alloc);
|
@@ -3,10 +3,15 @@ require 'mkmf'
|
|
3
3
|
abort 'missing malloc()' unless have_func "malloc"
|
4
4
|
abort 'missing realloc()' unless have_func "realloc"
|
5
5
|
|
6
|
-
if try_cflags('-
|
7
|
-
append_cflags('-
|
6
|
+
if try_cflags('-O3')
|
7
|
+
append_cflags('-O3')
|
8
8
|
end
|
9
9
|
|
10
10
|
extension_name = "c_disjoint_union"
|
11
11
|
dir_config(extension_name)
|
12
|
+
|
13
|
+
$srcs = ["disjoint_union.c", "../shared.c"]
|
14
|
+
$INCFLAGS << " -I$(srcdir)/.."
|
15
|
+
$VPATH << "$(srcdir)/.."
|
16
|
+
|
12
17
|
create_makefile("data_structures_rmolinari/c_disjoint_union")
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
|
3
|
+
abort 'missing malloc()' unless have_func "malloc"
|
4
|
+
abort 'missing realloc()' unless have_func "realloc"
|
5
|
+
|
6
|
+
if try_cflags('-O3')
|
7
|
+
append_cflags('-O3')
|
8
|
+
end
|
9
|
+
|
10
|
+
extension_name = "c_segment_tree_template"
|
11
|
+
dir_config(extension_name)
|
12
|
+
|
13
|
+
$srcs = ["segment_tree_template.c", "../shared.c"]
|
14
|
+
$INCFLAGS << " -I$(srcdir)/.."
|
15
|
+
$VPATH << "$(srcdir)/.."
|
16
|
+
|
17
|
+
create_makefile("data_structures_rmolinari/c_segment_tree_template")
|