data_structures_rmolinari 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/README.md +119 -33
- data/Rakefile +6 -4
- data/ext/c_disjoint_union/disjoint_union.c +75 -129
- data/ext/c_disjoint_union/extconf.rb +7 -2
- data/ext/c_segment_tree_template/extconf.rb +17 -0
- data/ext/c_segment_tree_template/segment_tree_template.c +363 -0
- data/ext/shared.c +32 -0
- data/lib/data_structures_rmolinari/algorithms.rb +5 -5
- data/lib/data_structures_rmolinari/c_segment_tree_template_impl.rb +15 -0
- data/lib/data_structures_rmolinari/disjoint_union.rb +2 -0
- data/lib/data_structures_rmolinari/segment_tree.rb +126 -0
- data/lib/data_structures_rmolinari/segment_tree_template.rb +11 -8
- data/lib/data_structures_rmolinari.rb +5 -62
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7682f6d3b0779f347ce0797f55f33b9d7dcc7bd9c2039fc2fd6f865eb72e085a
|
4
|
+
data.tar.gz: d717e5e36f79ddc4ecb605a59b475b7114359dea7476445590deb300f7915bd4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c3ffd9a4f67f55b7a2df1c949cf2288c06fcae416d5ff03a10307a1b79c3dae1daa74e2576d5e190c989adeea47b046426fad8c3c64199aadf22ba500b317f36
|
7
|
+
data.tar.gz: 8380d6117f2955da9362395f8315f5121b4f7afba2f69aabb1981a01b675cbbed81d07c10b5745409080c2588c92df3d676ec36efa128571234a74dceef0e20d
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,24 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [Unreleased]
|
4
|
+
|
5
|
+
## [0.5.0] 2023-02.03
|
6
|
+
|
7
|
+
- SegmentTree
|
8
|
+
- Reorganize the code into a SegmentTree submodule.
|
9
|
+
- Provide a conveniece method for getting concrete instances.
|
10
|
+
|
11
|
+
- README.md
|
12
|
+
- Add some simple example code for the data types.
|
13
|
+
|
14
|
+
## [0.4.4] 2023-02-02
|
15
|
+
|
16
|
+
- Disjoint Union
|
17
|
+
- C extension: use Convenient Containers rather than my janky Dynamic Array attempt.
|
18
|
+
|
19
|
+
- Segment Tree
|
20
|
+
- Add a C implementation as CSegmentTreeTemplate.
|
21
|
+
|
3
22
|
## [0.4.3] 2023-01-27
|
4
23
|
|
5
24
|
- Fix bad directive in Rakefile for DisjointUnion C extension
|
data/README.md
CHANGED
@@ -4,8 +4,8 @@ This is a small collection of Ruby data structures that I have implemented for m
|
|
4
4
|
structure is almost always more educational than simply reading about it and is usually fun. I wrote some of them while
|
5
5
|
participating in the Advent of Code (https://adventofcode.com/).
|
6
6
|
|
7
|
-
|
8
|
-
|
7
|
+
The implementations are based on the expository descriptions and pseudo-code I found as I read about each structure and so are not
|
8
|
+
as fast as possible.
|
9
9
|
|
10
10
|
The code is available as a gem: https://rubygems.org/gems/data_structures_rmolinari.
|
11
11
|
|
@@ -14,18 +14,6 @@ The code is available as a gem: https://rubygems.org/gems/data_structures_rmolin
|
|
14
14
|
The right way to organize the code is not obvious to me. For now the data structures are all defined in the module
|
15
15
|
`DataStructuresRMolinari` to avoid polluting the global namespace.
|
16
16
|
|
17
|
-
Example usage after the gem is installed:
|
18
|
-
```
|
19
|
-
require 'data_structures_rmolinari`
|
20
|
-
|
21
|
-
# Pull what we need out of the namespace
|
22
|
-
MaxPrioritySearchTree = DataStructuresRMolinari::MaxPrioritySearchTree
|
23
|
-
Point = DataStructuresRMolinari::Point # anything responding to :x and :y is fine
|
24
|
-
|
25
|
-
pst = MaxPrioritySearchTree.new([Point.new(1, 1)])
|
26
|
-
puts pst.largest_y_in_ne(0, 0) # "Point(1,1)"
|
27
|
-
```
|
28
|
-
|
29
17
|
# Implementations
|
30
18
|
|
31
19
|
## Disjoint Union
|
@@ -42,8 +30,22 @@ It also provides
|
|
42
30
|
For more details see https://en.wikipedia.org/wiki/Disjoint-set_data_structure and the paper [[TvL1984]](#references) by Tarjan and
|
43
31
|
van Leeuwen.
|
44
32
|
|
45
|
-
|
46
|
-
|
33
|
+
``` ruby
|
34
|
+
require 'data_structures_rmolinari'
|
35
|
+
DisjointUnion = DataStructuresRMolinari::DisjointUnion
|
36
|
+
|
37
|
+
# Create an instance over the "universe" 0, 1, ..., 9.
|
38
|
+
du = DisjointUnion.new(10)
|
39
|
+
du.subset_count # => 10; each element starts out in its own subset
|
40
|
+
|
41
|
+
du.unite(2, 3) # say that 2 and 3 are actually in the same subset
|
42
|
+
du.subset_count # => 9
|
43
|
+
du.find(2) == du.find(3) # => true
|
44
|
+
|
45
|
+
du.unite(4, 5)
|
46
|
+
du.unite(3, 4) # now 2, 3, 4, and 5 are all in the same subset
|
47
|
+
du.subset_count # => 7
|
48
|
+
```
|
47
49
|
|
48
50
|
## Heap
|
49
51
|
|
@@ -66,6 +68,24 @@ allows the insertion of duplicate items (which is sometimes useful) and slightly
|
|
66
68
|
|
67
69
|
See https://en.wikipedia.org/wiki/Binary_heap and the paper by Edelkamp, Elmasry, and Katajainen [[EEK2017]](#references).
|
68
70
|
|
71
|
+
``` ruby
|
72
|
+
require 'data_structures_rmolinari'
|
73
|
+
Heap = DataStructuresRMolinari::Heap
|
74
|
+
|
75
|
+
data = [4, 3, 2, 1]
|
76
|
+
|
77
|
+
heap = Heap.new
|
78
|
+
|
79
|
+
# Insert the elements of data, each with itself as priority.
|
80
|
+
data.each { |v| heap.insert(v, v) }
|
81
|
+
|
82
|
+
heap.top # => 1, since we have a min-heap.
|
83
|
+
heap.pop # => 1
|
84
|
+
heap.top # => 2; with 1 gone, this is the element with least priority
|
85
|
+
heap.update(3, -3)
|
86
|
+
heap.top # => 3; now 3 is the element with least priority
|
87
|
+
```
|
88
|
+
|
69
89
|
## Priority Search Tree
|
70
90
|
|
71
91
|
A PST stores a set P of two-dimensional points in a way that allows certain queries about P to be answered efficiently. The data
|
@@ -84,41 +104,81 @@ pointing north.
|
|
84
104
|
|
85
105
|
There is no `smallest_x_in_3_sided(x0, x1, y0)`. Just use `smallest_x_in_ne(x0, y0)`.
|
86
106
|
|
107
|
+
(These queries appear rather abstract at first but there are interesting applications. See, for example, section 4 of
|
108
|
+
[[McC85]](#references), keeping in mind that the data structure in that paper is actually a _MinPST_.)
|
109
|
+
|
87
110
|
The single-point queries run in O(log n) time, where n is the size of P, while `enumerate_3_sided` runs in O(m + log n), where m is
|
88
111
|
the number of points actually enumerated.
|
89
112
|
|
90
113
|
The implementation is in `MaxPrioritySearchTree` (MaxPST for short), so called because internally the structure is, among other
|
91
114
|
things, a max-heap on the y-coordinates.
|
92
115
|
|
93
|
-
These queries appear rather abstract at first but there are interesting applications. See, for example, section 4 of
|
94
|
-
[[McC85]](#references), keeping in mind that the data structure in that paper is actually a _MinPST_.
|
95
|
-
|
96
116
|
We also provide a `MinPrioritySearchTree`, which answers analagous queries in the southward-infinite quadrants and 3-sided
|
97
117
|
regions.
|
98
118
|
|
99
119
|
By default these data structures are immutable: once constructed they cannot be changed. But there is a constructor option that
|
100
120
|
makes the instance "dynamic". This allows us to delete the element at the root of the tree - the one with largest y value (smallest
|
101
121
|
for MinPST) - with the `delete_top!` method. This operation is important in certain algorithms, such as enumerating all maximal
|
102
|
-
empty rectangles (see the second paper by De et al
|
122
|
+
empty rectangles (see the second paper by De et al[[DMNS2013]](#references)). Note that points can still not be added to the PST in
|
103
123
|
any case, and choosing the dynamic option makes certain internal bookkeeping operations slower.
|
104
124
|
|
105
125
|
In [[DMNS2013]](#references) De et al. generalize the in-place structure to a _Min-max Priority Search Tree_ (MinmaxPST) that can
|
106
126
|
answer queries in all four quadrants and both "kinds" of 3-sided boxes. Having one of these would save the trouble of constructing
|
107
127
|
both a MaxPST and MinPST. But the presentiation is hard to follow in places and the paper's pseudocode is buggy.[^minmaxpst]
|
108
128
|
|
129
|
+
``` ruby
|
130
|
+
require 'data_structures_rmolinari'
|
131
|
+
MaxPST = DataStructuresRMolinari::MaxPrioritySearchTree
|
132
|
+
Point = Shared::Point # simple (x, y) struct. Anything responding to #x and #y will work
|
133
|
+
|
134
|
+
data = [Point.new(0, 0), Point.new(1, 2), Point.new(2, 1)]
|
135
|
+
pst = MaxPST.new(data)
|
136
|
+
|
137
|
+
pst.largest_y_in_ne(0, 0) # => #<struct Shared::Point x=1, y=2>
|
138
|
+
pst.largest_y_in_ne(1, 1) # => #<struct Shared::Point x=1, y=2>
|
139
|
+
pst.largest_y_in_ne(1.5, 1) # => #<struct Shared::Point x=2, y=1>
|
140
|
+
pst.largest_y_in_3_sided(-0.5, 0.5, 0) # => #<struct Shared::Point x=0, y=0>
|
141
|
+
```
|
142
|
+
|
109
143
|
## Segment Tree
|
110
144
|
|
111
|
-
|
112
|
-
elements in an arbitrary subinterval A
|
113
|
-
of A in such a way that the values we store in the nodes can be combined efficiently to determine the desired result for
|
114
|
-
subarrays.
|
145
|
+
A segment tree stores information related to subintervals of a certain array. For example, a segment tree can be used to find the
|
146
|
+
sum of the elements in an arbitrary subinterval A(i..j) of an array A(0..n) in O(log n) time. Each node in the tree corresponds to a
|
147
|
+
subarray of A in such a way that the values we store in the nodes can be combined efficiently to determine the desired result for
|
148
|
+
arbitrary subarrays.
|
115
149
|
|
116
150
|
An excellent description of the idea is found at https://cp-algorithms.com/data_structures/segment_tree.html.
|
117
151
|
|
118
|
-
Generic code is provided in `SegmentTreeTemplate
|
119
|
-
|
120
|
-
|
121
|
-
|
152
|
+
Generic code is provided in `SegmentTree::SegmentTreeTemplate` and its equivalent (and faster) C-based sibling,
|
153
|
+
`SegmentTree::CSegmentTreeTemplate` (see [below](#c-extensions)).
|
154
|
+
|
155
|
+
Writing a concrete segment tree class just means providing some simple lambdas and constants to the template class's
|
156
|
+
initializer. Figuring out the details requires some knowledge of the internal mechanisms of a segment tree, for which the link at
|
157
|
+
cp-algorithms.com is very helpful. See the implementations of the concrete classes `MaxValSegmentTree` and
|
158
|
+
`IndexOfMaxValSegmentTree` for examples.
|
159
|
+
|
160
|
+
Since there are several concrete "types" and two underlying generic implementions there is a convenience method on the `SegmentTree`
|
161
|
+
module to get instances.
|
162
|
+
|
163
|
+
``` ruby
|
164
|
+
require 'data_structures_rmolinari'
|
165
|
+
SegmentTree = DataStructuresRMolinari::SegmentTree # namespace module
|
166
|
+
|
167
|
+
data = [1, -3, 2, 1, 5, -9]
|
168
|
+
|
169
|
+
# Get a segment tree instance that will answer "max over this subinterval" questions about data.
|
170
|
+
# Here we get one using the ruby implementation of the generic functionality.
|
171
|
+
#
|
172
|
+
# We offer :index_of_max as an alternative to :max. This will construct an instance that answers
|
173
|
+
# questions of the form "an index of the maximum value over this subinterval".
|
174
|
+
#
|
175
|
+
# To use the version written in C, put :c instead of :ruby.
|
176
|
+
seg_tree = SegmentTree.construct(data, :max, :ruby)
|
177
|
+
|
178
|
+
seg_tree.max_on(0, 2) # => 2
|
179
|
+
seg_tree.max_on(1, 4) # => 5
|
180
|
+
# ..etc..
|
181
|
+
```
|
122
182
|
|
123
183
|
## Algorithms
|
124
184
|
|
@@ -131,11 +191,37 @@ The Algorithms submodule contains some algorithms using the data structures.
|
|
131
191
|
[left, right, bottom, top].
|
132
192
|
- The algorithm is due to [[DMNS2013]](#references).
|
133
193
|
|
194
|
+
# C Extensions
|
195
|
+
|
196
|
+
As another learning process I have implemented several of these data structures as C extensions. The APIs are the same.
|
197
|
+
|
198
|
+
## Disjoint Union
|
199
|
+
|
200
|
+
The C version is called `CDisjointUnion`. A benchmark suggests that a long sequence of `unite` operations is about 3 times as fast
|
201
|
+
with `CDisjointUnion` as with `DisjointUnion`.
|
202
|
+
|
203
|
+
The implementation uses the remarkable Convenient Containers library from Jackson Allan.[[Allan]](#references).
|
204
|
+
|
205
|
+
## Segment Tree
|
206
|
+
|
207
|
+
`CSegmentTreeTemplate` is the C implementation of the generic class. Concrete classes are built on top of this in Ruby, just as with
|
208
|
+
the pure Ruby `SegmentTreeTemplate` class.
|
209
|
+
|
210
|
+
A benchmark suggests that a long sequence of `max_on` operations against a max-val Segment Tree is about 4 times as fast with C as
|
211
|
+
with Ruby. I'm a bit suprised the improvment isn't larger, but remember that the C code must still interact with the Ruby objects in
|
212
|
+
the underlying data array, and must combine them, etc., via Ruby lambdas.
|
213
|
+
|
134
214
|
# References
|
135
|
-
- [
|
136
|
-
- [
|
137
|
-
|
138
|
-
- [
|
139
|
-
-
|
215
|
+
- [Allan] Allan, J., _CC: Convenient Containers_, https://github.com/JacksonAllan/CC, (retrieved 2023-02-01).
|
216
|
+
- [TvL1984] Tarjan, Robert E., van Leeuwen, J., _Worst-case Analysis of Set Union Algorithms_, Journal of the ACM, v31:2 (1984), pp
|
217
|
+
245–281, https://dl.acm.org/doi/10.1145/62.2160 (retrieved 2022-02-01).
|
218
|
+
- [EEK2017] Edelkamp, S., Elmasry, A., Katajainen, J., _Optimizing Binary Heaps_, Theory Comput Syst (2017), vol 61, pp 606-636, DOI
|
219
|
+
10.1007/s00224-017-9760-2, https://kclpure.kcl.ac.uk/portal/files/87388857/TheoryComputingSzstems.pdf (retrieved 2022-02-02).
|
220
|
+
- [McC1985] McCreight, E. M., _Priority Search Trees_, SIAM J. Comput., 14(2):257-276, 1985,
|
221
|
+
http://www.cs.duke.edu/courses/fall08/cps234/handouts/SMJ000257.pdf (retrieved 2023-02-02).
|
222
|
+
- [DMNS2011] De, M., Maheshwari, A., Nandy, S. C., Smid, M., _An In-Place Priority Search Tree_, 23rd Canadian Conference on
|
223
|
+
Computational Geometry, 2011, http://www.cs.carleton.ca/~michiel/inplace_pst.pdf (retrieved 2023-02-02).
|
224
|
+
- [DMNS2013] De, M., Maheshwari, A., Nandy, S. C., Smid, M., _An In-Place Min-max Priority Search Tree_, Computational Geometry, v46
|
225
|
+
(2013), pp 310-327, https://people.scs.carleton.ca/~michiel/MinMaxPST.pdf (retrieved 2023-02-02).
|
140
226
|
|
141
227
|
[^minmaxpst]: See the comments in the fragmentary class `MinMaxPrioritySearchTree` for further details.
|
data/Rakefile
CHANGED
@@ -2,10 +2,12 @@ require 'rubygems'
|
|
2
2
|
require 'rake/testtask'
|
3
3
|
require 'rake/extensiontask'
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
['c_disjoint_union', 'c_segment_tree_template'].each do |extension_name|
|
6
|
+
Rake::ExtensionTask.new("data_structures_rmolinari/#{extension_name}") do |ext|
|
7
|
+
ext.name = extension_name
|
8
|
+
ext.ext_dir = "ext/#{extension_name}"
|
9
|
+
ext.lib_dir = 'lib/data_structures_rmolinari/'
|
10
|
+
end
|
9
11
|
end
|
10
12
|
|
11
13
|
Rake::TestTask.new do |t|
|
@@ -16,118 +16,69 @@
|
|
16
16
|
*/
|
17
17
|
|
18
18
|
#include "ruby.h"
|
19
|
-
|
20
|
-
|
21
|
-
#define mShared rb_define_module("Shared")
|
22
|
-
#define eSharedDataError rb_const_get(mShared, rb_intern_const("DataError"))
|
19
|
+
#include "cc.h" // Convenient Containers
|
20
|
+
#include "shared.h"
|
23
21
|
|
24
22
|
/**
|
25
|
-
*
|
26
|
-
*
|
27
|
-
* Dynamic array of longs, with an initial value for otherwise uninitialized elements.
|
28
|
-
* Based on https://stackoverflow.com/questions/3536153/c-dynamically-growing-array
|
29
|
-
*/
|
30
|
-
typedef struct {
|
31
|
-
long *array;
|
32
|
-
size_t size;
|
33
|
-
long default_val;
|
34
|
-
} DynamicArray;
|
35
|
-
|
36
|
-
/*
|
37
|
-
* Initialize a DynamicArray struct with the given initial size and with all values set to the default value.
|
38
|
-
*
|
39
|
-
* The default value is stored and used to initialize new array sections if and when the array needs to be expanded.
|
40
|
-
*/
|
41
|
-
void initDynamicArray(DynamicArray *a, size_t initial_size, long default_val) {
|
42
|
-
a->array = malloc(initial_size * sizeof(long));
|
43
|
-
a->size = initial_size;
|
44
|
-
a->default_val = default_val;
|
45
|
-
|
46
|
-
for (size_t i = 0; i < initial_size; i++) {
|
47
|
-
a->array[i] = default_val;
|
48
|
-
}
|
49
|
-
}
|
50
|
-
|
51
|
-
/*
|
52
|
-
* Assign +value+ to the the +index+-th element of the array, expanding the available space if necessary.
|
23
|
+
* Data type for the (parent, rank) pair, and some accessor helpers for the vec() container we are going to be using.
|
53
24
|
*/
|
54
|
-
void assignInDynamicArray(DynamicArray *a, unsigned long index, long value) {
|
55
|
-
if (a->size <= index) {
|
56
|
-
size_t new_size = a->size;
|
57
|
-
while (new_size <= index) {
|
58
|
-
new_size = 8 * new_size / 5 + 8; // 8/5 gives "Fibonnacci-like" growth; adding 8 to avoid small arrays having to reallocate
|
59
|
-
// too often as they grow. Who knows if it's worth being "clever".
|
60
|
-
}
|
61
25
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
26
|
+
typedef struct data_pair {
|
27
|
+
long parent;
|
28
|
+
unsigned long rank;
|
29
|
+
} data_pair;
|
66
30
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
}
|
71
|
-
|
72
|
-
a->size = new_size;
|
73
|
-
}
|
31
|
+
#define DEFAULT_PARENT -1
|
32
|
+
#define DEFAULT_RANK 0
|
33
|
+
static data_pair default_pair = { .parent = DEFAULT_PARENT, .rank = DEFAULT_RANK };
|
74
34
|
|
75
|
-
|
35
|
+
static data_pair make_data_pair(long parent, unsigned long rank) {
|
36
|
+
data_pair pair = { .parent = parent, .rank = rank };
|
37
|
+
return pair;
|
76
38
|
}
|
77
39
|
|
78
|
-
|
79
|
-
|
80
|
-
a->array = NULL;
|
81
|
-
a->size = 0;
|
82
|
-
}
|
40
|
+
/* The vector generic from Convenient Containers */
|
41
|
+
typedef vec(data_pair) pair_vector;
|
83
42
|
|
84
|
-
|
85
|
-
|
86
|
-
}
|
43
|
+
#define parent(disjoint_union_ptr, idx) (get(disjoint_union->pairs, idx)->parent)
|
44
|
+
#define rank(disjoint_union_ptr, idx) (get(disjoint_union->pairs, idx)->rank)
|
87
45
|
|
88
46
|
/**
|
89
47
|
* The C implementation of a Disjoint Union
|
90
48
|
*
|
91
|
-
* See
|
49
|
+
* See the paper for optimizations we use to get almost constant time for find() and unite().
|
50
|
+
*
|
51
|
+
* Tarjan, Robert E., van Leeuwen, J., _Worst-case Analysis of Set Union Algorithms_, Journal of the ACM, v31:2 (1984), pp 245–281.
|
92
52
|
*/
|
93
53
|
|
94
54
|
/*
|
95
55
|
* The Disjoint Union struct.
|
96
|
-
* -
|
97
|
-
* -
|
98
|
-
*
|
56
|
+
* - pairs: a vector (dynamic array) of pairs, the i-th of which contains
|
57
|
+
* - the "parent" of element i in its membership tree
|
58
|
+
* - An element e is the root of its tree just when it is its own parent
|
59
|
+
* - Two elements are in the same subset just when they are in the same tree in the forest.
|
99
60
|
* - So the key idea is that we can check this by navigating via parents from each element to their roots. Clever optimizations
|
100
61
|
* keep the trees flat and so most nodes are close to their roots.
|
101
|
-
*
|
102
|
-
*
|
103
|
-
* Leeuwen
|
62
|
+
* - the "rank" of element i
|
63
|
+
* - this value is used to guide the "linking" of trees when subsets are being merged to keep the trees flat.
|
104
64
|
* - subset_count: the number of (disjoint) subsets.
|
105
65
|
* - it isn't needed internally but may be useful to client code.
|
106
66
|
*/
|
107
67
|
typedef struct du_data {
|
108
|
-
|
109
|
-
DynamicArray *rank; // the "ranks" of the elements, used when uniting subsets
|
68
|
+
pair_vector *pairs; // The generic vector container from the amazing Convenient Containers library
|
110
69
|
size_t subset_count;
|
111
70
|
} disjoint_union_data;
|
112
71
|
|
113
72
|
/*
|
114
73
|
* Create one (on the heap).
|
115
|
-
*
|
116
|
-
* The dynamic arrays are initialized with a size of 100 because I didn't have a better idea. This will end up getting called from
|
117
|
-
* the Ruby #allocate method, which happens before #initialize. Thus we don't know the calling code's desired initial size.
|
118
74
|
*/
|
119
|
-
#define INITIAL_SIZE 100
|
120
75
|
static disjoint_union_data *create_disjoint_union() {
|
121
76
|
disjoint_union_data *disjoint_union = (disjoint_union_data *)malloc(sizeof(disjoint_union_data));
|
122
77
|
|
123
78
|
// Allocate the structures
|
124
|
-
|
125
|
-
|
126
|
-
initDynamicArray(forest, INITIAL_SIZE, -1);
|
127
|
-
initDynamicArray(rank, INITIAL_SIZE, 0);
|
79
|
+
disjoint_union->pairs = malloc(sizeof(pair_vector));
|
80
|
+
init(disjoint_union->pairs);
|
128
81
|
|
129
|
-
disjoint_union->forest = forest;
|
130
|
-
disjoint_union->rank = rank;
|
131
82
|
disjoint_union->subset_count = 0;
|
132
83
|
|
133
84
|
return disjoint_union;
|
@@ -141,15 +92,7 @@ static disjoint_union_data *create_disjoint_union() {
|
|
141
92
|
static void disjoint_union_free(void *ptr) {
|
142
93
|
if (ptr) {
|
143
94
|
disjoint_union_data *disjoint_union = ptr;
|
144
|
-
|
145
|
-
freeDynamicArray(disjoint_union->rank);
|
146
|
-
|
147
|
-
free(disjoint_union->forest);
|
148
|
-
disjoint_union->forest = NULL;
|
149
|
-
|
150
|
-
free(disjoint_union->rank);
|
151
|
-
disjoint_union->rank = NULL;
|
152
|
-
|
95
|
+
cleanup(disjoint_union->pairs);
|
153
96
|
xfree(disjoint_union);
|
154
97
|
}
|
155
98
|
}
|
@@ -162,8 +105,7 @@ static void disjoint_union_free(void *ptr) {
|
|
162
105
|
* Is the given element already a member of the universe?
|
163
106
|
*/
|
164
107
|
static int present_p(disjoint_union_data *disjoint_union, size_t element) {
|
165
|
-
|
166
|
-
return (forest->size > element && (forest->array[element] != forest->default_val));
|
108
|
+
return (size(disjoint_union->pairs) > element && (parent(disjoint_union, element) != DEFAULT_PARENT));
|
167
109
|
}
|
168
110
|
|
169
111
|
/*
|
@@ -172,6 +114,13 @@ static int present_p(disjoint_union_data *disjoint_union, size_t element) {
|
|
172
114
|
static void assert_membership(disjoint_union_data *disjoint_union, size_t element) {
|
173
115
|
if (!present_p(disjoint_union, element)) {
|
174
116
|
rb_raise(eSharedDataError, "Value %zu is not part of the universe", element);
|
117
|
+
/* rb_raise( */
|
118
|
+
/* eSharedDataError, */
|
119
|
+
/* "Value %zu is not part of the universe, size = %zu, forest_val = %lu", */
|
120
|
+
/* element, */
|
121
|
+
/* size(disjoint_union->pairs), */
|
122
|
+
/* get(disjoint_union->pairs, element)->parent */
|
123
|
+
/* ); */
|
175
124
|
}
|
176
125
|
}
|
177
126
|
|
@@ -185,47 +134,52 @@ static void add_new_element(disjoint_union_data *disjoint_union, size_t element)
|
|
185
134
|
rb_raise(eSharedDataError, "Element %zu already present in the universe", element);
|
186
135
|
}
|
187
136
|
|
188
|
-
|
189
|
-
|
137
|
+
// Expand the underlying vector if necessary
|
138
|
+
size_t sz = size(disjoint_union->pairs);
|
139
|
+
if (sz <= element) {
|
140
|
+
resize(disjoint_union->pairs, element + 1);
|
141
|
+
for (size_t i = sz + 1; i <= element; i++) {
|
142
|
+
lval(disjoint_union->pairs, i) = default_pair;
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
lval(disjoint_union->pairs, element) = make_data_pair(element, 0l);
|
190
147
|
disjoint_union->subset_count++;
|
191
148
|
}
|
192
149
|
|
193
150
|
/*
|
194
|
-
* Find the canonical representative of the given element. This is the root of the tree
|
151
|
+
* Find the canonical representative of the given element. This is the root of the tree containing it.
|
195
152
|
*
|
196
153
|
* Two elements are in the same subset exactly when their canonical representatives are equal.
|
197
154
|
*/
|
198
155
|
static size_t find(disjoint_union_data *disjoint_union, size_t element) {
|
199
156
|
assert_membership(disjoint_union, element);
|
200
157
|
|
201
|
-
// We
|
202
|
-
long *d = disjoint_union->forest->array; // the actual forest data
|
158
|
+
// We use "halving" to shrink the length of paths to the root. See Tarjan and van Leeuwin p 252.
|
203
159
|
size_t x = element;
|
204
|
-
|
205
|
-
|
160
|
+
long p, gp; // parent and grandparent
|
161
|
+
while (p = parent(disjoint_union, x), gp = parent(disjoint_union, p), p != gp) {
|
162
|
+
parent(disjoint_union, p) = gp;
|
163
|
+
x = gp;
|
206
164
|
}
|
207
|
-
return
|
165
|
+
return parent(disjoint_union, x);
|
208
166
|
}
|
209
167
|
|
210
168
|
/*
|
211
|
-
* "Link"
|
169
|
+
* "Link" the two given elements so that they are in the same subset now.
|
212
170
|
*
|
213
171
|
* In other words, merge the subtrees containing the two elements.
|
214
172
|
*
|
215
|
-
*
|
216
|
-
* though we don't check that here.
|
173
|
+
* elt1 and elt2 area must be disinct and the roots of their trees, though we don't check that here.
|
217
174
|
*/
|
218
175
|
static void link_roots(disjoint_union_data *disjoint_union, size_t elt1, size_t elt2) {
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
} else if (rank[elt1] == rank[elt2]) {
|
225
|
-
forest[elt2] = elt1;
|
226
|
-
rank[elt1]++;
|
176
|
+
if (rank(disjoint_union, elt1) > rank(disjoint_union, elt2)) {
|
177
|
+
parent(disjoint_union, elt2) = elt1;
|
178
|
+
} else if (rank(disjoint_union, elt1) == rank(disjoint_union, elt2)) {
|
179
|
+
parent(disjoint_union, elt2) = elt1;
|
180
|
+
rank(disjoint_union, elt1)++;
|
227
181
|
} else {
|
228
|
-
|
182
|
+
parent(disjoint_union, elt1) = elt2;
|
229
183
|
}
|
230
184
|
|
231
185
|
disjoint_union->subset_count--;
|
@@ -263,7 +217,9 @@ static void unite(disjoint_union_data *disjoint_union, size_t elt1, size_t elt2)
|
|
263
217
|
static size_t disjoint_union_memsize(const void *ptr) {
|
264
218
|
if (ptr) {
|
265
219
|
const disjoint_union_data *du = ptr;
|
266
|
-
|
220
|
+
|
221
|
+
// See https://github.com/JacksonAllan/CC/issues/3
|
222
|
+
return sizeof( cc_vec_hdr_ty ) + cap( du->pairs ) * CC_EL_SIZE( *(du->pairs) );
|
267
223
|
} else {
|
268
224
|
return 0;
|
269
225
|
}
|
@@ -286,21 +242,7 @@ static const rb_data_type_t disjoint_union_type = {
|
|
286
242
|
};
|
287
243
|
|
288
244
|
/*
|
289
|
-
*
|
290
|
-
*/
|
291
|
-
static unsigned long checked_nonneg_fixnum(VALUE val) {
|
292
|
-
Check_Type(val, T_FIXNUM);
|
293
|
-
long c_val = FIX2LONG(val);
|
294
|
-
|
295
|
-
if (c_val < 0) {
|
296
|
-
rb_raise(eSharedDataError, "Value must be non-negative");
|
297
|
-
}
|
298
|
-
|
299
|
-
return c_val;
|
300
|
-
}
|
301
|
-
|
302
|
-
/*
|
303
|
-
* Unwrap a Rubyfied disjoint union to get the C struct inside.
|
245
|
+
* Unwrap a Ruby-side disjoint union object to get the C struct inside.
|
304
246
|
*/
|
305
247
|
static disjoint_union_data *unwrapped(VALUE self) {
|
306
248
|
disjoint_union_data *disjoint_union;
|
@@ -333,9 +275,13 @@ static VALUE disjoint_union_init(int argc, VALUE *argv, VALUE self) {
|
|
333
275
|
size_t initial_size = checked_nonneg_fixnum(argv[0]);
|
334
276
|
disjoint_union_data *disjoint_union = unwrapped(self);
|
335
277
|
|
278
|
+
pair_vector *pair_vec = disjoint_union->pairs;
|
279
|
+
resize(pair_vec, initial_size);
|
280
|
+
|
336
281
|
for (size_t i = 0; i < initial_size; i++) {
|
337
|
-
|
282
|
+
lval(pair_vec, i) = make_data_pair(i, 0);
|
338
283
|
}
|
284
|
+
disjoint_union->subset_count = initial_size;
|
339
285
|
}
|
340
286
|
return self;
|
341
287
|
}
|
@@ -343,7 +289,7 @@ static VALUE disjoint_union_init(int argc, VALUE *argv, VALUE self) {
|
|
343
289
|
/**
|
344
290
|
* And now the simple wrappers around the Disjoint Union C functionality. In each case we
|
345
291
|
* - unwrap a 'VALUE self',
|
346
|
-
* - i.e.,
|
292
|
+
* - i.e., the CDisjointUnion instance on the Ruby side;
|
347
293
|
* - munge any other arguments into longs;
|
348
294
|
* - call the appropriate C function to act on the struct; and
|
349
295
|
* - return an appropriate VALUE for the Ruby runtime can use.
|
@@ -354,7 +300,7 @@ static VALUE disjoint_union_init(int argc, VALUE *argv, VALUE self) {
|
|
354
300
|
/*
|
355
301
|
* Add a new subset to the universe containing the element +new_v+.
|
356
302
|
*
|
357
|
-
* @param the new element, starting in its own singleton subset
|
303
|
+
* @param arg the new element, starting in its own singleton subset
|
358
304
|
* - it must be a non-negative integer, not already part of the universe of elements.
|
359
305
|
*/
|
360
306
|
static VALUE disjoint_union_make_set(VALUE self, VALUE arg) {
|
@@ -412,7 +358,7 @@ static VALUE disjoint_union_unite(VALUE self, VALUE arg1, VALUE arg2) {
|
|
412
358
|
* - Tarjan, Robert E., van Leeuwen, Jan (1984). _Worst-case analysis of set union algorithms_. Journal of the ACM. 31 (2): 245–281.
|
413
359
|
*/
|
414
360
|
void Init_c_disjoint_union() {
|
415
|
-
VALUE mDataStructuresRMolinari = rb_define_module("DataStructuresRMolinari");
|
361
|
+
//VALUE mDataStructuresRMolinari = rb_define_module("DataStructuresRMolinari");
|
416
362
|
VALUE cDisjointUnion = rb_define_class_under(mDataStructuresRMolinari, "CDisjointUnion", rb_cObject);
|
417
363
|
|
418
364
|
rb_define_alloc_func(cDisjointUnion, disjoint_union_alloc);
|
@@ -3,10 +3,15 @@ require 'mkmf'
|
|
3
3
|
abort 'missing malloc()' unless have_func "malloc"
|
4
4
|
abort 'missing realloc()' unless have_func "realloc"
|
5
5
|
|
6
|
-
if try_cflags('-
|
7
|
-
append_cflags('-
|
6
|
+
if try_cflags('-O3')
|
7
|
+
append_cflags('-O3')
|
8
8
|
end
|
9
9
|
|
10
10
|
extension_name = "c_disjoint_union"
|
11
11
|
dir_config(extension_name)
|
12
|
+
|
13
|
+
$srcs = ["disjoint_union.c", "../shared.c"]
|
14
|
+
$INCFLAGS << " -I$(srcdir)/.."
|
15
|
+
$VPATH << "$(srcdir)/.."
|
16
|
+
|
12
17
|
create_makefile("data_structures_rmolinari/c_disjoint_union")
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
|
3
|
+
abort 'missing malloc()' unless have_func "malloc"
|
4
|
+
abort 'missing realloc()' unless have_func "realloc"
|
5
|
+
|
6
|
+
if try_cflags('-O3')
|
7
|
+
append_cflags('-O3')
|
8
|
+
end
|
9
|
+
|
10
|
+
extension_name = "c_segment_tree_template"
|
11
|
+
dir_config(extension_name)
|
12
|
+
|
13
|
+
$srcs = ["segment_tree_template.c", "../shared.c"]
|
14
|
+
$INCFLAGS << " -I$(srcdir)/.."
|
15
|
+
$VPATH << "$(srcdir)/.."
|
16
|
+
|
17
|
+
create_makefile("data_structures_rmolinari/c_segment_tree_template")
|