data_structures_rmolinari 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Rakefile +1 -1
- data/ext/c_disjoint_union/disjoint_union.c +57 -45
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c9022e9531472d1125c6172025c2d10c5d4ef4f9c43e326a43f1c5b4f0721263
|
4
|
+
data.tar.gz: '0212619be7fe32e68b63d2087730f81ffd6b4179b8b8bf63aa0026e4e3056224'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a7f9258eeed2dc7e7fa5713aaecfcdf44e061bb161aa3d0d2662fb662bfb6b2685c61be221b4a109792982c3b2aa6215da75b51ae299d4a9237b6226000612e4
|
7
|
+
data.tar.gz: e585a245f753ef731895163eedba802e3fe2f6000720d10705b5a0cd02a12642a35220eea57c8b71f504b66db7cb06161fdfca1660edc0dc132ee026dd83be4d
|
data/CHANGELOG.md
CHANGED
data/Rakefile
CHANGED
@@ -3,7 +3,7 @@ require 'rake/testtask'
|
|
3
3
|
require 'rake/extensiontask'
|
4
4
|
|
5
5
|
Rake::ExtensionTask.new('data_structures_rmolinari/c_disjoint_union') do |ext|
|
6
|
-
ext.name = '
|
6
|
+
ext.name = 'c_disjoint_union'
|
7
7
|
ext.ext_dir = 'ext/c_disjoint_union'
|
8
8
|
ext.lib_dir = 'lib/data_structures_rmolinari/'
|
9
9
|
end
|
@@ -17,10 +17,9 @@
|
|
17
17
|
|
18
18
|
#include "ruby.h"
|
19
19
|
|
20
|
-
// The Shared::DataError exception type in the Ruby code. We only need it when we detect a runtime error, so a macro
|
21
|
-
// just fine.
|
20
|
+
// The Shared::DataError exception type in the Ruby code. We only need it when we detect a runtime error, so a macro should be fine.
|
22
21
|
#define mShared rb_define_module("Shared")
|
23
|
-
#define
|
22
|
+
#define eSharedDataError rb_const_get(mShared, rb_intern_const("DataError"))
|
24
23
|
|
25
24
|
/**
|
26
25
|
* It's been so long since I've written non-trival C that I need to copy examples from online.
|
@@ -34,6 +33,11 @@ typedef struct {
|
|
34
33
|
long default_val;
|
35
34
|
} DynamicArray;
|
36
35
|
|
36
|
+
/*
|
37
|
+
* Initialize a DynamicArray struct with the given initial size and with all values set to the default value.
|
38
|
+
*
|
39
|
+
* The default value is stored and used to initialize new array sections if and when the array needs to be expanded.
|
40
|
+
*/
|
37
41
|
void initDynamicArray(DynamicArray *a, size_t initial_size, long default_val) {
|
38
42
|
a->array = malloc(initial_size * sizeof(long));
|
39
43
|
a->size = initial_size;
|
@@ -44,15 +48,18 @@ void initDynamicArray(DynamicArray *a, size_t initial_size, long default_val) {
|
|
44
48
|
}
|
45
49
|
}
|
46
50
|
|
47
|
-
|
51
|
+
/*
|
52
|
+
* Assign +value+ to the the +index+-th element of the array, expanding the available space if necessary.
|
53
|
+
*/
|
54
|
+
void assignInDynamicArray(DynamicArray *a, unsigned long index, long value) {
|
48
55
|
if (a->size <= index) {
|
49
56
|
size_t new_size = a->size;
|
50
57
|
while (new_size <= index) {
|
51
58
|
new_size = 8 * new_size / 5 + 8; // 8/5 gives "Fibonnacci-like" growth; adding 8 to avoid small arrays having to reallocate
|
52
|
-
// too often. Who knows if it's worth being "clever".
|
59
|
+
// too often as they grow. Who knows if it's worth being "clever".
|
53
60
|
}
|
54
61
|
|
55
|
-
long*
|
62
|
+
long *new_array = realloc(a->array, new_size * sizeof(long));
|
56
63
|
if (!new_array) {
|
57
64
|
rb_raise(rb_eRuntimeError, "Cannot allocate memory to expand DynamicArray!");
|
58
65
|
}
|
@@ -65,7 +72,7 @@ void insertDynamicArray(DynamicArray *a, unsigned long index, long element) {
|
|
65
72
|
a->size = new_size;
|
66
73
|
}
|
67
74
|
|
68
|
-
a->array[index] =
|
75
|
+
a->array[index] = value;
|
69
76
|
}
|
70
77
|
|
71
78
|
void freeDynamicArray(DynamicArray *a) {
|
@@ -74,6 +81,10 @@ void freeDynamicArray(DynamicArray *a) {
|
|
74
81
|
a->size = 0;
|
75
82
|
}
|
76
83
|
|
84
|
+
size_t _size_of(DynamicArray *a) {
|
85
|
+
return a->size * sizeof(a->default_val);
|
86
|
+
}
|
87
|
+
|
77
88
|
/**
|
78
89
|
* The C implementation of a Disjoint Union
|
79
90
|
*
|
@@ -82,7 +93,7 @@ void freeDynamicArray(DynamicArray *a) {
|
|
82
93
|
|
83
94
|
/*
|
84
95
|
* The Disjoint Union struct.
|
85
|
-
* - forest: an array of longs giving, for each element, the
|
96
|
+
* - forest: an array of longs giving, for each element, the element's parent.
|
86
97
|
* - An element e is the root of its tree just when forest[e] == e.
|
87
98
|
* - Two elements are in the same subset just when they are in the same tree in the forest.
|
88
99
|
* - So the key idea is that we can check this by navigating via parents from each element to their roots. Clever optimizations
|
@@ -94,24 +105,24 @@ void freeDynamicArray(DynamicArray *a) {
|
|
94
105
|
* - it isn't needed internally but may be useful to client code.
|
95
106
|
*/
|
96
107
|
typedef struct du_data {
|
97
|
-
DynamicArray*
|
98
|
-
DynamicArray*
|
108
|
+
DynamicArray *forest; // the forest that describes the unified subsets
|
109
|
+
DynamicArray *rank; // the "ranks" of the elements, used when uniting subsets
|
99
110
|
size_t subset_count;
|
100
111
|
} disjoint_union_data;
|
101
112
|
|
102
113
|
/*
|
103
|
-
* Create one.
|
114
|
+
* Create one (on the heap).
|
104
115
|
*
|
105
116
|
* The dynamic arrays are initialized with a size of 100 because I didn't have a better idea. This will end up getting called from
|
106
117
|
* the Ruby #allocate method, which happens before #initialize. Thus we don't know the calling code's desired initial size.
|
107
118
|
*/
|
108
119
|
#define INITIAL_SIZE 100
|
109
|
-
static disjoint_union_data*
|
110
|
-
disjoint_union_data*
|
120
|
+
static disjoint_union_data *create_disjoint_union() {
|
121
|
+
disjoint_union_data *disjoint_union = (disjoint_union_data *)malloc(sizeof(disjoint_union_data));
|
111
122
|
|
112
123
|
// Allocate the structures
|
113
|
-
DynamicArray*
|
114
|
-
DynamicArray*
|
124
|
+
DynamicArray *forest = (DynamicArray *)malloc(sizeof(DynamicArray));
|
125
|
+
DynamicArray *rank = (DynamicArray *)malloc(sizeof(DynamicArray));
|
115
126
|
initDynamicArray(forest, INITIAL_SIZE, -1);
|
116
127
|
initDynamicArray(rank, INITIAL_SIZE, 0);
|
117
128
|
|
@@ -123,7 +134,9 @@ static disjoint_union_data* create_disjoint_union() {
|
|
123
134
|
}
|
124
135
|
|
125
136
|
/*
|
126
|
-
* Free the memory associated with a disjoint union.
|
137
|
+
* Free the memory associated with a disjoint union.
|
138
|
+
*
|
139
|
+
* This will end up getting triggered by the Ruby garbage collector. Ruby learns about it via the disjoint_union_type struct below.
|
127
140
|
*/
|
128
141
|
static void disjoint_union_free(void *ptr) {
|
129
142
|
if (ptr) {
|
@@ -137,7 +150,7 @@ static void disjoint_union_free(void *ptr) {
|
|
137
150
|
free(disjoint_union->rank);
|
138
151
|
disjoint_union->rank = NULL;
|
139
152
|
|
140
|
-
|
153
|
+
xfree(disjoint_union);
|
141
154
|
}
|
142
155
|
}
|
143
156
|
|
@@ -148,17 +161,17 @@ static void disjoint_union_free(void *ptr) {
|
|
148
161
|
/*
|
149
162
|
* Is the given element already a member of the universe?
|
150
163
|
*/
|
151
|
-
static int present_p(disjoint_union_data*
|
152
|
-
DynamicArray*
|
164
|
+
static int present_p(disjoint_union_data *disjoint_union, size_t element) {
|
165
|
+
DynamicArray *forest = (DynamicArray *)disjoint_union->forest;
|
153
166
|
return (forest->size > element && (forest->array[element] != forest->default_val));
|
154
167
|
}
|
155
168
|
|
156
169
|
/*
|
157
170
|
* Check that the given element is a member of the universe and raise Shared::DataError (ruby-side) if not
|
158
171
|
*/
|
159
|
-
static void assert_membership(disjoint_union_data*
|
172
|
+
static void assert_membership(disjoint_union_data *disjoint_union, size_t element) {
|
160
173
|
if (!present_p(disjoint_union, element)) {
|
161
|
-
rb_raise(
|
174
|
+
rb_raise(eSharedDataError, "Value %zu is not part of the universe", element);
|
162
175
|
}
|
163
176
|
}
|
164
177
|
|
@@ -167,13 +180,13 @@ static void assert_membership(disjoint_union_data* disjoint_union, size_t elemen
|
|
167
180
|
*
|
168
181
|
* Shared::DataError is raised if it is already an element.
|
169
182
|
*/
|
170
|
-
static void add_new_element(disjoint_union_data*
|
183
|
+
static void add_new_element(disjoint_union_data *disjoint_union, size_t element) {
|
171
184
|
if (present_p(disjoint_union, element)) {
|
172
|
-
rb_raise(
|
185
|
+
rb_raise(eSharedDataError, "Element %zu already present in the universe", element);
|
173
186
|
}
|
174
187
|
|
175
|
-
|
176
|
-
|
188
|
+
assignInDynamicArray(disjoint_union->forest, element, element);
|
189
|
+
assignInDynamicArray(disjoint_union->rank, element, 0);
|
177
190
|
disjoint_union->subset_count++;
|
178
191
|
}
|
179
192
|
|
@@ -182,11 +195,11 @@ static void add_new_element(disjoint_union_data* disjoint_union, size_t element)
|
|
182
195
|
*
|
183
196
|
* Two elements are in the same subset exactly when their canonical representatives are equal.
|
184
197
|
*/
|
185
|
-
static size_t find(disjoint_union_data*
|
198
|
+
static size_t find(disjoint_union_data *disjoint_union, size_t element) {
|
186
199
|
assert_membership(disjoint_union, element);
|
187
200
|
|
188
201
|
// We implement find with "halving" to shrink the length of paths to the root. See Tarjan and van Leeuwin p 252.
|
189
|
-
long*
|
202
|
+
long *d = disjoint_union->forest->array; // the actual forest data
|
190
203
|
size_t x = element;
|
191
204
|
while (d[d[x]] != d[x]) {
|
192
205
|
x = d[x] = d[d[x]];
|
@@ -202,9 +215,9 @@ static size_t find(disjoint_union_data* disjoint_union, size_t element) {
|
|
202
215
|
* Good performace (see Tarjan and van Leeuwin) assumes that elt1 and elt2 area are disinct and already the roots of their trees,
|
203
216
|
* though we don't check that here.
|
204
217
|
*/
|
205
|
-
static void link_roots(disjoint_union_data*
|
206
|
-
long*
|
207
|
-
long*
|
218
|
+
static void link_roots(disjoint_union_data *disjoint_union, size_t elt1, size_t elt2) {
|
219
|
+
long *rank = disjoint_union->rank->array;
|
220
|
+
long *forest = disjoint_union->forest->array;
|
208
221
|
|
209
222
|
if (rank[elt1] > rank[elt2]) {
|
210
223
|
forest[elt2] = elt1;
|
@@ -221,12 +234,12 @@ static void link_roots(disjoint_union_data* disjoint_union, size_t elt1, size_t
|
|
221
234
|
/*
|
222
235
|
* "Unite" or merge the subsets containing elt1 and elt2.
|
223
236
|
*/
|
224
|
-
static void unite(disjoint_union_data*
|
237
|
+
static void unite(disjoint_union_data *disjoint_union, size_t elt1, size_t elt2) {
|
225
238
|
assert_membership(disjoint_union, elt1);
|
226
239
|
assert_membership(disjoint_union, elt2);
|
227
240
|
|
228
241
|
if (elt1 == elt2) {
|
229
|
-
rb_raise(
|
242
|
+
rb_raise(eSharedDataError, "Uniting an element with itself is meaningless");
|
230
243
|
}
|
231
244
|
|
232
245
|
size_t root1 = find(disjoint_union, elt1);
|
@@ -249,8 +262,8 @@ static void unite(disjoint_union_data* disjoint_union, size_t elt1, size_t elt2)
|
|
249
262
|
// deciding how agressive to be during garbage collection and such.
|
250
263
|
static size_t disjoint_union_memsize(const void *ptr) {
|
251
264
|
if (ptr) {
|
252
|
-
const disjoint_union_data *
|
253
|
-
return (
|
265
|
+
const disjoint_union_data *du = ptr;
|
266
|
+
return sizeof(disjoint_union_data) + _size_of(du->forest) + _size_of(du->rank);
|
254
267
|
} else {
|
255
268
|
return 0;
|
256
269
|
}
|
@@ -273,16 +286,14 @@ static const rb_data_type_t disjoint_union_type = {
|
|
273
286
|
};
|
274
287
|
|
275
288
|
/*
|
276
|
-
* Helper: check that a Ruby value is a non-negative Fixnum and convert it to a
|
277
|
-
*
|
278
|
-
* TODO: can we return an size_t or unsigned long instead?
|
289
|
+
* Helper: check that a Ruby value is a non-negative Fixnum and convert it to a C unsigned long
|
279
290
|
*/
|
280
|
-
static long checked_nonneg_fixnum(VALUE val) {
|
291
|
+
static unsigned long checked_nonneg_fixnum(VALUE val) {
|
281
292
|
Check_Type(val, T_FIXNUM);
|
282
293
|
long c_val = FIX2LONG(val);
|
283
294
|
|
284
295
|
if (c_val < 0) {
|
285
|
-
rb_raise(
|
296
|
+
rb_raise(eSharedDataError, "Value must be non-negative");
|
286
297
|
}
|
287
298
|
|
288
299
|
return c_val;
|
@@ -291,8 +302,8 @@ static long checked_nonneg_fixnum(VALUE val) {
|
|
291
302
|
/*
|
292
303
|
* Unwrap a Rubyfied disjoint union to get the C struct inside.
|
293
304
|
*/
|
294
|
-
static disjoint_union_data*
|
295
|
-
disjoint_union_data*
|
305
|
+
static disjoint_union_data *unwrapped(VALUE self) {
|
306
|
+
disjoint_union_data *disjoint_union;
|
296
307
|
TypedData_Get_Struct((self), disjoint_union_data, &disjoint_union_type, disjoint_union);
|
297
308
|
return disjoint_union;
|
298
309
|
}
|
@@ -301,7 +312,9 @@ static disjoint_union_data* unwrapped(VALUE self) {
|
|
301
312
|
* This is for CDisjointUnion.allocate on the Ruby side
|
302
313
|
*/
|
303
314
|
static VALUE disjoint_union_alloc(VALUE klass) {
|
304
|
-
|
315
|
+
// Get one on the heap
|
316
|
+
disjoint_union_data *disjoint_union = create_disjoint_union();
|
317
|
+
// Wrap it up into a Ruby object
|
305
318
|
return TypedData_Wrap_Struct(klass, &disjoint_union_type, disjoint_union);
|
306
319
|
}
|
307
320
|
|
@@ -318,7 +331,7 @@ static VALUE disjoint_union_init(int argc, VALUE *argv, VALUE self) {
|
|
318
331
|
rb_raise(rb_eArgError, "wrong number of arguments");
|
319
332
|
} else {
|
320
333
|
size_t initial_size = checked_nonneg_fixnum(argv[0]);
|
321
|
-
disjoint_union_data*
|
334
|
+
disjoint_union_data *disjoint_union = unwrapped(self);
|
322
335
|
|
323
336
|
for (size_t i = 0; i < initial_size; i++) {
|
324
337
|
add_new_element(disjoint_union, i);
|
@@ -389,8 +402,7 @@ static VALUE disjoint_union_unite(VALUE self, VALUE arg1, VALUE arg2) {
|
|
389
402
|
* The data structure provides efficient actions to merge two disjoint subsets, i.e., replace them by their union, and determine if
|
390
403
|
* two elements are in the same subset.
|
391
404
|
*
|
392
|
-
* The elements of the set are
|
393
|
-
* representatives.
|
405
|
+
* The elements of the set are non-negative integers. Client code can map its data to these representatives.
|
394
406
|
*
|
395
407
|
* See https://en.wikipedia.org/wiki/Disjoint-set_data_structure for a good introduction.
|
396
408
|
*
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_structures_rmolinari
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rory Molinari
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: must_be
|