data_structures_rmolinari 0.4.2 → 0.4.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Rakefile +1 -1
- data/ext/c_disjoint_union/disjoint_union.c +57 -45
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c9022e9531472d1125c6172025c2d10c5d4ef4f9c43e326a43f1c5b4f0721263
|
4
|
+
data.tar.gz: '0212619be7fe32e68b63d2087730f81ffd6b4179b8b8bf63aa0026e4e3056224'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a7f9258eeed2dc7e7fa5713aaecfcdf44e061bb161aa3d0d2662fb662bfb6b2685c61be221b4a109792982c3b2aa6215da75b51ae299d4a9237b6226000612e4
|
7
|
+
data.tar.gz: e585a245f753ef731895163eedba802e3fe2f6000720d10705b5a0cd02a12642a35220eea57c8b71f504b66db7cb06161fdfca1660edc0dc132ee026dd83be4d
|
data/CHANGELOG.md
CHANGED
data/Rakefile
CHANGED
@@ -3,7 +3,7 @@ require 'rake/testtask'
|
|
3
3
|
require 'rake/extensiontask'
|
4
4
|
|
5
5
|
Rake::ExtensionTask.new('data_structures_rmolinari/c_disjoint_union') do |ext|
|
6
|
-
ext.name = '
|
6
|
+
ext.name = 'c_disjoint_union'
|
7
7
|
ext.ext_dir = 'ext/c_disjoint_union'
|
8
8
|
ext.lib_dir = 'lib/data_structures_rmolinari/'
|
9
9
|
end
|
@@ -17,10 +17,9 @@
|
|
17
17
|
|
18
18
|
#include "ruby.h"
|
19
19
|
|
20
|
-
// The Shared::DataError exception type in the Ruby code. We only need it when we detect a runtime error, so a macro
|
21
|
-
// just fine.
|
20
|
+
// The Shared::DataError exception type in the Ruby code. We only need it when we detect a runtime error, so a macro should be fine.
|
22
21
|
#define mShared rb_define_module("Shared")
|
23
|
-
#define
|
22
|
+
#define eSharedDataError rb_const_get(mShared, rb_intern_const("DataError"))
|
24
23
|
|
25
24
|
/**
|
26
25
|
* It's been so long since I've written non-trival C that I need to copy examples from online.
|
@@ -34,6 +33,11 @@ typedef struct {
|
|
34
33
|
long default_val;
|
35
34
|
} DynamicArray;
|
36
35
|
|
36
|
+
/*
|
37
|
+
* Initialize a DynamicArray struct with the given initial size and with all values set to the default value.
|
38
|
+
*
|
39
|
+
* The default value is stored and used to initialize new array sections if and when the array needs to be expanded.
|
40
|
+
*/
|
37
41
|
void initDynamicArray(DynamicArray *a, size_t initial_size, long default_val) {
|
38
42
|
a->array = malloc(initial_size * sizeof(long));
|
39
43
|
a->size = initial_size;
|
@@ -44,15 +48,18 @@ void initDynamicArray(DynamicArray *a, size_t initial_size, long default_val) {
|
|
44
48
|
}
|
45
49
|
}
|
46
50
|
|
47
|
-
|
51
|
+
/*
|
52
|
+
* Assign +value+ to the the +index+-th element of the array, expanding the available space if necessary.
|
53
|
+
*/
|
54
|
+
void assignInDynamicArray(DynamicArray *a, unsigned long index, long value) {
|
48
55
|
if (a->size <= index) {
|
49
56
|
size_t new_size = a->size;
|
50
57
|
while (new_size <= index) {
|
51
58
|
new_size = 8 * new_size / 5 + 8; // 8/5 gives "Fibonnacci-like" growth; adding 8 to avoid small arrays having to reallocate
|
52
|
-
// too often. Who knows if it's worth being "clever".
|
59
|
+
// too often as they grow. Who knows if it's worth being "clever".
|
53
60
|
}
|
54
61
|
|
55
|
-
long*
|
62
|
+
long *new_array = realloc(a->array, new_size * sizeof(long));
|
56
63
|
if (!new_array) {
|
57
64
|
rb_raise(rb_eRuntimeError, "Cannot allocate memory to expand DynamicArray!");
|
58
65
|
}
|
@@ -65,7 +72,7 @@ void insertDynamicArray(DynamicArray *a, unsigned long index, long element) {
|
|
65
72
|
a->size = new_size;
|
66
73
|
}
|
67
74
|
|
68
|
-
a->array[index] =
|
75
|
+
a->array[index] = value;
|
69
76
|
}
|
70
77
|
|
71
78
|
void freeDynamicArray(DynamicArray *a) {
|
@@ -74,6 +81,10 @@ void freeDynamicArray(DynamicArray *a) {
|
|
74
81
|
a->size = 0;
|
75
82
|
}
|
76
83
|
|
84
|
+
size_t _size_of(DynamicArray *a) {
|
85
|
+
return a->size * sizeof(a->default_val);
|
86
|
+
}
|
87
|
+
|
77
88
|
/**
|
78
89
|
* The C implementation of a Disjoint Union
|
79
90
|
*
|
@@ -82,7 +93,7 @@ void freeDynamicArray(DynamicArray *a) {
|
|
82
93
|
|
83
94
|
/*
|
84
95
|
* The Disjoint Union struct.
|
85
|
-
* - forest: an array of longs giving, for each element, the
|
96
|
+
* - forest: an array of longs giving, for each element, the element's parent.
|
86
97
|
* - An element e is the root of its tree just when forest[e] == e.
|
87
98
|
* - Two elements are in the same subset just when they are in the same tree in the forest.
|
88
99
|
* - So the key idea is that we can check this by navigating via parents from each element to their roots. Clever optimizations
|
@@ -94,24 +105,24 @@ void freeDynamicArray(DynamicArray *a) {
|
|
94
105
|
* - it isn't needed internally but may be useful to client code.
|
95
106
|
*/
|
96
107
|
typedef struct du_data {
|
97
|
-
DynamicArray*
|
98
|
-
DynamicArray*
|
108
|
+
DynamicArray *forest; // the forest that describes the unified subsets
|
109
|
+
DynamicArray *rank; // the "ranks" of the elements, used when uniting subsets
|
99
110
|
size_t subset_count;
|
100
111
|
} disjoint_union_data;
|
101
112
|
|
102
113
|
/*
|
103
|
-
* Create one.
|
114
|
+
* Create one (on the heap).
|
104
115
|
*
|
105
116
|
* The dynamic arrays are initialized with a size of 100 because I didn't have a better idea. This will end up getting called from
|
106
117
|
* the Ruby #allocate method, which happens before #initialize. Thus we don't know the calling code's desired initial size.
|
107
118
|
*/
|
108
119
|
#define INITIAL_SIZE 100
|
109
|
-
static disjoint_union_data*
|
110
|
-
disjoint_union_data*
|
120
|
+
static disjoint_union_data *create_disjoint_union() {
|
121
|
+
disjoint_union_data *disjoint_union = (disjoint_union_data *)malloc(sizeof(disjoint_union_data));
|
111
122
|
|
112
123
|
// Allocate the structures
|
113
|
-
DynamicArray*
|
114
|
-
DynamicArray*
|
124
|
+
DynamicArray *forest = (DynamicArray *)malloc(sizeof(DynamicArray));
|
125
|
+
DynamicArray *rank = (DynamicArray *)malloc(sizeof(DynamicArray));
|
115
126
|
initDynamicArray(forest, INITIAL_SIZE, -1);
|
116
127
|
initDynamicArray(rank, INITIAL_SIZE, 0);
|
117
128
|
|
@@ -123,7 +134,9 @@ static disjoint_union_data* create_disjoint_union() {
|
|
123
134
|
}
|
124
135
|
|
125
136
|
/*
|
126
|
-
* Free the memory associated with a disjoint union.
|
137
|
+
* Free the memory associated with a disjoint union.
|
138
|
+
*
|
139
|
+
* This will end up getting triggered by the Ruby garbage collector. Ruby learns about it via the disjoint_union_type struct below.
|
127
140
|
*/
|
128
141
|
static void disjoint_union_free(void *ptr) {
|
129
142
|
if (ptr) {
|
@@ -137,7 +150,7 @@ static void disjoint_union_free(void *ptr) {
|
|
137
150
|
free(disjoint_union->rank);
|
138
151
|
disjoint_union->rank = NULL;
|
139
152
|
|
140
|
-
|
153
|
+
xfree(disjoint_union);
|
141
154
|
}
|
142
155
|
}
|
143
156
|
|
@@ -148,17 +161,17 @@ static void disjoint_union_free(void *ptr) {
|
|
148
161
|
/*
|
149
162
|
* Is the given element already a member of the universe?
|
150
163
|
*/
|
151
|
-
static int present_p(disjoint_union_data*
|
152
|
-
DynamicArray*
|
164
|
+
static int present_p(disjoint_union_data *disjoint_union, size_t element) {
|
165
|
+
DynamicArray *forest = (DynamicArray *)disjoint_union->forest;
|
153
166
|
return (forest->size > element && (forest->array[element] != forest->default_val));
|
154
167
|
}
|
155
168
|
|
156
169
|
/*
|
157
170
|
* Check that the given element is a member of the universe and raise Shared::DataError (ruby-side) if not
|
158
171
|
*/
|
159
|
-
static void assert_membership(disjoint_union_data*
|
172
|
+
static void assert_membership(disjoint_union_data *disjoint_union, size_t element) {
|
160
173
|
if (!present_p(disjoint_union, element)) {
|
161
|
-
rb_raise(
|
174
|
+
rb_raise(eSharedDataError, "Value %zu is not part of the universe", element);
|
162
175
|
}
|
163
176
|
}
|
164
177
|
|
@@ -167,13 +180,13 @@ static void assert_membership(disjoint_union_data* disjoint_union, size_t elemen
|
|
167
180
|
*
|
168
181
|
* Shared::DataError is raised if it is already an element.
|
169
182
|
*/
|
170
|
-
static void add_new_element(disjoint_union_data*
|
183
|
+
static void add_new_element(disjoint_union_data *disjoint_union, size_t element) {
|
171
184
|
if (present_p(disjoint_union, element)) {
|
172
|
-
rb_raise(
|
185
|
+
rb_raise(eSharedDataError, "Element %zu already present in the universe", element);
|
173
186
|
}
|
174
187
|
|
175
|
-
|
176
|
-
|
188
|
+
assignInDynamicArray(disjoint_union->forest, element, element);
|
189
|
+
assignInDynamicArray(disjoint_union->rank, element, 0);
|
177
190
|
disjoint_union->subset_count++;
|
178
191
|
}
|
179
192
|
|
@@ -182,11 +195,11 @@ static void add_new_element(disjoint_union_data* disjoint_union, size_t element)
|
|
182
195
|
*
|
183
196
|
* Two elements are in the same subset exactly when their canonical representatives are equal.
|
184
197
|
*/
|
185
|
-
static size_t find(disjoint_union_data*
|
198
|
+
static size_t find(disjoint_union_data *disjoint_union, size_t element) {
|
186
199
|
assert_membership(disjoint_union, element);
|
187
200
|
|
188
201
|
// We implement find with "halving" to shrink the length of paths to the root. See Tarjan and van Leeuwin p 252.
|
189
|
-
long*
|
202
|
+
long *d = disjoint_union->forest->array; // the actual forest data
|
190
203
|
size_t x = element;
|
191
204
|
while (d[d[x]] != d[x]) {
|
192
205
|
x = d[x] = d[d[x]];
|
@@ -202,9 +215,9 @@ static size_t find(disjoint_union_data* disjoint_union, size_t element) {
|
|
202
215
|
* Good performace (see Tarjan and van Leeuwin) assumes that elt1 and elt2 area are disinct and already the roots of their trees,
|
203
216
|
* though we don't check that here.
|
204
217
|
*/
|
205
|
-
static void link_roots(disjoint_union_data*
|
206
|
-
long*
|
207
|
-
long*
|
218
|
+
static void link_roots(disjoint_union_data *disjoint_union, size_t elt1, size_t elt2) {
|
219
|
+
long *rank = disjoint_union->rank->array;
|
220
|
+
long *forest = disjoint_union->forest->array;
|
208
221
|
|
209
222
|
if (rank[elt1] > rank[elt2]) {
|
210
223
|
forest[elt2] = elt1;
|
@@ -221,12 +234,12 @@ static void link_roots(disjoint_union_data* disjoint_union, size_t elt1, size_t
|
|
221
234
|
/*
|
222
235
|
* "Unite" or merge the subsets containing elt1 and elt2.
|
223
236
|
*/
|
224
|
-
static void unite(disjoint_union_data*
|
237
|
+
static void unite(disjoint_union_data *disjoint_union, size_t elt1, size_t elt2) {
|
225
238
|
assert_membership(disjoint_union, elt1);
|
226
239
|
assert_membership(disjoint_union, elt2);
|
227
240
|
|
228
241
|
if (elt1 == elt2) {
|
229
|
-
rb_raise(
|
242
|
+
rb_raise(eSharedDataError, "Uniting an element with itself is meaningless");
|
230
243
|
}
|
231
244
|
|
232
245
|
size_t root1 = find(disjoint_union, elt1);
|
@@ -249,8 +262,8 @@ static void unite(disjoint_union_data* disjoint_union, size_t elt1, size_t elt2)
|
|
249
262
|
// deciding how agressive to be during garbage collection and such.
|
250
263
|
static size_t disjoint_union_memsize(const void *ptr) {
|
251
264
|
if (ptr) {
|
252
|
-
const disjoint_union_data *
|
253
|
-
return (
|
265
|
+
const disjoint_union_data *du = ptr;
|
266
|
+
return sizeof(disjoint_union_data) + _size_of(du->forest) + _size_of(du->rank);
|
254
267
|
} else {
|
255
268
|
return 0;
|
256
269
|
}
|
@@ -273,16 +286,14 @@ static const rb_data_type_t disjoint_union_type = {
|
|
273
286
|
};
|
274
287
|
|
275
288
|
/*
|
276
|
-
* Helper: check that a Ruby value is a non-negative Fixnum and convert it to a
|
277
|
-
*
|
278
|
-
* TODO: can we return an size_t or unsigned long instead?
|
289
|
+
* Helper: check that a Ruby value is a non-negative Fixnum and convert it to a C unsigned long
|
279
290
|
*/
|
280
|
-
static long checked_nonneg_fixnum(VALUE val) {
|
291
|
+
static unsigned long checked_nonneg_fixnum(VALUE val) {
|
281
292
|
Check_Type(val, T_FIXNUM);
|
282
293
|
long c_val = FIX2LONG(val);
|
283
294
|
|
284
295
|
if (c_val < 0) {
|
285
|
-
rb_raise(
|
296
|
+
rb_raise(eSharedDataError, "Value must be non-negative");
|
286
297
|
}
|
287
298
|
|
288
299
|
return c_val;
|
@@ -291,8 +302,8 @@ static long checked_nonneg_fixnum(VALUE val) {
|
|
291
302
|
/*
|
292
303
|
* Unwrap a Rubyfied disjoint union to get the C struct inside.
|
293
304
|
*/
|
294
|
-
static disjoint_union_data*
|
295
|
-
disjoint_union_data*
|
305
|
+
static disjoint_union_data *unwrapped(VALUE self) {
|
306
|
+
disjoint_union_data *disjoint_union;
|
296
307
|
TypedData_Get_Struct((self), disjoint_union_data, &disjoint_union_type, disjoint_union);
|
297
308
|
return disjoint_union;
|
298
309
|
}
|
@@ -301,7 +312,9 @@ static disjoint_union_data* unwrapped(VALUE self) {
|
|
301
312
|
* This is for CDisjointUnion.allocate on the Ruby side
|
302
313
|
*/
|
303
314
|
static VALUE disjoint_union_alloc(VALUE klass) {
|
304
|
-
|
315
|
+
// Get one on the heap
|
316
|
+
disjoint_union_data *disjoint_union = create_disjoint_union();
|
317
|
+
// Wrap it up into a Ruby object
|
305
318
|
return TypedData_Wrap_Struct(klass, &disjoint_union_type, disjoint_union);
|
306
319
|
}
|
307
320
|
|
@@ -318,7 +331,7 @@ static VALUE disjoint_union_init(int argc, VALUE *argv, VALUE self) {
|
|
318
331
|
rb_raise(rb_eArgError, "wrong number of arguments");
|
319
332
|
} else {
|
320
333
|
size_t initial_size = checked_nonneg_fixnum(argv[0]);
|
321
|
-
disjoint_union_data*
|
334
|
+
disjoint_union_data *disjoint_union = unwrapped(self);
|
322
335
|
|
323
336
|
for (size_t i = 0; i < initial_size; i++) {
|
324
337
|
add_new_element(disjoint_union, i);
|
@@ -389,8 +402,7 @@ static VALUE disjoint_union_unite(VALUE self, VALUE arg1, VALUE arg2) {
|
|
389
402
|
* The data structure provides efficient actions to merge two disjoint subsets, i.e., replace them by their union, and determine if
|
390
403
|
* two elements are in the same subset.
|
391
404
|
*
|
392
|
-
* The elements of the set are
|
393
|
-
* representatives.
|
405
|
+
* The elements of the set are non-negative integers. Client code can map its data to these representatives.
|
394
406
|
*
|
395
407
|
* See https://en.wikipedia.org/wiki/Disjoint-set_data_structure for a good introduction.
|
396
408
|
*
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_structures_rmolinari
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rory Molinari
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: must_be
|