data_structures_rmolinari 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c912d4ddf3a7cfc721b7f298a966f7e0d4cbd4249797506457605a44774523a0
4
- data.tar.gz: c168b7096178e496f76fa53f5b8566cd2ac26897fd5b362c3c37da5314f2a6db
3
+ metadata.gz: c9022e9531472d1125c6172025c2d10c5d4ef4f9c43e326a43f1c5b4f0721263
4
+ data.tar.gz: '0212619be7fe32e68b63d2087730f81ffd6b4179b8b8bf63aa0026e4e3056224'
5
5
  SHA512:
6
- metadata.gz: 0c88c1ad7c07fe6358e3eefd21406b4bbd33a89e89731edb3997bb027efca52a7d312ffee1435af960be73c5cd5212950a854a11c6f2105dfccc47ed4ae00c2b
7
- data.tar.gz: 9bf6e4570017217b59f4f3a0b1d9e23d7752ba4e4b5dc11a988826726367956c6564763044f23b5105293213c4844667249a7f88726861f792264c1d634256ae
6
+ metadata.gz: a7f9258eeed2dc7e7fa5713aaecfcdf44e061bb161aa3d0d2662fb662bfb6b2685c61be221b4a109792982c3b2aa6215da75b51ae299d4a9237b6226000612e4
7
+ data.tar.gz: e585a245f753ef731895163eedba802e3fe2f6000720d10705b5a0cd02a12642a35220eea57c8b71f504b66db7cb06161fdfca1660edc0dc132ee026dd83be4d
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.4.3] 2023-01-27
4
+
5
+ - Fix bad directive in Rakefile for DisjointUnion C extension
6
+
3
7
  ## [0.4.2] 2023-01-26
4
8
 
5
9
  ### Added
data/Rakefile CHANGED
@@ -3,7 +3,7 @@ require 'rake/testtask'
3
3
  require 'rake/extensiontask'
4
4
 
5
5
  Rake::ExtensionTask.new('data_structures_rmolinari/c_disjoint_union') do |ext|
6
- ext.name = 'CDisjointUnion'
6
+ ext.name = 'c_disjoint_union'
7
7
  ext.ext_dir = 'ext/c_disjoint_union'
8
8
  ext.lib_dir = 'lib/data_structures_rmolinari/'
9
9
  end
@@ -17,10 +17,9 @@
17
17
 
18
18
  #include "ruby.h"
19
19
 
20
- // The Shared::DataError exception type in the Ruby code. We only need it when we detect a runtime error, so a macro is simplest and
21
- // just fine.
20
+ // The Shared::DataError exception type in the Ruby code. We only need it when we detect a runtime error, so a macro should be fine.
22
21
  #define mShared rb_define_module("Shared")
23
- #define eDataError rb_const_get(mShared, rb_intern_const("DataError"))
22
+ #define eSharedDataError rb_const_get(mShared, rb_intern_const("DataError"))
24
23
 
25
24
  /**
26
25
  * It's been so long since I've written non-trival C that I need to copy examples from online.
@@ -34,6 +33,11 @@ typedef struct {
34
33
  long default_val;
35
34
  } DynamicArray;
36
35
 
36
+ /*
37
+ * Initialize a DynamicArray struct with the given initial size and with all values set to the default value.
38
+ *
39
+ * The default value is stored and used to initialize new array sections if and when the array needs to be expanded.
40
+ */
37
41
  void initDynamicArray(DynamicArray *a, size_t initial_size, long default_val) {
38
42
  a->array = malloc(initial_size * sizeof(long));
39
43
  a->size = initial_size;
@@ -44,15 +48,18 @@ void initDynamicArray(DynamicArray *a, size_t initial_size, long default_val) {
44
48
  }
45
49
  }
46
50
 
47
- void insertDynamicArray(DynamicArray *a, unsigned long index, long element) {
51
+ /*
52
+ * Assign +value+ to the the +index+-th element of the array, expanding the available space if necessary.
53
+ */
54
+ void assignInDynamicArray(DynamicArray *a, unsigned long index, long value) {
48
55
  if (a->size <= index) {
49
56
  size_t new_size = a->size;
50
57
  while (new_size <= index) {
51
58
  new_size = 8 * new_size / 5 + 8; // 8/5 gives "Fibonnacci-like" growth; adding 8 to avoid small arrays having to reallocate
52
- // too often. Who knows if it's worth being "clever"."
59
+ // too often as they grow. Who knows if it's worth being "clever".
53
60
  }
54
61
 
55
- long* new_array = realloc(a->array, new_size * sizeof(long));
62
+ long *new_array = realloc(a->array, new_size * sizeof(long));
56
63
  if (!new_array) {
57
64
  rb_raise(rb_eRuntimeError, "Cannot allocate memory to expand DynamicArray!");
58
65
  }
@@ -65,7 +72,7 @@ void insertDynamicArray(DynamicArray *a, unsigned long index, long element) {
65
72
  a->size = new_size;
66
73
  }
67
74
 
68
- a->array[index] = element;
75
+ a->array[index] = value;
69
76
  }
70
77
 
71
78
  void freeDynamicArray(DynamicArray *a) {
@@ -74,6 +81,10 @@ void freeDynamicArray(DynamicArray *a) {
74
81
  a->size = 0;
75
82
  }
76
83
 
84
+ size_t _size_of(DynamicArray *a) {
85
+ return a->size * sizeof(a->default_val);
86
+ }
87
+
77
88
  /**
78
89
  * The C implementation of a Disjoint Union
79
90
  *
@@ -82,7 +93,7 @@ void freeDynamicArray(DynamicArray *a) {
82
93
 
83
94
  /*
84
95
  * The Disjoint Union struct.
85
- * - forest: an array of longs giving, for each element, the parent element of its tree.
96
+ * - forest: an array of longs giving, for each element, the element's parent.
86
97
  * - An element e is the root of its tree just when forest[e] == e.
87
98
  * - Two elements are in the same subset just when they are in the same tree in the forest.
88
99
  * - So the key idea is that we can check this by navigating via parents from each element to their roots. Clever optimizations
@@ -94,24 +105,24 @@ void freeDynamicArray(DynamicArray *a) {
94
105
  * - it isn't needed internally but may be useful to client code.
95
106
  */
96
107
  typedef struct du_data {
97
- DynamicArray* forest; // the forest that describes the unified subsets
98
- DynamicArray* rank; // the "ranks" of the elements, used when uniting subsets
108
+ DynamicArray *forest; // the forest that describes the unified subsets
109
+ DynamicArray *rank; // the "ranks" of the elements, used when uniting subsets
99
110
  size_t subset_count;
100
111
  } disjoint_union_data;
101
112
 
102
113
  /*
103
- * Create one.
114
+ * Create one (on the heap).
104
115
  *
105
116
  * The dynamic arrays are initialized with a size of 100 because I didn't have a better idea. This will end up getting called from
106
117
  * the Ruby #allocate method, which happens before #initialize. Thus we don't know the calling code's desired initial size.
107
118
  */
108
119
  #define INITIAL_SIZE 100
109
- static disjoint_union_data* create_disjoint_union() {
110
- disjoint_union_data* disjoint_union = malloc(sizeof(disjoint_union_data));
120
+ static disjoint_union_data *create_disjoint_union() {
121
+ disjoint_union_data *disjoint_union = (disjoint_union_data *)malloc(sizeof(disjoint_union_data));
111
122
 
112
123
  // Allocate the structures
113
- DynamicArray* forest = malloc(sizeof(DynamicArray));
114
- DynamicArray* rank = malloc(sizeof(DynamicArray));
124
+ DynamicArray *forest = (DynamicArray *)malloc(sizeof(DynamicArray));
125
+ DynamicArray *rank = (DynamicArray *)malloc(sizeof(DynamicArray));
115
126
  initDynamicArray(forest, INITIAL_SIZE, -1);
116
127
  initDynamicArray(rank, INITIAL_SIZE, 0);
117
128
 
@@ -123,7 +134,9 @@ static disjoint_union_data* create_disjoint_union() {
123
134
  }
124
135
 
125
136
  /*
126
- * Free the memory associated with a disjoint union. This will end up getting triggered by the Ruby garbage collector.
137
+ * Free the memory associated with a disjoint union.
138
+ *
139
+ * This will end up getting triggered by the Ruby garbage collector. Ruby learns about it via the disjoint_union_type struct below.
127
140
  */
128
141
  static void disjoint_union_free(void *ptr) {
129
142
  if (ptr) {
@@ -137,7 +150,7 @@ static void disjoint_union_free(void *ptr) {
137
150
  free(disjoint_union->rank);
138
151
  disjoint_union->rank = NULL;
139
152
 
140
- free(disjoint_union);
153
+ xfree(disjoint_union);
141
154
  }
142
155
  }
143
156
 
@@ -148,17 +161,17 @@ static void disjoint_union_free(void *ptr) {
148
161
  /*
149
162
  * Is the given element already a member of the universe?
150
163
  */
151
- static int present_p(disjoint_union_data* disjoint_union, size_t element) {
152
- DynamicArray* forest = disjoint_union->forest;
164
+ static int present_p(disjoint_union_data *disjoint_union, size_t element) {
165
+ DynamicArray *forest = (DynamicArray *)disjoint_union->forest;
153
166
  return (forest->size > element && (forest->array[element] != forest->default_val));
154
167
  }
155
168
 
156
169
  /*
157
170
  * Check that the given element is a member of the universe and raise Shared::DataError (ruby-side) if not
158
171
  */
159
- static void assert_membership(disjoint_union_data* disjoint_union, size_t element) {
172
+ static void assert_membership(disjoint_union_data *disjoint_union, size_t element) {
160
173
  if (!present_p(disjoint_union, element)) {
161
- rb_raise(eDataError, "Value %zu is not part of the universe", element);
174
+ rb_raise(eSharedDataError, "Value %zu is not part of the universe", element);
162
175
  }
163
176
  }
164
177
 
@@ -167,13 +180,13 @@ static void assert_membership(disjoint_union_data* disjoint_union, size_t elemen
167
180
  *
168
181
  * Shared::DataError is raised if it is already an element.
169
182
  */
170
- static void add_new_element(disjoint_union_data* disjoint_union, size_t element) {
183
+ static void add_new_element(disjoint_union_data *disjoint_union, size_t element) {
171
184
  if (present_p(disjoint_union, element)) {
172
- rb_raise(eDataError, "Element %zu already present in the universe", element);
185
+ rb_raise(eSharedDataError, "Element %zu already present in the universe", element);
173
186
  }
174
187
 
175
- insertDynamicArray(disjoint_union->forest, element, element);
176
- insertDynamicArray(disjoint_union->rank, element, 0);
188
+ assignInDynamicArray(disjoint_union->forest, element, element);
189
+ assignInDynamicArray(disjoint_union->rank, element, 0);
177
190
  disjoint_union->subset_count++;
178
191
  }
179
192
 
@@ -182,11 +195,11 @@ static void add_new_element(disjoint_union_data* disjoint_union, size_t element)
182
195
  *
183
196
  * Two elements are in the same subset exactly when their canonical representatives are equal.
184
197
  */
185
- static size_t find(disjoint_union_data* disjoint_union, size_t element) {
198
+ static size_t find(disjoint_union_data *disjoint_union, size_t element) {
186
199
  assert_membership(disjoint_union, element);
187
200
 
188
201
  // We implement find with "halving" to shrink the length of paths to the root. See Tarjan and van Leeuwin p 252.
189
- long* d = disjoint_union->forest->array; // the actual forest data
202
+ long *d = disjoint_union->forest->array; // the actual forest data
190
203
  size_t x = element;
191
204
  while (d[d[x]] != d[x]) {
192
205
  x = d[x] = d[d[x]];
@@ -202,9 +215,9 @@ static size_t find(disjoint_union_data* disjoint_union, size_t element) {
202
215
  * Good performace (see Tarjan and van Leeuwin) assumes that elt1 and elt2 area are disinct and already the roots of their trees,
203
216
  * though we don't check that here.
204
217
  */
205
- static void link_roots(disjoint_union_data* disjoint_union, size_t elt1, size_t elt2) {
206
- long* rank = disjoint_union->rank->array;
207
- long* forest = disjoint_union->forest->array;
218
+ static void link_roots(disjoint_union_data *disjoint_union, size_t elt1, size_t elt2) {
219
+ long *rank = disjoint_union->rank->array;
220
+ long *forest = disjoint_union->forest->array;
208
221
 
209
222
  if (rank[elt1] > rank[elt2]) {
210
223
  forest[elt2] = elt1;
@@ -221,12 +234,12 @@ static void link_roots(disjoint_union_data* disjoint_union, size_t elt1, size_t
221
234
  /*
222
235
  * "Unite" or merge the subsets containing elt1 and elt2.
223
236
  */
224
- static void unite(disjoint_union_data* disjoint_union, size_t elt1, size_t elt2) {
237
+ static void unite(disjoint_union_data *disjoint_union, size_t elt1, size_t elt2) {
225
238
  assert_membership(disjoint_union, elt1);
226
239
  assert_membership(disjoint_union, elt2);
227
240
 
228
241
  if (elt1 == elt2) {
229
- rb_raise(eDataError, "Uniting an element with itself is meaningless");
242
+ rb_raise(eSharedDataError, "Uniting an element with itself is meaningless");
230
243
  }
231
244
 
232
245
  size_t root1 = find(disjoint_union, elt1);
@@ -249,8 +262,8 @@ static void unite(disjoint_union_data* disjoint_union, size_t elt1, size_t elt2)
249
262
  // deciding how agressive to be during garbage collection and such.
250
263
  static size_t disjoint_union_memsize(const void *ptr) {
251
264
  if (ptr) {
252
- const disjoint_union_data *disjoint_union = ptr;
253
- return (2 * disjoint_union->forest->size * sizeof(long)); // disjoint_union->rank is the same size
265
+ const disjoint_union_data *du = ptr;
266
+ return sizeof(disjoint_union_data) + _size_of(du->forest) + _size_of(du->rank);
254
267
  } else {
255
268
  return 0;
256
269
  }
@@ -273,16 +286,14 @@ static const rb_data_type_t disjoint_union_type = {
273
286
  };
274
287
 
275
288
  /*
276
- * Helper: check that a Ruby value is a non-negative Fixnum and convert it to a nice C long
277
- *
278
- * TODO: can we return an size_t or unsigned long instead?
289
+ * Helper: check that a Ruby value is a non-negative Fixnum and convert it to a C unsigned long
279
290
  */
280
- static long checked_nonneg_fixnum(VALUE val) {
291
+ static unsigned long checked_nonneg_fixnum(VALUE val) {
281
292
  Check_Type(val, T_FIXNUM);
282
293
  long c_val = FIX2LONG(val);
283
294
 
284
295
  if (c_val < 0) {
285
- rb_raise(eDataError, "Value must be non-negative");
296
+ rb_raise(eSharedDataError, "Value must be non-negative");
286
297
  }
287
298
 
288
299
  return c_val;
@@ -291,8 +302,8 @@ static long checked_nonneg_fixnum(VALUE val) {
291
302
  /*
292
303
  * Unwrap a Rubyfied disjoint union to get the C struct inside.
293
304
  */
294
- static disjoint_union_data* unwrapped(VALUE self) {
295
- disjoint_union_data* disjoint_union;
305
+ static disjoint_union_data *unwrapped(VALUE self) {
306
+ disjoint_union_data *disjoint_union;
296
307
  TypedData_Get_Struct((self), disjoint_union_data, &disjoint_union_type, disjoint_union);
297
308
  return disjoint_union;
298
309
  }
@@ -301,7 +312,9 @@ static disjoint_union_data* unwrapped(VALUE self) {
301
312
  * This is for CDisjointUnion.allocate on the Ruby side
302
313
  */
303
314
  static VALUE disjoint_union_alloc(VALUE klass) {
304
- disjoint_union_data* disjoint_union = create_disjoint_union();
315
+ // Get one on the heap
316
+ disjoint_union_data *disjoint_union = create_disjoint_union();
317
+ // Wrap it up into a Ruby object
305
318
  return TypedData_Wrap_Struct(klass, &disjoint_union_type, disjoint_union);
306
319
  }
307
320
 
@@ -318,7 +331,7 @@ static VALUE disjoint_union_init(int argc, VALUE *argv, VALUE self) {
318
331
  rb_raise(rb_eArgError, "wrong number of arguments");
319
332
  } else {
320
333
  size_t initial_size = checked_nonneg_fixnum(argv[0]);
321
- disjoint_union_data* disjoint_union = unwrapped(self);
334
+ disjoint_union_data *disjoint_union = unwrapped(self);
322
335
 
323
336
  for (size_t i = 0; i < initial_size; i++) {
324
337
  add_new_element(disjoint_union, i);
@@ -389,8 +402,7 @@ static VALUE disjoint_union_unite(VALUE self, VALUE arg1, VALUE arg2) {
389
402
  * The data structure provides efficient actions to merge two disjoint subsets, i.e., replace them by their union, and determine if
390
403
  * two elements are in the same subset.
391
404
  *
392
- * The elements of the set are 0, 1, ..., n-1, where n is the size of the universe. Client code can map its data to these
393
- * representatives.
405
+ * The elements of the set are non-negative integers. Client code can map its data to these representatives.
394
406
  *
395
407
  * See https://en.wikipedia.org/wiki/Disjoint-set_data_structure for a good introduction.
396
408
  *
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_structures_rmolinari
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rory Molinari
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-26 00:00:00.000000000 Z
11
+ date: 2023-01-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: must_be