data_structures_rmolinari 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/README.md +119 -33
- data/Rakefile +6 -4
- data/ext/c_disjoint_union/disjoint_union.c +75 -129
- data/ext/c_disjoint_union/extconf.rb +7 -2
- data/ext/c_segment_tree_template/extconf.rb +17 -0
- data/ext/c_segment_tree_template/segment_tree_template.c +363 -0
- data/ext/shared.c +32 -0
- data/lib/data_structures_rmolinari/algorithms.rb +5 -5
- data/lib/data_structures_rmolinari/c_segment_tree_template_impl.rb +15 -0
- data/lib/data_structures_rmolinari/disjoint_union.rb +2 -0
- data/lib/data_structures_rmolinari/segment_tree.rb +126 -0
- data/lib/data_structures_rmolinari/segment_tree_template.rb +11 -8
- data/lib/data_structures_rmolinari.rb +5 -62
- metadata +8 -2
@@ -0,0 +1,363 @@
|
|
1
|
+
/*
|
2
|
+
* This is a C implementation of a Segment Tree data structure.
|
3
|
+
*
|
4
|
+
* More specifically, it is the C version of the SegmentTreeTemplate Ruby class, for which see elsewhere in the repo.
|
5
|
+
*/
|
6
|
+
|
7
|
+
#include "ruby.h"
|
8
|
+
#include "shared.h"
|
9
|
+
|
10
|
+
#define single_cell_val_at(seg_tree, idx) rb_funcall(seg_tree->single_cell_array_val_lambda, rb_intern("call"), 1, LONG2FIX(idx))
|
11
|
+
#define combined_val(seg_tree, v1, v2) rb_funcall(seg_tree->combine_lambda, rb_intern("call"), 2, (v1), (v2))
|
12
|
+
|
13
|
+
/**
|
14
|
+
* The C implementation of a generic Segment Tree
|
15
|
+
*/
|
16
|
+
|
17
|
+
typedef struct {
|
18
|
+
VALUE *tree; // The 1-based implicit binary tree in which the data structure lives
|
19
|
+
VALUE single_cell_array_val_lambda;
|
20
|
+
VALUE combine_lambda;
|
21
|
+
VALUE identity;
|
22
|
+
size_t size; // the size of the underlying data array
|
23
|
+
size_t tree_alloc_size; // the size of the VALUE* tree array
|
24
|
+
} segment_tree_data;
|
25
|
+
|
26
|
+
/************************************************************
|
27
|
+
* Memory Management
|
28
|
+
*
|
29
|
+
*/
|
30
|
+
|
31
|
+
/*
|
32
|
+
* Create one (on the heap).
|
33
|
+
*/
|
34
|
+
static segment_tree_data *create_segment_tree() {
|
35
|
+
segment_tree_data *segment_tree = malloc(sizeof(segment_tree_data));
|
36
|
+
|
37
|
+
// Allocate the structures
|
38
|
+
segment_tree->tree = NULL; // we don't yet know how much space we need
|
39
|
+
|
40
|
+
segment_tree->single_cell_array_val_lambda = 0;
|
41
|
+
segment_tree->combine_lambda = 0;
|
42
|
+
segment_tree->size = 0; // we don't know the right value yet
|
43
|
+
|
44
|
+
return segment_tree;
|
45
|
+
}
|
46
|
+
|
47
|
+
/*
|
48
|
+
* Free the memory associated with a segment_tree.
|
49
|
+
*
|
50
|
+
* This will end up getting triggered by the Ruby garbage collector. Ruby learns about it via the segment_tree_type struct below.
|
51
|
+
*/
|
52
|
+
static void segment_tree_free(void *ptr) {
|
53
|
+
if (ptr) {
|
54
|
+
segment_tree_data *segment_tree = ptr;
|
55
|
+
xfree(segment_tree->tree);
|
56
|
+
xfree(segment_tree);
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
/*
|
61
|
+
* How much memory (roughly) does a segment_tree_data instance consume?
|
62
|
+
*
|
63
|
+
* I guess the Ruby runtime can use this information when deciding how agressive to be during garbage collection and such.
|
64
|
+
*/
|
65
|
+
static size_t segment_tree_memsize(const void *ptr) {
|
66
|
+
if (ptr) {
|
67
|
+
const segment_tree_data *st = ptr;
|
68
|
+
|
69
|
+
// for the tree array plus the size of the segment_tree_data struct itself.
|
70
|
+
return sizeof( VALUE ) * st->tree_alloc_size * 4 + sizeof(segment_tree_data);
|
71
|
+
} else {
|
72
|
+
return 0;
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
/*
|
77
|
+
* Mark the Ruby objects we hold so that the Ruby garbage collector knows that they are still in use.
|
78
|
+
*/
|
79
|
+
static void segment_tree_mark(void *ptr) {
|
80
|
+
segment_tree_data *st = ptr;
|
81
|
+
|
82
|
+
rb_gc_mark(st->combine_lambda);
|
83
|
+
rb_gc_mark(st->single_cell_array_val_lambda);
|
84
|
+
rb_gc_mark(st->identity);
|
85
|
+
|
86
|
+
for (size_t i = 0; i < st->tree_alloc_size; i++) {
|
87
|
+
VALUE value = st->tree[i];
|
88
|
+
if (value) {
|
89
|
+
rb_gc_mark(value);
|
90
|
+
}
|
91
|
+
}
|
92
|
+
}
|
93
|
+
|
94
|
+
|
95
|
+
/*
|
96
|
+
* A configuration struct that tells the Ruby runtime how to deal with a segment_tree_data object.
|
97
|
+
*
|
98
|
+
* https://docs.ruby-lang.org/en/master/extension_rdoc.html#label-Encapsulate+C+data+into+a+Ruby+object
|
99
|
+
*/
|
100
|
+
static const rb_data_type_t segment_tree_type = {
|
101
|
+
.wrap_struct_name = "segment_tree_template",
|
102
|
+
{ // help for the Ruby garbage collector
|
103
|
+
.dmark = segment_tree_mark, // dmark, for marking other Ruby objects.
|
104
|
+
.dfree = segment_tree_free, // how to free the memory associated with an object
|
105
|
+
.dsize = segment_tree_memsize, // roughly how much space does the object consume?
|
106
|
+
},
|
107
|
+
.data = NULL, // a data field we could use for something here if we wanted. Ruby ignores it
|
108
|
+
.flags = 0 // GC-related flag values.
|
109
|
+
};
|
110
|
+
|
111
|
+
/*
|
112
|
+
* End memory management functions.
|
113
|
+
************************************************************/
|
114
|
+
|
115
|
+
|
116
|
+
/************************************************************
|
117
|
+
* Wrapping and unwrapping the C struct and other things.
|
118
|
+
*
|
119
|
+
*/
|
120
|
+
|
121
|
+
/*
|
122
|
+
* Unwrap a Ruby-side disjoint union object to get the C struct inside.
|
123
|
+
*
|
124
|
+
* TODO: consider a macro in a shared header
|
125
|
+
*/
|
126
|
+
static segment_tree_data *unwrapped(VALUE self) {
|
127
|
+
segment_tree_data *segment_tree;
|
128
|
+
TypedData_Get_Struct((self), segment_tree_data, &segment_tree_type, segment_tree);
|
129
|
+
return segment_tree;
|
130
|
+
}
|
131
|
+
|
132
|
+
/*
|
133
|
+
* Allocate a segment_tree_data struct and wrap it for the Ruby runtime.
|
134
|
+
*
|
135
|
+
* This is for CSegmentTreeTemplate.allocate on the Ruby side.
|
136
|
+
*/
|
137
|
+
static VALUE segment_tree_alloc(VALUE klass) {
|
138
|
+
// Get one on the heap
|
139
|
+
segment_tree_data *segment_tree = create_segment_tree();
|
140
|
+
// ...and wrap it into a Ruby object
|
141
|
+
return TypedData_Wrap_Struct(klass, &segment_tree_type, segment_tree);
|
142
|
+
}
|
143
|
+
|
144
|
+
/*
|
145
|
+
* End wrapping and unwrapping functions.
|
146
|
+
************************************************************/
|
147
|
+
|
148
|
+
/************************************************************
|
149
|
+
* The Segment Tree API on the C side.
|
150
|
+
*
|
151
|
+
* We wrap these in the Ruby-ready functions below
|
152
|
+
*/
|
153
|
+
|
154
|
+
/*
|
155
|
+
* Recursively build the internal tree data structure.
|
156
|
+
*
|
157
|
+
* - tree_idx: the index into the tree array of the node being calculated
|
158
|
+
* - [tree_l, tree_r]: the sub-interval of the underlying array data corresponding to the tree node being calculated.
|
159
|
+
*/
|
160
|
+
static void build(segment_tree_data *segment_tree, size_t tree_idx, size_t tree_l, size_t tree_r) {
|
161
|
+
VALUE *tree = segment_tree->tree;
|
162
|
+
|
163
|
+
if (tree_l == tree_r) {
|
164
|
+
// Base case: the node corresponds to a subarray of length 1.
|
165
|
+
segment_tree->tree[tree_idx] = single_cell_val_at(segment_tree, tree_l);
|
166
|
+
} else {
|
167
|
+
// Build to two child nodes, and then combine their values for this node.
|
168
|
+
size_t mid = midpoint(tree_l, tree_r);
|
169
|
+
size_t left = left_child(tree_idx);
|
170
|
+
size_t right = right_child(tree_idx);
|
171
|
+
|
172
|
+
build(segment_tree, left, tree_l, mid);
|
173
|
+
build(segment_tree, right, mid + 1, tree_r);
|
174
|
+
|
175
|
+
VALUE comb_val = combined_val(segment_tree, tree[left], tree[right]);
|
176
|
+
segment_tree->tree[tree_idx] = comb_val;
|
177
|
+
}
|
178
|
+
}
|
179
|
+
|
180
|
+
/*
|
181
|
+
* Set up the internals with the arguments we get from #initialize.
|
182
|
+
*
|
183
|
+
* - combine: must be callable
|
184
|
+
* - single_cell_array_val: must be callable
|
185
|
+
* - size: must be a positive integer
|
186
|
+
* - identity: we don't care what it is.
|
187
|
+
* - maybe we should check at least that it is not 0. But Qnil is fine.
|
188
|
+
*/
|
189
|
+
static void setup(segment_tree_data* seg_tree, VALUE combine, VALUE single_cell_array_val, VALUE size, VALUE identity) {
|
190
|
+
VALUE idCall = rb_intern("call");
|
191
|
+
|
192
|
+
if (!rb_obj_respond_to(combine, idCall, TRUE)) {
|
193
|
+
rb_raise(rb_eArgError, "wrong type argument %"PRIsVALUE" (should be callable)", rb_obj_class(combine));
|
194
|
+
}
|
195
|
+
|
196
|
+
if (!rb_obj_respond_to(single_cell_array_val, idCall, TRUE)) {
|
197
|
+
rb_raise(rb_eArgError, "wrong type argument %"PRIsVALUE" (should be callable)", rb_obj_class(single_cell_array_val));
|
198
|
+
}
|
199
|
+
|
200
|
+
seg_tree->combine_lambda = combine;
|
201
|
+
seg_tree->single_cell_array_val_lambda = single_cell_array_val;
|
202
|
+
seg_tree->identity = identity;
|
203
|
+
seg_tree->size = checked_nonneg_fixnum(size);
|
204
|
+
|
205
|
+
if (seg_tree->size == 0) {
|
206
|
+
rb_raise(rb_eArgError, "size must be positive.");
|
207
|
+
}
|
208
|
+
|
209
|
+
// Implicit binary tree with n leaves and straightforward left() and right() may use indices up to 4n. But see here for a way to
|
210
|
+
// reduce the requirement to 2n: https://cp-algorithms.com/data_structures/segment_tree.html#memory-efficient-implementation
|
211
|
+
size_t tree_size = 1 + 4 * seg_tree->size;
|
212
|
+
seg_tree->tree = calloc(tree_size, sizeof(VALUE));
|
213
|
+
seg_tree->tree_alloc_size = tree_size;
|
214
|
+
|
215
|
+
build(seg_tree, TREE_ROOT, 0, seg_tree->size - 1);
|
216
|
+
}
|
217
|
+
|
218
|
+
|
219
|
+
/*
|
220
|
+
* Determine the value for the subarray A(left, right).
|
221
|
+
*
|
222
|
+
* - tree_idx: the index in the array of the node we are currently visiting
|
223
|
+
* - tree_l..tree_r: the subarray handled by the current node.
|
224
|
+
* - left..right: the subarray whose value we are currently looking for.
|
225
|
+
*
|
226
|
+
* As an invariant we have left..right \subset tree_l..tree_r.
|
227
|
+
*
|
228
|
+
* We start out with
|
229
|
+
* - tree_idx = TREE_ROOT
|
230
|
+
* - tree_l..tree_r = 0..(size - 1), and
|
231
|
+
* - left..right given by the client code's query
|
232
|
+
*
|
233
|
+
* If [tree_l, tree_r] = [left, right] then the current node gives the desired answer. Otherwise we decend the tree with one or two
|
234
|
+
* recursive calls.
|
235
|
+
*
|
236
|
+
* If left..right is contained the the bottom or top half of tree_l..tree_r we decend to the corresponding child with one recursive
|
237
|
+
* call. Otherwise we split left..right at the midpoint of tree_l..tree_r, make two recursive calls, and then combine the results.
|
238
|
+
*/
|
239
|
+
static VALUE determine_val(segment_tree_data* seg_tree, size_t tree_idx, size_t left, size_t right, size_t tree_l, size_t tree_r) {
|
240
|
+
// Does the current tree node exactly serve up the interval we're interested in?
|
241
|
+
if (left == tree_l && right == tree_r) {
|
242
|
+
return seg_tree->tree[tree_idx];
|
243
|
+
}
|
244
|
+
|
245
|
+
// We need to go further down the tree */
|
246
|
+
size_t mid = midpoint(tree_l, tree_r);
|
247
|
+
if (mid >= right) {
|
248
|
+
// Our interval is contained by the left child's interval
|
249
|
+
return determine_val(seg_tree, left_child(tree_idx), left, right, tree_l, mid);
|
250
|
+
} else if (mid + 1 <= left) {
|
251
|
+
// Our interval is contained by the right child's interval
|
252
|
+
return determine_val(seg_tree, right_child(tree_idx), left, right, mid + 1, tree_r);
|
253
|
+
} else {
|
254
|
+
// Our interval is split between the two, so we need to combine the results from the children.
|
255
|
+
return rb_funcall(
|
256
|
+
seg_tree->combine_lambda, rb_intern("call"), 2,
|
257
|
+
determine_val(seg_tree, left_child(tree_idx), left, mid, tree_l, mid),
|
258
|
+
determine_val(seg_tree, right_child(tree_idx), mid + 1, right, mid + 1, tree_r)
|
259
|
+
);
|
260
|
+
}
|
261
|
+
}
|
262
|
+
|
263
|
+
/*
|
264
|
+
* Update the structure to reflect the change in the underlying array at index idx.
|
265
|
+
*
|
266
|
+
* - idx: the index at which the underlying array data has changed.
|
267
|
+
* - tree_id: the index in the internal datastructure of the node we are currently visiting.
|
268
|
+
* - tree_l..tree_r: the range handled by the current node
|
269
|
+
*/
|
270
|
+
static void update_val_at(segment_tree_data *seg_tree, size_t idx, size_t tree_idx, size_t tree_l, size_t tree_r) {
|
271
|
+
if (tree_l == tree_r) {
|
272
|
+
// We have found the base case of our update
|
273
|
+
if (tree_l != idx) {
|
274
|
+
rb_raise(
|
275
|
+
eSharedInternalLogicError,
|
276
|
+
"tree_l == tree_r == %lu but they do not agree with the idx %lu holding the updated value",
|
277
|
+
tree_r, idx
|
278
|
+
);
|
279
|
+
}
|
280
|
+
seg_tree->tree[tree_idx] = single_cell_val_at(seg_tree, tree_l);
|
281
|
+
} else {
|
282
|
+
// Recursively update the appropriate subtree...
|
283
|
+
size_t mid = midpoint(tree_l, tree_r);
|
284
|
+
size_t left = left_child(tree_idx);
|
285
|
+
size_t right = right_child(tree_idx);
|
286
|
+
if (mid >= idx) {
|
287
|
+
update_val_at(seg_tree, idx, left, tree_l, mid);
|
288
|
+
} else {
|
289
|
+
update_val_at(seg_tree, idx, right, mid + 1, tree_r);
|
290
|
+
}
|
291
|
+
// ...and ourself to incorporate the change
|
292
|
+
seg_tree->tree[tree_idx] = combined_val(seg_tree, seg_tree->tree[left], seg_tree->tree[right]);
|
293
|
+
}
|
294
|
+
}
|
295
|
+
|
296
|
+
/*
|
297
|
+
* End C implementation of the Segment Tree API
|
298
|
+
************************************************************/
|
299
|
+
|
300
|
+
/**
|
301
|
+
* And now the wrappers around the C functionality.
|
302
|
+
*/
|
303
|
+
|
304
|
+
/*
|
305
|
+
* CSegmentTreeTemplate#c_initialize.
|
306
|
+
*
|
307
|
+
* (see CSegmentTreeTemplate#initialize).
|
308
|
+
*/
|
309
|
+
static VALUE segment_tree_init(VALUE self, VALUE combine, VALUE single_cell_array_val, VALUE size, VALUE identity) {
|
310
|
+
setup(unwrapped(self), combine, single_cell_array_val, size, identity);
|
311
|
+
return self;
|
312
|
+
}
|
313
|
+
|
314
|
+
/*
|
315
|
+
* (see SegmentTreeTemplate#query_on)
|
316
|
+
*/
|
317
|
+
static VALUE segment_tree_query_on(VALUE self, VALUE left, VALUE right) {
|
318
|
+
segment_tree_data* seg_tree = unwrapped(self);
|
319
|
+
size_t c_left = checked_nonneg_fixnum(left);
|
320
|
+
size_t c_right = checked_nonneg_fixnum(right);
|
321
|
+
|
322
|
+
if (c_right >= seg_tree->size) {
|
323
|
+
rb_raise(eSharedDataError, "Bad query interval %lu..%lu (size = %lu)", c_left, c_right, seg_tree->size);
|
324
|
+
}
|
325
|
+
|
326
|
+
if (left > right) {
|
327
|
+
// empty interval.
|
328
|
+
return seg_tree->identity;
|
329
|
+
}
|
330
|
+
|
331
|
+
return determine_val(seg_tree, TREE_ROOT, c_left, c_right, 0, seg_tree->size - 1);
|
332
|
+
}
|
333
|
+
|
334
|
+
/*
|
335
|
+
* (see SegmentTreeTemplate#update_at)
|
336
|
+
*/
|
337
|
+
static VALUE segment_tree_update_at(VALUE self, VALUE idx) {
|
338
|
+
segment_tree_data *seg_tree = unwrapped(self);
|
339
|
+
size_t c_idx = checked_nonneg_fixnum(idx);
|
340
|
+
|
341
|
+
if (c_idx >= seg_tree->size) {
|
342
|
+
rb_raise(eSharedDataError, "Cannot update value at index %lu, size = %lu", c_idx, seg_tree->size);
|
343
|
+
}
|
344
|
+
|
345
|
+
update_val_at(seg_tree, c_idx, TREE_ROOT, 0, seg_tree->size - 1);
|
346
|
+
|
347
|
+
return Qnil;
|
348
|
+
}
|
349
|
+
|
350
|
+
/*
|
351
|
+
* A generic Segment Tree template, written in C.
|
352
|
+
*
|
353
|
+
* (see SegmentTreeTemplate)
|
354
|
+
*/
|
355
|
+
void Init_c_segment_tree_template() {
|
356
|
+
VALUE mSegmentTree = rb_define_module_under(mDataStructuresRMolinari, "SegmentTree");
|
357
|
+
VALUE cSegmentTreeTemplate = rb_define_class_under(mSegmentTree, "CSegmentTreeTemplate", rb_cObject);
|
358
|
+
|
359
|
+
rb_define_alloc_func(cSegmentTreeTemplate, segment_tree_alloc);
|
360
|
+
rb_define_method(cSegmentTreeTemplate, "c_initialize", segment_tree_init, 4);
|
361
|
+
rb_define_method(cSegmentTreeTemplate, "query_on", segment_tree_query_on, 2);
|
362
|
+
rb_define_method(cSegmentTreeTemplate, "update_at", segment_tree_update_at, 1);
|
363
|
+
}
|
data/ext/shared.c
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
#include "shared.h"
|
2
|
+
|
3
|
+
/*
|
4
|
+
* Arithmetic for in-array binary tree
|
5
|
+
*/
|
6
|
+
size_t midpoint(size_t left, size_t right) {
|
7
|
+
return (left + right) / 2;
|
8
|
+
}
|
9
|
+
|
10
|
+
size_t left_child(size_t i) {
|
11
|
+
return i << 1;
|
12
|
+
}
|
13
|
+
|
14
|
+
size_t right_child(size_t i) {
|
15
|
+
return 1 + (i << 1);
|
16
|
+
}
|
17
|
+
|
18
|
+
/*
|
19
|
+
* Check that a Ruby value is a non-negative Fixnum and convert it to a C unsigned long
|
20
|
+
*/
|
21
|
+
unsigned long checked_nonneg_fixnum(VALUE val) {
|
22
|
+
Check_Type(val, T_FIXNUM);
|
23
|
+
long c_val = FIX2LONG(val);
|
24
|
+
|
25
|
+
if (c_val < 0) {
|
26
|
+
rb_raise(eSharedDataError, "Value must be non-negative");
|
27
|
+
}
|
28
|
+
|
29
|
+
return c_val;
|
30
|
+
}
|
31
|
+
|
32
|
+
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# Algorithms that use the module's data structures but don't belong as a method on one of the data structures
|
2
2
|
module DataStructuresRMolinari::Algorithms
|
3
3
|
include Shared
|
4
4
|
|
@@ -11,12 +11,12 @@ module DataStructuresRMolinari::Algorithms
|
|
11
11
|
#
|
12
12
|
# A _maximal empty rectangle_ (MER) for P is an empty rectangle for P not properly contained in any other.
|
13
13
|
#
|
14
|
-
# We enumerate all maximal empty rectangles for P, yielding each as (left, right, bottom, top)
|
15
|
-
#
|
16
|
-
#
|
14
|
+
# We enumerate all maximal empty rectangles for P, yielding each as (left, right, bottom, top). The algorithm is due to De, M.,
|
15
|
+
# Maheshwari, A., Nandy, S. C., Smid, M., _An In-Place Min-max Priority Search Tree_, Computational Geometry, v46 (2013), pp
|
16
|
+
# 310-327.
|
17
17
|
#
|
18
18
|
# It runs in O(m log n) time, where m is the number of MERs enumerated and n is the number of points in P. (Contructing the
|
19
|
-
# MaxPST
|
19
|
+
# MaxPST takes O(n log^2 n) time, but m = O(n^2) so we are still O(m log n) overall.)
|
20
20
|
#
|
21
21
|
# @param points [Array] an array of points in the x-y plane. Each must respond to +x+ and +y+.
|
22
22
|
def self.maximal_empty_rectangles(points)
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'must_be'
|
2
|
+
|
3
|
+
require_relative 'shared'
|
4
|
+
require_relative 'c_segment_tree_template'
|
5
|
+
|
6
|
+
# The underlying functionality of the Segment Tree data type, implemented in C as a Ruby extension.
|
7
|
+
#
|
8
|
+
# See SegmentTreeTemplate for more information.
|
9
|
+
class DataStructuresRMolinari::CSegmentTreeTemplate
|
10
|
+
# (see SegmentTreeTemplate::initialize)
|
11
|
+
def initialize(combine:, single_cell_array_val:, size:, identity:)
|
12
|
+
# having sorted out the keyword arguments, pass them more easily to the C layer.
|
13
|
+
c_initialize(combine, single_cell_array_val, size, identity)
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
require_relative 'shared'
|
2
|
+
|
3
|
+
# A namespace to hold the various bits and bobs related to the SegmentTree implementation
|
4
|
+
module DataStructuresRMolinari::SegmentTree
|
5
|
+
end
|
6
|
+
|
7
|
+
require_relative 'segment_tree_template' # Ruby implementation of the generic API
|
8
|
+
require_relative 'c_segment_tree_template' # C implementation of the generic API
|
9
|
+
|
10
|
+
# Segment Tree: various concrete implementations
|
11
|
+
#
|
12
|
+
# There is an excellent description of the data structure at https://cp-algorithms.com/data_structures/segment_tree.html. The
|
13
|
+
# Wikipedia article (https://en.wikipedia.org/wiki/Segment_tree) appears to describe a different data structure which is sometimes
|
14
|
+
# called an "interval tree."
|
15
|
+
#
|
16
|
+
# For more details (and some close-to-metal analysis of run time, especially for large datasets) see
|
17
|
+
# https://en.algorithmica.org/hpc/data-structures/segment-trees/. In particular, this shows how to do a bottom-up implementation,
|
18
|
+
# which is faster, at least for large datasets and cache-relevant compiled code. These issues don't really apply to code written in
|
19
|
+
# Ruby.
|
20
|
+
#
|
21
|
+
# Here we provide several concrete segment tree implementations built on top of the template (generic) versions. Each instance is
|
22
|
+
# backed either by the pure Ruby SegmentTreeTemplate or its C-based sibling CSegmentTreeTemplate
|
23
|
+
module DataStructuresRMolinari
|
24
|
+
module SegmentTree
|
25
|
+
# A convenience method to construct a Segment Tree that, for a given array A(0...size), answers questions of the kind given by
|
26
|
+
# operation, using the template written in lang
|
27
|
+
#
|
28
|
+
# - @param data: the array A.
|
29
|
+
# - It must respond to +#size+ and to +#[]+ with non-negative integer arguments.
|
30
|
+
# - @param operation: a supported "style" of Segment Tree
|
31
|
+
# - for now, must be one of these (but you can write your own concrete version)
|
32
|
+
# - +:max+: implementing +max_on(i, j)+, returning the maximum value in A(i..j)
|
33
|
+
# - +:index_of_max+: implementing +index_of_max_val_on(i, j)+, returning an index corresponding to the maximum value in
|
34
|
+
# A(i..j).
|
35
|
+
# - @param lang: the language in which the underlying "template" is written
|
36
|
+
# - +:c+ or +:ruby+
|
37
|
+
# - the C version will run faster but for now may be buggier and harder to debug
|
38
|
+
module_function def construct(data, operation, lang)
|
39
|
+
operation.must_be_in [:max, :index_of_max]
|
40
|
+
lang.must_be_in [:ruby, :c]
|
41
|
+
|
42
|
+
klass = operation == :max ? MaxValSegmentTree : IndexOfMaxValSegmentTree
|
43
|
+
template = lang == :ruby ? SegmentTreeTemplate : CSegmentTreeTemplate
|
44
|
+
|
45
|
+
klass.new(template, data)
|
46
|
+
end
|
47
|
+
|
48
|
+
# A segment tree that for an array A(0...n) answers questions of the form "what is the maximum value in the subinterval A(i..j)?"
|
49
|
+
# in O(log n) time.
|
50
|
+
class MaxValSegmentTree
|
51
|
+
extend Forwardable
|
52
|
+
|
53
|
+
# Tell the tree that the value at idx has changed
|
54
|
+
def_delegator :@structure, :update_at
|
55
|
+
|
56
|
+
# @param template_klass the "template" class that provides the generic implementation of the Segment Tree functionality.
|
57
|
+
# @param data an object that contains values at integer indices based at 0, via +data[i]+.
|
58
|
+
# - This will usually be an Array, but it could also be a hash or a proc.
|
59
|
+
def initialize(template_klass, data)
|
60
|
+
data.must_be_a Enumerable
|
61
|
+
|
62
|
+
@structure = template_klass.new(
|
63
|
+
combine: ->(a, b) { [a, b].max },
|
64
|
+
single_cell_array_val: ->(i) { data[i] },
|
65
|
+
size: data.size,
|
66
|
+
identity: -Shared::INFINITY
|
67
|
+
)
|
68
|
+
end
|
69
|
+
|
70
|
+
# The maximum value in A(i..j).
|
71
|
+
#
|
72
|
+
# The arguments must be integers in 0...(A.size)
|
73
|
+
# @return the largest value in A(i..j) or -Infinity if i > j.
|
74
|
+
def max_on(i, j)
|
75
|
+
@structure.query_on(i, j)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# A segment tree that for an array A(0...n) answers questions of the form "what is the index of the maximal value in the
|
80
|
+
# subinterval A(i..j)?" in O(log n) time.
|
81
|
+
class IndexOfMaxValSegmentTree
|
82
|
+
extend Forwardable
|
83
|
+
|
84
|
+
# Tell the tree that the value at idx has changed
|
85
|
+
def_delegator :@structure, :update_at
|
86
|
+
|
87
|
+
# @param (see MaxValSegmentTree#initialize)
|
88
|
+
def initialize(template_klass, data)
|
89
|
+
data.must_be_a Enumerable
|
90
|
+
|
91
|
+
@structure = template_klass.new(
|
92
|
+
combine: ->(p1, p2) { p1[1] >= p2[1] ? p1 : p2 },
|
93
|
+
single_cell_array_val: ->(i) { [i, data[i]] },
|
94
|
+
size: data.size,
|
95
|
+
identity: nil
|
96
|
+
)
|
97
|
+
end
|
98
|
+
|
99
|
+
# The index of the maximum value in A(i..j)
|
100
|
+
#
|
101
|
+
# The arguments must be integers in 0...(A.size)
|
102
|
+
# @return (Integer, nil) the index of the largest value in A(i..j) or +nil+ if i > j.
|
103
|
+
# - If there is more than one entry with that value, return one the indices. There is no guarantee as to which one.
|
104
|
+
# - Return +nil+ if i > j
|
105
|
+
def index_of_max_val_on(i, j)
|
106
|
+
@structure.query_on(i, j)&.first # discard the value part of the pair, which is a bookkeeping
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# The underlying functionality of the Segment Tree data type, implemented in C as a Ruby extension.
|
111
|
+
#
|
112
|
+
# See SegmentTreeTemplate for more information.
|
113
|
+
#
|
114
|
+
# Implementation note
|
115
|
+
#
|
116
|
+
# The functionality is entirely written in C. But we write the constructor in Ruby because keyword arguments are difficult to
|
117
|
+
# parse on the C side.
|
118
|
+
class CSegmentTreeTemplate
|
119
|
+
# (see SegmentTreeTemplate::initialize)
|
120
|
+
def initialize(combine:, single_cell_array_val:, size:, identity:)
|
121
|
+
# having sorted out the keyword arguments, pass them more easily to the C layer.
|
122
|
+
c_initialize(combine, single_cell_array_val, size, identity)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require_relative 'shared'
|
2
2
|
|
3
|
-
#
|
4
|
-
# max) on a arbitrary subarray of a given array.
|
3
|
+
# A generic implementation of Segment Tree, which can be used for various interval-related purposes, like efficiently finding the
|
4
|
+
# sum (or min or max) on a arbitrary subarray of a given array.
|
5
5
|
#
|
6
6
|
# There is an excellent description of the data structure at https://cp-algorithms.com/data_structures/segment_tree.html. The
|
7
7
|
# Wikipedia article (https://en.wikipedia.org/wiki/Segment_tree) appears to describe a different data structure which is sometimes
|
@@ -16,7 +16,8 @@ require_relative 'shared'
|
|
16
16
|
# initializer and the definitions of concrete realisations like MaxValSegmentTree.
|
17
17
|
#
|
18
18
|
# We do O(n) work to build the internal data structure at initialization. Then we answer queries in O(log n) time.
|
19
|
-
class DataStructuresRMolinari::SegmentTreeTemplate
|
19
|
+
class DataStructuresRMolinari::SegmentTree::SegmentTreeTemplate
|
20
|
+
include Shared
|
20
21
|
include Shared::BinaryTreeArithmetic
|
21
22
|
|
22
23
|
# Construct a concrete instance of a Segment Tree. See details at the links above for the underlying concepts here.
|
@@ -47,27 +48,29 @@ class DataStructuresRMolinari::SegmentTreeTemplate
|
|
47
48
|
end
|
48
49
|
|
49
50
|
# The desired value (max, sum, etc.) on the subinterval left..right.
|
51
|
+
#
|
50
52
|
# @param left the left end of the subinterval.
|
51
53
|
# @param right the right end (inclusive) of the subinterval.
|
52
54
|
#
|
55
|
+
# It must be that left..right is contained in 0...size.
|
56
|
+
#
|
53
57
|
# The type of the return value depends on the concrete instance of the segment tree. We return the _identity_ element provided at
|
54
58
|
# construction time if the interval is empty.
|
55
59
|
def query_on(left, right)
|
56
|
-
raise DataError, "Bad query interval #{left}..#{right}"
|
60
|
+
raise DataError, "Bad query interval #{left}..#{right} (size = #{@size})" unless (0...@size).cover?(left..right)
|
57
61
|
|
58
62
|
return @identity if left > right # empty interval
|
59
63
|
|
60
64
|
determine_val(root, left, right, 0, @size - 1)
|
61
65
|
end
|
62
66
|
|
63
|
-
#
|
67
|
+
# Reflect the fact that the underlying array has been updated at the given idx
|
64
68
|
#
|
65
69
|
# @param idx an index in the underlying data array.
|
66
70
|
#
|
67
71
|
# Note that we don't need the updated value itself. We get that by calling the lambda +single_cell_array_val+ supplied at
|
68
72
|
# construction.
|
69
73
|
def update_at(idx)
|
70
|
-
raise DataError, 'Cannot update an index outside the initial range of the underlying data' unless (0...@size).cover?(idx)
|
71
74
|
|
72
75
|
update_val_at(idx, root, 0, @size - 1)
|
73
76
|
end
|
@@ -105,9 +108,9 @@ class DataStructuresRMolinari::SegmentTreeTemplate
|
|
105
108
|
left = left(tree_idx)
|
106
109
|
right = right(tree_idx)
|
107
110
|
if mid >= idx
|
108
|
-
update_val_at(idx, left
|
111
|
+
update_val_at(idx, left, tree_l, mid)
|
109
112
|
else
|
110
|
-
update_val_at(idx, right
|
113
|
+
update_val_at(idx, right, mid + 1, tree_r)
|
111
114
|
end
|
112
115
|
@tree[tree_idx] = @combine.call(@tree[left], @tree[right])
|
113
116
|
end
|