data_structures_rmolinari 0.4.3 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/README.md +119 -33
- data/Rakefile +6 -4
- data/ext/c_disjoint_union/disjoint_union.c +75 -129
- data/ext/c_disjoint_union/extconf.rb +7 -2
- data/ext/c_segment_tree_template/extconf.rb +17 -0
- data/ext/c_segment_tree_template/segment_tree_template.c +363 -0
- data/ext/shared.c +32 -0
- data/lib/data_structures_rmolinari/algorithms.rb +5 -5
- data/lib/data_structures_rmolinari/c_segment_tree_template_impl.rb +15 -0
- data/lib/data_structures_rmolinari/disjoint_union.rb +2 -0
- data/lib/data_structures_rmolinari/segment_tree.rb +126 -0
- data/lib/data_structures_rmolinari/segment_tree_template.rb +11 -8
- data/lib/data_structures_rmolinari.rb +5 -62
- metadata +8 -2
@@ -0,0 +1,363 @@
|
|
1
|
+
/*
|
2
|
+
* This is a C implementation of a Segment Tree data structure.
|
3
|
+
*
|
4
|
+
* More specifically, it is the C version of the SegmentTreeTemplate Ruby class, for which see elsewhere in the repo.
|
5
|
+
*/
|
6
|
+
|
7
|
+
#include "ruby.h"
|
8
|
+
#include "shared.h"
|
9
|
+
|
10
|
+
#define single_cell_val_at(seg_tree, idx) rb_funcall(seg_tree->single_cell_array_val_lambda, rb_intern("call"), 1, LONG2FIX(idx))
|
11
|
+
#define combined_val(seg_tree, v1, v2) rb_funcall(seg_tree->combine_lambda, rb_intern("call"), 2, (v1), (v2))
|
12
|
+
|
13
|
+
/**
|
14
|
+
* The C implementation of a generic Segment Tree
|
15
|
+
*/
|
16
|
+
|
17
|
+
typedef struct {
|
18
|
+
VALUE *tree; // The 1-based implicit binary tree in which the data structure lives
|
19
|
+
VALUE single_cell_array_val_lambda;
|
20
|
+
VALUE combine_lambda;
|
21
|
+
VALUE identity;
|
22
|
+
size_t size; // the size of the underlying data array
|
23
|
+
size_t tree_alloc_size; // the size of the VALUE* tree array
|
24
|
+
} segment_tree_data;
|
25
|
+
|
26
|
+
/************************************************************
|
27
|
+
* Memory Management
|
28
|
+
*
|
29
|
+
*/
|
30
|
+
|
31
|
+
/*
|
32
|
+
* Create one (on the heap).
|
33
|
+
*/
|
34
|
+
static segment_tree_data *create_segment_tree() {
|
35
|
+
segment_tree_data *segment_tree = malloc(sizeof(segment_tree_data));
|
36
|
+
|
37
|
+
// Allocate the structures
|
38
|
+
segment_tree->tree = NULL; // we don't yet know how much space we need
|
39
|
+
|
40
|
+
segment_tree->single_cell_array_val_lambda = 0;
|
41
|
+
segment_tree->combine_lambda = 0;
|
42
|
+
segment_tree->size = 0; // we don't know the right value yet
|
43
|
+
|
44
|
+
return segment_tree;
|
45
|
+
}
|
46
|
+
|
47
|
+
/*
|
48
|
+
* Free the memory associated with a segment_tree.
|
49
|
+
*
|
50
|
+
* This will end up getting triggered by the Ruby garbage collector. Ruby learns about it via the segment_tree_type struct below.
|
51
|
+
*/
|
52
|
+
static void segment_tree_free(void *ptr) {
|
53
|
+
if (ptr) {
|
54
|
+
segment_tree_data *segment_tree = ptr;
|
55
|
+
xfree(segment_tree->tree);
|
56
|
+
xfree(segment_tree);
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
/*
|
61
|
+
* How much memory (roughly) does a segment_tree_data instance consume?
|
62
|
+
*
|
63
|
+
* I guess the Ruby runtime can use this information when deciding how agressive to be during garbage collection and such.
|
64
|
+
*/
|
65
|
+
static size_t segment_tree_memsize(const void *ptr) {
|
66
|
+
if (ptr) {
|
67
|
+
const segment_tree_data *st = ptr;
|
68
|
+
|
69
|
+
// for the tree array plus the size of the segment_tree_data struct itself.
|
70
|
+
return sizeof( VALUE ) * st->tree_alloc_size * 4 + sizeof(segment_tree_data);
|
71
|
+
} else {
|
72
|
+
return 0;
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
/*
|
77
|
+
* Mark the Ruby objects we hold so that the Ruby garbage collector knows that they are still in use.
|
78
|
+
*/
|
79
|
+
static void segment_tree_mark(void *ptr) {
|
80
|
+
segment_tree_data *st = ptr;
|
81
|
+
|
82
|
+
rb_gc_mark(st->combine_lambda);
|
83
|
+
rb_gc_mark(st->single_cell_array_val_lambda);
|
84
|
+
rb_gc_mark(st->identity);
|
85
|
+
|
86
|
+
for (size_t i = 0; i < st->tree_alloc_size; i++) {
|
87
|
+
VALUE value = st->tree[i];
|
88
|
+
if (value) {
|
89
|
+
rb_gc_mark(value);
|
90
|
+
}
|
91
|
+
}
|
92
|
+
}
|
93
|
+
|
94
|
+
|
95
|
+
/*
|
96
|
+
* A configuration struct that tells the Ruby runtime how to deal with a segment_tree_data object.
|
97
|
+
*
|
98
|
+
* https://docs.ruby-lang.org/en/master/extension_rdoc.html#label-Encapsulate+C+data+into+a+Ruby+object
|
99
|
+
*/
|
100
|
+
static const rb_data_type_t segment_tree_type = {
|
101
|
+
.wrap_struct_name = "segment_tree_template",
|
102
|
+
{ // help for the Ruby garbage collector
|
103
|
+
.dmark = segment_tree_mark, // dmark, for marking other Ruby objects.
|
104
|
+
.dfree = segment_tree_free, // how to free the memory associated with an object
|
105
|
+
.dsize = segment_tree_memsize, // roughly how much space does the object consume?
|
106
|
+
},
|
107
|
+
.data = NULL, // a data field we could use for something here if we wanted. Ruby ignores it
|
108
|
+
.flags = 0 // GC-related flag values.
|
109
|
+
};
|
110
|
+
|
111
|
+
/*
|
112
|
+
* End memory management functions.
|
113
|
+
************************************************************/
|
114
|
+
|
115
|
+
|
116
|
+
/************************************************************
|
117
|
+
* Wrapping and unwrapping the C struct and other things.
|
118
|
+
*
|
119
|
+
*/
|
120
|
+
|
121
|
+
/*
|
122
|
+
* Unwrap a Ruby-side disjoint union object to get the C struct inside.
|
123
|
+
*
|
124
|
+
* TODO: consider a macro in a shared header
|
125
|
+
*/
|
126
|
+
static segment_tree_data *unwrapped(VALUE self) {
|
127
|
+
segment_tree_data *segment_tree;
|
128
|
+
TypedData_Get_Struct((self), segment_tree_data, &segment_tree_type, segment_tree);
|
129
|
+
return segment_tree;
|
130
|
+
}
|
131
|
+
|
132
|
+
/*
|
133
|
+
* Allocate a segment_tree_data struct and wrap it for the Ruby runtime.
|
134
|
+
*
|
135
|
+
* This is for CSegmentTreeTemplate.allocate on the Ruby side.
|
136
|
+
*/
|
137
|
+
static VALUE segment_tree_alloc(VALUE klass) {
|
138
|
+
// Get one on the heap
|
139
|
+
segment_tree_data *segment_tree = create_segment_tree();
|
140
|
+
// ...and wrap it into a Ruby object
|
141
|
+
return TypedData_Wrap_Struct(klass, &segment_tree_type, segment_tree);
|
142
|
+
}
|
143
|
+
|
144
|
+
/*
|
145
|
+
* End wrapping and unwrapping functions.
|
146
|
+
************************************************************/
|
147
|
+
|
148
|
+
/************************************************************
|
149
|
+
* The Segment Tree API on the C side.
|
150
|
+
*
|
151
|
+
* We wrap these in the Ruby-ready functions below
|
152
|
+
*/
|
153
|
+
|
154
|
+
/*
|
155
|
+
* Recursively build the internal tree data structure.
|
156
|
+
*
|
157
|
+
* - tree_idx: the index into the tree array of the node being calculated
|
158
|
+
* - [tree_l, tree_r]: the sub-interval of the underlying array data corresponding to the tree node being calculated.
|
159
|
+
*/
|
160
|
+
static void build(segment_tree_data *segment_tree, size_t tree_idx, size_t tree_l, size_t tree_r) {
|
161
|
+
VALUE *tree = segment_tree->tree;
|
162
|
+
|
163
|
+
if (tree_l == tree_r) {
|
164
|
+
// Base case: the node corresponds to a subarray of length 1.
|
165
|
+
segment_tree->tree[tree_idx] = single_cell_val_at(segment_tree, tree_l);
|
166
|
+
} else {
|
167
|
+
// Build to two child nodes, and then combine their values for this node.
|
168
|
+
size_t mid = midpoint(tree_l, tree_r);
|
169
|
+
size_t left = left_child(tree_idx);
|
170
|
+
size_t right = right_child(tree_idx);
|
171
|
+
|
172
|
+
build(segment_tree, left, tree_l, mid);
|
173
|
+
build(segment_tree, right, mid + 1, tree_r);
|
174
|
+
|
175
|
+
VALUE comb_val = combined_val(segment_tree, tree[left], tree[right]);
|
176
|
+
segment_tree->tree[tree_idx] = comb_val;
|
177
|
+
}
|
178
|
+
}
|
179
|
+
|
180
|
+
/*
|
181
|
+
* Set up the internals with the arguments we get from #initialize.
|
182
|
+
*
|
183
|
+
* - combine: must be callable
|
184
|
+
* - single_cell_array_val: must be callable
|
185
|
+
* - size: must be a positive integer
|
186
|
+
* - identity: we don't care what it is.
|
187
|
+
* - maybe we should check at least that it is not 0. But Qnil is fine.
|
188
|
+
*/
|
189
|
+
static void setup(segment_tree_data* seg_tree, VALUE combine, VALUE single_cell_array_val, VALUE size, VALUE identity) {
|
190
|
+
VALUE idCall = rb_intern("call");
|
191
|
+
|
192
|
+
if (!rb_obj_respond_to(combine, idCall, TRUE)) {
|
193
|
+
rb_raise(rb_eArgError, "wrong type argument %"PRIsVALUE" (should be callable)", rb_obj_class(combine));
|
194
|
+
}
|
195
|
+
|
196
|
+
if (!rb_obj_respond_to(single_cell_array_val, idCall, TRUE)) {
|
197
|
+
rb_raise(rb_eArgError, "wrong type argument %"PRIsVALUE" (should be callable)", rb_obj_class(single_cell_array_val));
|
198
|
+
}
|
199
|
+
|
200
|
+
seg_tree->combine_lambda = combine;
|
201
|
+
seg_tree->single_cell_array_val_lambda = single_cell_array_val;
|
202
|
+
seg_tree->identity = identity;
|
203
|
+
seg_tree->size = checked_nonneg_fixnum(size);
|
204
|
+
|
205
|
+
if (seg_tree->size == 0) {
|
206
|
+
rb_raise(rb_eArgError, "size must be positive.");
|
207
|
+
}
|
208
|
+
|
209
|
+
// Implicit binary tree with n leaves and straightforward left() and right() may use indices up to 4n. But see here for a way to
|
210
|
+
// reduce the requirement to 2n: https://cp-algorithms.com/data_structures/segment_tree.html#memory-efficient-implementation
|
211
|
+
size_t tree_size = 1 + 4 * seg_tree->size;
|
212
|
+
seg_tree->tree = calloc(tree_size, sizeof(VALUE));
|
213
|
+
seg_tree->tree_alloc_size = tree_size;
|
214
|
+
|
215
|
+
build(seg_tree, TREE_ROOT, 0, seg_tree->size - 1);
|
216
|
+
}
|
217
|
+
|
218
|
+
|
219
|
+
/*
|
220
|
+
* Determine the value for the subarray A(left, right).
|
221
|
+
*
|
222
|
+
* - tree_idx: the index in the array of the node we are currently visiting
|
223
|
+
* - tree_l..tree_r: the subarray handled by the current node.
|
224
|
+
* - left..right: the subarray whose value we are currently looking for.
|
225
|
+
*
|
226
|
+
* As an invariant we have left..right \subset tree_l..tree_r.
|
227
|
+
*
|
228
|
+
* We start out with
|
229
|
+
* - tree_idx = TREE_ROOT
|
230
|
+
* - tree_l..tree_r = 0..(size - 1), and
|
231
|
+
* - left..right given by the client code's query
|
232
|
+
*
|
233
|
+
* If [tree_l, tree_r] = [left, right] then the current node gives the desired answer. Otherwise we decend the tree with one or two
|
234
|
+
* recursive calls.
|
235
|
+
*
|
236
|
+
* If left..right is contained the the bottom or top half of tree_l..tree_r we decend to the corresponding child with one recursive
|
237
|
+
* call. Otherwise we split left..right at the midpoint of tree_l..tree_r, make two recursive calls, and then combine the results.
|
238
|
+
*/
|
239
|
+
static VALUE determine_val(segment_tree_data* seg_tree, size_t tree_idx, size_t left, size_t right, size_t tree_l, size_t tree_r) {
|
240
|
+
// Does the current tree node exactly serve up the interval we're interested in?
|
241
|
+
if (left == tree_l && right == tree_r) {
|
242
|
+
return seg_tree->tree[tree_idx];
|
243
|
+
}
|
244
|
+
|
245
|
+
// We need to go further down the tree */
|
246
|
+
size_t mid = midpoint(tree_l, tree_r);
|
247
|
+
if (mid >= right) {
|
248
|
+
// Our interval is contained by the left child's interval
|
249
|
+
return determine_val(seg_tree, left_child(tree_idx), left, right, tree_l, mid);
|
250
|
+
} else if (mid + 1 <= left) {
|
251
|
+
// Our interval is contained by the right child's interval
|
252
|
+
return determine_val(seg_tree, right_child(tree_idx), left, right, mid + 1, tree_r);
|
253
|
+
} else {
|
254
|
+
// Our interval is split between the two, so we need to combine the results from the children.
|
255
|
+
return rb_funcall(
|
256
|
+
seg_tree->combine_lambda, rb_intern("call"), 2,
|
257
|
+
determine_val(seg_tree, left_child(tree_idx), left, mid, tree_l, mid),
|
258
|
+
determine_val(seg_tree, right_child(tree_idx), mid + 1, right, mid + 1, tree_r)
|
259
|
+
);
|
260
|
+
}
|
261
|
+
}
|
262
|
+
|
263
|
+
/*
|
264
|
+
* Update the structure to reflect the change in the underlying array at index idx.
|
265
|
+
*
|
266
|
+
* - idx: the index at which the underlying array data has changed.
|
267
|
+
* - tree_id: the index in the internal datastructure of the node we are currently visiting.
|
268
|
+
* - tree_l..tree_r: the range handled by the current node
|
269
|
+
*/
|
270
|
+
static void update_val_at(segment_tree_data *seg_tree, size_t idx, size_t tree_idx, size_t tree_l, size_t tree_r) {
|
271
|
+
if (tree_l == tree_r) {
|
272
|
+
// We have found the base case of our update
|
273
|
+
if (tree_l != idx) {
|
274
|
+
rb_raise(
|
275
|
+
eSharedInternalLogicError,
|
276
|
+
"tree_l == tree_r == %lu but they do not agree with the idx %lu holding the updated value",
|
277
|
+
tree_r, idx
|
278
|
+
);
|
279
|
+
}
|
280
|
+
seg_tree->tree[tree_idx] = single_cell_val_at(seg_tree, tree_l);
|
281
|
+
} else {
|
282
|
+
// Recursively update the appropriate subtree...
|
283
|
+
size_t mid = midpoint(tree_l, tree_r);
|
284
|
+
size_t left = left_child(tree_idx);
|
285
|
+
size_t right = right_child(tree_idx);
|
286
|
+
if (mid >= idx) {
|
287
|
+
update_val_at(seg_tree, idx, left, tree_l, mid);
|
288
|
+
} else {
|
289
|
+
update_val_at(seg_tree, idx, right, mid + 1, tree_r);
|
290
|
+
}
|
291
|
+
// ...and ourself to incorporate the change
|
292
|
+
seg_tree->tree[tree_idx] = combined_val(seg_tree, seg_tree->tree[left], seg_tree->tree[right]);
|
293
|
+
}
|
294
|
+
}
|
295
|
+
|
296
|
+
/*
|
297
|
+
* End C implementation of the Segment Tree API
|
298
|
+
************************************************************/
|
299
|
+
|
300
|
+
/**
|
301
|
+
* And now the wrappers around the C functionality.
|
302
|
+
*/
|
303
|
+
|
304
|
+
/*
|
305
|
+
* CSegmentTreeTemplate#c_initialize.
|
306
|
+
*
|
307
|
+
* (see CSegmentTreeTemplate#initialize).
|
308
|
+
*/
|
309
|
+
static VALUE segment_tree_init(VALUE self, VALUE combine, VALUE single_cell_array_val, VALUE size, VALUE identity) {
|
310
|
+
setup(unwrapped(self), combine, single_cell_array_val, size, identity);
|
311
|
+
return self;
|
312
|
+
}
|
313
|
+
|
314
|
+
/*
|
315
|
+
* (see SegmentTreeTemplate#query_on)
|
316
|
+
*/
|
317
|
+
static VALUE segment_tree_query_on(VALUE self, VALUE left, VALUE right) {
|
318
|
+
segment_tree_data* seg_tree = unwrapped(self);
|
319
|
+
size_t c_left = checked_nonneg_fixnum(left);
|
320
|
+
size_t c_right = checked_nonneg_fixnum(right);
|
321
|
+
|
322
|
+
if (c_right >= seg_tree->size) {
|
323
|
+
rb_raise(eSharedDataError, "Bad query interval %lu..%lu (size = %lu)", c_left, c_right, seg_tree->size);
|
324
|
+
}
|
325
|
+
|
326
|
+
if (left > right) {
|
327
|
+
// empty interval.
|
328
|
+
return seg_tree->identity;
|
329
|
+
}
|
330
|
+
|
331
|
+
return determine_val(seg_tree, TREE_ROOT, c_left, c_right, 0, seg_tree->size - 1);
|
332
|
+
}
|
333
|
+
|
334
|
+
/*
|
335
|
+
* (see SegmentTreeTemplate#update_at)
|
336
|
+
*/
|
337
|
+
static VALUE segment_tree_update_at(VALUE self, VALUE idx) {
|
338
|
+
segment_tree_data *seg_tree = unwrapped(self);
|
339
|
+
size_t c_idx = checked_nonneg_fixnum(idx);
|
340
|
+
|
341
|
+
if (c_idx >= seg_tree->size) {
|
342
|
+
rb_raise(eSharedDataError, "Cannot update value at index %lu, size = %lu", c_idx, seg_tree->size);
|
343
|
+
}
|
344
|
+
|
345
|
+
update_val_at(seg_tree, c_idx, TREE_ROOT, 0, seg_tree->size - 1);
|
346
|
+
|
347
|
+
return Qnil;
|
348
|
+
}
|
349
|
+
|
350
|
+
/*
|
351
|
+
* A generic Segment Tree template, written in C.
|
352
|
+
*
|
353
|
+
* (see SegmentTreeTemplate)
|
354
|
+
*/
|
355
|
+
void Init_c_segment_tree_template() {
|
356
|
+
VALUE mSegmentTree = rb_define_module_under(mDataStructuresRMolinari, "SegmentTree");
|
357
|
+
VALUE cSegmentTreeTemplate = rb_define_class_under(mSegmentTree, "CSegmentTreeTemplate", rb_cObject);
|
358
|
+
|
359
|
+
rb_define_alloc_func(cSegmentTreeTemplate, segment_tree_alloc);
|
360
|
+
rb_define_method(cSegmentTreeTemplate, "c_initialize", segment_tree_init, 4);
|
361
|
+
rb_define_method(cSegmentTreeTemplate, "query_on", segment_tree_query_on, 2);
|
362
|
+
rb_define_method(cSegmentTreeTemplate, "update_at", segment_tree_update_at, 1);
|
363
|
+
}
|
data/ext/shared.c
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
#include "shared.h"
|
2
|
+
|
3
|
+
/*
|
4
|
+
* Arithmetic for in-array binary tree
|
5
|
+
*/
|
6
|
+
size_t midpoint(size_t left, size_t right) {
|
7
|
+
return (left + right) / 2;
|
8
|
+
}
|
9
|
+
|
10
|
+
size_t left_child(size_t i) {
|
11
|
+
return i << 1;
|
12
|
+
}
|
13
|
+
|
14
|
+
size_t right_child(size_t i) {
|
15
|
+
return 1 + (i << 1);
|
16
|
+
}
|
17
|
+
|
18
|
+
/*
|
19
|
+
* Check that a Ruby value is a non-negative Fixnum and convert it to a C unsigned long
|
20
|
+
*/
|
21
|
+
unsigned long checked_nonneg_fixnum(VALUE val) {
|
22
|
+
Check_Type(val, T_FIXNUM);
|
23
|
+
long c_val = FIX2LONG(val);
|
24
|
+
|
25
|
+
if (c_val < 0) {
|
26
|
+
rb_raise(eSharedDataError, "Value must be non-negative");
|
27
|
+
}
|
28
|
+
|
29
|
+
return c_val;
|
30
|
+
}
|
31
|
+
|
32
|
+
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# Algorithms that use the module's data structures but don't belong as a method on one of the data structures
|
2
2
|
module DataStructuresRMolinari::Algorithms
|
3
3
|
include Shared
|
4
4
|
|
@@ -11,12 +11,12 @@ module DataStructuresRMolinari::Algorithms
|
|
11
11
|
#
|
12
12
|
# A _maximal empty rectangle_ (MER) for P is an empty rectangle for P not properly contained in any other.
|
13
13
|
#
|
14
|
-
# We enumerate all maximal empty rectangles for P, yielding each as (left, right, bottom, top)
|
15
|
-
#
|
16
|
-
#
|
14
|
+
# We enumerate all maximal empty rectangles for P, yielding each as (left, right, bottom, top). The algorithm is due to De, M.,
|
15
|
+
# Maheshwari, A., Nandy, S. C., Smid, M., _An In-Place Min-max Priority Search Tree_, Computational Geometry, v46 (2013), pp
|
16
|
+
# 310-327.
|
17
17
|
#
|
18
18
|
# It runs in O(m log n) time, where m is the number of MERs enumerated and n is the number of points in P. (Contructing the
|
19
|
-
# MaxPST
|
19
|
+
# MaxPST takes O(n log^2 n) time, but m = O(n^2) so we are still O(m log n) overall.)
|
20
20
|
#
|
21
21
|
# @param points [Array] an array of points in the x-y plane. Each must respond to +x+ and +y+.
|
22
22
|
def self.maximal_empty_rectangles(points)
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'must_be'
|
2
|
+
|
3
|
+
require_relative 'shared'
|
4
|
+
require_relative 'c_segment_tree_template'
|
5
|
+
|
6
|
+
# The underlying functionality of the Segment Tree data type, implemented in C as a Ruby extension.
|
7
|
+
#
|
8
|
+
# See SegmentTreeTemplate for more information.
|
9
|
+
class DataStructuresRMolinari::CSegmentTreeTemplate
|
10
|
+
# (see SegmentTreeTemplate::initialize)
|
11
|
+
def initialize(combine:, single_cell_array_val:, size:, identity:)
|
12
|
+
# having sorted out the keyword arguments, pass them more easily to the C layer.
|
13
|
+
c_initialize(combine, single_cell_array_val, size, identity)
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
require_relative 'shared'
|
2
|
+
|
3
|
+
# A namespace to hold the various bits and bobs related to the SegmentTree implementation
|
4
|
+
module DataStructuresRMolinari::SegmentTree
|
5
|
+
end
|
6
|
+
|
7
|
+
require_relative 'segment_tree_template' # Ruby implementation of the generic API
|
8
|
+
require_relative 'c_segment_tree_template' # C implementation of the generic API
|
9
|
+
|
10
|
+
# Segment Tree: various concrete implementations
|
11
|
+
#
|
12
|
+
# There is an excellent description of the data structure at https://cp-algorithms.com/data_structures/segment_tree.html. The
|
13
|
+
# Wikipedia article (https://en.wikipedia.org/wiki/Segment_tree) appears to describe a different data structure which is sometimes
|
14
|
+
# called an "interval tree."
|
15
|
+
#
|
16
|
+
# For more details (and some close-to-metal analysis of run time, especially for large datasets) see
|
17
|
+
# https://en.algorithmica.org/hpc/data-structures/segment-trees/. In particular, this shows how to do a bottom-up implementation,
|
18
|
+
# which is faster, at least for large datasets and cache-relevant compiled code. These issues don't really apply to code written in
|
19
|
+
# Ruby.
|
20
|
+
#
|
21
|
+
# Here we provide several concrete segment tree implementations built on top of the template (generic) versions. Each instance is
|
22
|
+
# backed either by the pure Ruby SegmentTreeTemplate or its C-based sibling CSegmentTreeTemplate
|
23
|
+
module DataStructuresRMolinari
|
24
|
+
module SegmentTree
|
25
|
+
# A convenience method to construct a Segment Tree that, for a given array A(0...size), answers questions of the kind given by
|
26
|
+
# operation, using the template written in lang
|
27
|
+
#
|
28
|
+
# - @param data: the array A.
|
29
|
+
# - It must respond to +#size+ and to +#[]+ with non-negative integer arguments.
|
30
|
+
# - @param operation: a supported "style" of Segment Tree
|
31
|
+
# - for now, must be one of these (but you can write your own concrete version)
|
32
|
+
# - +:max+: implementing +max_on(i, j)+, returning the maximum value in A(i..j)
|
33
|
+
# - +:index_of_max+: implementing +index_of_max_val_on(i, j)+, returning an index corresponding to the maximum value in
|
34
|
+
# A(i..j).
|
35
|
+
# - @param lang: the language in which the underlying "template" is written
|
36
|
+
# - +:c+ or +:ruby+
|
37
|
+
# - the C version will run faster but for now may be buggier and harder to debug
|
38
|
+
module_function def construct(data, operation, lang)
|
39
|
+
operation.must_be_in [:max, :index_of_max]
|
40
|
+
lang.must_be_in [:ruby, :c]
|
41
|
+
|
42
|
+
klass = operation == :max ? MaxValSegmentTree : IndexOfMaxValSegmentTree
|
43
|
+
template = lang == :ruby ? SegmentTreeTemplate : CSegmentTreeTemplate
|
44
|
+
|
45
|
+
klass.new(template, data)
|
46
|
+
end
|
47
|
+
|
48
|
+
# A segment tree that for an array A(0...n) answers questions of the form "what is the maximum value in the subinterval A(i..j)?"
|
49
|
+
# in O(log n) time.
|
50
|
+
class MaxValSegmentTree
|
51
|
+
extend Forwardable
|
52
|
+
|
53
|
+
# Tell the tree that the value at idx has changed
|
54
|
+
def_delegator :@structure, :update_at
|
55
|
+
|
56
|
+
# @param template_klass the "template" class that provides the generic implementation of the Segment Tree functionality.
|
57
|
+
# @param data an object that contains values at integer indices based at 0, via +data[i]+.
|
58
|
+
# - This will usually be an Array, but it could also be a hash or a proc.
|
59
|
+
def initialize(template_klass, data)
|
60
|
+
data.must_be_a Enumerable
|
61
|
+
|
62
|
+
@structure = template_klass.new(
|
63
|
+
combine: ->(a, b) { [a, b].max },
|
64
|
+
single_cell_array_val: ->(i) { data[i] },
|
65
|
+
size: data.size,
|
66
|
+
identity: -Shared::INFINITY
|
67
|
+
)
|
68
|
+
end
|
69
|
+
|
70
|
+
# The maximum value in A(i..j).
|
71
|
+
#
|
72
|
+
# The arguments must be integers in 0...(A.size)
|
73
|
+
# @return the largest value in A(i..j) or -Infinity if i > j.
|
74
|
+
def max_on(i, j)
|
75
|
+
@structure.query_on(i, j)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# A segment tree that for an array A(0...n) answers questions of the form "what is the index of the maximal value in the
|
80
|
+
# subinterval A(i..j)?" in O(log n) time.
|
81
|
+
class IndexOfMaxValSegmentTree
|
82
|
+
extend Forwardable
|
83
|
+
|
84
|
+
# Tell the tree that the value at idx has changed
|
85
|
+
def_delegator :@structure, :update_at
|
86
|
+
|
87
|
+
# @param (see MaxValSegmentTree#initialize)
|
88
|
+
def initialize(template_klass, data)
|
89
|
+
data.must_be_a Enumerable
|
90
|
+
|
91
|
+
@structure = template_klass.new(
|
92
|
+
combine: ->(p1, p2) { p1[1] >= p2[1] ? p1 : p2 },
|
93
|
+
single_cell_array_val: ->(i) { [i, data[i]] },
|
94
|
+
size: data.size,
|
95
|
+
identity: nil
|
96
|
+
)
|
97
|
+
end
|
98
|
+
|
99
|
+
# The index of the maximum value in A(i..j)
|
100
|
+
#
|
101
|
+
# The arguments must be integers in 0...(A.size)
|
102
|
+
# @return (Integer, nil) the index of the largest value in A(i..j) or +nil+ if i > j.
|
103
|
+
# - If there is more than one entry with that value, return one the indices. There is no guarantee as to which one.
|
104
|
+
# - Return +nil+ if i > j
|
105
|
+
def index_of_max_val_on(i, j)
|
106
|
+
@structure.query_on(i, j)&.first # discard the value part of the pair, which is a bookkeeping
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# The underlying functionality of the Segment Tree data type, implemented in C as a Ruby extension.
|
111
|
+
#
|
112
|
+
# See SegmentTreeTemplate for more information.
|
113
|
+
#
|
114
|
+
# Implementation note
|
115
|
+
#
|
116
|
+
# The functionality is entirely written in C. But we write the constructor in Ruby because keyword arguments are difficult to
|
117
|
+
# parse on the C side.
|
118
|
+
class CSegmentTreeTemplate
|
119
|
+
# (see SegmentTreeTemplate::initialize)
|
120
|
+
def initialize(combine:, single_cell_array_val:, size:, identity:)
|
121
|
+
# having sorted out the keyword arguments, pass them more easily to the C layer.
|
122
|
+
c_initialize(combine, single_cell_array_val, size, identity)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require_relative 'shared'
|
2
2
|
|
3
|
-
#
|
4
|
-
# max) on a arbitrary subarray of a given array.
|
3
|
+
# A generic implementation of Segment Tree, which can be used for various interval-related purposes, like efficiently finding the
|
4
|
+
# sum (or min or max) on a arbitrary subarray of a given array.
|
5
5
|
#
|
6
6
|
# There is an excellent description of the data structure at https://cp-algorithms.com/data_structures/segment_tree.html. The
|
7
7
|
# Wikipedia article (https://en.wikipedia.org/wiki/Segment_tree) appears to describe a different data structure which is sometimes
|
@@ -16,7 +16,8 @@ require_relative 'shared'
|
|
16
16
|
# initializer and the definitions of concrete realisations like MaxValSegmentTree.
|
17
17
|
#
|
18
18
|
# We do O(n) work to build the internal data structure at initialization. Then we answer queries in O(log n) time.
|
19
|
-
class DataStructuresRMolinari::SegmentTreeTemplate
|
19
|
+
class DataStructuresRMolinari::SegmentTree::SegmentTreeTemplate
|
20
|
+
include Shared
|
20
21
|
include Shared::BinaryTreeArithmetic
|
21
22
|
|
22
23
|
# Construct a concrete instance of a Segment Tree. See details at the links above for the underlying concepts here.
|
@@ -47,27 +48,29 @@ class DataStructuresRMolinari::SegmentTreeTemplate
|
|
47
48
|
end
|
48
49
|
|
49
50
|
# The desired value (max, sum, etc.) on the subinterval left..right.
|
51
|
+
#
|
50
52
|
# @param left the left end of the subinterval.
|
51
53
|
# @param right the right end (inclusive) of the subinterval.
|
52
54
|
#
|
55
|
+
# It must be that left..right is contained in 0...size.
|
56
|
+
#
|
53
57
|
# The type of the return value depends on the concrete instance of the segment tree. We return the _identity_ element provided at
|
54
58
|
# construction time if the interval is empty.
|
55
59
|
def query_on(left, right)
|
56
|
-
raise DataError, "Bad query interval #{left}..#{right}"
|
60
|
+
raise DataError, "Bad query interval #{left}..#{right} (size = #{@size})" unless (0...@size).cover?(left..right)
|
57
61
|
|
58
62
|
return @identity if left > right # empty interval
|
59
63
|
|
60
64
|
determine_val(root, left, right, 0, @size - 1)
|
61
65
|
end
|
62
66
|
|
63
|
-
#
|
67
|
+
# Reflect the fact that the underlying array has been updated at the given idx
|
64
68
|
#
|
65
69
|
# @param idx an index in the underlying data array.
|
66
70
|
#
|
67
71
|
# Note that we don't need the updated value itself. We get that by calling the lambda +single_cell_array_val+ supplied at
|
68
72
|
# construction.
|
69
73
|
def update_at(idx)
|
70
|
-
raise DataError, 'Cannot update an index outside the initial range of the underlying data' unless (0...@size).cover?(idx)
|
71
74
|
|
72
75
|
update_val_at(idx, root, 0, @size - 1)
|
73
76
|
end
|
@@ -105,9 +108,9 @@ class DataStructuresRMolinari::SegmentTreeTemplate
|
|
105
108
|
left = left(tree_idx)
|
106
109
|
right = right(tree_idx)
|
107
110
|
if mid >= idx
|
108
|
-
update_val_at(idx, left
|
111
|
+
update_val_at(idx, left, tree_l, mid)
|
109
112
|
else
|
110
|
-
update_val_at(idx, right
|
113
|
+
update_val_at(idx, right, mid + 1, tree_r)
|
111
114
|
end
|
112
115
|
@tree[tree_idx] = @combine.call(@tree[left], @tree[right])
|
113
116
|
end
|