d_heap 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +63 -4
- data/ext/d_heap/d_heap.c +202 -91
- data/ext/d_heap/d_heap.h +56 -15
- data/lib/d_heap.rb +2 -30
- data/lib/d_heap/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c8c06927edbc903bc67c3f52bf6d7027a1c9b04d7860acc712a29b4298fd7adf
|
4
|
+
data.tar.gz: e347372e483f9d2b46deb5ad78cc6eb4ab9f28a427853e01282f035abdf475b7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 124b615e62c6f6e58b3f8ff55bd7471f99960eab5e937dc40354f067112750fbebdcb0540585ec615d1d713f005fc40d2840a9b6ff4c9b6e6f62dcf374f56d9c
|
7
|
+
data.tar.gz: 391af9443b13e0902a813cfb9712fdf98ea414d27d9406348ef25aa5ec03a47488b7c2bbbf066bbbfe50e69f04fec95413eb705c5c45c5185172c5b63777c059
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -94,10 +94,69 @@ Read the `rdoc` for more detailed documentation and examples.
|
|
94
94
|
|
95
95
|
_TODO: put benchmarks here._
|
96
96
|
|
97
|
+
## Analysis
|
98
|
+
|
99
|
+
### Time complexity
|
100
|
+
|
101
|
+
Both sift operations can perform (log[d] n = log n / log d) swaps.
|
102
|
+
Swap up performs only a single comparison per swap: O(1).
|
103
|
+
Swap down performs as many as d comparions per swap: O(d).
|
104
|
+
|
105
|
+
Inserting an item is O(log n / log d).
|
106
|
+
Deleting the root is O(d log n / log d).
|
107
|
+
|
108
|
+
Assuming every inserted item is eventually deleted from the root, d=4 requires
|
109
|
+
the fewest comparisons for combined insert and delete:
|
110
|
+
* (1 + 2) lg 2 = 4.328085
|
111
|
+
* (1 + 3) lg 3 = 3.640957
|
112
|
+
* (1 + 4) lg 4 = 3.606738
|
113
|
+
* (1 + 5) lg 5 = 3.728010
|
114
|
+
* (1 + 6) lg 6 = 3.906774
|
115
|
+
* etc...
|
116
|
+
|
117
|
+
Leaf nodes require no comparisons to shift down, and higher values for d have
|
118
|
+
higher percentage of leaf nodes:
|
119
|
+
* d=2 has ~50% leaves,
|
120
|
+
* d=3 has ~67% leaves,
|
121
|
+
* d=4 has ~75% leaves,
|
122
|
+
* and so on...
|
123
|
+
|
124
|
+
See https://en.wikipedia.org/wiki/D-ary_heap#Analysis for deeper analysis.
|
125
|
+
|
126
|
+
### Space complexity
|
127
|
+
|
128
|
+
Because the heap is a complete binary tree, space usage is linear, regardless
|
129
|
+
of d. However higher d values may provide better cache locality.
|
130
|
+
|
131
|
+
We can run comparisons much much faster for Numeric or String objects than for
|
132
|
+
ruby objects which delegate comparison to internal Numeric or String objects.
|
133
|
+
And it is often advantageous to use extrinsic scores for uncomparable items.
|
134
|
+
For this, our internal array uses twice as many entries (one for score and one
|
135
|
+
for value) as it would if it only supported intrinsic comparison or used an
|
136
|
+
un-memoized "sort_by" proc.
|
137
|
+
|
138
|
+
### Timers
|
139
|
+
|
140
|
+
Additionally, when used to sort timers, we can reasonably assume that:
|
141
|
+
* New timers usually sort after most existing timers.
|
142
|
+
* Most timers will be canceled before executing.
|
143
|
+
* Canceled timers usually sort after most existing timers.
|
144
|
+
|
145
|
+
So, if we are able to delete an item without searching for it, by keeping a map
|
146
|
+
of positions within the heap, most timers can be inserted and deleted in O(1)
|
147
|
+
time. Canceling a non-leaf timer can be further optimized by marking it as
|
148
|
+
canceled without immediately removing it from the heap. If the timer is
|
149
|
+
rescheduled before we garbage collect, adjusting its position will usually be
|
150
|
+
faster than a delete and re-insert.
|
151
|
+
|
97
152
|
## Alternative data structures
|
98
153
|
|
99
154
|
Depending on what you're doing, maintaining a sorted `Array` using
|
100
|
-
`#bsearch_index` and `#insert` might be faster!
|
155
|
+
`#bsearch_index` and `#insert` might be faster! Although it is technically
|
156
|
+
O(n) for insertions, the implementations for `memcpy` or `memmove` can be *very*
|
157
|
+
fast on modern architectures. Also, it can be faster O(n) on average, if
|
158
|
+
insertions are usually near the end of the array. You should run benchmarks
|
159
|
+
with your expected scenarios to determine which is right.
|
101
160
|
|
102
161
|
If it is important to be able to quickly enumerate the set or find the ranking
|
103
162
|
of values in it, then you probably want to use a self-balancing binary search
|
@@ -105,9 +164,9 @@ tree (e.g. a red-black tree) or a skip-list.
|
|
105
164
|
|
106
165
|
A Hashed Timing Wheel or Heirarchical Timing Wheels (or some variant in that
|
107
166
|
family of data structures) can be constructed to have effectively O(1) running
|
108
|
-
time in most cases. However, the implementation for that data structure is
|
109
|
-
|
110
|
-
|
167
|
+
time in most cases. However, the implementation for that data structure is more
|
168
|
+
complex than a heap. If a 4-ary heap is good enough for go's timers, it should
|
169
|
+
be suitable for many use cases.
|
111
170
|
|
112
171
|
## Development
|
113
172
|
|
data/ext/d_heap/d_heap.c
CHANGED
@@ -3,6 +3,26 @@
|
|
3
3
|
ID id_ivar_a;
|
4
4
|
ID id_ivar_d;
|
5
5
|
|
6
|
+
#define DHEAP_GET_A(self) rb_ivar_get(self, id_ivar_a)
|
7
|
+
#define DHEAP_GET_D(self) rb_ivar_get(self, id_ivar_d)
|
8
|
+
|
9
|
+
#define DHEAP_SIZE(ary) (RARRAY_LEN(ary) / 2)
|
10
|
+
#define DHEAP_LAST_IDX(ary) (DHEAP_SIZE(ary) - 1)
|
11
|
+
#define DHEAP_VALUE(ary, idx) rb_ary_entry(ary, idx * 2 + 1)
|
12
|
+
#define DHEAP_SCORE(ary, idx) rb_ary_entry(ary, idx * 2)
|
13
|
+
#define DHEAP_ASSIGN(ary, idx, scr, val) \
|
14
|
+
rb_ary_store(ary, idx * 2, scr); \
|
15
|
+
rb_ary_store(ary, idx * 2 + 1, val);
|
16
|
+
#define DHEAP_APPEND(ary, scr, val) \
|
17
|
+
rb_ary_push(ary, scr); \
|
18
|
+
rb_ary_push(ary, val);
|
19
|
+
#define DHEAP_DROP_LAST(ary) ( \
|
20
|
+
rb_ary_pop(ary), \
|
21
|
+
rb_ary_pop(ary) \
|
22
|
+
) // score, value
|
23
|
+
#define IDX_PARENT(idx) ((idx - 1) / d)
|
24
|
+
#define IDX_CHILD0(idx) ((idx * d) + 1)
|
25
|
+
|
6
26
|
#define DHEAP_Check_d_size(d) \
|
7
27
|
if (d < 2) { \
|
8
28
|
rb_raise(rb_eIndexError, "DHeap d=%d is too small", d); \
|
@@ -11,86 +31,95 @@ ID id_ivar_d;
|
|
11
31
|
rb_raise(rb_eIndexError, "DHeap d=%d is too large", d); \
|
12
32
|
}
|
13
33
|
|
14
|
-
#define DHEAP_Check_Sift_Idx(
|
15
|
-
if (
|
16
|
-
rb_raise(rb_eIndexError, "
|
34
|
+
#define DHEAP_Check_Sift_Idx(sift_index, last_index) \
|
35
|
+
if (sift_index < 0) { \
|
36
|
+
rb_raise(rb_eIndexError, "sift_index %ld too small", sift_index); \
|
17
37
|
} \
|
18
|
-
else if (
|
19
|
-
rb_raise(rb_eIndexError, "
|
38
|
+
else if (last_index < sift_index) { \
|
39
|
+
rb_raise(rb_eIndexError, "sift_index %ld too large", sift_index); \
|
20
40
|
}
|
21
41
|
|
22
|
-
#define
|
23
|
-
Check_Type(dval, T_FIXNUM); \
|
24
|
-
int d = FIX2INT(dval); \
|
25
|
-
long sift_idx = NUM2LONG(idxval);
|
26
|
-
|
27
|
-
#define DHEAP_Check_Sift_Args(heap_array, d, sift_idx) \
|
42
|
+
#define DHEAP_Check_Sift_Args(heap_array, d, sift_index) \
|
28
43
|
DHEAP_Check_d_size(d); \
|
29
44
|
Check_Type(heap_array, T_ARRAY); \
|
30
|
-
long
|
31
|
-
DHEAP_Check_Sift_Idx(
|
45
|
+
long last_index = DHEAP_LAST_IDX(heap_array); \
|
46
|
+
DHEAP_Check_Sift_Idx(sift_index, last_index); \
|
32
47
|
\
|
33
|
-
VALUE
|
48
|
+
VALUE sift_value = DHEAP_VALUE(heap_array, sift_index); \
|
49
|
+
VALUE sift_score = DHEAP_SCORE(heap_array, sift_index);
|
34
50
|
|
35
51
|
VALUE
|
36
|
-
dheap_ary_sift_up(VALUE heap_array, int d, long
|
37
|
-
DHEAP_Check_Sift_Args(heap_array, d,
|
52
|
+
dheap_ary_sift_up(VALUE heap_array, int d, long sift_index) {
|
53
|
+
DHEAP_Check_Sift_Args(heap_array, d, sift_index);
|
54
|
+
struct cmp_opt_data cmp_opt = { 0, 0 };
|
38
55
|
// sift it up to where it belongs
|
39
|
-
for (long
|
40
|
-
|
41
|
-
|
56
|
+
for (long parent_index; 0 < sift_index; sift_index = parent_index) {
|
57
|
+
// puts(rb_sprintf("sift up(%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
|
58
|
+
parent_index = IDX_PARENT(sift_index);
|
59
|
+
VALUE parent_score = DHEAP_SCORE(heap_array, parent_index);
|
42
60
|
|
43
61
|
// parent is smaller: heap is restored
|
44
|
-
if (CMP_LTE(
|
62
|
+
if (CMP_LTE(parent_score, sift_score, cmp_opt)) break;
|
45
63
|
|
46
64
|
// parent is larger: swap and continue sifting up
|
47
|
-
|
48
|
-
|
65
|
+
VALUE parent_value = DHEAP_VALUE(heap_array, parent_index);
|
66
|
+
DHEAP_ASSIGN(heap_array, sift_index, parent_score, parent_value);
|
67
|
+
DHEAP_ASSIGN(heap_array, parent_index, sift_score, sift_value);
|
49
68
|
}
|
50
|
-
|
69
|
+
// puts(rb_sprintf("sifted (%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
|
70
|
+
return LONG2NUM(sift_index);
|
51
71
|
}
|
52
72
|
|
53
|
-
|
54
73
|
VALUE
|
55
|
-
dheap_ary_sift_down(VALUE heap_array, int d, long
|
56
|
-
DHEAP_Check_Sift_Args(heap_array, d,
|
74
|
+
dheap_ary_sift_down(VALUE heap_array, int d, long sift_index) {
|
75
|
+
DHEAP_Check_Sift_Args(heap_array, d, sift_index);
|
76
|
+
struct cmp_opt_data cmp_opt = { 0, 0 };
|
77
|
+
|
57
78
|
// iteratively sift it down to where it belongs
|
58
|
-
for (long
|
79
|
+
for (long child_index; sift_index < last_index; sift_index = child_index) {
|
80
|
+
// puts(rb_sprintf("sift dn(%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
|
59
81
|
// find first child index, and break if we've reached the last layer
|
60
|
-
long child_idx0 =
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
// find
|
65
|
-
|
66
|
-
|
67
|
-
for (int i = 1; i < d; i
|
68
|
-
long
|
69
|
-
if (
|
70
|
-
|
71
|
-
VALUE
|
72
|
-
|
73
|
-
if (CMP_LT(
|
74
|
-
|
75
|
-
|
82
|
+
long child_idx0 = child_index = IDX_CHILD0(sift_index);
|
83
|
+
if (last_index < child_idx0) break;
|
84
|
+
|
85
|
+
// find the min child (and its child_index)
|
86
|
+
// requires "d" comparisons to find min child and compare to sift_score
|
87
|
+
VALUE child_score = DHEAP_SCORE(heap_array, child_idx0);
|
88
|
+
child_index = child_idx0;
|
89
|
+
for (int i = 1; i < d; ++i) {
|
90
|
+
long sibling_index = child_idx0 + i;
|
91
|
+
if (last_index < sibling_index) break;
|
92
|
+
|
93
|
+
VALUE sibling_score = DHEAP_SCORE(heap_array, sibling_index);
|
94
|
+
|
95
|
+
if (CMP_LT(sibling_score, child_score, cmp_opt)) {
|
96
|
+
child_score = sibling_score;
|
97
|
+
child_index = sibling_index;
|
76
98
|
}
|
77
99
|
}
|
78
100
|
|
79
101
|
// child is larger: heap is restored
|
80
|
-
if (
|
102
|
+
if (CMP_LTE(sift_score, child_score, cmp_opt)) break;
|
81
103
|
|
82
104
|
// child is smaller: swap and continue sifting down
|
83
|
-
|
84
|
-
|
105
|
+
VALUE child_value = DHEAP_VALUE(heap_array, child_index);
|
106
|
+
DHEAP_ASSIGN(heap_array, sift_index, child_score, child_value);
|
107
|
+
DHEAP_ASSIGN(heap_array, child_index, sift_score, sift_value);
|
85
108
|
}
|
86
|
-
|
109
|
+
// puts(rb_sprintf("sifted (%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
|
110
|
+
return LONG2NUM(sift_index);
|
87
111
|
}
|
88
112
|
|
113
|
+
#define DHEAP_Load_Sift_Vals(heap_array, dval, idxval) \
|
114
|
+
Check_Type(dval, T_FIXNUM); \
|
115
|
+
int d = FIX2INT(dval); \
|
116
|
+
long sift_index = NUM2LONG(idxval);
|
117
|
+
|
89
118
|
/*
|
90
119
|
* call-seq:
|
91
|
-
* DHeap.array_sift_up(heap_array, d,
|
120
|
+
* DHeap.array_sift_up(heap_array, d, sift_index)
|
92
121
|
*
|
93
|
-
* Treats +heap_array+ as a +d+-ary heap and sifts up from +
|
122
|
+
* Treats +heap_array+ as a +d+-ary heap and sifts up from +sift_index+ to restore
|
94
123
|
* the heap property.
|
95
124
|
*
|
96
125
|
* Time complexity: O(d log n / log d). If the average up shifted element sorts
|
@@ -100,14 +129,14 @@ dheap_ary_sift_down(VALUE heap_array, int d, long sift_idx) {
|
|
100
129
|
static VALUE
|
101
130
|
dheap_sift_up_s(VALUE unused, VALUE heap_array, VALUE dval, VALUE idxval) {
|
102
131
|
DHEAP_Load_Sift_Vals(heap_array, dval, idxval);
|
103
|
-
return dheap_ary_sift_up(heap_array, d,
|
132
|
+
return dheap_ary_sift_up(heap_array, d, sift_index);
|
104
133
|
}
|
105
134
|
|
106
135
|
/*
|
107
136
|
* call-seq:
|
108
|
-
* DHeap.array_sift_down(heap_array, d,
|
137
|
+
* DHeap.array_sift_down(heap_array, d, sift_index)
|
109
138
|
*
|
110
|
-
* Treats +heap_array+ as a +d+-ary heap and sifts down from +
|
139
|
+
* Treats +heap_array+ as a +d+-ary heap and sifts down from +sift_index+ to
|
111
140
|
* restore the heap property.
|
112
141
|
*
|
113
142
|
* Time complexity: O(d log n / log d). If the average down shifted element
|
@@ -117,7 +146,7 @@ dheap_sift_up_s(VALUE unused, VALUE heap_array, VALUE dval, VALUE idxval) {
|
|
117
146
|
static VALUE
|
118
147
|
dheap_sift_down_s(VALUE unused, VALUE heap_array, VALUE dval, VALUE idxval) {
|
119
148
|
DHEAP_Load_Sift_Vals(heap_array, dval, idxval);
|
120
|
-
return dheap_ary_sift_down(heap_array, d,
|
149
|
+
return dheap_ary_sift_down(heap_array, d, sift_index);
|
121
150
|
}
|
122
151
|
|
123
152
|
static VALUE
|
@@ -133,11 +162,35 @@ dheap_initialize(int argc, VALUE *argv, VALUE self) {
|
|
133
162
|
return self;
|
134
163
|
}
|
135
164
|
|
136
|
-
static
|
137
|
-
|
165
|
+
static VALUE dheap_size(VALUE self) {
|
166
|
+
VALUE ary = DHEAP_GET_A(self);
|
167
|
+
long size = DHEAP_SIZE(ary);
|
168
|
+
return LONG2NUM(size);
|
169
|
+
}
|
138
170
|
|
139
|
-
static VALUE
|
140
|
-
|
171
|
+
static VALUE dheap_empty_p(VALUE self) {
|
172
|
+
VALUE ary = DHEAP_GET_A(self);
|
173
|
+
long size = DHEAP_SIZE(ary);
|
174
|
+
return size == 0 ? Qtrue : Qfalse;
|
175
|
+
}
|
176
|
+
|
177
|
+
static VALUE dheap_attr_d(VALUE self) { return DHEAP_GET_D(self); }
|
178
|
+
|
179
|
+
static VALUE
|
180
|
+
dheap_freeze(VALUE self) {
|
181
|
+
VALUE ary = DHEAP_GET_A(self);
|
182
|
+
ID id_freeze = rb_intern("freeze");
|
183
|
+
rb_funcall(ary, id_freeze, 0);
|
184
|
+
return rb_call_super(0, NULL);
|
185
|
+
}
|
186
|
+
|
187
|
+
static VALUE
|
188
|
+
dheap_ary_push(VALUE ary, int d, VALUE val, VALUE scr)
|
189
|
+
{
|
190
|
+
DHEAP_APPEND(ary, scr, val);
|
191
|
+
long last_index = DHEAP_LAST_IDX(ary);
|
192
|
+
return dheap_ary_sift_up(ary, d, last_index);
|
193
|
+
}
|
141
194
|
|
142
195
|
/*
|
143
196
|
* Push val onto the end of the heap, then sift up to maintain heap property.
|
@@ -146,12 +199,16 @@ static VALUE dheap_d(VALUE dheap) { return dheap_get_d(dheap); }
|
|
146
199
|
*
|
147
200
|
*/
|
148
201
|
static VALUE
|
149
|
-
dheap_push(VALUE
|
150
|
-
|
151
|
-
VALUE
|
152
|
-
|
153
|
-
|
154
|
-
|
202
|
+
dheap_push(int argc, VALUE *argv, VALUE self) {
|
203
|
+
rb_check_arity(argc, 1, 2);
|
204
|
+
VALUE scr = argv[0];
|
205
|
+
VALUE val = argc < 2 ? scr : argv[1];
|
206
|
+
|
207
|
+
VALUE ary = DHEAP_GET_A(self);
|
208
|
+
VALUE dval = DHEAP_GET_D(self);
|
209
|
+
int d = FIX2INT(dval);
|
210
|
+
|
211
|
+
return dheap_ary_push(ary, d, val, scr);
|
155
212
|
}
|
156
213
|
|
157
214
|
/*
|
@@ -163,39 +220,87 @@ dheap_push(VALUE dheap, VALUE val) {
|
|
163
220
|
*
|
164
221
|
*/
|
165
222
|
static VALUE
|
166
|
-
dheap_left_shift(VALUE
|
167
|
-
dheap_push(
|
168
|
-
return
|
223
|
+
dheap_left_shift(VALUE self, VALUE val) {
|
224
|
+
dheap_push(1, &val, self);
|
225
|
+
return self;
|
226
|
+
}
|
227
|
+
|
228
|
+
#define DHEAP_Pop_Init(self) \
|
229
|
+
VALUE ary = DHEAP_GET_A(self); \
|
230
|
+
VALUE dval = DHEAP_GET_D(self); \
|
231
|
+
long last_index = DHEAP_LAST_IDX(ary); \
|
232
|
+
|
233
|
+
#define DHEAP_Pop_SwapLastAndSiftDown(ary, dval, last_index, sift_value) \
|
234
|
+
if (last_index == 0) { DHEAP_DROP_LAST(ary); return pop_value; } \
|
235
|
+
VALUE sift_value = DHEAP_VALUE(ary, last_index); \
|
236
|
+
VALUE sift_score = DHEAP_SCORE(ary, last_index); \
|
237
|
+
DHEAP_ASSIGN(ary, 0, sift_score, sift_value); \
|
238
|
+
DHEAP_DROP_LAST(ary); \
|
239
|
+
dheap_ary_sift_down(ary, FIX2INT(dval), 0);
|
240
|
+
|
241
|
+
static VALUE
|
242
|
+
dheap_peek(VALUE self) {
|
243
|
+
VALUE ary = DHEAP_GET_A(self);
|
244
|
+
return DHEAP_VALUE(ary, 0);
|
169
245
|
}
|
170
246
|
|
171
247
|
/*
|
172
248
|
* Pops the minimum value from the top of the heap, sifting down to maintain
|
173
249
|
* heap property.
|
174
250
|
*
|
175
|
-
* Time complexity: O(log n / log d).
|
251
|
+
* Time complexity: O(d log n / log d).
|
176
252
|
*
|
177
253
|
*/
|
178
254
|
static VALUE
|
179
|
-
dheap_pop(VALUE
|
180
|
-
|
181
|
-
|
182
|
-
|
255
|
+
dheap_pop(VALUE self) {
|
256
|
+
DHEAP_Pop_Init(self);
|
257
|
+
if (last_index < 0) return Qnil;
|
258
|
+
VALUE pop_value = DHEAP_VALUE(ary, 0);
|
259
|
+
|
260
|
+
DHEAP_Pop_SwapLastAndSiftDown(ary, dval, last_index, sift_value);
|
261
|
+
return pop_value;
|
262
|
+
}
|
263
|
+
|
264
|
+
/*
|
265
|
+
* Pops the minimum value from the top of the heap, sifting down to maintain
|
266
|
+
* heap property.
|
267
|
+
*
|
268
|
+
* Time complexity: O(d log n / log d).
|
269
|
+
*
|
270
|
+
*/
|
271
|
+
static VALUE
|
272
|
+
dheap_pop_lte(VALUE self, VALUE below_score) {
|
273
|
+
DHEAP_Pop_Init(self);
|
274
|
+
if (last_index < 0) return Qnil;
|
275
|
+
VALUE pop_value = DHEAP_VALUE(ary, 0);
|
276
|
+
|
277
|
+
VALUE pop_score = DHEAP_SCORE(ary, 0);
|
278
|
+
struct cmp_opt_data cmp_opt = { 0, 0 };
|
279
|
+
if (below_score && !CMP_LTE(pop_score, below_score, cmp_opt)) return Qnil;
|
183
280
|
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
if (last_idx <= 0) return rb_ary_pop(heap_a);
|
281
|
+
DHEAP_Pop_SwapLastAndSiftDown(ary, dval, last_index, sift_value);
|
282
|
+
return pop_value;
|
283
|
+
}
|
188
284
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
285
|
+
/*
|
286
|
+
* Pops the minimum value from the top of the heap, sifting down to maintain
|
287
|
+
* heap property.
|
288
|
+
*
|
289
|
+
* Time complexity: O(d log n / log d).
|
290
|
+
*
|
291
|
+
*/
|
292
|
+
static VALUE
|
293
|
+
dheap_pop_lt(VALUE self, VALUE below_score) {
|
294
|
+
DHEAP_Pop_Init(self);
|
295
|
+
if (last_index < 0) return Qnil;
|
296
|
+
VALUE pop_value = DHEAP_VALUE(ary, 0);
|
297
|
+
|
298
|
+
VALUE pop_score = DHEAP_SCORE(ary, 0);
|
299
|
+
struct cmp_opt_data cmp_opt = { 0, 0 };
|
300
|
+
if (below_score && !CMP_LT(pop_score, below_score, cmp_opt)) return Qnil;
|
197
301
|
|
198
|
-
|
302
|
+
DHEAP_Pop_SwapLastAndSiftDown(ary, dval, last_index, sift_value);
|
303
|
+
return pop_value;
|
199
304
|
}
|
200
305
|
|
201
306
|
void
|
@@ -206,14 +311,20 @@ Init_d_heap(void)
|
|
206
311
|
id_ivar_d = rb_intern_const("d");
|
207
312
|
|
208
313
|
rb_cDHeap = rb_define_class("DHeap", rb_cObject);
|
209
|
-
rb_define_method(rb_cDHeap, "initialize", dheap_initialize, -1);
|
210
|
-
rb_define_method(rb_cDHeap, "d", dheap_d, 0);
|
211
|
-
rb_define_private_method(rb_cDHeap, "_ary_", dheap_a, 0);
|
212
|
-
|
213
314
|
rb_define_singleton_method(rb_cDHeap, "heap_sift_down", dheap_sift_down_s, 3);
|
214
315
|
rb_define_singleton_method(rb_cDHeap, "heap_sift_up", dheap_sift_up_s, 3);
|
215
316
|
|
216
|
-
rb_define_method(rb_cDHeap, "
|
217
|
-
rb_define_method(rb_cDHeap, "
|
218
|
-
rb_define_method(rb_cDHeap, "
|
317
|
+
rb_define_method(rb_cDHeap, "initialize", dheap_initialize, -1);
|
318
|
+
rb_define_method(rb_cDHeap, "d", dheap_attr_d, 0);
|
319
|
+
rb_define_method(rb_cDHeap, "freeze", dheap_freeze, 0);
|
320
|
+
|
321
|
+
rb_define_method(rb_cDHeap, "size", dheap_size, 0);
|
322
|
+
rb_define_method(rb_cDHeap, "empty?", dheap_empty_p, 0);
|
323
|
+
|
324
|
+
rb_define_method(rb_cDHeap, "peek", dheap_peek, 0);
|
325
|
+
rb_define_method(rb_cDHeap, "push", dheap_push, -1);
|
326
|
+
rb_define_method(rb_cDHeap, "<<", dheap_left_shift, 1);
|
327
|
+
rb_define_method(rb_cDHeap, "pop", dheap_pop, 0);
|
328
|
+
rb_define_method(rb_cDHeap, "pop_lt", dheap_pop_lt, 1);
|
329
|
+
rb_define_method(rb_cDHeap, "pop_lte", dheap_pop_lte, 1);
|
219
330
|
}
|
data/ext/d_heap/d_heap.h
CHANGED
@@ -3,31 +3,72 @@
|
|
3
3
|
|
4
4
|
#include "ruby.h"
|
5
5
|
|
6
|
-
//
|
7
|
-
// gains from fewer levels can outweight doing this many comparisons per level.
|
8
|
-
// Since the comparisons will still be executed using <=> on ruby objects, it's
|
9
|
-
// likely they will be too slow to make any d > 8 worthwhile.
|
10
|
-
#define DHEAP_MAX_D 128
|
6
|
+
// d=4 uses the fewest comparisons for insert + delete-min (in the worst case).
|
11
7
|
#define DHEAP_DEFAULT_D 4
|
12
8
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
#define
|
20
|
-
(
|
9
|
+
// This is a somewhat arbitary maximum. But benefits from more leaf nodes
|
10
|
+
// are very unlikely to outweigh the increasinly higher number of worst-case
|
11
|
+
// comparisons as d gets further from 4.
|
12
|
+
#define DHEAP_MAX_D 32
|
13
|
+
|
14
|
+
|
15
|
+
#define CMP_LT(a, b, cmp_opt) \
|
16
|
+
(OPTIMIZED_CMP(a, b, cmp_opt) < 0)
|
17
|
+
#define CMP_LTE(a, b, cmp_opt) \
|
18
|
+
(OPTIMIZED_CMP(a, b, cmp_opt) <= 0)
|
19
|
+
#define CMP_GT(a, b, cmp_opt) \
|
20
|
+
(OPTIMIZED_CMP(a, b, cmp_opt) > 0)
|
21
|
+
#define CMP_GTE(a, b, cmp_opt) \
|
22
|
+
(OPTIMIZED_CMP(a, b, cmp_opt) >= 0)
|
21
23
|
|
22
24
|
VALUE rb_cDHeap;
|
23
25
|
ID id_cmp;
|
24
26
|
|
27
|
+
// from internal/numeric.h
|
28
|
+
#ifndef INTERNAL_NUMERIC_H
|
29
|
+
int rb_float_cmp(VALUE x, VALUE y);
|
30
|
+
#endif /* INTERNAL_NUMERIC_H */
|
31
|
+
|
32
|
+
// from internal/compar.h
|
33
|
+
#ifndef INTERNAL_COMPAR_H
|
34
|
+
#define STRING_P(s) (RB_TYPE_P((s), T_STRING) && CLASS_OF(s) == rb_cString)
|
35
|
+
|
36
|
+
enum {
|
37
|
+
cmp_opt_Integer,
|
38
|
+
cmp_opt_String,
|
39
|
+
cmp_opt_Float,
|
40
|
+
cmp_optimizable_count
|
41
|
+
};
|
42
|
+
|
43
|
+
struct cmp_opt_data {
|
44
|
+
unsigned int opt_methods;
|
45
|
+
unsigned int opt_inited;
|
46
|
+
};
|
47
|
+
|
48
|
+
#define NEW_CMP_OPT_MEMO(type, value) \
|
49
|
+
NEW_PARTIAL_MEMO_FOR(type, value, cmp_opt)
|
50
|
+
#define CMP_OPTIMIZABLE_BIT(type) (1U << TOKEN_PASTE(cmp_opt_,type))
|
51
|
+
#define CMP_OPTIMIZABLE(data, type) \
|
52
|
+
(((data).opt_inited & CMP_OPTIMIZABLE_BIT(type)) ? \
|
53
|
+
((data).opt_methods & CMP_OPTIMIZABLE_BIT(type)) : \
|
54
|
+
(((data).opt_inited |= CMP_OPTIMIZABLE_BIT(type)), \
|
55
|
+
rb_method_basic_definition_p(TOKEN_PASTE(rb_c,type), id_cmp) && \
|
56
|
+
((data).opt_methods |= CMP_OPTIMIZABLE_BIT(type))))
|
57
|
+
|
58
|
+
#define OPTIMIZED_CMP(a, b, data) \
|
59
|
+
((FIXNUM_P(a) && FIXNUM_P(b) && CMP_OPTIMIZABLE(data, Integer)) ? \
|
60
|
+
(((long)a > (long)b) ? 1 : ((long)a < (long)b) ? -1 : 0) : \
|
61
|
+
(STRING_P(a) && STRING_P(b) && CMP_OPTIMIZABLE(data, String)) ? \
|
62
|
+
rb_str_cmp(a, b) : \
|
63
|
+
(RB_FLOAT_TYPE_P(a) && RB_FLOAT_TYPE_P(b) && CMP_OPTIMIZABLE(data, Float)) ? \
|
64
|
+
rb_float_cmp(a, b) : \
|
65
|
+
rb_cmpint(rb_funcallv(a, id_cmp, 1, &b), a, b))
|
66
|
+
|
25
67
|
#define puts(v) { \
|
26
68
|
ID sym_puts = rb_intern("puts"); \
|
27
69
|
rb_funcall(rb_mKernel, sym_puts, 1, v); \
|
28
70
|
}
|
29
71
|
|
30
|
-
|
31
|
-
VALUE dheap_ary_sift_down(VALUE heap_array, int d, long sift_idx);
|
72
|
+
#endif /* INTERNAL_COMPAR_H */
|
32
73
|
|
33
74
|
#endif /* D_HEAP_H */
|
data/lib/d_heap.rb
CHANGED
@@ -3,36 +3,8 @@ require "d_heap/version"
|
|
3
3
|
|
4
4
|
class DHeap
|
5
5
|
|
6
|
-
def
|
7
|
-
|
8
|
-
_ary_.replace(_ary_.dup)
|
9
|
-
end
|
10
|
-
|
11
|
-
def freeze
|
12
|
-
_ary_.freeze
|
13
|
-
super
|
14
|
-
end
|
15
|
-
|
16
|
-
def peek
|
17
|
-
_ary_[0]
|
18
|
-
end
|
19
|
-
|
20
|
-
def empty?
|
21
|
-
_ary_.empty?
|
22
|
-
end
|
23
|
-
|
24
|
-
def size
|
25
|
-
_ary_.size
|
26
|
-
end
|
27
|
-
|
28
|
-
def each_in_order
|
29
|
-
return to_enum(__method__) unless block_given?
|
30
|
-
heap = dup
|
31
|
-
yield val until heap.emptu?
|
32
|
-
end
|
33
|
-
|
34
|
-
def to_a
|
35
|
-
_ary_.dup
|
6
|
+
def initialize_copy(other)
|
7
|
+
raise NotImplementedError, "initialize_copy should deep copy array"
|
36
8
|
end
|
37
9
|
|
38
10
|
end
|
data/lib/d_heap/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: d_heap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- nicholas a. evans
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-12-
|
11
|
+
date: 2020-12-24 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: |
|
14
14
|
A C extension implementation of a d-ary heap data structure, suitable for
|