d_heap 0.3.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.clang-format +21 -0
- data/.github/workflows/main.yml +18 -3
- data/.gitignore +1 -0
- data/.rubocop.yml +32 -2
- data/.yardopts +10 -0
- data/CHANGELOG.md +93 -0
- data/D +7 -0
- data/README.md +416 -154
- data/d_heap.gemspec +20 -8
- data/docs/benchmarks-2.txt +93 -0
- data/docs/benchmarks-mem.txt +39 -0
- data/docs/benchmarks.txt +686 -0
- data/docs/profile.txt +358 -0
- data/ext/d_heap/.rubocop.yml +7 -0
- data/ext/d_heap/d_heap.c +917 -295
- data/ext/d_heap/extconf.rb +45 -3
- data/images/push_n.png +0 -0
- data/images/push_n_pop_n.png +0 -0
- data/images/push_pop.png +0 -0
- data/images/wikipedia-min-heap.png +0 -0
- data/lib/d_heap.rb +116 -3
- data/lib/d_heap/version.rb +1 -1
- metadata +33 -17
- data/.rspec +0 -3
- data/.travis.yml +0 -6
- data/Gemfile +0 -11
- data/Gemfile.lock +0 -67
- data/Rakefile +0 -20
- data/bin/console +0 -15
- data/bin/rake +0 -29
- data/bin/rspec +0 -29
- data/bin/rubocop +0 -29
- data/bin/setup +0 -8
- data/ext/d_heap/d_heap.h +0 -41
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5b51ed52baf74b585a7ab7799f92a446aef5852431ba10e146658b419657ffbe
|
4
|
+
data.tar.gz: cc7c6786eee78ec13214582b8701448d312f59fb723d12676fb673447ab409a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5de98f8c9084b30694fff5f8154a6e42e7e67d76518c25136ab4fb0c0afb047ad3c923f4544dcf613ded4c3b01417729aa796c973100faaa7ee93051fa630c7d
|
7
|
+
data.tar.gz: e5dbcc90da7adfba7ef45cd9a2da5fd1781a2bd489002a5ffc0a764915c035c178db30ae9b8431a8fc810cfa6f03a1b38ec0a50cbf23c2e1ba5dfc36549c0609
|
data/.clang-format
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
---
|
2
|
+
BasedOnStyle: mozilla
|
3
|
+
IndentWidth: 4
|
4
|
+
PointerAlignment: Right
|
5
|
+
AlignAfterOpenBracket: Align
|
6
|
+
AlignConsecutiveAssignments: true
|
7
|
+
AlignConsecutiveDeclarations: true
|
8
|
+
AlignConsecutiveBitFields: true
|
9
|
+
AlignConsecutiveMacros: true
|
10
|
+
AlignEscapedNewlines: Right
|
11
|
+
AlignOperands: true
|
12
|
+
|
13
|
+
AllowAllConstructorInitializersOnNextLine: false
|
14
|
+
AllowShortIfStatementsOnASingleLine: WithoutElse
|
15
|
+
|
16
|
+
IndentCaseLabels: false
|
17
|
+
IndentPPDirectives: AfterHash
|
18
|
+
|
19
|
+
ForEachMacros:
|
20
|
+
- WHILE_PEEK_LT_P
|
21
|
+
...
|
data/.github/workflows/main.yml
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
name:
|
1
|
+
name: CI
|
2
2
|
|
3
3
|
on: [push,pull_request]
|
4
4
|
|
@@ -7,7 +7,7 @@ jobs:
|
|
7
7
|
strategy:
|
8
8
|
fail-fast: false
|
9
9
|
matrix:
|
10
|
-
ruby: [2.5, 2.6, 2.7, 3.0]
|
10
|
+
ruby: [2.4, 2.5, 2.6, 2.7, 3.0]
|
11
11
|
os: [ubuntu, macos]
|
12
12
|
experimental: [false]
|
13
13
|
runs-on: ${{ matrix.os }}-latest
|
@@ -23,4 +23,19 @@ jobs:
|
|
23
23
|
run: |
|
24
24
|
gem install bundler -v 2.2.3
|
25
25
|
bundle install
|
26
|
-
bundle exec rake
|
26
|
+
bundle exec rake ci
|
27
|
+
|
28
|
+
benchmarks:
|
29
|
+
runs-on: ubuntu-latest
|
30
|
+
steps:
|
31
|
+
- uses: actions/checkout@v2
|
32
|
+
- name: Set up Ruby
|
33
|
+
uses: ruby/setup-ruby@v1
|
34
|
+
with:
|
35
|
+
ruby-version: 2.7
|
36
|
+
bundler-cache: true
|
37
|
+
- name: Run the benchmarks
|
38
|
+
run: |
|
39
|
+
gem install bundler -v 2.2.3
|
40
|
+
bundle install
|
41
|
+
bundle exec rake ci:benchmarks
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
@@ -3,9 +3,10 @@ inherit_mode:
|
|
3
3
|
- Exclude
|
4
4
|
|
5
5
|
AllCops:
|
6
|
-
TargetRubyVersion: 2.
|
6
|
+
TargetRubyVersion: 2.4
|
7
7
|
NewCops: disable
|
8
8
|
Exclude:
|
9
|
+
- bin/benchmark-driver
|
9
10
|
- bin/rake
|
10
11
|
- bin/rspec
|
11
12
|
- bin/rubocop
|
@@ -106,26 +107,50 @@ Naming/RescuedExceptionsVariableName: { Enabled: false }
|
|
106
107
|
###########################################################################
|
107
108
|
# Matrics:
|
108
109
|
|
110
|
+
Metrics/CyclomaticComplexity:
|
111
|
+
Max: 10
|
112
|
+
|
109
113
|
# Although it may be better to split specs into multiple files...?
|
110
114
|
Metrics/BlockLength:
|
111
115
|
Exclude:
|
112
116
|
- "spec/**/*_spec.rb"
|
117
|
+
CountAsOne:
|
118
|
+
- array
|
119
|
+
- hash
|
120
|
+
- heredoc
|
121
|
+
|
122
|
+
Metrics/ClassLength:
|
123
|
+
Max: 200
|
124
|
+
CountAsOne:
|
125
|
+
- array
|
126
|
+
- hash
|
127
|
+
- heredoc
|
113
128
|
|
114
129
|
###########################################################################
|
115
130
|
# Style...
|
116
131
|
|
117
132
|
Style/AccessorGrouping: { Enabled: false }
|
118
133
|
Style/AsciiComments: { Enabled: false } # 👮 can't stop our 🎉🥳🎊🥳!
|
134
|
+
Style/ClassAndModuleChildren: { Enabled: false }
|
119
135
|
Style/EachWithObject: { Enabled: false }
|
120
136
|
Style/FormatStringToken: { Enabled: false }
|
121
137
|
Style/FloatDivision: { Enabled: false }
|
138
|
+
Style/GuardClause: { Enabled: false } # usually nice to do, but...
|
139
|
+
Style/IfUnlessModifier: { Enabled: false }
|
140
|
+
Style/IfWithSemicolon: { Enabled: false }
|
122
141
|
Style/Lambda: { Enabled: false }
|
123
142
|
Style/LineEndConcatenation: { Enabled: false }
|
124
143
|
Style/MixinGrouping: { Enabled: false }
|
144
|
+
Style/MultilineBlockChain: { Enabled: false }
|
125
145
|
Style/PerlBackrefs: { Enabled: false } # use occasionally/sparingly
|
126
146
|
Style/RescueStandardError: { Enabled: false }
|
147
|
+
Style/Semicolon: { Enabled: false }
|
127
148
|
Style/SingleLineMethods: { Enabled: false }
|
128
149
|
Style/StabbyLambdaParentheses: { Enabled: false }
|
150
|
+
Style/WhenThen : { Enabled: false }
|
151
|
+
|
152
|
+
# I require trailing commas elsewhere, but these are optional
|
153
|
+
Style/TrailingCommaInArguments: { Enabled: false }
|
129
154
|
|
130
155
|
# If rubocop had an option to only enforce this on constants and literals (e.g.
|
131
156
|
# strings, regexp, range), I'd agree.
|
@@ -149,7 +174,9 @@ Style/BlockDelimiters:
|
|
149
174
|
EnforcedStyle: semantic
|
150
175
|
AllowBracesOnProceduralOneLiners: true
|
151
176
|
IgnoredMethods:
|
152
|
-
- expect
|
177
|
+
- expect # rspec
|
178
|
+
- profile # ruby-prof
|
179
|
+
- ips # benchmark-ips
|
153
180
|
|
154
181
|
|
155
182
|
Style/FormatString:
|
@@ -168,3 +195,6 @@ Style/TrailingCommaInHashLiteral:
|
|
168
195
|
|
169
196
|
Style/TrailingCommaInArrayLiteral:
|
170
197
|
EnforcedStyleForMultiline: consistent_comma
|
198
|
+
|
199
|
+
Style/YodaCondition:
|
200
|
+
EnforcedStyle: forbid_for_equality_operators_only
|
data/.yardopts
ADDED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
## Current/Unreleased
|
2
|
+
|
3
|
+
## Release v0.7.0 (2021-01-24)
|
4
|
+
|
5
|
+
* 💥⚡️ **BREAKING**: Uses `double`) for _all_ scores.
|
6
|
+
* 💥 Integers larger than a double mantissa (53-bits) will lose some
|
7
|
+
precision.
|
8
|
+
* ⚡️ big speed up
|
9
|
+
* ⚡️ Much better memory usage
|
10
|
+
* ⚡️ Simplifies score conversion between ruby and C
|
11
|
+
* ✨ Added `DHeap::Map` for ensuring values can only be added once, by `#hash`.
|
12
|
+
* Adding again will update the score.
|
13
|
+
* Adds `DHeap::Map#[]` for quick lookup of existing scores
|
14
|
+
* Adds `DHeap::Map#[]=` for adjustments of existing scores
|
15
|
+
* TODO: `DHeap::Map#delete`
|
16
|
+
* 📝📈 SO MANY BENCHMARKS
|
17
|
+
* ⚡️ Set DEFAULT_D to 6, based on benchmarks.
|
18
|
+
* 🐛♻️ convert all `long` indexes to `size_t`
|
19
|
+
|
20
|
+
## Release v0.6.1 (2021-01-24)
|
21
|
+
|
22
|
+
* 📝 Fix link to CHANGELOG.md in gemspec
|
23
|
+
|
24
|
+
## Release v0.6.0 (2021-01-24)
|
25
|
+
|
26
|
+
* 🔥 **Breaking**: `#initialize` uses a keyword argument for `d`
|
27
|
+
* ✨ Added `#initialize(capacity: capa)` to set initial capacity.
|
28
|
+
* ✨ Added `peek_with_score` and `peek_score`
|
29
|
+
* ✨ Added `pop_with_score` and `each_pop(with_score: true)`
|
30
|
+
* ✨ Added `pop_all_below(max_score, array = [])`
|
31
|
+
* ✨ Added aliases for `shift` and `next`
|
32
|
+
* 📈 Added benchmark charts to README, and `bin/bench_charts` to generate them.
|
33
|
+
* requires `gruff` which requires `rmagick` which requires `imagemagick`
|
34
|
+
* 📝 Many documentation updates and fixes.
|
35
|
+
|
36
|
+
## Release v0.5.0 (2021-01-17)
|
37
|
+
|
38
|
+
* 🔥 **Breaking**: reversed order of `#push` arguments to `value, score`.
|
39
|
+
* ✨ Added `#insert(score, value)` to replace earlier version of `#push`.
|
40
|
+
* ✨ Added `#each_pop` enumerator.
|
41
|
+
* ✨ Added aliases for `deq`, `enq`, `first`, `pop_below`, `length`, and
|
42
|
+
`count`, to mimic other classes in ruby's stdlib.
|
43
|
+
* ⚡️♻️ More performance improvements:
|
44
|
+
* Created an `ENTRY` struct and store both the score and the value pointer in
|
45
|
+
the same `ENTRY *entries` array.
|
46
|
+
* Reduced unnecessary allocations or copies in both sift loops. A similar
|
47
|
+
refactoring also sped up the pure ruby benchmark implementation.
|
48
|
+
* Compiling with `-O3`.
|
49
|
+
* 📝 Updated (and in some cases, fixed) yardoc
|
50
|
+
* ♻️ Moved aliases and less performance sensitive code into ruby.
|
51
|
+
* ♻️ DRY up push/insert methods
|
52
|
+
|
53
|
+
## Release v0.4.0 (2021-01-12)
|
54
|
+
|
55
|
+
* 🔥 **Breaking**: Scores must be `Integer` or convertable to `Float`
|
56
|
+
* ⚠️ `Integer` scores must fit in `-ULONG_LONG_MAX` to `+ULONG_LONG_MAX`.
|
57
|
+
* ⚡️ Big performance improvements, by using C `long double *cscores` array
|
58
|
+
* ⚡️ many many (so many) updates to benchmarks
|
59
|
+
* ✨ Added `DHeap#clear`
|
60
|
+
* 🐛 Fixed `DHeap#initialize_copy` and `#freeze`
|
61
|
+
* ♻️ significant refactoring
|
62
|
+
* 📝 Updated docs (mostly adding benchmarks)
|
63
|
+
|
64
|
+
## Release v0.3.0 (2020-12-29)
|
65
|
+
|
66
|
+
* 🔥 **Breaking**: Removed class methods that operated directly on an array.
|
67
|
+
They weren't compatible with the performance improvements.
|
68
|
+
* ⚡️ Big performance improvements, by converting to a `T_DATA` struct.
|
69
|
+
* ♻️ Major refactoring/rewriting of dheap.c
|
70
|
+
* ✅ Added benchmark specs
|
71
|
+
|
72
|
+
## Release v0.2.2 (2020-12-27)
|
73
|
+
|
74
|
+
* 🐛 fix `optimized_cmp`, avoiding internal symbols
|
75
|
+
* 📝 Update documentation
|
76
|
+
* 💚 fix macos CI
|
77
|
+
* ➕ Add rubocop 👮🎨
|
78
|
+
|
79
|
+
## Release v0.2.1 (2020-12-26)
|
80
|
+
|
81
|
+
* ⬆️ Upgraded rake (and bundler) to support ruby 3.0
|
82
|
+
|
83
|
+
## Release v0.2.0 (2020-12-24)
|
84
|
+
|
85
|
+
* ✨ Add ability to push separate score and value
|
86
|
+
* ⚡️ Big performance gain, by storing scores separately and using ruby's
|
87
|
+
internal `OPTIMIZED_CMP` instead of always directly calling `<=>`
|
88
|
+
|
89
|
+
## Release v0.1.0 (2020-12-22)
|
90
|
+
|
91
|
+
🎉 initial release 🎉
|
92
|
+
|
93
|
+
* ✨ Add basic d-ary Heap implementation
|
data/D
ADDED
data/README.md
CHANGED
@@ -1,199 +1,461 @@
|
|
1
|
-
# DHeap
|
1
|
+
# DHeap - Fast d-ary heap for ruby
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/d_heap.svg)](https://badge.fury.io/rb/d_heap)
|
4
|
+
[![Build Status](https://github.com/nevans/d_heap/workflows/CI/badge.svg)](https://github.com/nevans/d_heap/actions?query=workflow%3ACI)
|
5
|
+
[![Maintainability](https://api.codeclimate.com/v1/badges/ff274acd0683c99c03e1/maintainability)](https://codeclimate.com/github/nevans/d_heap/maintainability)
|
6
|
+
|
7
|
+
A fast [_d_-ary heap][d-ary heap] [priority queue] implementation for ruby,
|
8
|
+
implemented as a C extension.
|
9
|
+
|
10
|
+
A regular queue has "FIFO" behavior: first in, first out. A stack is "LIFO":
|
11
|
+
last in first out. A priority queue pushes each element with a score and pops
|
12
|
+
out in order by score. Priority queues are often used in algorithms for e.g.
|
13
|
+
[scheduling] of timers or bandwidth management, for [Huffman coding], and for
|
14
|
+
various graph search algorithms such as [Dijkstra's algorithm], [A* search], or
|
15
|
+
[Prim's algorithm].
|
16
|
+
|
17
|
+
From [wikipedia](https://en.wikipedia.org/wiki/Heap_(data_structure)):
|
18
|
+
> A heap is a specialized tree-based data structure which is essentially an
|
19
|
+
> almost complete tree that satisfies the heap property: in a min heap, for any
|
20
|
+
> given node C, if P is a parent node of C, then the key (the value) of P is
|
21
|
+
> less than or equal to the key of C. The node at the "top" of the heap (with no
|
22
|
+
> parents) is called the root node.
|
23
|
+
|
24
|
+
![tree representation of a min heap](images/wikipedia-min-heap.png)
|
25
|
+
|
26
|
+
The _d_-ary heap data structure is a generalization of a [binary heap] in which
|
27
|
+
each node has _d_ children instead of 2. This speeds up "push" or "decrease
|
28
|
+
priority" operations (`O(log n / log d)`) with the tradeoff of slower "pop" or
|
29
|
+
"increase priority" (`O(d log n / log d)`). Additionally, _d_-ary heaps can
|
30
|
+
have better memory cache behavior than binary heaps, letting them run more
|
31
|
+
quickly in practice.
|
32
|
+
|
33
|
+
Although the default _d_ value will usually perform best (see the time
|
34
|
+
complexity analysis below), it's always advisable to benchmark your specific
|
35
|
+
use-case. In particular, if you push items more than you pop, higher values for
|
36
|
+
_d_ can give a faster total runtime.
|
37
|
+
|
38
|
+
[d-ary heap]: https://en.wikipedia.org/wiki/D-ary_heap
|
39
|
+
[priority queue]: https://en.wikipedia.org/wiki/Priority_queue
|
40
|
+
[binary heap]: https://en.wikipedia.org/wiki/Binary_heap
|
41
|
+
[scheduling]: https://en.wikipedia.org/wiki/Scheduling_(computing)
|
42
|
+
[Huffman coding]: https://en.wikipedia.org/wiki/Huffman_coding#Compression
|
43
|
+
[Dijkstra's algorithm]: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm#Using_a_priority_queue
|
44
|
+
[A* search]: https://en.wikipedia.org/wiki/A*_search_algorithm#Description
|
45
|
+
[Prim's algorithm]: https://en.wikipedia.org/wiki/Prim%27s_algorithm
|
2
46
|
|
3
|
-
|
4
|
-
algorithms.
|
47
|
+
## Installation
|
5
48
|
|
6
|
-
|
7
|
-
the nodes have _d_ children instead of 2. This allows for "decrease priority"
|
8
|
-
operations to be performed more quickly with the tradeoff of slower delete
|
9
|
-
minimum. Additionally, _d_-ary heaps can have better memory cache behavior than
|
10
|
-
binary heaps, allowing them to run more quickly in practice despite slower
|
11
|
-
worst-case time complexity. In the worst case, a _d_-ary heap requires only
|
12
|
-
`O(log n / log d)` to push, with the tradeoff that pop is `O(d log n / log d)`.
|
49
|
+
Add this line to your application's Gemfile:
|
13
50
|
|
14
|
-
|
15
|
-
|
51
|
+
```ruby
|
52
|
+
gem 'd_heap'
|
53
|
+
```
|
54
|
+
|
55
|
+
And then execute:
|
56
|
+
|
57
|
+
$ bundle install
|
58
|
+
|
59
|
+
Or install it yourself as:
|
60
|
+
|
61
|
+
$ gem install d_heap
|
16
62
|
|
17
63
|
## Usage
|
18
64
|
|
19
|
-
The
|
20
|
-
|
65
|
+
The basic API is `#push(object, score)` and `#pop`. Please read the [full
|
66
|
+
documentation] for more details. The score must be convertable to a `Float` via
|
67
|
+
`Float(score)` (i.e. it should properly implement `#to_f`).
|
68
|
+
|
69
|
+
Quick reference for the most common methods:
|
70
|
+
|
71
|
+
* `heap << object` adds a value, using `Float(object)` as its intrinsic score.
|
72
|
+
* `heap.push(object, score)` adds a value with an extrinsic score.
|
73
|
+
* `heap.peek` to view the minimum value without popping it.
|
74
|
+
* `heap.pop` removes and returns the value with the minimum score.
|
75
|
+
* `heap.pop_below(max_score)` pops only if the next score is `<` the argument.
|
76
|
+
* `heap.clear` to remove all items from the heap.
|
77
|
+
* `heap.empty?` returns true if the heap is empty.
|
78
|
+
* `heap.size` returns the number of items in the heap.
|
79
|
+
|
80
|
+
### Examples
|
21
81
|
|
22
82
|
```ruby
|
83
|
+
# create some example objects to place in our heap
|
84
|
+
Task = Struct.new(:id, :time) do
|
85
|
+
def to_f; time.to_f end
|
86
|
+
end
|
87
|
+
t1 = Task.new(1, Time.now + 5*60)
|
88
|
+
t2 = Task.new(2, Time.now + 50)
|
89
|
+
t3 = Task.new(3, Time.now + 60)
|
90
|
+
t4 = Task.new(4, Time.now + 5)
|
91
|
+
|
92
|
+
# create the heap
|
23
93
|
require "d_heap"
|
94
|
+
heap = DHeap.new
|
24
95
|
|
25
|
-
|
96
|
+
# push with an explicit score (which might be extrinsic to the value)
|
97
|
+
heap.push t1, t1.to_f
|
26
98
|
|
27
|
-
#
|
28
|
-
heap.push
|
29
|
-
heap.push Time.now + 30, Task.new(2)
|
30
|
-
heap.push Time.now + 60, Task.new(3)
|
31
|
-
heap.push Time.now + 5, Task.new(4)
|
99
|
+
# the score will be implicitly cast with Float, so any object with #to_f
|
100
|
+
heap.push t2, t2
|
32
101
|
|
33
|
-
#
|
34
|
-
heap
|
35
|
-
heap.pop.last # => Task[2]
|
36
|
-
heap.peak.last # => Task[3], but don't pop it
|
37
|
-
heap.pop.last # => Task[3]
|
38
|
-
heap.pop.last # => Task[1]
|
39
|
-
```
|
102
|
+
# if the object has an intrinsic score via #to_f, "<<" is the simplest API
|
103
|
+
heap << t3 << t4
|
40
104
|
|
41
|
-
|
105
|
+
# pop returns the lowest scored item, and removes it from the heap
|
106
|
+
heap.pop # => #<struct Task id=4, time=2021-01-17 17:02:22.5574 -0500>
|
107
|
+
heap.pop # => #<struct Task id=2, time=2021-01-17 17:03:07.5574 -0500>
|
42
108
|
|
43
|
-
|
109
|
+
# peek returns the lowest scored item, without removing it from the heap
|
110
|
+
heap.peek # => #<struct Task id=3, time=2021-01-17 17:03:17.5574 -0500>
|
111
|
+
heap.pop # => #<struct Task id=3, time=2021-01-17 17:03:17.5574 -0500>
|
44
112
|
|
45
|
-
|
113
|
+
# pop_lte handles the common "h.pop if h.peek_score < max" pattern
|
114
|
+
heap.pop_lte(Time.now + 65) # => nil
|
46
115
|
|
47
|
-
|
48
|
-
|
116
|
+
# the heap size can be inspected with size and empty?
|
117
|
+
heap.empty? # => false
|
118
|
+
heap.size # => 1
|
119
|
+
heap.pop # => #<struct Task id=1, time=2021-01-17 17:07:17.5574 -0500>
|
120
|
+
heap.empty? # => true
|
121
|
+
heap.size # => 0
|
122
|
+
|
123
|
+
# popping from an empty heap returns nil
|
124
|
+
heap.pop # => nil
|
49
125
|
```
|
50
126
|
|
51
|
-
|
127
|
+
Please see the [full documentation] for more methods and more examples.
|
52
128
|
|
53
|
-
|
129
|
+
[full documentation]: https://rubydoc.info/gems/d_heap/DHeap
|
54
130
|
|
55
|
-
|
131
|
+
### DHeap::Map
|
56
132
|
|
57
|
-
|
133
|
+
`DHeap::Map` augments the heap with an internal `Hash`, mapping objects to their
|
134
|
+
index in the heap. For simple push/pop this a bit slower than a normal `DHeap`
|
135
|
+
heap, but it can enable huge speed-ups for algorithms that need to adjust scores
|
136
|
+
after they've been added, e.g. [Dijkstra's algorithm]. It adds the following:
|
58
137
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
One obvious approach is to simply maintain an array in sorted order. And
|
67
|
-
ruby's Array class makes it simple to maintain a sorted array by combining
|
68
|
-
`#bsearch_index` with `#insert`. With certain insert/remove workloads that can
|
69
|
-
perform very well, but in the worst-case an insert or delete can result in O(n),
|
70
|
-
since `#insert` may need to `memcpy` or `memmove` a significant portion of the
|
71
|
-
array.
|
72
|
-
|
73
|
-
But the standard way to efficiently and simply solve this problem is using a
|
74
|
-
binary heap. Although it increases the time for `pop`, it converts the
|
75
|
-
amortized time per push + pop from `O(n)` to `O(d log n / log d)`.
|
76
|
-
|
77
|
-
I was surprised to find that, at least under certain benchmarks, my pure ruby
|
78
|
-
heap implementation was usually slower than inserting into a fully sorted
|
79
|
-
array. While this is a testament to ruby's fine-tuned Array implementationw, a
|
80
|
-
heap implementated in C should easily peform faster than `Array#insert`.
|
81
|
-
|
82
|
-
The biggest issue is that it just takes far too much time to call `<=>` from
|
83
|
-
ruby code: A sorted array only requires `log n / log 2` comparisons to insert
|
84
|
-
and no comparisons to pop. However a _d_-ary heap requires `log n / log d` to
|
85
|
-
insert plus an additional `d log n / log d` to pop. If your queue contains only
|
86
|
-
a few hundred items at once, the overhead of those extra calls to `<=>` is far
|
87
|
-
more than occasionally calling `memcpy`.
|
88
|
-
|
89
|
-
It's likely that MJIT will eventually make the C-extension completely
|
90
|
-
unnecessary. This is definitely hotspot code, and the basic ruby implementation
|
91
|
-
would work fine, if not for that `<=>` overhead. Until then... this gem gets
|
92
|
-
the job done.
|
93
|
-
|
94
|
-
## TODOs...
|
95
|
-
|
96
|
-
_TODO:_ In addition to a basic _d_-ary heap class (`DHeap`), this library
|
97
|
-
~~includes~~ _will include_ extensions to `Array`, allowing an Array to be
|
98
|
-
directly handled as a priority queue. These extension methods are meant to be
|
99
|
-
used similarly to how `#bsearch` and `#bsearch_index` might be used.
|
100
|
-
|
101
|
-
_TODO:_ Also ~~included is~~ _will include_ `DHeap::Set`, which augments the
|
102
|
-
basic heap with an internal `Hash`, which maps a set of values to scores.
|
103
|
-
loosely inspired by go's timers. e.g: It lazily sifts its heap after deletion
|
104
|
-
and adjustments, to achieve faster average runtime for *add* and *cancel*
|
105
|
-
operations.
|
106
|
-
|
107
|
-
_TODO:_ Also ~~included is~~ _will include_ `DHeap::Timers`, which contains some
|
108
|
-
features that are loosely inspired by go's timers. e.g: It lazily sifts its
|
109
|
-
heap after deletion and adjustments, to achieve faster average runtime for *add*
|
110
|
-
and *cancel* operations.
|
111
|
-
|
112
|
-
Additionally, I was inspired by reading go's "timer.go" implementation to
|
113
|
-
experiment with a 4-ary heap instead of the traditional binary heap. In the
|
114
|
-
case of timers, new timers are usually scheduled to run after most of the
|
115
|
-
existing timers. And timers are usually canceled before they have a chance to
|
116
|
-
run. While a binary heap holds 50% of its elements in its last layer, 75% of a
|
117
|
-
4-ary heap will have no children. That diminishes the extra comparison overhead
|
118
|
-
during sift-down.
|
138
|
+
* a uniqueness constraint, by `#hash` value
|
139
|
+
* `#[obj] # => score` or `#score(obj)` in `O(1)`
|
140
|
+
* `#[obj] = new_score` or `#rescore(obj, score)` in `O(d log n / log d)`
|
141
|
+
* TODO:
|
142
|
+
* optionally unique by object identity
|
143
|
+
* `#delete(obj)` in `O(d log n / log d)` (TODO)
|
119
144
|
|
120
|
-
##
|
145
|
+
## Scores
|
121
146
|
|
122
|
-
|
147
|
+
If a score changes while the object is still in the heap, it will not be
|
148
|
+
re-evaluated again.
|
123
149
|
|
124
|
-
|
150
|
+
Constraining scores to `Float` gives enormous performance benefits. n.b.
|
151
|
+
very large `Integer` values will lose precision when converted to `Float`. This
|
152
|
+
is compiler and architecture dependant but with gcc on an IA-64 system, `Float`
|
153
|
+
is 64 bits with a 53-bit mantissa, which gives a range of -9,007,199,254,740,991
|
154
|
+
to +9,007,199,254,740,991, which is _not_ enough to store the precise POSIX
|
155
|
+
time since the epoch in nanoseconds. This can be worked around by adding a
|
156
|
+
bias, but probably it's good enough for most usage.
|
125
157
|
|
126
|
-
|
158
|
+
_Comparing arbitary objects via_ `a <=> b` _was the original design and may be
|
159
|
+
added back in a future version,_ if (and only if) _it can be done without
|
160
|
+
impacting the speed of numeric comparisons._
|
127
161
|
|
128
|
-
|
129
|
-
Swap up performs only a single comparison per swap: O(1).
|
130
|
-
Swap down performs as many as d comparions per swap: O(d).
|
162
|
+
## Thread safety
|
131
163
|
|
132
|
-
|
133
|
-
|
164
|
+
`DHeap` is _not_ thread-safe, so concurrent access from multiple threads need to
|
165
|
+
take precautions such as locking access behind a mutex.
|
134
166
|
|
135
|
-
|
136
|
-
the fewest comparisons for combined insert and delete:
|
137
|
-
* (1 + 2) lg 2 = 4.328085
|
138
|
-
* (1 + 3) lg 3 = 3.640957
|
139
|
-
* (1 + 4) lg 4 = 3.606738
|
140
|
-
* (1 + 5) lg 5 = 3.728010
|
141
|
-
* (1 + 6) lg 6 = 3.906774
|
142
|
-
* etc...
|
167
|
+
## Benchmarks
|
143
168
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
169
|
+
_See full benchmark output in subdirs of `benchmarks`. See also or updated
|
170
|
+
results. These benchmarks were measured with an Intel Core i7-1065G7 8x3.9GHz
|
171
|
+
with d_heap v0.5.0 and ruby 2.7.2 without MJIT enabled._
|
172
|
+
|
173
|
+
### Implementations
|
174
|
+
|
175
|
+
* **findmin** -
|
176
|
+
A very fast `O(1)` push using `Array#push` onto an unsorted Array, but a
|
177
|
+
very slow `O(n)` pop using `Array#min`, `Array#rindex(min)` and
|
178
|
+
`Array#delete_at(min_index)`. Push + pop is still fast for `n < 100`, but
|
179
|
+
unusably slow for `n > 1000`.
|
180
|
+
|
181
|
+
* **bsearch** -
|
182
|
+
A simple implementation with a slow `O(n)` push using `Array#bsearch` +
|
183
|
+
`Array#insert` to maintain a sorted Array, but a very fast `O(1)` pop with
|
184
|
+
`Array#pop`. It is still relatively fast for `n < 10000`, but its linear
|
185
|
+
time complexity really destroys it after that.
|
186
|
+
|
187
|
+
* **rb_heap** -
|
188
|
+
A pure ruby binary min-heap that has been tuned for performance by making
|
189
|
+
few method calls and allocating and assigning as few variables as possible.
|
190
|
+
It runs in `O(log n)` for both push and pop, although pop is slower than
|
191
|
+
push by a constant factor. Its much higher constant factors makes it lose
|
192
|
+
to `bsearch` push + pop for `n < 10000` but it holds steady with very little
|
193
|
+
slowdown even with `n > 10000000`.
|
194
|
+
|
195
|
+
* **c++ stl** -
|
196
|
+
A thin wrapper around the [priority_queue_cxx gem] which uses the [C++ STL
|
197
|
+
priority_queue]. The wrapper is simply to provide compatibility with the
|
198
|
+
other benchmarked implementations, but it should be possible to speed this
|
199
|
+
up a little bit by benchmarking the `priority_queue_cxx` API directly. It
|
200
|
+
has the same time complexity as rb_heap but its much lower constant
|
201
|
+
factors allow it to easily outperform `bsearch`.
|
202
|
+
|
203
|
+
* **c_dheap** -
|
204
|
+
A {DHeap} instance with the default `d` value of `4`. It has the same time
|
205
|
+
complexity as `rb_heap` and `c++ stl`, but is faster than both in every
|
206
|
+
benchmarked scenario.
|
207
|
+
|
208
|
+
[priority_queue_cxx gem]: https://rubygems.org/gems/priority_queue_cxx
|
209
|
+
[C++ STL priority_queue]: http://www.cplusplus.com/reference/queue/priority_queue/
|
210
|
+
|
211
|
+
### Scenarios
|
212
|
+
|
213
|
+
Each benchmark increases N exponentially, either by √1̅0̅ or approximating
|
214
|
+
(alternating between x3 and x3.333) in order to simplify keeping loop counts
|
215
|
+
evenly divisible by N.
|
216
|
+
|
217
|
+
#### push N items
|
218
|
+
|
219
|
+
This measures the _average time per insert_ to create a queue of size N
|
220
|
+
(clearing the queue once it reaches that size). Use cases which push (or
|
221
|
+
decrease) more values than they pop, e.g. [Dijkstra's algorithm] or [Prim's
|
222
|
+
algorithm] when the graph has more edges than verticies, may want to pay more
|
223
|
+
attention to this benchmark.
|
224
|
+
|
225
|
+
![bar graph for push_n_pop_n benchmarks](./images/push_n.png)
|
226
|
+
|
227
|
+
== push N (N=100) ==========================================================
|
228
|
+
push N (c_dheap): 10522662.6 i/s
|
229
|
+
push N (findmin): 9980622.3 i/s - 1.05x slower
|
230
|
+
push N (c++ stl): 7991608.3 i/s - 1.32x slower
|
231
|
+
push N (rb_heap): 4607849.4 i/s - 2.28x slower
|
232
|
+
push N (bsearch): 2769106.2 i/s - 3.80x slower
|
233
|
+
== push N (N=10,000) =======================================================
|
234
|
+
push N (c_dheap): 10444588.3 i/s
|
235
|
+
push N (findmin): 10191797.4 i/s - 1.02x slower
|
236
|
+
push N (c++ stl): 8210895.4 i/s - 1.27x slower
|
237
|
+
push N (rb_heap): 4369252.9 i/s - 2.39x slower
|
238
|
+
push N (bsearch): 1213580.4 i/s - 8.61x slower
|
239
|
+
== push N (N=1,000,000) ====================================================
|
240
|
+
push N (c_dheap): 10342183.7 i/s
|
241
|
+
push N (findmin): 9963898.8 i/s - 1.04x slower
|
242
|
+
push N (c++ stl): 7891924.8 i/s - 1.31x slower
|
243
|
+
push N (rb_heap): 4350116.0 i/s - 2.38x slower
|
244
|
+
|
245
|
+
All three heap implementations have little to no perceptible slowdown for `N >
|
246
|
+
100`. But `DHeap` runs faster than `Array#push` to an unsorted array (findmin)!
|
247
|
+
|
248
|
+
#### push then pop N items
|
249
|
+
|
250
|
+
This measures the _average_ for a push **or** a pop, filling up a queue with N
|
251
|
+
items and then draining that queue until empty. It represents the amortized
|
252
|
+
cost of balanced pushes and pops to fill a heap and drain it.
|
253
|
+
|
254
|
+
![bar graph for push_n_pop_n benchmarks](./images/push_n_pop_n.png)
|
255
|
+
|
256
|
+
== push N then pop N (N=100) ===============================================
|
257
|
+
push N + pop N (c_dheap): 10954469.2 i/s
|
258
|
+
push N + pop N (c++ stl): 9317140.2 i/s - 1.18x slower
|
259
|
+
push N + pop N (bsearch): 4808770.2 i/s - 2.28x slower
|
260
|
+
push N + pop N (findmin): 4321411.9 i/s - 2.53x slower
|
261
|
+
push N + pop N (rb_heap): 2467417.0 i/s - 4.44x slower
|
262
|
+
== push N then pop N (N=10,000) ============================================
|
263
|
+
push N + pop N (c_dheap): 8083962.7 i/s
|
264
|
+
push N + pop N (c++ stl): 7365661.8 i/s - 1.10x slower
|
265
|
+
push N + pop N (bsearch): 2257047.9 i/s - 3.58x slower
|
266
|
+
push N + pop N (rb_heap): 1439204.3 i/s - 5.62x slower
|
267
|
+
== push N then pop N (N=1,000,000) =========================================
|
268
|
+
push N + pop N (c++ stl): 5274657.5 i/s
|
269
|
+
push N + pop N (c_dheap): 4731117.9 i/s - 1.11x slower
|
270
|
+
push N + pop N (rb_heap): 976688.6 i/s - 5.40x slower
|
271
|
+
|
272
|
+
At N=100 findmin still beats a pure-ruby heap. But above that it slows down too
|
273
|
+
much to be useful. At N=10k, bsearch still beats a pure ruby heap, but above
|
274
|
+
30k it slows down too much to be useful. `DHeap` consistently runs 4.5-5.5x
|
275
|
+
faster than the pure ruby heap.
|
276
|
+
|
277
|
+
#### push & pop on N-item heap
|
278
|
+
|
279
|
+
This measures the combined time to push once and pop once, which is done
|
280
|
+
repeatedly while keeping a stable heap size of N. Its an approximation for
|
281
|
+
scenarios which reach a stable size and then plateau with balanced pushes and
|
282
|
+
pops. E.g. timers and timeouts will often reschedule themselves or replace
|
283
|
+
themselves with new timers or timeouts, maintaining a roughly stable total count
|
284
|
+
of timers.
|
285
|
+
|
286
|
+
![bar graph for push_pop benchmarks](./images/push_pop.png)
|
287
|
+
|
288
|
+
push + pop (findmin)
|
289
|
+
N 10: 5480288.0 i/s
|
290
|
+
N 100: 2595178.8 i/s - 2.11x slower
|
291
|
+
N 1000: 224813.9 i/s - 24.38x slower
|
292
|
+
N 10000: 12630.7 i/s - 433.89x slower
|
293
|
+
N 100000: 1097.3 i/s - 4994.31x slower
|
294
|
+
N 1000000: 135.9 i/s - 40313.05x slower
|
295
|
+
N 10000000: 12.9 i/s - 425838.01x slower
|
296
|
+
|
297
|
+
push + pop (bsearch)
|
298
|
+
N 10: 3931408.4 i/s
|
299
|
+
N 100: 2904181.8 i/s - 1.35x slower
|
300
|
+
N 1000: 2203157.1 i/s - 1.78x slower
|
301
|
+
N 10000: 1209584.9 i/s - 3.25x slower
|
302
|
+
N 100000: 81121.4 i/s - 48.46x slower
|
303
|
+
N 1000000: 5356.0 i/s - 734.02x slower
|
304
|
+
N 10000000: 281.9 i/s - 13946.33x slower
|
305
|
+
|
306
|
+
push + pop (rb_heap)
|
307
|
+
N 10: 2325816.5 i/s
|
308
|
+
N 100: 1603540.3 i/s - 1.45x slower
|
309
|
+
N 1000: 1262515.2 i/s - 1.84x slower
|
310
|
+
N 10000: 950389.3 i/s - 2.45x slower
|
311
|
+
N 100000: 732548.8 i/s - 3.17x slower
|
312
|
+
N 1000000: 673577.8 i/s - 3.45x slower
|
313
|
+
N 10000000: 467512.3 i/s - 4.97x slower
|
314
|
+
|
315
|
+
push + pop (c++ stl)
|
316
|
+
N 10: 7706818.6 i/s - 1.01x slower
|
317
|
+
N 100: 7393127.3 i/s - 1.05x slower
|
318
|
+
N 1000: 6898781.3 i/s - 1.13x slower
|
319
|
+
N 10000: 5731130.5 i/s - 1.36x slower
|
320
|
+
N 100000: 4842393.2 i/s - 1.60x slower
|
321
|
+
N 1000000: 4170936.4 i/s - 1.86x slower
|
322
|
+
N 10000000: 2737146.6 i/s - 2.84x slower
|
323
|
+
|
324
|
+
push + pop (c_dheap)
|
325
|
+
N 10: 10196454.1 i/s
|
326
|
+
N 100: 9668679.8 i/s - 1.05x slower
|
327
|
+
N 1000: 9339557.0 i/s - 1.09x slower
|
328
|
+
N 10000: 8045103.0 i/s - 1.27x slower
|
329
|
+
N 100000: 7150276.7 i/s - 1.43x slower
|
330
|
+
N 1000000: 6490261.6 i/s - 1.57x slower
|
331
|
+
N 10000000: 3734856.5 i/s - 2.73x slower
|
332
|
+
|
333
|
+
## Time complexity analysis
|
334
|
+
|
335
|
+
There are two fundamental heap operations: sift-up (used by push or decrease
|
336
|
+
score) and sift-down (used by pop or delete or increase score). Each sift
|
337
|
+
bubbles an item to its correct location in the tree.
|
338
|
+
|
339
|
+
* A _d_-ary heap has `log n / log d` layers, so either sift performs as many as
|
340
|
+
`log n / log d` writes, when a member sifts the entire length of the tree.
|
341
|
+
* Sift-up needs one comparison per layer: `O(log n / log d)`.
|
342
|
+
* Sift-down needs d comparions per layer: `O(d log n / log d)`.
|
343
|
+
|
344
|
+
So, in the case of a balanced push then pop, as many as `(1 + d) log n / log d`
|
345
|
+
comparisons are made. Looking only at this worst case combo, `d=4` requires the
|
346
|
+
fewest comparisons for a combined push and pop:
|
347
|
+
|
348
|
+
* `(1 + 2) log n / log d ≈ 4.328085 log n`
|
349
|
+
* `(1 + 3) log n / log d ≈ 3.640957 log n`
|
350
|
+
* `(1 + 4) log n / log d ≈ 3.606738 log n`
|
351
|
+
* `(1 + 5) log n / log d ≈ 3.728010 log n`
|
352
|
+
* `(1 + 6) log n / log d ≈ 3.906774 log n`
|
353
|
+
* `(1 + 7) log n / log d ≈ 4.111187 log n`
|
354
|
+
* `(1 + 8) log n / log d ≈ 4.328085 log n`
|
355
|
+
* `(1 + 9) log n / log d ≈ 4.551196 log n`
|
356
|
+
* `(1 + 10) log n / log d ≈ 4.777239 log n`
|
357
|
+
* etc...
|
150
358
|
|
151
359
|
See https://en.wikipedia.org/wiki/D-ary_heap#Analysis for deeper analysis.
|
152
360
|
|
153
|
-
|
361
|
+
However, what this simple count of comparisons misses is the extent to which
|
362
|
+
modern compilers can optimize code (e.g. by unrolling the comparison loop to
|
363
|
+
execute on registers) and more importantly how well modern processors are at
|
364
|
+
pipelined speculative execution using branch prediction, etc. Benchmarks should
|
365
|
+
be run on the _exact same_ hardware platform that production code will use,
|
366
|
+
as the sift-down operation is especially sensitive to good pipelining.
|
154
367
|
|
155
|
-
|
156
|
-
of d. However higher d values may provide better cache locality.
|
368
|
+
## Comparison performance
|
157
369
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
for value) as it would if it only supported intrinsic comparison or used an
|
163
|
-
un-memoized "sort_by" proc.
|
370
|
+
It is often useful to use external scores for otherwise uncomparable values.
|
371
|
+
And casting an item or score (via `to_f`) can also be time consuming. So
|
372
|
+
`DHeap` evaluates and stores scores at the time of insertion, and they will be
|
373
|
+
compared directly without needing any further lookup.
|
164
374
|
|
165
|
-
|
375
|
+
Numeric values can be compared _much_ faster than other ruby objects, even if
|
376
|
+
those objects simply delegate comparison to internal Numeric values.
|
377
|
+
Additionally, native C integers or floats can be compared _much_ faster than
|
378
|
+
ruby `Numeric` objects. So scores are converted to Float and stored as
|
379
|
+
`double`, which is 64 bits on an [LP64 64-bit system].
|
166
380
|
|
167
|
-
|
168
|
-
* New timers usually sort after most existing timers.
|
169
|
-
* Most timers will be canceled before executing.
|
170
|
-
* Canceled timers usually sort after most existing timers.
|
171
|
-
|
172
|
-
So, if we are able to delete an item without searching for it, by keeping a map
|
173
|
-
of positions within the heap, most timers can be inserted and deleted in O(1)
|
174
|
-
time. Canceling a non-leaf timer can be further optimized by marking it as
|
175
|
-
canceled without immediately removing it from the heap. If the timer is
|
176
|
-
rescheduled before we garbage collect, adjusting its position will usually be
|
177
|
-
faster than a delete and re-insert.
|
381
|
+
[LP64 64-bit system]: https://en.wikipedia.org/wiki/64-bit_computing#64-bit_data_models
|
178
382
|
|
179
383
|
## Alternative data structures
|
180
384
|
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
385
|
+
As always, you should run benchmarks with your expected scenarios to determine
|
386
|
+
which is best for your application.
|
387
|
+
|
388
|
+
Depending on your use-case, using a sorted `Array` using `#bsearch_index`
|
389
|
+
and `#insert` might be just fine! It only takes a couple of lines of code and
|
390
|
+
is probably "Fast Enough".
|
391
|
+
|
392
|
+
More complex heap variant, e.g. [Fibonacci heap], allow heaps to be split and
|
393
|
+
merged which gives some graph algorithms a lower amortized time complexity. But
|
394
|
+
in practice, _d_-ary heaps have much lower overhead and often run faster.
|
395
|
+
|
396
|
+
[Fibonacci heap]: https://en.wikipedia.org/wiki/Fibonacci_heap
|
187
397
|
|
188
398
|
If it is important to be able to quickly enumerate the set or find the ranking
|
189
|
-
of values in it, then you
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
399
|
+
of values in it, then you may want to use a self-balancing binary search tree
|
400
|
+
(e.g. a [red-black tree]) or a [skip-list].
|
401
|
+
|
402
|
+
[red-black tree]: https://en.wikipedia.org/wiki/Red%E2%80%93black_tree
|
403
|
+
[skip-list]: https://en.wikipedia.org/wiki/Skip_list
|
404
|
+
|
405
|
+
[Hashed and Heirarchical Timing Wheels][timing wheel] (or some variant in the
|
406
|
+
timing wheel family of data structures) can have effectively `O(1)` running time
|
407
|
+
in most cases. Although the implementation for that data structure is more
|
408
|
+
complex than a heap, it may be necessary for enormous values of N.
|
409
|
+
|
410
|
+
[timing wheel]: http://www.cs.columbia.edu/~nahum/w6998/papers/ton97-timing-wheels.pdf
|
411
|
+
|
412
|
+
## Supported platforms
|
413
|
+
|
414
|
+
See the [CI workflow] for all supported platforms.
|
415
|
+
|
416
|
+
[CI workflow]: https://github.com/nevans/d_heap/actions?query=workflow%3ACI
|
417
|
+
|
418
|
+
`d_heap` may contain bugs on 32-bit systems. Currently, `d_heap` is only tested
|
419
|
+
on 64-bit x86 CRuby 2.4-3.0 under Linux and Mac OS.
|
420
|
+
|
421
|
+
## Caveats and TODOs (PRs welcome!)
|
422
|
+
|
423
|
+
A `DHeap`'s internal array grows but never shrinks. At the very least, there
|
424
|
+
should be a `#compact` or `#shrink` method and during `#freeze`. It might make
|
425
|
+
sense to automatically shrink (to no more than 2x the current size) during GC's
|
426
|
+
compact phase.
|
427
|
+
|
428
|
+
Benchmark sift-down min-child comparisons using SSE, AVX2, and AVX512F. This
|
429
|
+
might lead to a different default `d` value (maybe 16 or 24?).
|
430
|
+
|
431
|
+
Shrink scores to 64-bits: either store a type flag with each entry (this could
|
432
|
+
be used to support non-numeric scores) or require users to choose between
|
433
|
+
`Integer` or `Float` at construction time. Reducing memory usage should also
|
434
|
+
improve speed for very large heaps.
|
435
|
+
|
436
|
+
Patches to support JRuby, rubinius, 32-bit systems, or any other platforms are
|
437
|
+
welcome! JRuby and Truffle Ruby ought to be able to use [Java's PriorityQueue]?
|
438
|
+
Other platforms could fallback on the (slower) pure ruby implementation used by
|
439
|
+
the benchmarks.
|
440
|
+
|
441
|
+
[Java's PriorityQueue]: https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/PriorityQueue.html
|
442
|
+
|
443
|
+
Allow a max-heap (or other configurations of the compare function). This can be
|
444
|
+
very easily implemented by just reversing the scores.
|
445
|
+
|
446
|
+
_Maybe_ allow non-numeric scores to be compared with `<=>`, _only_ if the basic
|
447
|
+
numeric use case simplicity and speed can be preserved.
|
448
|
+
|
449
|
+
Consider `DHeap::Monotonic`, which could rely on `#pop_below` for "current time"
|
450
|
+
and move all values below that time onto an Array.
|
451
|
+
|
452
|
+
Consider adding `DHeap::Lazy` or `DHeap.new(lazy: true)` which could contain
|
453
|
+
some features that are loosely inspired by go's timers. Go lazily sifts its
|
454
|
+
heap after deletion or adjustments, to achieve faster amortized runtime.
|
455
|
+
There's no need to actually remove a deleted item from the heap, if you re-add
|
456
|
+
it back before it's next evaluated. A similar trick can be to store "far away"
|
457
|
+
values in an internal `Hash`, assuming many will be deleted before they rise to
|
458
|
+
the top. This could naturally evolve into a [timing wheel] variant.
|
197
459
|
|
198
460
|
## Development
|
199
461
|
|