d_heap 0.2.2 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +2 -2
- data/.gitignore +1 -0
- data/.rubocop.yml +40 -1
- data/.yardopts +10 -0
- data/CHANGELOG.md +76 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +26 -1
- data/N +7 -0
- data/README.md +358 -147
- data/benchmarks/perf.rb +29 -0
- data/benchmarks/push_n.yml +35 -0
- data/benchmarks/push_n_pop_n.yml +52 -0
- data/benchmarks/push_pop.yml +32 -0
- data/benchmarks/stackprof.rb +31 -0
- data/bin/bench_charts +13 -0
- data/bin/bench_n +7 -0
- data/bin/benchmark-driver +29 -0
- data/bin/benchmarks +10 -0
- data/bin/profile +10 -0
- data/d_heap.gemspec +5 -2
- data/docs/benchmarks-2.txt +75 -0
- data/docs/benchmarks-mem.txt +39 -0
- data/docs/benchmarks.txt +515 -0
- data/docs/profile.txt +392 -0
- data/ext/d_heap/d_heap.c +824 -246
- data/ext/d_heap/extconf.rb +16 -3
- data/images/push_n.png +0 -0
- data/images/push_n_pop_n.png +0 -0
- data/images/push_pop.png +0 -0
- data/images/wikipedia-min-heap.png +0 -0
- data/lib/benchmark_driver/runner/ips_zero_fail.rb +158 -0
- data/lib/d_heap.rb +92 -3
- data/lib/d_heap/benchmarks.rb +112 -0
- data/lib/d_heap/benchmarks/benchmarker.rb +116 -0
- data/lib/d_heap/benchmarks/implementations.rb +224 -0
- data/lib/d_heap/benchmarks/profiler.rb +71 -0
- data/lib/d_heap/benchmarks/rspec_matchers.rb +352 -0
- data/lib/d_heap/version.rb +1 -1
- metadata +60 -6
- data/ext/d_heap/d_heap.h +0 -65
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1ad095ff29343f83c8bbe6fd0bc7f4acd79fa9c298aa4f8d007acf02ebedba30
|
4
|
+
data.tar.gz: b2806a066a173a83d12259342c3f7d90900c83dc628063955d861f05acc98796
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 297aad8a8b4c7845fbea64808a2beaf4aa66b8431a23841c3d17952aaf85f41a3377c2dadc7651858e038adc69a35b2fe8e6ca484d45999f026efb41817e281b
|
7
|
+
data.tar.gz: 1e3f123c7f723c752b2e8326c70b4208188ad09c275574bd0cee3dc7a119c7e3f07173f4ad4ed32035d2103a10b1a979400dfa35bdc1dd55272b53bcc8eaa2b9
|
data/.github/workflows/main.yml
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
name:
|
1
|
+
name: CI
|
2
2
|
|
3
3
|
on: [push,pull_request]
|
4
4
|
|
@@ -7,7 +7,7 @@ jobs:
|
|
7
7
|
strategy:
|
8
8
|
fail-fast: false
|
9
9
|
matrix:
|
10
|
-
ruby: [2.5, 2.6, 2.7, 3.0]
|
10
|
+
ruby: [2.4, 2.5, 2.6, 2.7, 3.0]
|
11
11
|
os: [ubuntu, macos]
|
12
12
|
experimental: [false]
|
13
13
|
runs-on: ${{ matrix.os }}-latest
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
@@ -3,9 +3,10 @@ inherit_mode:
|
|
3
3
|
- Exclude
|
4
4
|
|
5
5
|
AllCops:
|
6
|
-
TargetRubyVersion: 2.
|
6
|
+
TargetRubyVersion: 2.4
|
7
7
|
NewCops: disable
|
8
8
|
Exclude:
|
9
|
+
- bin/benchmark-driver
|
9
10
|
- bin/rake
|
10
11
|
- bin/rspec
|
11
12
|
- bin/rubocop
|
@@ -44,6 +45,7 @@ Layout/EmptyLineBetweenDefs:
|
|
44
45
|
Layout/EmptyLinesAroundAttributeAccessor:
|
45
46
|
inherit_mode:
|
46
47
|
merge:
|
48
|
+
- Exclude
|
47
49
|
- AllowedMethods
|
48
50
|
Enabled: true
|
49
51
|
AllowedMethods:
|
@@ -105,26 +107,49 @@ Naming/RescuedExceptionsVariableName: { Enabled: false }
|
|
105
107
|
###########################################################################
|
106
108
|
# Matrics:
|
107
109
|
|
110
|
+
Metrics/CyclomaticComplexity:
|
111
|
+
Max: 10
|
112
|
+
|
108
113
|
# Although it may be better to split specs into multiple files...?
|
109
114
|
Metrics/BlockLength:
|
110
115
|
Exclude:
|
111
116
|
- "spec/**/*_spec.rb"
|
117
|
+
CountAsOne:
|
118
|
+
- array
|
119
|
+
- hash
|
120
|
+
- heredoc
|
121
|
+
|
122
|
+
Metrics/ClassLength:
|
123
|
+
Max: 200
|
124
|
+
CountAsOne:
|
125
|
+
- array
|
126
|
+
- hash
|
127
|
+
- heredoc
|
112
128
|
|
113
129
|
###########################################################################
|
114
130
|
# Style...
|
115
131
|
|
116
132
|
Style/AccessorGrouping: { Enabled: false }
|
117
133
|
Style/AsciiComments: { Enabled: false } # 👮 can't stop our 🎉🥳🎊🥳!
|
134
|
+
Style/ClassAndModuleChildren: { Enabled: false }
|
118
135
|
Style/EachWithObject: { Enabled: false }
|
119
136
|
Style/FormatStringToken: { Enabled: false }
|
120
137
|
Style/FloatDivision: { Enabled: false }
|
138
|
+
Style/IfUnlessModifier: { Enabled: false }
|
139
|
+
Style/IfWithSemicolon: { Enabled: false }
|
121
140
|
Style/Lambda: { Enabled: false }
|
122
141
|
Style/LineEndConcatenation: { Enabled: false }
|
123
142
|
Style/MixinGrouping: { Enabled: false }
|
143
|
+
Style/MultilineBlockChain: { Enabled: false }
|
124
144
|
Style/PerlBackrefs: { Enabled: false } # use occasionally/sparingly
|
125
145
|
Style/RescueStandardError: { Enabled: false }
|
146
|
+
Style/Semicolon: { Enabled: false }
|
126
147
|
Style/SingleLineMethods: { Enabled: false }
|
127
148
|
Style/StabbyLambdaParentheses: { Enabled: false }
|
149
|
+
Style/WhenThen : { Enabled: false }
|
150
|
+
|
151
|
+
# I require trailing commas elsewhere, but these are optional
|
152
|
+
Style/TrailingCommaInArguments: { Enabled: false }
|
128
153
|
|
129
154
|
# If rubocop had an option to only enforce this on constants and literals (e.g.
|
130
155
|
# strings, regexp, range), I'd agree.
|
@@ -139,8 +164,19 @@ Style/TernaryParentheses:
|
|
139
164
|
Enabled: false
|
140
165
|
|
141
166
|
Style/BlockDelimiters:
|
167
|
+
inherit_mode:
|
168
|
+
merge:
|
169
|
+
- Exclude
|
170
|
+
- ProceduralMethods
|
171
|
+
- IgnoredMethods
|
172
|
+
- FunctionalMethods
|
142
173
|
EnforcedStyle: semantic
|
143
174
|
AllowBracesOnProceduralOneLiners: true
|
175
|
+
IgnoredMethods:
|
176
|
+
- expect # rspec
|
177
|
+
- profile # ruby-prof
|
178
|
+
- ips # benchmark-ips
|
179
|
+
|
144
180
|
|
145
181
|
Style/FormatString:
|
146
182
|
EnforcedStyle: percent
|
@@ -158,3 +194,6 @@ Style/TrailingCommaInHashLiteral:
|
|
158
194
|
|
159
195
|
Style/TrailingCommaInArrayLiteral:
|
160
196
|
EnforcedStyleForMultiline: consistent_comma
|
197
|
+
|
198
|
+
Style/YodaCondition:
|
199
|
+
EnforcedStyle: forbid_for_equality_operators_only
|
data/.yardopts
ADDED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
## Current/Unreleased
|
2
|
+
|
3
|
+
## Release v0.6.1 (2021-01-24)
|
4
|
+
|
5
|
+
* 📝 Fix link to CHANGELOG.md in gemspec
|
6
|
+
|
7
|
+
## Release v0.6.0 (2021-01-24)
|
8
|
+
|
9
|
+
* 🔥 **Breaking**: `#initialize` uses a keyword argument for `d`
|
10
|
+
* ✨ Added `#initialize(capacity: capa)` to set initial capacity.
|
11
|
+
* ✨ Added `peek_with_score` and `peek_score`
|
12
|
+
* ✨ Added `pop_with_score` and `each_pop(with_score: true)`
|
13
|
+
* ✨ Added `pop_all_below(max_score, array = [])`
|
14
|
+
* ✨ Added aliases for `shift` and `next`
|
15
|
+
* 📈 Added benchmark charts to README, and `bin/bench_charts` to generate them.
|
16
|
+
* requires `gruff` which requires `rmagick` which requires `imagemagick`
|
17
|
+
* 📝 Many documentation updates and fixes.
|
18
|
+
|
19
|
+
## Release v0.5.0 (2021-01-17)
|
20
|
+
|
21
|
+
* 🔥 **Breaking**: reversed order of `#push` arguments to `value, score`.
|
22
|
+
* ✨ Added `#insert(score, value)` to replace earlier version of `#push`.
|
23
|
+
* ✨ Added `#each_pop` enumerator.
|
24
|
+
* ✨ Added aliases for `deq`, `enq`, `first`, `pop_below`, `length`, and
|
25
|
+
`count`, to mimic other classes in ruby's stdlib.
|
26
|
+
* ⚡️♻️ More performance improvements:
|
27
|
+
* Created an `ENTRY` struct and store both the score and the value pointer in
|
28
|
+
the same `ENTRY *entries` array.
|
29
|
+
* Reduced unnecessary allocations or copies in both sift loops. A similar
|
30
|
+
refactoring also sped up the pure ruby benchmark implementation.
|
31
|
+
* Compiling with `-O3`.
|
32
|
+
* 📝 Updated (and in some cases, fixed) yardoc
|
33
|
+
* ♻️ Moved aliases and less performance sensitive code into ruby.
|
34
|
+
* ♻️ DRY up push/insert methods
|
35
|
+
|
36
|
+
## Release v0.4.0 (2021-01-12)
|
37
|
+
|
38
|
+
* 🔥 **Breaking**: Scores must be `Integer` or convertable to `Float`
|
39
|
+
* ⚠️ `Integer` scores must fit in `-ULONG_LONG_MAX` to `+ULONG_LONG_MAX`.
|
40
|
+
* ⚡️ Big performance improvements, by using C `long double *cscores` array
|
41
|
+
* ⚡️ many many (so many) updates to benchmarks
|
42
|
+
* ✨ Added `DHeap#clear`
|
43
|
+
* 🐛 Fixed `DHeap#initialize_copy` and `#freeze`
|
44
|
+
* ♻️ significant refactoring
|
45
|
+
* 📝 Updated docs (mostly adding benchmarks)
|
46
|
+
|
47
|
+
## Release v0.3.0 (2020-12-29)
|
48
|
+
|
49
|
+
* 🔥 **Breaking**: Removed class methods that operated directly on an array.
|
50
|
+
They weren't compatible with the performance improvements.
|
51
|
+
* ⚡️ Big performance improvements, by converting to a `T_DATA` struct.
|
52
|
+
* ♻️ Major refactoring/rewriting of dheap.c
|
53
|
+
* ✅ Added benchmark specs
|
54
|
+
|
55
|
+
## Release v0.2.2 (2020-12-27)
|
56
|
+
|
57
|
+
* 🐛 fix `optimized_cmp`, avoiding internal symbols
|
58
|
+
* 📝 Update documentation
|
59
|
+
* 💚 fix macos CI
|
60
|
+
* ➕ Add rubocop 👮🎨
|
61
|
+
|
62
|
+
## Release v0.2.1 (2020-12-26)
|
63
|
+
|
64
|
+
* ⬆️ Upgraded rake (and bundler) to support ruby 3.0
|
65
|
+
|
66
|
+
## Release v0.2.0 (2020-12-24)
|
67
|
+
|
68
|
+
* ✨ Add ability to push separate score and value
|
69
|
+
* ⚡️ Big performance gain, by storing scores separately and using ruby's
|
70
|
+
internal `OPTIMIZED_CMP` instead of always directly calling `<=>`
|
71
|
+
|
72
|
+
## Release v0.1.0 (2020-12-22)
|
73
|
+
|
74
|
+
🎉 initial release 🎉
|
75
|
+
|
76
|
+
* ✨ Add basic d-ary Heap implementation
|
data/Gemfile
CHANGED
@@ -5,7 +5,16 @@ source "https://rubygems.org"
|
|
5
5
|
# Specify your gem's dependencies in d_heap.gemspec
|
6
6
|
gemspec
|
7
7
|
|
8
|
+
gem "pry"
|
8
9
|
gem "rake", "~> 13.0"
|
9
10
|
gem "rake-compiler"
|
10
11
|
gem "rspec", "~> 3.10"
|
11
12
|
gem "rubocop", "~> 1.0"
|
13
|
+
|
14
|
+
install_if -> { RUBY_PLATFORM !~ /darwin/ } do
|
15
|
+
gem "benchmark_driver-output-gruff"
|
16
|
+
end
|
17
|
+
|
18
|
+
gem "perf"
|
19
|
+
gem "priority_queue_cxx"
|
20
|
+
gem "stackprof"
|
data/Gemfile.lock
CHANGED
@@ -1,22 +1,38 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
d_heap (0.
|
4
|
+
d_heap (0.6.1)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
9
|
ast (2.4.1)
|
10
|
+
benchmark_driver (0.15.16)
|
11
|
+
benchmark_driver-output-gruff (0.3.1)
|
12
|
+
benchmark_driver (>= 0.12.0)
|
13
|
+
gruff
|
14
|
+
coderay (1.1.3)
|
10
15
|
diff-lcs (1.4.4)
|
16
|
+
gruff (0.12.1)
|
17
|
+
histogram
|
18
|
+
rmagick
|
19
|
+
histogram (0.2.4.1)
|
20
|
+
method_source (1.0.0)
|
11
21
|
parallel (1.19.2)
|
12
22
|
parser (2.7.2.0)
|
13
23
|
ast (~> 2.4.1)
|
24
|
+
perf (0.1.2)
|
25
|
+
priority_queue_cxx (0.3.4)
|
26
|
+
pry (0.13.1)
|
27
|
+
coderay (~> 1.1)
|
28
|
+
method_source (~> 1.0)
|
14
29
|
rainbow (3.0.0)
|
15
30
|
rake (13.0.3)
|
16
31
|
rake-compiler (1.1.1)
|
17
32
|
rake
|
18
33
|
regexp_parser (1.8.2)
|
19
34
|
rexml (3.2.3)
|
35
|
+
rmagick (4.1.2)
|
20
36
|
rspec (3.10.0)
|
21
37
|
rspec-core (~> 3.10.0)
|
22
38
|
rspec-expectations (~> 3.10.0)
|
@@ -41,18 +57,27 @@ GEM
|
|
41
57
|
unicode-display_width (>= 1.4.0, < 2.0)
|
42
58
|
rubocop-ast (1.1.1)
|
43
59
|
parser (>= 2.7.1.5)
|
60
|
+
ruby-prof (1.4.2)
|
44
61
|
ruby-progressbar (1.10.1)
|
62
|
+
stackprof (0.2.16)
|
45
63
|
unicode-display_width (1.7.0)
|
46
64
|
|
47
65
|
PLATFORMS
|
48
66
|
ruby
|
49
67
|
|
50
68
|
DEPENDENCIES
|
69
|
+
benchmark_driver
|
70
|
+
benchmark_driver-output-gruff
|
51
71
|
d_heap!
|
72
|
+
perf
|
73
|
+
priority_queue_cxx
|
74
|
+
pry
|
52
75
|
rake (~> 13.0)
|
53
76
|
rake-compiler
|
54
77
|
rspec (~> 3.10)
|
55
78
|
rubocop (~> 1.0)
|
79
|
+
ruby-prof
|
80
|
+
stackprof
|
56
81
|
|
57
82
|
BUNDLED WITH
|
58
83
|
2.2.3
|
data/N
ADDED
data/README.md
CHANGED
@@ -1,53 +1,134 @@
|
|
1
|
-
# DHeap
|
1
|
+
# DHeap - Fast d-ary heap for ruby
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/d_heap.svg)](https://badge.fury.io/rb/d_heap)
|
4
|
+
[![Build Status](https://github.com/nevans/d_heap/workflows/CI/badge.svg)](https://github.com/nevans/d_heap/actions?query=workflow%3ACI)
|
5
|
+
[![Maintainability](https://api.codeclimate.com/v1/badges/ff274acd0683c99c03e1/maintainability)](https://codeclimate.com/github/nevans/d_heap/maintainability)
|
6
|
+
|
7
|
+
A fast [_d_-ary heap][d-ary heap] [priority queue] implementation for ruby,
|
8
|
+
implemented as a C extension.
|
9
|
+
|
10
|
+
From [wikipedia](https://en.wikipedia.org/wiki/Heap_(data_structure)):
|
11
|
+
> A heap is a specialized tree-based data structure which is essentially an
|
12
|
+
> almost complete tree that satisfies the heap property: in a min heap, for any
|
13
|
+
> given node C, if P is a parent node of C, then the key (the value) of P is
|
14
|
+
> less than or equal to the key of C. The node at the "top" of the heap (with no
|
15
|
+
> parents) is called the root node.
|
16
|
+
|
17
|
+
![tree representation of a min heap](images/wikipedia-min-heap.png)
|
18
|
+
|
19
|
+
With a regular queue, you expect "FIFO" behavior: first in, first out. With a
|
20
|
+
stack you expect "LIFO": last in first out. A priority queue has a score for
|
21
|
+
each element and elements are popped in order by score. Priority queues are
|
22
|
+
often used in algorithms for e.g. [scheduling] of timers or bandwidth
|
23
|
+
management, for [Huffman coding], and various graph search algorithms such as
|
24
|
+
[Dijkstra's algorithm], [A* search], or [Prim's algorithm].
|
25
|
+
|
26
|
+
The _d_-ary heap data structure is a generalization of the [binary heap], in
|
27
|
+
which the nodes have _d_ children instead of 2. This allows for "insert" and
|
28
|
+
"decrease priority" operations to be performed more quickly with the tradeoff of
|
29
|
+
slower delete minimum or "increase priority". Additionally, _d_-ary heaps can
|
30
|
+
have better memory cache behavior than binary heaps, allowing them to run more
|
31
|
+
quickly in practice despite slower worst-case time complexity. In the worst
|
32
|
+
case, a _d_-ary heap requires only `O(log n / log d)` operations to push, with
|
33
|
+
the tradeoff that pop requires `O(d log n / log d)`.
|
34
|
+
|
35
|
+
Although you should probably just use the default _d_ value of `4` (see the
|
36
|
+
analysis below), it's always advisable to benchmark your specific use-case. In
|
37
|
+
particular, if you push items more than you pop, higher values for _d_ can give
|
38
|
+
a faster total runtime.
|
39
|
+
|
40
|
+
[d-ary heap]: https://en.wikipedia.org/wiki/D-ary_heap
|
41
|
+
[priority queue]: https://en.wikipedia.org/wiki/Priority_queue
|
42
|
+
[binary heap]: https://en.wikipedia.org/wiki/Binary_heap
|
43
|
+
[scheduling]: https://en.wikipedia.org/wiki/Scheduling_(computing)
|
44
|
+
[Huffman coding]: https://en.wikipedia.org/wiki/Huffman_coding#Compression
|
45
|
+
[Dijkstra's algorithm]: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm#Using_a_priority_queue
|
46
|
+
[A* search]: https://en.wikipedia.org/wiki/A*_search_algorithm#Description
|
47
|
+
[Prim's algorithm]: https://en.wikipedia.org/wiki/Prim%27s_algorithm
|
2
48
|
|
3
|
-
|
4
|
-
algorithms.
|
49
|
+
## Usage
|
5
50
|
|
6
|
-
The
|
7
|
-
|
8
|
-
operations to be performed more quickly with the tradeoff of slower delete
|
9
|
-
minimum. Additionally, _d_-ary heaps can have better memory cache behavior than
|
10
|
-
binary heaps, allowing them to run more quickly in practice despite slower
|
11
|
-
worst-case time complexity. In the worst case, a _d_-ary heap requires only
|
12
|
-
`O(log n / log d)` to push, with the tradeoff that pop is `O(d log n / log d)`.
|
51
|
+
The basic API is `#push(object, score)` and `#pop`. Please read the
|
52
|
+
[gem documentation] for more details and other methods.
|
13
53
|
|
14
|
-
|
15
|
-
may be worthwhile to benchmark your specific scenario.
|
54
|
+
Quick reference for some common methods:
|
16
55
|
|
17
|
-
|
56
|
+
* `heap << object` adds a value, with `Float(object)` as its score.
|
57
|
+
* `heap.push(object, score)` adds a value with an extrinsic score.
|
58
|
+
* `heap.pop` removes and returns the value with the minimum score.
|
59
|
+
* `heap.pop_lte(max_score)` pops only if the next score is `<=` the argument.
|
60
|
+
* `heap.peek` to view the minimum value without popping it.
|
61
|
+
* `heap.clear` to remove all items from the heap.
|
62
|
+
* `heap.empty?` returns true if the heap is empty.
|
63
|
+
* `heap.size` returns the number of items in the heap.
|
64
|
+
|
65
|
+
If the score changes while the object is still in the heap, it will not be
|
66
|
+
re-evaluated again.
|
67
|
+
|
68
|
+
The score must either be `Integer` or `Float` or convertable to a `Float` via
|
69
|
+
`Float(score)` (i.e. it should implement `#to_f`). Constraining scores to
|
70
|
+
numeric values gives more than 50% speedup under some benchmarks! _n.b._
|
71
|
+
`Integer` _scores must have an absolute value that fits into_ `unsigned long
|
72
|
+
long`. This is compiler and architecture dependant but with gcc on an IA-64
|
73
|
+
system it's 64 bits, which gives a range of -18,446,744,073,709,551,615 to
|
74
|
+
+18,446,744,073,709,551,615, which is more than enough to store e.g. POSIX time
|
75
|
+
in nanoseconds.
|
76
|
+
|
77
|
+
_Comparing arbitary objects via_ `a <=> b` _was the original design and may be
|
78
|
+
added back in a future version,_ if (and only if) _it can be done without
|
79
|
+
impacting the speed of numeric comparisons. The speedup from this constraint is
|
80
|
+
huge!_
|
81
|
+
|
82
|
+
[gem documentation]: https://rubydoc.info/gems/d_heap/DHeap
|
83
|
+
|
84
|
+
### Examples
|
85
|
+
|
86
|
+
```ruby
|
87
|
+
# create some example objects to place in our heap
|
88
|
+
Task = Struct.new(:id, :time) do
|
89
|
+
def to_f; time.to_f end
|
90
|
+
end
|
91
|
+
t1 = Task.new(1, Time.now + 5*60)
|
92
|
+
t2 = Task.new(2, Time.now + 50)
|
93
|
+
t3 = Task.new(3, Time.now + 60)
|
94
|
+
t4 = Task.new(4, Time.now + 5)
|
95
|
+
|
96
|
+
# create the heap
|
97
|
+
require "d_heap"
|
98
|
+
heap = DHeap.new
|
99
|
+
|
100
|
+
# push with an explicit score (which might be extrinsic to the value)
|
101
|
+
heap.push t1, t1.to_f
|
102
|
+
|
103
|
+
# the score will be implicitly cast with Float, so any object with #to_f
|
104
|
+
heap.push t2, t2
|
18
105
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
heap
|
37
|
-
|
38
|
-
heap
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
more than occasionally calling `memcpy`.
|
46
|
-
|
47
|
-
It's likely that MJIT will eventually make the C-extension completely
|
48
|
-
unnecessary. This is definitely hotspot code, and the basic ruby implementation
|
49
|
-
would work fine, if not for that `<=>` overhead. Until then... this gem gets
|
50
|
-
the job done.
|
106
|
+
# if the object has an intrinsic score via #to_f, "<<" is the simplest API
|
107
|
+
heap << t3 << t4
|
108
|
+
|
109
|
+
# pop returns the lowest scored item, and removes it from the heap
|
110
|
+
heap.pop # => #<struct Task id=4, time=2021-01-17 17:02:22.5574 -0500>
|
111
|
+
heap.pop # => #<struct Task id=2, time=2021-01-17 17:03:07.5574 -0500>
|
112
|
+
|
113
|
+
# peek returns the lowest scored item, without removing it from the heap
|
114
|
+
heap.peek # => #<struct Task id=3, time=2021-01-17 17:03:17.5574 -0500>
|
115
|
+
heap.pop # => #<struct Task id=3, time=2021-01-17 17:03:17.5574 -0500>
|
116
|
+
|
117
|
+
# pop_lte handles the common "h.pop if h.peek_score < max" pattern
|
118
|
+
heap.pop_lte(Time.now + 65) # => nil
|
119
|
+
|
120
|
+
# the heap size can be inspected with size and empty?
|
121
|
+
heap.empty? # => false
|
122
|
+
heap.size # => 1
|
123
|
+
heap.pop # => #<struct Task id=1, time=2021-01-17 17:07:17.5574 -0500>
|
124
|
+
heap.empty? # => true
|
125
|
+
heap.size # => 0
|
126
|
+
|
127
|
+
# popping from an empty heap returns nil
|
128
|
+
heap.pop # => nil
|
129
|
+
```
|
130
|
+
|
131
|
+
Please see the [gem documentation] for more methods and more examples.
|
51
132
|
|
52
133
|
## Installation
|
53
134
|
|
@@ -65,134 +146,264 @@ Or install it yourself as:
|
|
65
146
|
|
66
147
|
$ gem install d_heap
|
67
148
|
|
68
|
-
##
|
149
|
+
## Motivation
|
69
150
|
|
70
|
-
|
151
|
+
One naive approach to a priority queue is to maintain an array in sorted order.
|
152
|
+
This can be very simply implemented in ruby with `Array#bseach_index` +
|
153
|
+
`Array#insert`. This can be very fast—`Array#pop` is `O(1)`—but the worst-case
|
154
|
+
for insert is `O(n)` because it may need to `memcpy` a significant portion of
|
155
|
+
the array.
|
71
156
|
|
72
|
-
|
73
|
-
|
157
|
+
The standard way to implement a priority queue is with a binary heap. Although
|
158
|
+
this increases the time complexity for `pop` alone, it reduces the combined time
|
159
|
+
compexity for the combined `push` + `pop`. Using a d-ary heap with d > 2
|
160
|
+
makes the tree shorter but broader, which reduces to `O(log n / log d)` while
|
161
|
+
increasing the comparisons needed by sift-down to `O(d log n/ log d)`.
|
74
162
|
|
75
|
-
|
163
|
+
However, I was disappointed when my best ruby heap implementation ran much more
|
164
|
+
slowly than the naive approach—even for heaps containing ten thousand items.
|
165
|
+
Although it _is_ `O(n)`, `memcpy` is _very_ fast, while calling `<=>` from ruby
|
166
|
+
has _much_ higher overhead. And a _d_-heap needs `d + 1` times more comparisons
|
167
|
+
for each push + pop than `bsearch` + `insert`.
|
76
168
|
|
77
|
-
|
78
|
-
|
79
|
-
heap
|
80
|
-
heap << [Time.now + 60, Task.new(3)]
|
81
|
-
heap << [Time.now + 5, Task.new(4)]
|
169
|
+
Additionally, when researching how other systems handle their scheduling, I was
|
170
|
+
inspired by reading go's "timer.go" implementation to experiment with a 4-ary
|
171
|
+
heap instead of the traditional binary heap.
|
82
172
|
|
83
|
-
|
84
|
-
heap.pop.last # => Task[4]
|
85
|
-
heap.pop.last # => Task[2]
|
86
|
-
heap.peak.last # => Task[3]
|
87
|
-
heap.pop.last # => Task[3]
|
88
|
-
heap.pop.last # => Task[1]
|
89
|
-
```
|
173
|
+
## Benchmarks
|
90
174
|
|
91
|
-
|
175
|
+
_See `bin/benchmarks` and `docs/benchmarks.txt`, as well as `bin/profile` and
|
176
|
+
`docs/profile.txt` for much more detail or updated results. These benchmarks
|
177
|
+
were measured with v0.5.0 and ruby 2.7.2 without MJIT enabled._
|
178
|
+
|
179
|
+
These benchmarks use very simple implementations for a pure-ruby heap and an
|
180
|
+
array that is kept sorted using `Array#bsearch_index` and `Array#insert`. For
|
181
|
+
comparison, I also compare to the [priority_queue_cxx gem] which uses the [C++
|
182
|
+
STL priority_queue], and another naive implementation that uses `Array#min` and
|
183
|
+
`Array#delete_at` with an unsorted array.
|
184
|
+
|
185
|
+
In these benchmarks, `DHeap` runs faster than all other implementations for
|
186
|
+
every scenario and every value of N, although the difference is usually more
|
187
|
+
noticable at higher values of N. The pure ruby heap implementation is
|
188
|
+
competitive for `push` alone at every value of N, but is significantly slower
|
189
|
+
than bsearch + insert for push + pop, until N is _very_ large (somewhere between
|
190
|
+
10k and 100k)!
|
191
|
+
|
192
|
+
[priority_queue_cxx gem]: https://rubygems.org/gems/priority_queue_cxx
|
193
|
+
[C++ STL priority_queue]: http://www.cplusplus.com/reference/queue/priority_queue/
|
194
|
+
|
195
|
+
Three different scenarios are measured:
|
196
|
+
|
197
|
+
### push N items onto an empty heap
|
198
|
+
|
199
|
+
...but never pop (clearing between each set of pushes).
|
200
|
+
|
201
|
+
![bar graph for push_n_pop_n benchmarks](./images/push_n.png)
|
202
|
+
|
203
|
+
### push N items onto an empty heap then pop all N
|
204
|
+
|
205
|
+
Although this could be used for heap sort, we're unlikely to choose heap sort
|
206
|
+
over Ruby's quick sort implementation. I'm using this scenario to represent
|
207
|
+
the amortized cost of creating a heap and (eventually) draining it.
|
208
|
+
|
209
|
+
![bar graph for push_n_pop_n benchmarks](./images/push_n_pop_n.png)
|
210
|
+
|
211
|
+
### push and pop on a heap with N values
|
212
|
+
|
213
|
+
Repeatedly push and pop while keeping a stable heap size. This is a _very
|
214
|
+
simplistic_ approximation for how most scheduler/timer heaps might be used.
|
215
|
+
Usually when a timer fires it will be quickly replaced by a new timer, and the
|
216
|
+
overall count of timers will remain roughly stable.
|
217
|
+
|
218
|
+
![bar graph for push_pop benchmarks](./images/push_pop.png)
|
219
|
+
|
220
|
+
### numbers
|
221
|
+
|
222
|
+
Even for very small N values the benchmark implementations, `DHeap` runs faster
|
223
|
+
than the other implementations for each scenario, although the difference is
|
224
|
+
still relatively small. The pure ruby binary heap is 2x or more slower than
|
225
|
+
bsearch + insert for common push/pop scenario.
|
226
|
+
|
227
|
+
== push N (N=5) ==========================================================
|
228
|
+
push N (c_dheap): 1969700.7 i/s
|
229
|
+
push N (c++ stl): 1049738.1 i/s - 1.88x slower
|
230
|
+
push N (rb_heap): 928435.2 i/s - 2.12x slower
|
231
|
+
push N (bsearch): 921060.0 i/s - 2.14x slower
|
232
|
+
|
233
|
+
== push N then pop N (N=5) ===============================================
|
234
|
+
push N + pop N (c_dheap): 1375805.0 i/s
|
235
|
+
push N + pop N (c++ stl): 1134997.5 i/s - 1.21x slower
|
236
|
+
push N + pop N (findmin): 862913.1 i/s - 1.59x slower
|
237
|
+
push N + pop N (bsearch): 762887.1 i/s - 1.80x slower
|
238
|
+
push N + pop N (rb_heap): 506890.4 i/s - 2.71x slower
|
239
|
+
|
240
|
+
== Push/pop with pre-filled queue of size=N (N=5) ========================
|
241
|
+
push + pop (c_dheap): 9044435.5 i/s
|
242
|
+
push + pop (c++ stl): 7534583.4 i/s - 1.20x slower
|
243
|
+
push + pop (findmin): 5026155.1 i/s - 1.80x slower
|
244
|
+
push + pop (bsearch): 4300260.0 i/s - 2.10x slower
|
245
|
+
push + pop (rb_heap): 2299499.7 i/s - 3.93x slower
|
246
|
+
|
247
|
+
By N=21, `DHeap` has pulled significantly ahead of bsearch + insert for all
|
248
|
+
scenarios, but the pure ruby heap is still slower than every other
|
249
|
+
implementation—even resorting the array after every `#push`—in any scenario that
|
250
|
+
uses `#pop`.
|
251
|
+
|
252
|
+
== push N (N=21) =========================================================
|
253
|
+
push N (c_dheap): 464231.4 i/s
|
254
|
+
push N (c++ stl): 305546.7 i/s - 1.52x slower
|
255
|
+
push N (rb_heap): 202803.7 i/s - 2.29x slower
|
256
|
+
push N (bsearch): 168678.7 i/s - 2.75x slower
|
257
|
+
|
258
|
+
== push N then pop N (N=21) ==============================================
|
259
|
+
push N + pop N (c_dheap): 298350.3 i/s
|
260
|
+
push N + pop N (c++ stl): 252227.1 i/s - 1.18x slower
|
261
|
+
push N + pop N (findmin): 161998.7 i/s - 1.84x slower
|
262
|
+
push N + pop N (bsearch): 143432.3 i/s - 2.08x slower
|
263
|
+
push N + pop N (rb_heap): 79622.1 i/s - 3.75x slower
|
264
|
+
|
265
|
+
== Push/pop with pre-filled queue of size=N (N=21) =======================
|
266
|
+
push + pop (c_dheap): 8855093.4 i/s
|
267
|
+
push + pop (c++ stl): 7223079.5 i/s - 1.23x slower
|
268
|
+
push + pop (findmin): 4542913.7 i/s - 1.95x slower
|
269
|
+
push + pop (bsearch): 3461802.4 i/s - 2.56x slower
|
270
|
+
push + pop (rb_heap): 1845488.7 i/s - 4.80x slower
|
271
|
+
|
272
|
+
At higher values of N, a heaps logarithmic growth leads to only a little
|
273
|
+
slowdown of `#push`, while insert's linear growth causes it to run noticably
|
274
|
+
slower and slower. But because `#pop` is `O(1)` for a sorted array and `O(d log
|
275
|
+
n / log d)` for a heap, scenarios involving both `#push` and `#pop` remain
|
276
|
+
relatively close, and bsearch + insert still runs faster than a pure ruby heap,
|
277
|
+
even up to queues with 10k items. But as queue size increases beyond than that,
|
278
|
+
the linear time compexity to keep a sorted array dominates.
|
279
|
+
|
280
|
+
== push + pop (rb_heap)
|
281
|
+
queue size = 10000: 736618.2 i/s
|
282
|
+
queue size = 25000: 670186.8 i/s - 1.10x slower
|
283
|
+
queue size = 50000: 618156.7 i/s - 1.19x slower
|
284
|
+
queue size = 100000: 579250.7 i/s - 1.27x slower
|
285
|
+
queue size = 250000: 572795.0 i/s - 1.29x slower
|
286
|
+
queue size = 500000: 543648.3 i/s - 1.35x slower
|
287
|
+
queue size = 1000000: 513523.4 i/s - 1.43x slower
|
288
|
+
queue size = 2500000: 460848.9 i/s - 1.60x slower
|
289
|
+
queue size = 5000000: 445234.5 i/s - 1.65x slower
|
290
|
+
queue size = 10000000: 423119.0 i/s - 1.74x slower
|
291
|
+
|
292
|
+
== push + pop (bsearch)
|
293
|
+
queue size = 10000: 786334.2 i/s
|
294
|
+
queue size = 25000: 364963.8 i/s - 2.15x slower
|
295
|
+
queue size = 50000: 200520.6 i/s - 3.92x slower
|
296
|
+
queue size = 100000: 88607.0 i/s - 8.87x slower
|
297
|
+
queue size = 250000: 34530.5 i/s - 22.77x slower
|
298
|
+
queue size = 500000: 17965.4 i/s - 43.77x slower
|
299
|
+
queue size = 1000000: 5638.7 i/s - 139.45x slower
|
300
|
+
queue size = 2500000: 1302.0 i/s - 603.93x slower
|
301
|
+
queue size = 5000000: 592.0 i/s - 1328.25x slower
|
302
|
+
queue size = 10000000: 288.8 i/s - 2722.66x slower
|
303
|
+
|
304
|
+
== push + pop (c_dheap)
|
305
|
+
queue size = 10000: 7311366.6 i/s
|
306
|
+
queue size = 50000: 6737824.5 i/s - 1.09x slower
|
307
|
+
queue size = 25000: 6407340.6 i/s - 1.14x slower
|
308
|
+
queue size = 100000: 6254396.3 i/s - 1.17x slower
|
309
|
+
queue size = 250000: 5917684.5 i/s - 1.24x slower
|
310
|
+
queue size = 500000: 5126307.6 i/s - 1.43x slower
|
311
|
+
queue size = 1000000: 4403494.1 i/s - 1.66x slower
|
312
|
+
queue size = 2500000: 3304088.2 i/s - 2.21x slower
|
313
|
+
queue size = 5000000: 2664897.7 i/s - 2.74x slower
|
314
|
+
queue size = 10000000: 2137927.6 i/s - 3.42x slower
|
92
315
|
|
93
|
-
##
|
316
|
+
## Analysis
|
94
317
|
|
95
|
-
|
96
|
-
~~includes~~ _will include_ extensions to `Array`, allowing an Array to be
|
97
|
-
directly handled as a priority queue. These extension methods are meant to be
|
98
|
-
used similarly to how `#bsearch` and `#bsearch_index` might be used.
|
318
|
+
### Time complexity
|
99
319
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
320
|
+
There are two fundamental heap operations: sift-up (used by push) and sift-down
|
321
|
+
(used by pop).
|
322
|
+
|
323
|
+
* A _d_-ary heap will have `log n / log d` layers, so both sift operations can
|
324
|
+
perform as many as `log n / log d` writes, when a member sifts the entire
|
325
|
+
length of the tree.
|
326
|
+
* Sift-up makes one comparison per layer, so push runs in `O(log n / log d)`.
|
327
|
+
* Sift-down makes d comparions per layer, so pop runs in `O(d log n / log d)`.
|
328
|
+
|
329
|
+
So, in the simplest case of running balanced push/pop while maintaining the same
|
330
|
+
heap size, `(1 + d) log n / log d` comparisons are made. In the worst case,
|
331
|
+
when every sift traverses every layer of the tree, `d=4` requires the fewest
|
332
|
+
comparisons for combined insert and delete:
|
333
|
+
|
334
|
+
* (1 + 2) lg n / lg d ≈ 4.328085 lg n
|
335
|
+
* (1 + 3) lg n / lg d ≈ 3.640957 lg n
|
336
|
+
* (1 + 4) lg n / lg d ≈ 3.606738 lg n
|
337
|
+
* (1 + 5) lg n / lg d ≈ 3.728010 lg n
|
338
|
+
* (1 + 6) lg n / lg d ≈ 3.906774 lg n
|
339
|
+
* (1 + 7) lg n / lg d ≈ 4.111187 lg n
|
340
|
+
* (1 + 8) lg n / lg d ≈ 4.328085 lg n
|
341
|
+
* (1 + 9) lg n / lg d ≈ 4.551196 lg n
|
342
|
+
* (1 + 10) lg n / lg d ≈ 4.777239 lg n
|
343
|
+
* etc...
|
105
344
|
|
106
|
-
|
107
|
-
features that are loosely inspired by go's timers. e.g: It lazily sifts its
|
108
|
-
heap after deletion and adjustments, to achieve faster average runtime for *add*
|
109
|
-
and *cancel* operations.
|
345
|
+
See https://en.wikipedia.org/wiki/D-ary_heap#Analysis for deeper analysis.
|
110
346
|
|
111
|
-
|
112
|
-
experiment with a 4-ary heap instead of the traditional binary heap. In the
|
113
|
-
case of timers, new timers are usually scheduled to run after most of the
|
114
|
-
existing timers. And timers are usually canceled before they have a chance to
|
115
|
-
run. While a binary heap holds 50% of its elements in its last layer, 75% of a
|
116
|
-
4-ary heap will have no children. That diminishes the extra comparison overhead
|
117
|
-
during sift-down.
|
347
|
+
### Space complexity
|
118
348
|
|
119
|
-
|
349
|
+
Space usage is linear, regardless of d. However higher d values may
|
350
|
+
provide better cache locality. Because the heap is a complete binary tree, the
|
351
|
+
elements can be stored in an array, without the need for tree or list pointers.
|
120
352
|
|
121
|
-
|
353
|
+
Ruby can compare Numeric values _much_ faster than other ruby objects, even if
|
354
|
+
those objects simply delegate comparison to internal Numeric values. And it is
|
355
|
+
often useful to use external scores for otherwise uncomparable values. So
|
356
|
+
`DHeap` uses twice as many entries (one for score and one for value)
|
357
|
+
as an array which only stores values.
|
122
358
|
|
123
|
-
##
|
359
|
+
## Thread safety
|
124
360
|
|
125
|
-
|
361
|
+
`DHeap` is _not_ thread-safe, so concurrent access from multiple threads need to
|
362
|
+
take precautions such as locking access behind a mutex.
|
126
363
|
|
127
|
-
|
128
|
-
Swap up performs only a single comparison per swap: O(1).
|
129
|
-
Swap down performs as many as d comparions per swap: O(d).
|
130
|
-
|
131
|
-
Inserting an item is O(log n / log d).
|
132
|
-
Deleting the root is O(d log n / log d).
|
133
|
-
|
134
|
-
Assuming every inserted item is eventually deleted from the root, d=4 requires
|
135
|
-
the fewest comparisons for combined insert and delete:
|
136
|
-
* (1 + 2) lg 2 = 4.328085
|
137
|
-
* (1 + 3) lg 3 = 3.640957
|
138
|
-
* (1 + 4) lg 4 = 3.606738
|
139
|
-
* (1 + 5) lg 5 = 3.728010
|
140
|
-
* (1 + 6) lg 6 = 3.906774
|
141
|
-
* etc...
|
142
|
-
|
143
|
-
Leaf nodes require no comparisons to shift down, and higher values for d have
|
144
|
-
higher percentage of leaf nodes:
|
145
|
-
* d=2 has ~50% leaves,
|
146
|
-
* d=3 has ~67% leaves,
|
147
|
-
* d=4 has ~75% leaves,
|
148
|
-
* and so on...
|
364
|
+
## Alternative data structures
|
149
365
|
|
150
|
-
|
366
|
+
As always, you should run benchmarks with your expected scenarios to determine
|
367
|
+
which is best for your application.
|
151
368
|
|
152
|
-
|
369
|
+
Depending on your use-case, maintaining a sorted `Array` using `#bsearch_index`
|
370
|
+
and `#insert` might be just fine! Even `min` plus `delete` with an unsorted
|
371
|
+
array can be very fast on small queues. Although insertions run with `O(n)`,
|
372
|
+
`memcpy` is so fast on modern hardware that your dataset might not be large
|
373
|
+
enough for it to matter.
|
153
374
|
|
154
|
-
|
155
|
-
|
375
|
+
More complex heap varients, e.g. [Fibonacci heap], allow heaps to be split and
|
376
|
+
merged which gives some graph algorithms a lower amortized time complexity. But
|
377
|
+
in practice, _d_-ary heaps have much lower overhead and often run faster.
|
156
378
|
|
157
|
-
|
158
|
-
ruby objects which delegate comparison to internal Numeric or String objects.
|
159
|
-
And it is often advantageous to use extrinsic scores for uncomparable items.
|
160
|
-
For this, our internal array uses twice as many entries (one for score and one
|
161
|
-
for value) as it would if it only supported intrinsic comparison or used an
|
162
|
-
un-memoized "sort_by" proc.
|
379
|
+
[Fibonacci heap]: https://en.wikipedia.org/wiki/Fibonacci_heap
|
163
380
|
|
164
|
-
|
381
|
+
If it is important to be able to quickly enumerate the set or find the ranking
|
382
|
+
of values in it, then you may want to use a self-balancing binary search tree
|
383
|
+
(e.g. a [red-black tree]) or a [skip-list].
|
165
384
|
|
166
|
-
|
167
|
-
|
168
|
-
* Most timers will be canceled before executing.
|
169
|
-
* Canceled timers usually sort after most existing timers.
|
385
|
+
[red-black tree]: https://en.wikipedia.org/wiki/Red%E2%80%93black_tree
|
386
|
+
[skip-list]: https://en.wikipedia.org/wiki/Skip_list
|
170
387
|
|
171
|
-
|
172
|
-
of
|
173
|
-
time
|
174
|
-
|
175
|
-
rescheduled before we garbage collect, adjusting its position will usually be
|
176
|
-
faster than a delete and re-insert.
|
388
|
+
[Hashed and Heirarchical Timing Wheels][timing wheels] (or some variant in that
|
389
|
+
family of data structures) can be constructed to have effectively `O(1)` running
|
390
|
+
time in most cases. Although the implementation for that data structure is more
|
391
|
+
complex than a heap, it may be necessary for enormous values of N.
|
177
392
|
|
178
|
-
|
393
|
+
[timing wheels]: http://www.cs.columbia.edu/~nahum/w6998/papers/ton97-timing-wheels.pdf
|
179
394
|
|
180
|
-
|
181
|
-
`#bsearch_index` and `#insert` might be faster! Although it is technically
|
182
|
-
O(n) for insertions, the implementations for `memcpy` or `memmove` can be *very*
|
183
|
-
fast on modern architectures. Also, it can be faster O(n) on average, if
|
184
|
-
insertions are usually near the end of the array. You should run benchmarks
|
185
|
-
with your expected scenarios to determine which is right.
|
395
|
+
## TODOs...
|
186
396
|
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
397
|
+
_TODO:_ Also ~~included is~~ _will include_ `DHeap::Map`, which augments the
|
398
|
+
basic heap with an internal `Hash`, which maps objects to their position in the
|
399
|
+
heap. This enforces a uniqueness constraint on items on the heap, and also
|
400
|
+
allows items to be more efficiently deleted or adjusted. However maintaining
|
401
|
+
the hash does lead to a small drop in normal `#push` and `#pop` performance.
|
402
|
+
|
403
|
+
_TODO:_ Also ~~included is~~ _will include_ `DHeap::Lazy`, which contains some
|
404
|
+
features that are loosely inspired by go's timers. e.g: It lazily sifts its
|
405
|
+
heap after deletion and adjustments, to achieve faster average runtime for *add*
|
406
|
+
and *cancel* operations.
|
196
407
|
|
197
408
|
## Development
|
198
409
|
|