d_heap 0.2.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +26 -0
- data/.rubocop.yml +199 -0
- data/CHANGELOG.md +59 -0
- data/Gemfile +10 -2
- data/Gemfile.lock +42 -5
- data/README.md +392 -109
- data/Rakefile +8 -2
- data/benchmarks/perf.rb +29 -0
- data/benchmarks/push_n.yml +31 -0
- data/benchmarks/push_n_pop_n.yml +35 -0
- data/benchmarks/push_pop.yml +27 -0
- data/benchmarks/stackprof.rb +31 -0
- data/bin/bench_n +7 -0
- data/bin/benchmark-driver +29 -0
- data/bin/benchmarks +10 -0
- data/bin/console +1 -0
- data/bin/profile +10 -0
- data/bin/rubocop +29 -0
- data/d_heap.gemspec +11 -6
- data/docs/benchmarks-2.txt +75 -0
- data/docs/benchmarks-mem.txt +39 -0
- data/docs/benchmarks.txt +515 -0
- data/docs/profile.txt +392 -0
- data/ext/d_heap/d_heap.c +555 -225
- data/ext/d_heap/d_heap.h +24 -48
- data/ext/d_heap/extconf.rb +20 -0
- data/lib/benchmark_driver/runner/ips_zero_fail.rb +120 -0
- data/lib/d_heap.rb +40 -2
- data/lib/d_heap/benchmarks.rb +112 -0
- data/lib/d_heap/benchmarks/benchmarker.rb +116 -0
- data/lib/d_heap/benchmarks/implementations.rb +222 -0
- data/lib/d_heap/benchmarks/profiler.rb +71 -0
- data/lib/d_heap/benchmarks/rspec_matchers.rb +374 -0
- data/lib/d_heap/version.rb +4 -1
- metadata +54 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3dd1049e0a8041a328da4ed65622c2f0589475bc386a0eb6f20e466c79587bc5
|
4
|
+
data.tar.gz: ec44970feaa5ce6aef37f511e71e55342ec93b7e1a0b2a3d40f249afa4e9ac25
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a6b6e192dbe5980b2b79728e4b4bf413151b3e193733d1435119482edb977f0a2edd692fd156614fdfbc86f4fa1dc6ac9f7907ca640c21ca23050d07b9a1caa6
|
7
|
+
data.tar.gz: 27a987139a1fd14f73c16459f72be2bf1059dadbd212e250a28b3691dff2372e708cdf740155e2d2aa21de78cc789816a9d83f80f42d3c396510cd6fce6e6bf2
|
@@ -0,0 +1,26 @@
|
|
1
|
+
name: Ruby
|
2
|
+
|
3
|
+
on: [push,pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
strategy:
|
8
|
+
fail-fast: false
|
9
|
+
matrix:
|
10
|
+
ruby: [2.5, 2.6, 2.7, 3.0]
|
11
|
+
os: [ubuntu, macos]
|
12
|
+
experimental: [false]
|
13
|
+
runs-on: ${{ matrix.os }}-latest
|
14
|
+
continue-on-error: ${{ matrix.experimental }}
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v2
|
17
|
+
- name: Set up Ruby
|
18
|
+
uses: ruby/setup-ruby@v1
|
19
|
+
with:
|
20
|
+
ruby-version: ${{ matrix.ruby }}
|
21
|
+
bundler-cache: true
|
22
|
+
- name: Run the default task
|
23
|
+
run: |
|
24
|
+
gem install bundler -v 2.2.3
|
25
|
+
bundle install
|
26
|
+
bundle exec rake
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,199 @@
|
|
1
|
+
inherit_mode:
|
2
|
+
merge:
|
3
|
+
- Exclude
|
4
|
+
|
5
|
+
AllCops:
|
6
|
+
TargetRubyVersion: 2.5
|
7
|
+
NewCops: disable
|
8
|
+
Exclude:
|
9
|
+
- bin/benchmark-driver
|
10
|
+
- bin/rake
|
11
|
+
- bin/rspec
|
12
|
+
- bin/rubocop
|
13
|
+
|
14
|
+
###########################################################################
|
15
|
+
# rubocop defaults are simply WRONG about many rules... Sorry. It's true.
|
16
|
+
|
17
|
+
###########################################################################
|
18
|
+
# Layout: Alignment. I want these to work, I really do...
|
19
|
+
|
20
|
+
# I wish this worked with "table". but that goes wrong sometimes.
|
21
|
+
Layout/HashAlignment: { Enabled: false }
|
22
|
+
|
23
|
+
# This needs to be configurable so parenthesis calls are aligned with first
|
24
|
+
# parameter, and non-parenthesis calls are aligned with fixed indentation.
|
25
|
+
Layout/ParameterAlignment: { Enabled: false }
|
26
|
+
|
27
|
+
###########################################################################
|
28
|
+
# Layout: Empty lines
|
29
|
+
|
30
|
+
Layout/EmptyLineAfterGuardClause: { Enabled: false }
|
31
|
+
Layout/EmptyLineAfterMagicComment: { Enabled: true }
|
32
|
+
Layout/EmptyLineAfterMultilineCondition: { Enabled: false }
|
33
|
+
Layout/EmptyLines: { Enabled: true }
|
34
|
+
Layout/EmptyLinesAroundAccessModifier: { Enabled: true }
|
35
|
+
Layout/EmptyLinesAroundArguments: { Enabled: true }
|
36
|
+
Layout/EmptyLinesAroundBeginBody: { Enabled: true }
|
37
|
+
Layout/EmptyLinesAroundBlockBody: { Enabled: false }
|
38
|
+
Layout/EmptyLinesAroundExceptionHandlingKeywords: { Enabled: true }
|
39
|
+
Layout/EmptyLinesAroundMethodBody: { Enabled: true }
|
40
|
+
|
41
|
+
Layout/EmptyLineBetweenDefs:
|
42
|
+
Enabled: true
|
43
|
+
AllowAdjacentOneLineDefs: true
|
44
|
+
|
45
|
+
Layout/EmptyLinesAroundAttributeAccessor:
|
46
|
+
inherit_mode:
|
47
|
+
merge:
|
48
|
+
- Exclude
|
49
|
+
- AllowedMethods
|
50
|
+
Enabled: true
|
51
|
+
AllowedMethods:
|
52
|
+
- delegate
|
53
|
+
- def_delegator
|
54
|
+
- def_delegators
|
55
|
+
- def_instance_delegators
|
56
|
+
|
57
|
+
# "empty_lines_special" sometimes does the wrong thing and annoys me.
|
58
|
+
# But I've mostly learned to live with it... mostly. 🙁
|
59
|
+
|
60
|
+
Layout/EmptyLinesAroundClassBody:
|
61
|
+
Enabled: true
|
62
|
+
EnforcedStyle: empty_lines_special
|
63
|
+
|
64
|
+
Layout/EmptyLinesAroundModuleBody:
|
65
|
+
Enabled: true
|
66
|
+
EnforcedStyle: empty_lines_special
|
67
|
+
|
68
|
+
###########################################################################
|
69
|
+
# Layout: Space around, before, inside, etc
|
70
|
+
|
71
|
+
Layout/SpaceAroundEqualsInParameterDefault: { Enabled: false }
|
72
|
+
Layout/SpaceBeforeBlockBraces: { Enabled: false }
|
73
|
+
Layout/SpaceBeforeFirstArg: { Enabled: false }
|
74
|
+
Layout/SpaceInLambdaLiteral: { Enabled: false }
|
75
|
+
Layout/SpaceInsideArrayLiteralBrackets: { Enabled: false }
|
76
|
+
Layout/SpaceInsideHashLiteralBraces: { Enabled: false }
|
77
|
+
|
78
|
+
Layout/SpaceInsideBlockBraces:
|
79
|
+
EnforcedStyle: space
|
80
|
+
EnforcedStyleForEmptyBraces: space
|
81
|
+
SpaceBeforeBlockParameters: false
|
82
|
+
|
83
|
+
# I would enable this if it were a bit better at handling alignment.
|
84
|
+
Layout/ExtraSpacing:
|
85
|
+
Enabled: false
|
86
|
+
AllowForAlignment: true
|
87
|
+
AllowBeforeTrailingComments: true
|
88
|
+
|
89
|
+
###########################################################################
|
90
|
+
# Layout: Misc
|
91
|
+
|
92
|
+
Layout/LineLength:
|
93
|
+
Max: 90 # should stay under 80, but we'll allow a little wiggle-room
|
94
|
+
|
95
|
+
Layout/MultilineOperationIndentation: { Enabled: false }
|
96
|
+
|
97
|
+
Layout/MultilineMethodCallIndentation:
|
98
|
+
EnforcedStyle: indented
|
99
|
+
|
100
|
+
###########################################################################
|
101
|
+
# Lint and Naming: rubocop defaults are mostly good, but...
|
102
|
+
|
103
|
+
Lint/UnusedMethodArgument: { Enabled: false }
|
104
|
+
Naming/BinaryOperatorParameterName: { Enabled: false } # def /(denominator)
|
105
|
+
Naming/RescuedExceptionsVariableName: { Enabled: false }
|
106
|
+
|
107
|
+
###########################################################################
|
108
|
+
# Matrics:
|
109
|
+
|
110
|
+
Metrics/CyclomaticComplexity:
|
111
|
+
Max: 10
|
112
|
+
|
113
|
+
# Although it may be better to split specs into multiple files...?
|
114
|
+
Metrics/BlockLength:
|
115
|
+
Exclude:
|
116
|
+
- "spec/**/*_spec.rb"
|
117
|
+
CountAsOne:
|
118
|
+
- array
|
119
|
+
- hash
|
120
|
+
- heredoc
|
121
|
+
|
122
|
+
Metrics/ClassLength:
|
123
|
+
Max: 200
|
124
|
+
CountAsOne:
|
125
|
+
- array
|
126
|
+
- hash
|
127
|
+
- heredoc
|
128
|
+
|
129
|
+
###########################################################################
|
130
|
+
# Style...
|
131
|
+
|
132
|
+
Style/AccessorGrouping: { Enabled: false }
|
133
|
+
Style/AsciiComments: { Enabled: false } # 👮 can't stop our 🎉🥳🎊🥳!
|
134
|
+
Style/ClassAndModuleChildren: { Enabled: false }
|
135
|
+
Style/EachWithObject: { Enabled: false }
|
136
|
+
Style/FormatStringToken: { Enabled: false }
|
137
|
+
Style/FloatDivision: { Enabled: false }
|
138
|
+
Style/IfUnlessModifier: { Enabled: false }
|
139
|
+
Style/IfWithSemicolon: { Enabled: false }
|
140
|
+
Style/Lambda: { Enabled: false }
|
141
|
+
Style/LineEndConcatenation: { Enabled: false }
|
142
|
+
Style/MixinGrouping: { Enabled: false }
|
143
|
+
Style/MultilineBlockChain: { Enabled: false }
|
144
|
+
Style/PerlBackrefs: { Enabled: false } # use occasionally/sparingly
|
145
|
+
Style/RescueStandardError: { Enabled: false }
|
146
|
+
Style/Semicolon: { Enabled: false }
|
147
|
+
Style/SingleLineMethods: { Enabled: false }
|
148
|
+
Style/StabbyLambdaParentheses: { Enabled: false }
|
149
|
+
Style/WhenThen : { Enabled: false }
|
150
|
+
|
151
|
+
# I require trailing commas elsewhere, but these are optional
|
152
|
+
Style/TrailingCommaInArguments: { Enabled: false }
|
153
|
+
|
154
|
+
# If rubocop had an option to only enforce this on constants and literals (e.g.
|
155
|
+
# strings, regexp, range), I'd agree.
|
156
|
+
#
|
157
|
+
# But if you are using it e.g. on method arguments of unknown type, in the same
|
158
|
+
# style that ruby uses it with grep, then you are doing exactly the right thing.
|
159
|
+
Style/CaseEquality: { Enabled: false }
|
160
|
+
|
161
|
+
# I'd enable if "require_parentheses_when_complex" considered unary '!' simple.
|
162
|
+
Style/TernaryParentheses:
|
163
|
+
EnforcedStyle: require_parentheses_when_complex
|
164
|
+
Enabled: false
|
165
|
+
|
166
|
+
Style/BlockDelimiters:
|
167
|
+
inherit_mode:
|
168
|
+
merge:
|
169
|
+
- Exclude
|
170
|
+
- ProceduralMethods
|
171
|
+
- IgnoredMethods
|
172
|
+
- FunctionalMethods
|
173
|
+
EnforcedStyle: semantic
|
174
|
+
AllowBracesOnProceduralOneLiners: true
|
175
|
+
IgnoredMethods:
|
176
|
+
- expect # rspec
|
177
|
+
- profile # ruby-prof
|
178
|
+
- ips # benchmark-ips
|
179
|
+
|
180
|
+
|
181
|
+
Style/FormatString:
|
182
|
+
EnforcedStyle: percent
|
183
|
+
|
184
|
+
Style/StringLiterals:
|
185
|
+
Enabled: true
|
186
|
+
EnforcedStyle: double_quotes
|
187
|
+
|
188
|
+
Style/StringLiteralsInInterpolation:
|
189
|
+
Enabled: true
|
190
|
+
EnforcedStyle: double_quotes
|
191
|
+
|
192
|
+
Style/TrailingCommaInHashLiteral:
|
193
|
+
EnforcedStyleForMultiline: consistent_comma
|
194
|
+
|
195
|
+
Style/TrailingCommaInArrayLiteral:
|
196
|
+
EnforcedStyleForMultiline: consistent_comma
|
197
|
+
|
198
|
+
Style/YodaCondition:
|
199
|
+
EnforcedStyle: forbid_for_equality_operators_only
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
## Current/Unreleased
|
2
|
+
|
3
|
+
## Release v0.5.0 (2021-01-17)
|
4
|
+
|
5
|
+
* 🔥 **Breaking**: reversed order of `#push` arguments to `value, score`.
|
6
|
+
* ✨ Added `#insert(score, value)` to replace earlier version of `#push`.
|
7
|
+
* ✨ Added `#each_pop` enumerator.
|
8
|
+
* ✨ Added aliases for `deq`, `enq`, `first`, `pop_below`, `length`, and
|
9
|
+
`count`, to mimic other classes in ruby's stdlib.
|
10
|
+
* ⚡️♻️ More performance improvements:
|
11
|
+
* Created an `ENTRY` struct and store both the score and the value pointer in
|
12
|
+
the same `ENTRY *entries` array.
|
13
|
+
* Reduced unnecessary allocations or copies in both sift loops. A similar
|
14
|
+
refactoring also sped up the pure ruby benchmark implementation.
|
15
|
+
* Compiling with `-O3`.
|
16
|
+
* 📝 Updated (and in some cases, fixed) yardoc
|
17
|
+
* ♻️ Moved aliases and less performance sensitive code into ruby.
|
18
|
+
* ♻️ DRY up push/insert methods
|
19
|
+
|
20
|
+
## Release v0.4.0 (2021-01-12)
|
21
|
+
|
22
|
+
* ⚡️ Big performance improvements, by using C `long double *cscores` array
|
23
|
+
* ⚡️ Scores must be `Integer` in `-uint64..+uint64`, or convertable to `Float`
|
24
|
+
* ⚡️ many many (so many) updates to benchmarks
|
25
|
+
* ✨ Added `DHeap#clear`
|
26
|
+
* 🐛 Fixed `DHeap#initialize_copy` and `#freeze`
|
27
|
+
* ♻️ significant refactoring
|
28
|
+
* 📝 Updated docs (mostly adding benchmarks)
|
29
|
+
|
30
|
+
## Release v0.3.0 (2020-12-29)
|
31
|
+
|
32
|
+
* 🔥 **Breaking**: Removed class methods that operated directly on an array.
|
33
|
+
They weren't compatible with the performance improvements.
|
34
|
+
* ⚡️ Big performance improvements, by converting to a `T_DATA` struct.
|
35
|
+
* ♻️ Major refactoring/rewriting of dheap.c
|
36
|
+
* ✅ Added benchmark specs
|
37
|
+
|
38
|
+
## Release v0.2.2 (2020-12-27)
|
39
|
+
|
40
|
+
* 🐛 fix `optimized_cmp`, avoiding internal symbols
|
41
|
+
* 📝 Update documentation
|
42
|
+
* 💚 fix macos CI
|
43
|
+
* ➕ Add rubocop 👮🎨
|
44
|
+
|
45
|
+
## Release v0.2.1 (2020-12-26)
|
46
|
+
|
47
|
+
* ⬆️ Upgraded rake (and bundler) to support ruby 3.0
|
48
|
+
|
49
|
+
## Release v0.2.0 (2020-12-24)
|
50
|
+
|
51
|
+
* ✨ Add ability to push separate score and value
|
52
|
+
* ⚡️ Big performance gain, by storing scores separately and using ruby's
|
53
|
+
internal `OPTIMIZED_CMP` instead of always directly calling `<=>`
|
54
|
+
|
55
|
+
## Release v0.1.0 (2020-12-22)
|
56
|
+
|
57
|
+
🎉 initial release 🎉
|
58
|
+
|
59
|
+
* ✨ Add basic d-ary Heap implementation
|
data/Gemfile
CHANGED
@@ -1,8 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
source "https://rubygems.org"
|
2
4
|
|
3
5
|
# Specify your gem's dependencies in d_heap.gemspec
|
4
6
|
gemspec
|
5
7
|
|
6
|
-
gem "
|
8
|
+
gem "pry"
|
9
|
+
gem "rake", "~> 13.0"
|
7
10
|
gem "rake-compiler"
|
8
|
-
gem "rspec", "~> 3.
|
11
|
+
gem "rspec", "~> 3.10"
|
12
|
+
gem "rubocop", "~> 1.0"
|
13
|
+
|
14
|
+
gem "perf"
|
15
|
+
gem "priority_queue_cxx"
|
16
|
+
gem "stackprof"
|
data/Gemfile.lock
CHANGED
@@ -1,15 +1,30 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
d_heap (0.
|
4
|
+
d_heap (0.5.0)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
|
+
ast (2.4.1)
|
10
|
+
benchmark_driver (0.15.16)
|
11
|
+
coderay (1.1.3)
|
9
12
|
diff-lcs (1.4.4)
|
10
|
-
|
13
|
+
method_source (1.0.0)
|
14
|
+
parallel (1.19.2)
|
15
|
+
parser (2.7.2.0)
|
16
|
+
ast (~> 2.4.1)
|
17
|
+
perf (0.1.2)
|
18
|
+
priority_queue_cxx (0.3.4)
|
19
|
+
pry (0.13.1)
|
20
|
+
coderay (~> 1.1)
|
21
|
+
method_source (~> 1.0)
|
22
|
+
rainbow (3.0.0)
|
23
|
+
rake (13.0.3)
|
11
24
|
rake-compiler (1.1.1)
|
12
25
|
rake
|
26
|
+
regexp_parser (1.8.2)
|
27
|
+
rexml (3.2.3)
|
13
28
|
rspec (3.10.0)
|
14
29
|
rspec-core (~> 3.10.0)
|
15
30
|
rspec-expectations (~> 3.10.0)
|
@@ -23,15 +38,37 @@ GEM
|
|
23
38
|
diff-lcs (>= 1.2.0, < 2.0)
|
24
39
|
rspec-support (~> 3.10.0)
|
25
40
|
rspec-support (3.10.0)
|
41
|
+
rubocop (1.2.0)
|
42
|
+
parallel (~> 1.10)
|
43
|
+
parser (>= 2.7.1.5)
|
44
|
+
rainbow (>= 2.2.2, < 4.0)
|
45
|
+
regexp_parser (>= 1.8)
|
46
|
+
rexml
|
47
|
+
rubocop-ast (>= 1.0.1)
|
48
|
+
ruby-progressbar (~> 1.7)
|
49
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
50
|
+
rubocop-ast (1.1.1)
|
51
|
+
parser (>= 2.7.1.5)
|
52
|
+
ruby-prof (1.4.2)
|
53
|
+
ruby-progressbar (1.10.1)
|
54
|
+
stackprof (0.2.16)
|
55
|
+
unicode-display_width (1.7.0)
|
26
56
|
|
27
57
|
PLATFORMS
|
28
58
|
ruby
|
29
59
|
|
30
60
|
DEPENDENCIES
|
61
|
+
benchmark_driver
|
31
62
|
d_heap!
|
32
|
-
|
63
|
+
perf
|
64
|
+
priority_queue_cxx
|
65
|
+
pry
|
66
|
+
rake (~> 13.0)
|
33
67
|
rake-compiler
|
34
|
-
rspec (~> 3.
|
68
|
+
rspec (~> 3.10)
|
69
|
+
rubocop (~> 1.0)
|
70
|
+
ruby-prof
|
71
|
+
stackprof
|
35
72
|
|
36
73
|
BUNDLED WITH
|
37
|
-
2.
|
74
|
+
2.2.3
|
data/README.md
CHANGED
@@ -1,139 +1,390 @@
|
|
1
1
|
# DHeap
|
2
2
|
|
3
|
-
A fast _d_-ary heap implementation for ruby,
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
3
|
+
A fast [_d_-ary heap][d-ary heap] [priority queue] implementation for ruby,
|
4
|
+
implemented as a C extension.
|
5
|
+
|
6
|
+
With a regular queue, you expect "FIFO" behavior: first in, first out. With a
|
7
|
+
stack you expect "LIFO": last in first out. A priority queue has a score for
|
8
|
+
each element and elements are popped in order by score. Priority queues are
|
9
|
+
often used in algorithms for e.g. [scheduling] of timers or bandwidth
|
10
|
+
management, for [Huffman coding], and various graph search algorithms such as
|
11
|
+
[Dijkstra's algorithm], [A* search], or [Prim's algorithm].
|
12
|
+
|
13
|
+
The _d_-ary heap data structure is a generalization of the [binary heap], in
|
14
|
+
which the nodes have _d_ children instead of 2. This allows for "insert" and
|
15
|
+
"decrease priority" operations to be performed more quickly with the tradeoff of
|
16
|
+
slower delete minimum. Additionally, _d_-ary heaps can have better memory cache
|
17
|
+
behavior than binary heaps, allowing them to run more quickly in practice
|
18
|
+
despite slower worst-case time complexity. In the worst case, a _d_-ary heap
|
19
|
+
requires only `O(log n / log d)` operations to push, with the tradeoff that pop
|
20
|
+
requires `O(d log n / log d)`.
|
21
|
+
|
22
|
+
Although you should probably just use the default _d_ value of `4` (see the
|
23
|
+
analysis below), it's always advisable to benchmark your specific use-case.
|
24
|
+
|
25
|
+
[d-ary heap]: https://en.wikipedia.org/wiki/D-ary_heap
|
26
|
+
[priority queue]: https://en.wikipedia.org/wiki/Priority_queue
|
27
|
+
[binary heap]: https://en.wikipedia.org/wiki/Binary_heap
|
28
|
+
[scheduling]: https://en.wikipedia.org/wiki/Scheduling_(computing)
|
29
|
+
[Huffman coding]: https://en.wikipedia.org/wiki/Huffman_coding#Compression
|
30
|
+
[Dijkstra's algorithm]: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm#Using_a_priority_queue
|
31
|
+
[A* search]: https://en.wikipedia.org/wiki/A*_search_algorithm#Description
|
32
|
+
[Prim's algorithm]: https://en.wikipedia.org/wiki/Prim%27s_algorithm
|
23
33
|
|
24
|
-
|
25
|
-
loosely inspired by go's timers. e.g: It lazily sifts its heap after deletion
|
26
|
-
and adjustments, to achieve faster average runtime for *add* and *cancel*
|
27
|
-
operations.
|
34
|
+
## Usage
|
28
35
|
|
29
|
-
|
36
|
+
Quick reference:
|
30
37
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
slower than inserting into a fully sorted array. On the one hand, this is a
|
40
|
-
testament to ruby's fine-tuned Array implementation. On the other hand, it
|
41
|
-
seemed like a heap implementated in C should easily match the speed of ruby's
|
42
|
-
bsearch + insert.
|
38
|
+
* `heap << object` adds a value, with `Float(object)` as its score.
|
39
|
+
* `heap.push(object, score)` adds a value with an extrinsic score.
|
40
|
+
* `heap.pop` removes and returns the value with the minimum score.
|
41
|
+
* `heap.pop_lte(score)` pops if the minimum score is `<=` the provided score.
|
42
|
+
* `heap.peek` to view the minimum value without popping it.
|
43
|
+
* `heap.clear` to remove all items from the heap.
|
44
|
+
* `heap.empty?` returns true if the heap is empty.
|
45
|
+
* `heap.size` returns the number of items in the heap.
|
43
46
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
4-ary heap will have no children. That diminishes the extra comparison
|
50
|
-
overhead during sift-down.
|
47
|
+
The basic API is `#push(object, score)` and `pop`. If your values behave as
|
48
|
+
their own score, then you can push with `#<<`. If the score changes while the
|
49
|
+
object is still in the heap, it will not be re-evaluated again. The score must
|
50
|
+
either be `Integer` or `Float` or convertable to a `Float` via `Float(score)`
|
51
|
+
(i.e. it should implement `#to_f`).
|
51
52
|
|
52
|
-
|
53
|
+
```ruby
|
54
|
+
require "d_heap"
|
53
55
|
|
54
|
-
|
56
|
+
Task = Struct.new(:id, :time) do
|
57
|
+
def to_f; time.to_f end
|
58
|
+
end
|
59
|
+
t1 = Task.new(1, Time.now + 5*60)
|
60
|
+
t2 = Task.new(2, Time.now + 50)
|
61
|
+
t3 = Task.new(3, Time.now + 60)
|
62
|
+
t4 = Task.new(4, Time.now + 5)
|
63
|
+
|
64
|
+
# if the object returns its own score via #to_f, "<<" is the simplest API
|
65
|
+
heap << t1 << t2
|
66
|
+
|
67
|
+
# or push with an explicit score
|
68
|
+
heap.push t3, t4.to_f
|
69
|
+
heap.push t4, t4 # score can be implicitly cast with Float
|
70
|
+
|
71
|
+
# peek and pop
|
72
|
+
heap.pop # => #<struct Task id=4, time=2021-01-17 17:02:22.5574 -0500>
|
73
|
+
heap.pop # => #<struct Task id=2, time=2021-01-17 17:03:07.5574 -0500>
|
74
|
+
heap.peek # => #<struct Task id=3, time=2021-01-17 17:03:17.5574 -0500>
|
75
|
+
heap.pop # => #<struct Task id=3, time=2021-01-17 17:03:17.5574 -0500>
|
76
|
+
heap.pop # => #<struct Task id=1, time=2021-01-17 17:07:17.5574 -0500>
|
77
|
+
heap.empty? # => true
|
78
|
+
heap.pop # => nil
|
79
|
+
```
|
80
|
+
|
81
|
+
Constraining scores to numeric values gives more than 50% speedup under some
|
82
|
+
benchmarks! _n.b._ `Integer` _scores must have an absolute value that fits
|
83
|
+
into_ `unsigned long long`. _This is architecture dependant but on an IA-64
|
84
|
+
system this is 64 bits, which gives a range of -18,446,744,073,709,551,615 to
|
85
|
+
+18446744073709551615. Comparing arbitary objects via_ `a <=> b` _was the
|
86
|
+
original design and may be added back in a future version,_ if (and only if) _it
|
87
|
+
can be done without impacting the speed of numeric comparisons._
|
55
88
|
|
56
89
|
```ruby
|
57
|
-
|
90
|
+
heap.clear
|
91
|
+
|
92
|
+
# The score can be derived from the value by using to_f.
|
93
|
+
# "a <=> b" is *much* slower than comparing numbers, so it isn't used.
|
94
|
+
class Event
|
95
|
+
include Comparable
|
96
|
+
attr_reader :time, :payload
|
97
|
+
alias_method :to_time, :time
|
98
|
+
|
99
|
+
def initialize(time, payload)
|
100
|
+
@time = time.to_time
|
101
|
+
@payload = payload
|
102
|
+
freeze
|
103
|
+
end
|
104
|
+
|
105
|
+
def to_f
|
106
|
+
time.to_f
|
107
|
+
end
|
108
|
+
|
109
|
+
def <=>(other)
|
110
|
+
to_f <=> other.to_f
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
heap << comparable_max # sorts last, using <=>
|
115
|
+
heap << comparable_min # sorts first, using <=>
|
116
|
+
heap << comparable_mid # sorts in the middle, using <=>
|
117
|
+
heap.pop # => comparable_min
|
118
|
+
heap.pop # => comparable_mid
|
119
|
+
heap.pop # => comparable_max
|
120
|
+
heap.empty? # => true
|
121
|
+
heap.pop # => nil
|
58
122
|
```
|
59
123
|
|
60
|
-
|
124
|
+
You can also pass a value into `#pop(max)` which will only pop if the minimum
|
125
|
+
score is less than or equal to `max`.
|
61
126
|
|
62
|
-
|
63
|
-
|
64
|
-
Or install it yourself as:
|
127
|
+
Read the [API documentation] for more detailed documentation and examples.
|
65
128
|
|
66
|
-
|
129
|
+
[API documentation]: https://rubydoc.info/gems/d_heap/DHeap
|
67
130
|
|
68
|
-
##
|
131
|
+
## Installation
|
69
132
|
|
70
|
-
|
133
|
+
Add this line to your application's Gemfile:
|
71
134
|
|
72
135
|
```ruby
|
73
|
-
|
136
|
+
gem 'd_heap'
|
137
|
+
```
|
74
138
|
|
75
|
-
|
139
|
+
And then execute:
|
76
140
|
|
77
|
-
|
78
|
-
heap << [Time.now + 5*60, Task.new(1)]
|
79
|
-
heap << [Time.now + 30, Task.new(2)]
|
80
|
-
heap << [Time.now + 60, Task.new(3)]
|
81
|
-
heap << [Time.now + 5, Task.new(4)]
|
141
|
+
$ bundle install
|
82
142
|
|
83
|
-
|
84
|
-
heap.pop.last # => Task[4]
|
85
|
-
heap.pop.last # => Task[2]
|
86
|
-
heap.peak.last # => Task[3]
|
87
|
-
heap.pop.last # => Task[3]
|
88
|
-
heap.pop.last # => Task[1]
|
89
|
-
```
|
143
|
+
Or install it yourself as:
|
90
144
|
|
91
|
-
|
145
|
+
$ gem install d_heap
|
92
146
|
|
93
|
-
##
|
147
|
+
## Motivation
|
94
148
|
|
95
|
-
|
149
|
+
One naive approach to a priority queue is to maintain an array in sorted order.
|
150
|
+
This can be very simply implemented in ruby with `Array#bseach_index` +
|
151
|
+
`Array#insert`. This can be very fast—`Array#pop` is `O(1)`—but the worst-case
|
152
|
+
for insert is `O(n)` because it may need to `memcpy` a significant portion of
|
153
|
+
the array.
|
154
|
+
|
155
|
+
The standard way to implement a priority queue is with a binary heap. Although
|
156
|
+
this increases the time for `pop`, it converts the amortized time per push + pop
|
157
|
+
from `O(n)` to `O(d log n / log d)`.
|
158
|
+
|
159
|
+
However, I was surprised to find that—at least for some benchmarks—my pure ruby
|
160
|
+
heap implementation was much slower than inserting into and popping from a fully
|
161
|
+
sorted array. The reasons for this surprising result: Although it is `O(n)`,
|
162
|
+
`memcpy` has a _very_ small constant factor, and calling `<=>` from ruby code
|
163
|
+
has relatively _much_ larger constant factors. If your queue contains only a
|
164
|
+
few thousand items, the overhead of those extra calls to `<=>` is _far_ more
|
165
|
+
than occasionally calling `memcpy`. In the worst case, a _d_-heap will require
|
166
|
+
`d + 1` times more comparisons for each push + pop than a `bsearch` + `insert`
|
167
|
+
sorted array.
|
168
|
+
|
169
|
+
Moving the sift-up and sift-down code into C helps some. But much more helpful
|
170
|
+
is optimizing the comparison of numeric scores, so `a <=> b` never needs to be
|
171
|
+
called. I'm hopeful that MJIT will eventually obsolete this C-extension. This
|
172
|
+
can be hotspot code, and a basic ruby implementation could perform well if `<=>`
|
173
|
+
had much lower overhead.
|
96
174
|
|
97
175
|
## Analysis
|
98
176
|
|
99
177
|
### Time complexity
|
100
178
|
|
101
|
-
|
102
|
-
|
103
|
-
Swap down performs as many as d comparions per swap: O(d).
|
179
|
+
There are two fundamental heap operations: sift-up (used by push) and sift-down
|
180
|
+
(used by pop).
|
104
181
|
|
105
|
-
|
106
|
-
|
182
|
+
* Both sift operations can perform as many as `log n / log d` swaps, as the
|
183
|
+
element may sift from the bottom of the tree to the top, or vice versa.
|
184
|
+
* Sift-up performs a single comparison per swap: `O(1)`.
|
185
|
+
So pushing a new element is `O(log n / log d)`.
|
186
|
+
* Swap down performs as many as d comparions per swap: `O(d)`.
|
187
|
+
So popping the min element is `O(d log n / log d)`.
|
107
188
|
|
108
|
-
Assuming every inserted
|
109
|
-
the fewest comparisons for combined insert and delete:
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
189
|
+
Assuming every inserted element is eventually deleted from the root, d=4
|
190
|
+
requires the fewest comparisons for combined insert and delete:
|
191
|
+
|
192
|
+
* (1 + 2) lg 2 = 4.328085
|
193
|
+
* (1 + 3) lg 3 = 3.640957
|
194
|
+
* (1 + 4) lg 4 = 3.606738
|
195
|
+
* (1 + 5) lg 5 = 3.728010
|
196
|
+
* (1 + 6) lg 6 = 3.906774
|
197
|
+
* etc...
|
116
198
|
|
117
199
|
Leaf nodes require no comparisons to shift down, and higher values for d have
|
118
200
|
higher percentage of leaf nodes:
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
201
|
+
|
202
|
+
* d=2 has ~50% leaves,
|
203
|
+
* d=3 has ~67% leaves,
|
204
|
+
* d=4 has ~75% leaves,
|
205
|
+
* and so on...
|
123
206
|
|
124
207
|
See https://en.wikipedia.org/wiki/D-ary_heap#Analysis for deeper analysis.
|
125
208
|
|
126
209
|
### Space complexity
|
127
210
|
|
128
|
-
|
129
|
-
|
211
|
+
Space usage is linear, regardless of d. However higher d values may
|
212
|
+
provide better cache locality. Because the heap is a complete binary tree, the
|
213
|
+
elements can be stored in an array, without the need for tree or list pointers.
|
130
214
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
215
|
+
Ruby can compare Numeric values _much_ faster than other ruby objects, even if
|
216
|
+
those objects simply delegate comparison to internal Numeric values. And it is
|
217
|
+
often useful to use external scores for otherwise uncomparable values. So
|
218
|
+
`DHeap` uses twice as many entries (one for score and one for value)
|
219
|
+
as an array which only stores values.
|
220
|
+
|
221
|
+
## Benchmarks
|
222
|
+
|
223
|
+
_See `bin/benchmarks` and `docs/benchmarks.txt`, as well as `bin/profile` and
|
224
|
+
`docs/profile.txt` for more details or updated results. These benchmarks were
|
225
|
+
measured with v0.5.0 and ruby 2.7.2 without MJIT enabled._
|
226
|
+
|
227
|
+
These benchmarks use very simple implementations for a pure-ruby heap and an
|
228
|
+
array that is kept sorted using `Array#bsearch_index` and `Array#insert`. For
|
229
|
+
comparison, an alternate implementation `Array#min` and `Array#delete_at` is
|
230
|
+
also shown.
|
231
|
+
|
232
|
+
Three different scenarios are measured:
|
233
|
+
* push N values but never pop (clearing between each set of pushes).
|
234
|
+
* push N values and then pop N values.
|
235
|
+
Although this could be used for heap sort, we're unlikely to choose heap sort
|
236
|
+
over Ruby's quick sort implementation. I'm using this scenario to represent
|
237
|
+
the amortized cost of creating a heap and (eventually) draining it.
|
238
|
+
* For a heap of size N, repeatedly push and pop while keeping a stable size.
|
239
|
+
This is a _very simple_ approximation for how most scheduler/timer heaps
|
240
|
+
would be used. Usually when a timer fires it will be quickly replaced by a
|
241
|
+
new timer, and the overall count of timers will remain roughly stable.
|
242
|
+
|
243
|
+
In these benchmarks, `DHeap` runs faster than all other implementations for
|
244
|
+
every scenario and every value of N, although the difference is much more
|
245
|
+
noticable at higher values of N. The pure ruby heap implementation is
|
246
|
+
competitive for `push` alone at every value of N, but is significantly slower
|
247
|
+
than bsearch + insert for push + pop until N is _very_ large (somewhere between
|
248
|
+
10k and 100k)!
|
249
|
+
|
250
|
+
For very small N values the benchmark implementations, `DHeap` runs faster than
|
251
|
+
the other implementations for each scenario, although the difference is still
|
252
|
+
relatively small. The pure ruby binary heap is 2x or more slower than bsearch +
|
253
|
+
insert for common common push/pop scenario.
|
254
|
+
|
255
|
+
== push N (N=5) ==========================================================
|
256
|
+
push N (c_dheap): 1969700.7 i/s
|
257
|
+
push N (c++ stl): 1049738.1 i/s - 1.88x slower
|
258
|
+
push N (rb_heap): 928435.2 i/s - 2.12x slower
|
259
|
+
push N (bsearch): 921060.0 i/s - 2.14x slower
|
260
|
+
|
261
|
+
== push N then pop N (N=5) ===============================================
|
262
|
+
push N + pop N (c_dheap): 1375805.0 i/s
|
263
|
+
push N + pop N (c++ stl): 1134997.5 i/s - 1.21x slower
|
264
|
+
push N + pop N (findmin): 862913.1 i/s - 1.59x slower
|
265
|
+
push N + pop N (bsearch): 762887.1 i/s - 1.80x slower
|
266
|
+
push N + pop N (rb_heap): 506890.4 i/s - 2.71x slower
|
267
|
+
|
268
|
+
== Push/pop with pre-filled queue of size=N (N=5) ========================
|
269
|
+
push + pop (c_dheap): 9044435.5 i/s
|
270
|
+
push + pop (c++ stl): 7534583.4 i/s - 1.20x slower
|
271
|
+
push + pop (findmin): 5026155.1 i/s - 1.80x slower
|
272
|
+
push + pop (bsearch): 4300260.0 i/s - 2.10x slower
|
273
|
+
push + pop (rb_heap): 2299499.7 i/s - 3.93x slower
|
274
|
+
|
275
|
+
By N=21, `DHeap` has pulled significantly ahead of bsearch + insert for all
|
276
|
+
scenarios, but the pure ruby heap is still slower than every other
|
277
|
+
implementation—even resorting the array after every `#push`—in any scenario that
|
278
|
+
uses `#pop`.
|
279
|
+
|
280
|
+
== push N (N=21) =========================================================
|
281
|
+
push N (c_dheap): 464231.4 i/s
|
282
|
+
push N (c++ stl): 305546.7 i/s - 1.52x slower
|
283
|
+
push N (rb_heap): 202803.7 i/s - 2.29x slower
|
284
|
+
push N (bsearch): 168678.7 i/s - 2.75x slower
|
285
|
+
|
286
|
+
== push N then pop N (N=21) ==============================================
|
287
|
+
push N + pop N (c_dheap): 298350.3 i/s
|
288
|
+
push N + pop N (c++ stl): 252227.1 i/s - 1.18x slower
|
289
|
+
push N + pop N (findmin): 161998.7 i/s - 1.84x slower
|
290
|
+
push N + pop N (bsearch): 143432.3 i/s - 2.08x slower
|
291
|
+
push N + pop N (rb_heap): 79622.1 i/s - 3.75x slower
|
292
|
+
|
293
|
+
== Push/pop with pre-filled queue of size=N (N=21) =======================
|
294
|
+
push + pop (c_dheap): 8855093.4 i/s
|
295
|
+
push + pop (c++ stl): 7223079.5 i/s - 1.23x slower
|
296
|
+
push + pop (findmin): 4542913.7 i/s - 1.95x slower
|
297
|
+
push + pop (bsearch): 3461802.4 i/s - 2.56x slower
|
298
|
+
push + pop (rb_heap): 1845488.7 i/s - 4.80x slower
|
299
|
+
|
300
|
+
At higher values of N, a heaps logarithmic growth leads to only a little
|
301
|
+
slowdown of `#push`, while insert's linear growth causes it to run noticably
|
302
|
+
slower and slower. But because `#pop` is `O(1)` for a sorted array and `O(d log
|
303
|
+
n / log d)` for a heap, scenarios involving both `#push` and `#pop` remain
|
304
|
+
relatively close, and bsearch + insert still runs faster than a pure ruby heap,
|
305
|
+
even up to queues with 10k items. But as queue size increases beyond than that,
|
306
|
+
the linear time compexity to keep a sorted array dominates.
|
307
|
+
|
308
|
+
== push + pop (rb_heap)
|
309
|
+
queue size = 10000: 736618.2 i/s
|
310
|
+
queue size = 25000: 670186.8 i/s - 1.10x slower
|
311
|
+
queue size = 50000: 618156.7 i/s - 1.19x slower
|
312
|
+
queue size = 100000: 579250.7 i/s - 1.27x slower
|
313
|
+
queue size = 250000: 572795.0 i/s - 1.29x slower
|
314
|
+
queue size = 500000: 543648.3 i/s - 1.35x slower
|
315
|
+
queue size = 1000000: 513523.4 i/s - 1.43x slower
|
316
|
+
queue size = 2500000: 460848.9 i/s - 1.60x slower
|
317
|
+
queue size = 5000000: 445234.5 i/s - 1.65x slower
|
318
|
+
queue size = 10000000: 423119.0 i/s - 1.74x slower
|
319
|
+
|
320
|
+
== push + pop (bsearch)
|
321
|
+
queue size = 10000: 786334.2 i/s
|
322
|
+
queue size = 25000: 364963.8 i/s - 2.15x slower
|
323
|
+
queue size = 50000: 200520.6 i/s - 3.92x slower
|
324
|
+
queue size = 100000: 88607.0 i/s - 8.87x slower
|
325
|
+
queue size = 250000: 34530.5 i/s - 22.77x slower
|
326
|
+
queue size = 500000: 17965.4 i/s - 43.77x slower
|
327
|
+
queue size = 1000000: 5638.7 i/s - 139.45x slower
|
328
|
+
queue size = 2500000: 1302.0 i/s - 603.93x slower
|
329
|
+
queue size = 5000000: 592.0 i/s - 1328.25x slower
|
330
|
+
queue size = 10000000: 288.8 i/s - 2722.66x slower
|
331
|
+
|
332
|
+
== push + pop (c_dheap)
|
333
|
+
queue size = 10000: 7311366.6 i/s
|
334
|
+
queue size = 50000: 6737824.5 i/s - 1.09x slower
|
335
|
+
queue size = 25000: 6407340.6 i/s - 1.14x slower
|
336
|
+
queue size = 100000: 6254396.3 i/s - 1.17x slower
|
337
|
+
queue size = 250000: 5917684.5 i/s - 1.24x slower
|
338
|
+
queue size = 500000: 5126307.6 i/s - 1.43x slower
|
339
|
+
queue size = 1000000: 4403494.1 i/s - 1.66x slower
|
340
|
+
queue size = 2500000: 3304088.2 i/s - 2.21x slower
|
341
|
+
queue size = 5000000: 2664897.7 i/s - 2.74x slower
|
342
|
+
queue size = 10000000: 2137927.6 i/s - 3.42x slower
|
343
|
+
|
344
|
+
## Profiling
|
345
|
+
|
346
|
+
_n.b. `Array#fetch` is reading the input data, external to heap operations.
|
347
|
+
These benchmarks use integers for all scores, which enables significantly faster
|
348
|
+
comparisons. If `a <=> b` were used instead, then the difference between push
|
349
|
+
and pop would be much larger. And ruby's `Tracepoint` impacts these different
|
350
|
+
implementations differently. So we can't use these profiler results for
|
351
|
+
comparisons between implementations. A sampling profiler would be needed for
|
352
|
+
more accurate relative measurements._
|
353
|
+
|
354
|
+
It's informative to look at the `ruby-prof` results for a simple binary search +
|
355
|
+
insert implementation, repeatedly pushing and popping to a large heap. In
|
356
|
+
particular, even with 1000 members, the linear `Array#insert` is _still_ faster
|
357
|
+
than the logarithmic `Array#bsearch_index`. At this scale, ruby comparisons are
|
358
|
+
still (relatively) slow and `memcpy` is (relatively) quite fast!
|
359
|
+
|
360
|
+
%self total self wait child calls name location
|
361
|
+
34.79 2.222 2.222 0.000 0.000 1000000 Array#insert
|
362
|
+
32.59 2.081 2.081 0.000 0.000 1000000 Array#bsearch_index
|
363
|
+
12.84 6.386 0.820 0.000 5.566 1 DHeap::Benchmarks::Scenarios#repeated_push_pop d_heap/benchmarks.rb:77
|
364
|
+
10.38 4.966 0.663 0.000 4.303 1000000 DHeap::Benchmarks::BinarySearchAndInsert#<< d_heap/benchmarks/implementations.rb:61
|
365
|
+
5.38 0.468 0.343 0.000 0.125 1000000 DHeap::Benchmarks::BinarySearchAndInsert#pop d_heap/benchmarks/implementations.rb:70
|
366
|
+
2.06 0.132 0.132 0.000 0.000 1000000 Array#fetch
|
367
|
+
1.95 0.125 0.125 0.000 0.000 1000000 Array#pop
|
368
|
+
|
369
|
+
Contrast this with a simplistic pure-ruby implementation of a binary heap:
|
370
|
+
|
371
|
+
%self total self wait child calls name location
|
372
|
+
48.52 8.487 8.118 0.000 0.369 1000000 DHeap::Benchmarks::NaiveBinaryHeap#pop d_heap/benchmarks/implementations.rb:96
|
373
|
+
42.94 7.310 7.184 0.000 0.126 1000000 DHeap::Benchmarks::NaiveBinaryHeap#<< d_heap/benchmarks/implementations.rb:80
|
374
|
+
4.80 16.732 0.803 0.000 15.929 1 DHeap::Benchmarks::Scenarios#repeated_push_pop d_heap/benchmarks.rb:77
|
375
|
+
|
376
|
+
You can see that it spends almost more time in pop than it does in push. That
|
377
|
+
is expected behavior for a heap: although both are O(log n), pop is
|
378
|
+
significantly more complex, and has _d_ comparisons per layer.
|
379
|
+
|
380
|
+
And `DHeap` shows a similar comparison between push and pop, although it spends
|
381
|
+
half of its time in the benchmark code (which is written in ruby):
|
382
|
+
|
383
|
+
%self total self wait child calls name location
|
384
|
+
43.09 1.685 0.726 0.000 0.959 1 DHeap::Benchmarks::Scenarios#repeated_push_pop d_heap/benchmarks.rb:77
|
385
|
+
26.05 0.439 0.439 0.000 0.000 1000000 DHeap#<<
|
386
|
+
23.57 0.397 0.397 0.000 0.000 1000000 DHeap#pop
|
387
|
+
7.29 0.123 0.123 0.000 0.000 1000000 Array#fetch
|
137
388
|
|
138
389
|
### Timers
|
139
390
|
|
@@ -151,22 +402,54 @@ faster than a delete and re-insert.
|
|
151
402
|
|
152
403
|
## Alternative data structures
|
153
404
|
|
405
|
+
As always, you should run benchmarks with your expected scenarios to determine
|
406
|
+
which is right.
|
407
|
+
|
154
408
|
Depending on what you're doing, maintaining a sorted `Array` using
|
155
|
-
`#bsearch_index` and `#insert` might be
|
156
|
-
O(n) for insertions,
|
157
|
-
|
158
|
-
|
159
|
-
|
409
|
+
`#bsearch_index` and `#insert` might be just fine! As discussed above, although
|
410
|
+
it is `O(n)` for insertions, `memcpy` is so fast on modern hardware that this
|
411
|
+
may not matter. Also, if you can arrange for insertions to occur near the end
|
412
|
+
of the array, that could significantly reduce the `memcpy` overhead even more.
|
413
|
+
|
414
|
+
More complex heap varients, e.g. [Fibonacci heap], can allow heaps to be merged
|
415
|
+
as well as lower amortized time.
|
416
|
+
|
417
|
+
[Fibonacci heap]: https://en.wikipedia.org/wiki/Fibonacci_heap
|
160
418
|
|
161
419
|
If it is important to be able to quickly enumerate the set or find the ranking
|
162
|
-
of values in it, then you
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
be
|
420
|
+
of values in it, then you may want to use a self-balancing binary search tree
|
421
|
+
(e.g. a [red-black tree]) or a [skip-list].
|
422
|
+
|
423
|
+
[red-black tree]: https://en.wikipedia.org/wiki/Red%E2%80%93black_tree
|
424
|
+
[skip-list]: https://en.wikipedia.org/wiki/Skip_list
|
425
|
+
|
426
|
+
[Hashed and Heirarchical Timing Wheels][timing wheels] (or some variant in that
|
427
|
+
family of data structures) can be constructed to have effectively `O(1)` running
|
428
|
+
time in most cases. Although the implementation for that data structure is more
|
429
|
+
complex than a heap, it may be necessary for enormous values of N.
|
430
|
+
|
431
|
+
[timing wheels]: http://www.cs.columbia.edu/~nahum/w6998/papers/ton97-timing-wheels.pdf
|
432
|
+
|
433
|
+
## TODOs...
|
434
|
+
|
435
|
+
_TODO:_ Also ~~included is~~ _will include_ `DHeap::Set`, which augments the
|
436
|
+
basic heap with an internal `Hash`, which maps a set of values to scores.
|
437
|
+
loosely inspired by go's timers. e.g: It lazily sifts its heap after deletion
|
438
|
+
and adjustments, to achieve faster average runtime for *add* and *cancel*
|
439
|
+
operations.
|
440
|
+
|
441
|
+
_TODO:_ Also ~~included is~~ _will include_ `DHeap::Lazy`, which contains some
|
442
|
+
features that are loosely inspired by go's timers. e.g: It lazily sifts its
|
443
|
+
heap after deletion and adjustments, to achieve faster average runtime for *add*
|
444
|
+
and *cancel* operations.
|
445
|
+
|
446
|
+
Additionally, I was inspired by reading go's "timer.go" implementation to
|
447
|
+
experiment with a 4-ary heap instead of the traditional binary heap. In the
|
448
|
+
case of timers, new timers are usually scheduled to run after most of the
|
449
|
+
existing timers. And timers are usually canceled before they have a chance to
|
450
|
+
run. While a binary heap holds 50% of its elements in its last layer, 75% of a
|
451
|
+
4-ary heap will have no children. That diminishes the extra comparison overhead
|
452
|
+
during sift-down.
|
170
453
|
|
171
454
|
## Development
|
172
455
|
|