d_heap 0.2.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +26 -0
- data/.rubocop.yml +199 -0
- data/CHANGELOG.md +59 -0
- data/Gemfile +10 -2
- data/Gemfile.lock +42 -5
- data/README.md +392 -109
- data/Rakefile +8 -2
- data/benchmarks/perf.rb +29 -0
- data/benchmarks/push_n.yml +31 -0
- data/benchmarks/push_n_pop_n.yml +35 -0
- data/benchmarks/push_pop.yml +27 -0
- data/benchmarks/stackprof.rb +31 -0
- data/bin/bench_n +7 -0
- data/bin/benchmark-driver +29 -0
- data/bin/benchmarks +10 -0
- data/bin/console +1 -0
- data/bin/profile +10 -0
- data/bin/rubocop +29 -0
- data/d_heap.gemspec +11 -6
- data/docs/benchmarks-2.txt +75 -0
- data/docs/benchmarks-mem.txt +39 -0
- data/docs/benchmarks.txt +515 -0
- data/docs/profile.txt +392 -0
- data/ext/d_heap/d_heap.c +555 -225
- data/ext/d_heap/d_heap.h +24 -48
- data/ext/d_heap/extconf.rb +20 -0
- data/lib/benchmark_driver/runner/ips_zero_fail.rb +120 -0
- data/lib/d_heap.rb +40 -2
- data/lib/d_heap/benchmarks.rb +112 -0
- data/lib/d_heap/benchmarks/benchmarker.rb +116 -0
- data/lib/d_heap/benchmarks/implementations.rb +222 -0
- data/lib/d_heap/benchmarks/profiler.rb +71 -0
- data/lib/d_heap/benchmarks/rspec_matchers.rb +374 -0
- data/lib/d_heap/version.rb +4 -1
- metadata +54 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3dd1049e0a8041a328da4ed65622c2f0589475bc386a0eb6f20e466c79587bc5
|
4
|
+
data.tar.gz: ec44970feaa5ce6aef37f511e71e55342ec93b7e1a0b2a3d40f249afa4e9ac25
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a6b6e192dbe5980b2b79728e4b4bf413151b3e193733d1435119482edb977f0a2edd692fd156614fdfbc86f4fa1dc6ac9f7907ca640c21ca23050d07b9a1caa6
|
7
|
+
data.tar.gz: 27a987139a1fd14f73c16459f72be2bf1059dadbd212e250a28b3691dff2372e708cdf740155e2d2aa21de78cc789816a9d83f80f42d3c396510cd6fce6e6bf2
|
@@ -0,0 +1,26 @@
|
|
1
|
+
name: Ruby
|
2
|
+
|
3
|
+
on: [push,pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
strategy:
|
8
|
+
fail-fast: false
|
9
|
+
matrix:
|
10
|
+
ruby: [2.5, 2.6, 2.7, 3.0]
|
11
|
+
os: [ubuntu, macos]
|
12
|
+
experimental: [false]
|
13
|
+
runs-on: ${{ matrix.os }}-latest
|
14
|
+
continue-on-error: ${{ matrix.experimental }}
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v2
|
17
|
+
- name: Set up Ruby
|
18
|
+
uses: ruby/setup-ruby@v1
|
19
|
+
with:
|
20
|
+
ruby-version: ${{ matrix.ruby }}
|
21
|
+
bundler-cache: true
|
22
|
+
- name: Run the default task
|
23
|
+
run: |
|
24
|
+
gem install bundler -v 2.2.3
|
25
|
+
bundle install
|
26
|
+
bundle exec rake
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,199 @@
|
|
1
|
+
inherit_mode:
|
2
|
+
merge:
|
3
|
+
- Exclude
|
4
|
+
|
5
|
+
AllCops:
|
6
|
+
TargetRubyVersion: 2.5
|
7
|
+
NewCops: disable
|
8
|
+
Exclude:
|
9
|
+
- bin/benchmark-driver
|
10
|
+
- bin/rake
|
11
|
+
- bin/rspec
|
12
|
+
- bin/rubocop
|
13
|
+
|
14
|
+
###########################################################################
|
15
|
+
# rubocop defaults are simply WRONG about many rules... Sorry. It's true.
|
16
|
+
|
17
|
+
###########################################################################
|
18
|
+
# Layout: Alignment. I want these to work, I really do...
|
19
|
+
|
20
|
+
# I wish this worked with "table". but that goes wrong sometimes.
|
21
|
+
Layout/HashAlignment: { Enabled: false }
|
22
|
+
|
23
|
+
# This needs to be configurable so parenthesis calls are aligned with first
|
24
|
+
# parameter, and non-parenthesis calls are aligned with fixed indentation.
|
25
|
+
Layout/ParameterAlignment: { Enabled: false }
|
26
|
+
|
27
|
+
###########################################################################
|
28
|
+
# Layout: Empty lines
|
29
|
+
|
30
|
+
Layout/EmptyLineAfterGuardClause: { Enabled: false }
|
31
|
+
Layout/EmptyLineAfterMagicComment: { Enabled: true }
|
32
|
+
Layout/EmptyLineAfterMultilineCondition: { Enabled: false }
|
33
|
+
Layout/EmptyLines: { Enabled: true }
|
34
|
+
Layout/EmptyLinesAroundAccessModifier: { Enabled: true }
|
35
|
+
Layout/EmptyLinesAroundArguments: { Enabled: true }
|
36
|
+
Layout/EmptyLinesAroundBeginBody: { Enabled: true }
|
37
|
+
Layout/EmptyLinesAroundBlockBody: { Enabled: false }
|
38
|
+
Layout/EmptyLinesAroundExceptionHandlingKeywords: { Enabled: true }
|
39
|
+
Layout/EmptyLinesAroundMethodBody: { Enabled: true }
|
40
|
+
|
41
|
+
Layout/EmptyLineBetweenDefs:
|
42
|
+
Enabled: true
|
43
|
+
AllowAdjacentOneLineDefs: true
|
44
|
+
|
45
|
+
Layout/EmptyLinesAroundAttributeAccessor:
|
46
|
+
inherit_mode:
|
47
|
+
merge:
|
48
|
+
- Exclude
|
49
|
+
- AllowedMethods
|
50
|
+
Enabled: true
|
51
|
+
AllowedMethods:
|
52
|
+
- delegate
|
53
|
+
- def_delegator
|
54
|
+
- def_delegators
|
55
|
+
- def_instance_delegators
|
56
|
+
|
57
|
+
# "empty_lines_special" sometimes does the wrong thing and annoys me.
|
58
|
+
# But I've mostly learned to live with it... mostly. 🙁
|
59
|
+
|
60
|
+
Layout/EmptyLinesAroundClassBody:
|
61
|
+
Enabled: true
|
62
|
+
EnforcedStyle: empty_lines_special
|
63
|
+
|
64
|
+
Layout/EmptyLinesAroundModuleBody:
|
65
|
+
Enabled: true
|
66
|
+
EnforcedStyle: empty_lines_special
|
67
|
+
|
68
|
+
###########################################################################
|
69
|
+
# Layout: Space around, before, inside, etc
|
70
|
+
|
71
|
+
Layout/SpaceAroundEqualsInParameterDefault: { Enabled: false }
|
72
|
+
Layout/SpaceBeforeBlockBraces: { Enabled: false }
|
73
|
+
Layout/SpaceBeforeFirstArg: { Enabled: false }
|
74
|
+
Layout/SpaceInLambdaLiteral: { Enabled: false }
|
75
|
+
Layout/SpaceInsideArrayLiteralBrackets: { Enabled: false }
|
76
|
+
Layout/SpaceInsideHashLiteralBraces: { Enabled: false }
|
77
|
+
|
78
|
+
Layout/SpaceInsideBlockBraces:
|
79
|
+
EnforcedStyle: space
|
80
|
+
EnforcedStyleForEmptyBraces: space
|
81
|
+
SpaceBeforeBlockParameters: false
|
82
|
+
|
83
|
+
# I would enable this if it were a bit better at handling alignment.
|
84
|
+
Layout/ExtraSpacing:
|
85
|
+
Enabled: false
|
86
|
+
AllowForAlignment: true
|
87
|
+
AllowBeforeTrailingComments: true
|
88
|
+
|
89
|
+
###########################################################################
|
90
|
+
# Layout: Misc
|
91
|
+
|
92
|
+
Layout/LineLength:
|
93
|
+
Max: 90 # should stay under 80, but we'll allow a little wiggle-room
|
94
|
+
|
95
|
+
Layout/MultilineOperationIndentation: { Enabled: false }
|
96
|
+
|
97
|
+
Layout/MultilineMethodCallIndentation:
|
98
|
+
EnforcedStyle: indented
|
99
|
+
|
100
|
+
###########################################################################
|
101
|
+
# Lint and Naming: rubocop defaults are mostly good, but...
|
102
|
+
|
103
|
+
Lint/UnusedMethodArgument: { Enabled: false }
|
104
|
+
Naming/BinaryOperatorParameterName: { Enabled: false } # def /(denominator)
|
105
|
+
Naming/RescuedExceptionsVariableName: { Enabled: false }
|
106
|
+
|
107
|
+
###########################################################################
|
108
|
+
# Matrics:
|
109
|
+
|
110
|
+
Metrics/CyclomaticComplexity:
|
111
|
+
Max: 10
|
112
|
+
|
113
|
+
# Although it may be better to split specs into multiple files...?
|
114
|
+
Metrics/BlockLength:
|
115
|
+
Exclude:
|
116
|
+
- "spec/**/*_spec.rb"
|
117
|
+
CountAsOne:
|
118
|
+
- array
|
119
|
+
- hash
|
120
|
+
- heredoc
|
121
|
+
|
122
|
+
Metrics/ClassLength:
|
123
|
+
Max: 200
|
124
|
+
CountAsOne:
|
125
|
+
- array
|
126
|
+
- hash
|
127
|
+
- heredoc
|
128
|
+
|
129
|
+
###########################################################################
|
130
|
+
# Style...
|
131
|
+
|
132
|
+
Style/AccessorGrouping: { Enabled: false }
|
133
|
+
Style/AsciiComments: { Enabled: false } # 👮 can't stop our 🎉🥳🎊🥳!
|
134
|
+
Style/ClassAndModuleChildren: { Enabled: false }
|
135
|
+
Style/EachWithObject: { Enabled: false }
|
136
|
+
Style/FormatStringToken: { Enabled: false }
|
137
|
+
Style/FloatDivision: { Enabled: false }
|
138
|
+
Style/IfUnlessModifier: { Enabled: false }
|
139
|
+
Style/IfWithSemicolon: { Enabled: false }
|
140
|
+
Style/Lambda: { Enabled: false }
|
141
|
+
Style/LineEndConcatenation: { Enabled: false }
|
142
|
+
Style/MixinGrouping: { Enabled: false }
|
143
|
+
Style/MultilineBlockChain: { Enabled: false }
|
144
|
+
Style/PerlBackrefs: { Enabled: false } # use occasionally/sparingly
|
145
|
+
Style/RescueStandardError: { Enabled: false }
|
146
|
+
Style/Semicolon: { Enabled: false }
|
147
|
+
Style/SingleLineMethods: { Enabled: false }
|
148
|
+
Style/StabbyLambdaParentheses: { Enabled: false }
|
149
|
+
Style/WhenThen : { Enabled: false }
|
150
|
+
|
151
|
+
# I require trailing commas elsewhere, but these are optional
|
152
|
+
Style/TrailingCommaInArguments: { Enabled: false }
|
153
|
+
|
154
|
+
# If rubocop had an option to only enforce this on constants and literals (e.g.
|
155
|
+
# strings, regexp, range), I'd agree.
|
156
|
+
#
|
157
|
+
# But if you are using it e.g. on method arguments of unknown type, in the same
|
158
|
+
# style that ruby uses it with grep, then you are doing exactly the right thing.
|
159
|
+
Style/CaseEquality: { Enabled: false }
|
160
|
+
|
161
|
+
# I'd enable if "require_parentheses_when_complex" considered unary '!' simple.
|
162
|
+
Style/TernaryParentheses:
|
163
|
+
EnforcedStyle: require_parentheses_when_complex
|
164
|
+
Enabled: false
|
165
|
+
|
166
|
+
Style/BlockDelimiters:
|
167
|
+
inherit_mode:
|
168
|
+
merge:
|
169
|
+
- Exclude
|
170
|
+
- ProceduralMethods
|
171
|
+
- IgnoredMethods
|
172
|
+
- FunctionalMethods
|
173
|
+
EnforcedStyle: semantic
|
174
|
+
AllowBracesOnProceduralOneLiners: true
|
175
|
+
IgnoredMethods:
|
176
|
+
- expect # rspec
|
177
|
+
- profile # ruby-prof
|
178
|
+
- ips # benchmark-ips
|
179
|
+
|
180
|
+
|
181
|
+
Style/FormatString:
|
182
|
+
EnforcedStyle: percent
|
183
|
+
|
184
|
+
Style/StringLiterals:
|
185
|
+
Enabled: true
|
186
|
+
EnforcedStyle: double_quotes
|
187
|
+
|
188
|
+
Style/StringLiteralsInInterpolation:
|
189
|
+
Enabled: true
|
190
|
+
EnforcedStyle: double_quotes
|
191
|
+
|
192
|
+
Style/TrailingCommaInHashLiteral:
|
193
|
+
EnforcedStyleForMultiline: consistent_comma
|
194
|
+
|
195
|
+
Style/TrailingCommaInArrayLiteral:
|
196
|
+
EnforcedStyleForMultiline: consistent_comma
|
197
|
+
|
198
|
+
Style/YodaCondition:
|
199
|
+
EnforcedStyle: forbid_for_equality_operators_only
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
## Current/Unreleased
|
2
|
+
|
3
|
+
## Release v0.5.0 (2021-01-17)
|
4
|
+
|
5
|
+
* 🔥 **Breaking**: reversed order of `#push` arguments to `value, score`.
|
6
|
+
* ✨ Added `#insert(score, value)` to replace earlier version of `#push`.
|
7
|
+
* ✨ Added `#each_pop` enumerator.
|
8
|
+
* ✨ Added aliases for `deq`, `enq`, `first`, `pop_below`, `length`, and
|
9
|
+
`count`, to mimic other classes in ruby's stdlib.
|
10
|
+
* ⚡️♻️ More performance improvements:
|
11
|
+
* Created an `ENTRY` struct and store both the score and the value pointer in
|
12
|
+
the same `ENTRY *entries` array.
|
13
|
+
* Reduced unnecessary allocations or copies in both sift loops. A similar
|
14
|
+
refactoring also sped up the pure ruby benchmark implementation.
|
15
|
+
* Compiling with `-O3`.
|
16
|
+
* 📝 Updated (and in some cases, fixed) yardoc
|
17
|
+
* ♻️ Moved aliases and less performance sensitive code into ruby.
|
18
|
+
* ♻️ DRY up push/insert methods
|
19
|
+
|
20
|
+
## Release v0.4.0 (2021-01-12)
|
21
|
+
|
22
|
+
* ⚡️ Big performance improvements, by using C `long double *cscores` array
|
23
|
+
* ⚡️ Scores must be `Integer` in `-uint64..+uint64`, or convertable to `Float`
|
24
|
+
* ⚡️ many many (so many) updates to benchmarks
|
25
|
+
* ✨ Added `DHeap#clear`
|
26
|
+
* 🐛 Fixed `DHeap#initialize_copy` and `#freeze`
|
27
|
+
* ♻️ significant refactoring
|
28
|
+
* 📝 Updated docs (mostly adding benchmarks)
|
29
|
+
|
30
|
+
## Release v0.3.0 (2020-12-29)
|
31
|
+
|
32
|
+
* 🔥 **Breaking**: Removed class methods that operated directly on an array.
|
33
|
+
They weren't compatible with the performance improvements.
|
34
|
+
* ⚡️ Big performance improvements, by converting to a `T_DATA` struct.
|
35
|
+
* ♻️ Major refactoring/rewriting of dheap.c
|
36
|
+
* ✅ Added benchmark specs
|
37
|
+
|
38
|
+
## Release v0.2.2 (2020-12-27)
|
39
|
+
|
40
|
+
* 🐛 fix `optimized_cmp`, avoiding internal symbols
|
41
|
+
* 📝 Update documentation
|
42
|
+
* 💚 fix macos CI
|
43
|
+
* ➕ Add rubocop 👮🎨
|
44
|
+
|
45
|
+
## Release v0.2.1 (2020-12-26)
|
46
|
+
|
47
|
+
* ⬆️ Upgraded rake (and bundler) to support ruby 3.0
|
48
|
+
|
49
|
+
## Release v0.2.0 (2020-12-24)
|
50
|
+
|
51
|
+
* ✨ Add ability to push separate score and value
|
52
|
+
* ⚡️ Big performance gain, by storing scores separately and using ruby's
|
53
|
+
internal `OPTIMIZED_CMP` instead of always directly calling `<=>`
|
54
|
+
|
55
|
+
## Release v0.1.0 (2020-12-22)
|
56
|
+
|
57
|
+
🎉 initial release 🎉
|
58
|
+
|
59
|
+
* ✨ Add basic d-ary Heap implementation
|
data/Gemfile
CHANGED
@@ -1,8 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
source "https://rubygems.org"
|
2
4
|
|
3
5
|
# Specify your gem's dependencies in d_heap.gemspec
|
4
6
|
gemspec
|
5
7
|
|
6
|
-
gem "
|
8
|
+
gem "pry"
|
9
|
+
gem "rake", "~> 13.0"
|
7
10
|
gem "rake-compiler"
|
8
|
-
gem "rspec", "~> 3.
|
11
|
+
gem "rspec", "~> 3.10"
|
12
|
+
gem "rubocop", "~> 1.0"
|
13
|
+
|
14
|
+
gem "perf"
|
15
|
+
gem "priority_queue_cxx"
|
16
|
+
gem "stackprof"
|
data/Gemfile.lock
CHANGED
@@ -1,15 +1,30 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
d_heap (0.
|
4
|
+
d_heap (0.5.0)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
|
+
ast (2.4.1)
|
10
|
+
benchmark_driver (0.15.16)
|
11
|
+
coderay (1.1.3)
|
9
12
|
diff-lcs (1.4.4)
|
10
|
-
|
13
|
+
method_source (1.0.0)
|
14
|
+
parallel (1.19.2)
|
15
|
+
parser (2.7.2.0)
|
16
|
+
ast (~> 2.4.1)
|
17
|
+
perf (0.1.2)
|
18
|
+
priority_queue_cxx (0.3.4)
|
19
|
+
pry (0.13.1)
|
20
|
+
coderay (~> 1.1)
|
21
|
+
method_source (~> 1.0)
|
22
|
+
rainbow (3.0.0)
|
23
|
+
rake (13.0.3)
|
11
24
|
rake-compiler (1.1.1)
|
12
25
|
rake
|
26
|
+
regexp_parser (1.8.2)
|
27
|
+
rexml (3.2.3)
|
13
28
|
rspec (3.10.0)
|
14
29
|
rspec-core (~> 3.10.0)
|
15
30
|
rspec-expectations (~> 3.10.0)
|
@@ -23,15 +38,37 @@ GEM
|
|
23
38
|
diff-lcs (>= 1.2.0, < 2.0)
|
24
39
|
rspec-support (~> 3.10.0)
|
25
40
|
rspec-support (3.10.0)
|
41
|
+
rubocop (1.2.0)
|
42
|
+
parallel (~> 1.10)
|
43
|
+
parser (>= 2.7.1.5)
|
44
|
+
rainbow (>= 2.2.2, < 4.0)
|
45
|
+
regexp_parser (>= 1.8)
|
46
|
+
rexml
|
47
|
+
rubocop-ast (>= 1.0.1)
|
48
|
+
ruby-progressbar (~> 1.7)
|
49
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
50
|
+
rubocop-ast (1.1.1)
|
51
|
+
parser (>= 2.7.1.5)
|
52
|
+
ruby-prof (1.4.2)
|
53
|
+
ruby-progressbar (1.10.1)
|
54
|
+
stackprof (0.2.16)
|
55
|
+
unicode-display_width (1.7.0)
|
26
56
|
|
27
57
|
PLATFORMS
|
28
58
|
ruby
|
29
59
|
|
30
60
|
DEPENDENCIES
|
61
|
+
benchmark_driver
|
31
62
|
d_heap!
|
32
|
-
|
63
|
+
perf
|
64
|
+
priority_queue_cxx
|
65
|
+
pry
|
66
|
+
rake (~> 13.0)
|
33
67
|
rake-compiler
|
34
|
-
rspec (~> 3.
|
68
|
+
rspec (~> 3.10)
|
69
|
+
rubocop (~> 1.0)
|
70
|
+
ruby-prof
|
71
|
+
stackprof
|
35
72
|
|
36
73
|
BUNDLED WITH
|
37
|
-
2.
|
74
|
+
2.2.3
|
data/README.md
CHANGED
@@ -1,139 +1,390 @@
|
|
1
1
|
# DHeap
|
2
2
|
|
3
|
-
A fast _d_-ary heap implementation for ruby,
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
3
|
+
A fast [_d_-ary heap][d-ary heap] [priority queue] implementation for ruby,
|
4
|
+
implemented as a C extension.
|
5
|
+
|
6
|
+
With a regular queue, you expect "FIFO" behavior: first in, first out. With a
|
7
|
+
stack you expect "LIFO": last in first out. A priority queue has a score for
|
8
|
+
each element and elements are popped in order by score. Priority queues are
|
9
|
+
often used in algorithms for e.g. [scheduling] of timers or bandwidth
|
10
|
+
management, for [Huffman coding], and various graph search algorithms such as
|
11
|
+
[Dijkstra's algorithm], [A* search], or [Prim's algorithm].
|
12
|
+
|
13
|
+
The _d_-ary heap data structure is a generalization of the [binary heap], in
|
14
|
+
which the nodes have _d_ children instead of 2. This allows for "insert" and
|
15
|
+
"decrease priority" operations to be performed more quickly with the tradeoff of
|
16
|
+
slower delete minimum. Additionally, _d_-ary heaps can have better memory cache
|
17
|
+
behavior than binary heaps, allowing them to run more quickly in practice
|
18
|
+
despite slower worst-case time complexity. In the worst case, a _d_-ary heap
|
19
|
+
requires only `O(log n / log d)` operations to push, with the tradeoff that pop
|
20
|
+
requires `O(d log n / log d)`.
|
21
|
+
|
22
|
+
Although you should probably just use the default _d_ value of `4` (see the
|
23
|
+
analysis below), it's always advisable to benchmark your specific use-case.
|
24
|
+
|
25
|
+
[d-ary heap]: https://en.wikipedia.org/wiki/D-ary_heap
|
26
|
+
[priority queue]: https://en.wikipedia.org/wiki/Priority_queue
|
27
|
+
[binary heap]: https://en.wikipedia.org/wiki/Binary_heap
|
28
|
+
[scheduling]: https://en.wikipedia.org/wiki/Scheduling_(computing)
|
29
|
+
[Huffman coding]: https://en.wikipedia.org/wiki/Huffman_coding#Compression
|
30
|
+
[Dijkstra's algorithm]: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm#Using_a_priority_queue
|
31
|
+
[A* search]: https://en.wikipedia.org/wiki/A*_search_algorithm#Description
|
32
|
+
[Prim's algorithm]: https://en.wikipedia.org/wiki/Prim%27s_algorithm
|
23
33
|
|
24
|
-
|
25
|
-
loosely inspired by go's timers. e.g: It lazily sifts its heap after deletion
|
26
|
-
and adjustments, to achieve faster average runtime for *add* and *cancel*
|
27
|
-
operations.
|
34
|
+
## Usage
|
28
35
|
|
29
|
-
|
36
|
+
Quick reference:
|
30
37
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
slower than inserting into a fully sorted array. On the one hand, this is a
|
40
|
-
testament to ruby's fine-tuned Array implementation. On the other hand, it
|
41
|
-
seemed like a heap implementated in C should easily match the speed of ruby's
|
42
|
-
bsearch + insert.
|
38
|
+
* `heap << object` adds a value, with `Float(object)` as its score.
|
39
|
+
* `heap.push(object, score)` adds a value with an extrinsic score.
|
40
|
+
* `heap.pop` removes and returns the value with the minimum score.
|
41
|
+
* `heap.pop_lte(score)` pops if the minimum score is `<=` the provided score.
|
42
|
+
* `heap.peek` to view the minimum value without popping it.
|
43
|
+
* `heap.clear` to remove all items from the heap.
|
44
|
+
* `heap.empty?` returns true if the heap is empty.
|
45
|
+
* `heap.size` returns the number of items in the heap.
|
43
46
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
4-ary heap will have no children. That diminishes the extra comparison
|
50
|
-
overhead during sift-down.
|
47
|
+
The basic API is `#push(object, score)` and `pop`. If your values behave as
|
48
|
+
their own score, then you can push with `#<<`. If the score changes while the
|
49
|
+
object is still in the heap, it will not be re-evaluated again. The score must
|
50
|
+
either be `Integer` or `Float` or convertable to a `Float` via `Float(score)`
|
51
|
+
(i.e. it should implement `#to_f`).
|
51
52
|
|
52
|
-
|
53
|
+
```ruby
|
54
|
+
require "d_heap"
|
53
55
|
|
54
|
-
|
56
|
+
Task = Struct.new(:id, :time) do
|
57
|
+
def to_f; time.to_f end
|
58
|
+
end
|
59
|
+
t1 = Task.new(1, Time.now + 5*60)
|
60
|
+
t2 = Task.new(2, Time.now + 50)
|
61
|
+
t3 = Task.new(3, Time.now + 60)
|
62
|
+
t4 = Task.new(4, Time.now + 5)
|
63
|
+
|
64
|
+
# if the object returns its own score via #to_f, "<<" is the simplest API
|
65
|
+
heap << t1 << t2
|
66
|
+
|
67
|
+
# or push with an explicit score
|
68
|
+
heap.push t3, t4.to_f
|
69
|
+
heap.push t4, t4 # score can be implicitly cast with Float
|
70
|
+
|
71
|
+
# peek and pop
|
72
|
+
heap.pop # => #<struct Task id=4, time=2021-01-17 17:02:22.5574 -0500>
|
73
|
+
heap.pop # => #<struct Task id=2, time=2021-01-17 17:03:07.5574 -0500>
|
74
|
+
heap.peek # => #<struct Task id=3, time=2021-01-17 17:03:17.5574 -0500>
|
75
|
+
heap.pop # => #<struct Task id=3, time=2021-01-17 17:03:17.5574 -0500>
|
76
|
+
heap.pop # => #<struct Task id=1, time=2021-01-17 17:07:17.5574 -0500>
|
77
|
+
heap.empty? # => true
|
78
|
+
heap.pop # => nil
|
79
|
+
```
|
80
|
+
|
81
|
+
Constraining scores to numeric values gives more than 50% speedup under some
|
82
|
+
benchmarks! _n.b._ `Integer` _scores must have an absolute value that fits
|
83
|
+
into_ `unsigned long long`. _This is architecture dependant but on an IA-64
|
84
|
+
system this is 64 bits, which gives a range of -18,446,744,073,709,551,615 to
|
85
|
+
+18446744073709551615. Comparing arbitary objects via_ `a <=> b` _was the
|
86
|
+
original design and may be added back in a future version,_ if (and only if) _it
|
87
|
+
can be done without impacting the speed of numeric comparisons._
|
55
88
|
|
56
89
|
```ruby
|
57
|
-
|
90
|
+
heap.clear
|
91
|
+
|
92
|
+
# The score can be derived from the value by using to_f.
|
93
|
+
# "a <=> b" is *much* slower than comparing numbers, so it isn't used.
|
94
|
+
class Event
|
95
|
+
include Comparable
|
96
|
+
attr_reader :time, :payload
|
97
|
+
alias_method :to_time, :time
|
98
|
+
|
99
|
+
def initialize(time, payload)
|
100
|
+
@time = time.to_time
|
101
|
+
@payload = payload
|
102
|
+
freeze
|
103
|
+
end
|
104
|
+
|
105
|
+
def to_f
|
106
|
+
time.to_f
|
107
|
+
end
|
108
|
+
|
109
|
+
def <=>(other)
|
110
|
+
to_f <=> other.to_f
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
heap << comparable_max # sorts last, using <=>
|
115
|
+
heap << comparable_min # sorts first, using <=>
|
116
|
+
heap << comparable_mid # sorts in the middle, using <=>
|
117
|
+
heap.pop # => comparable_min
|
118
|
+
heap.pop # => comparable_mid
|
119
|
+
heap.pop # => comparable_max
|
120
|
+
heap.empty? # => true
|
121
|
+
heap.pop # => nil
|
58
122
|
```
|
59
123
|
|
60
|
-
|
124
|
+
You can also pass a value into `#pop(max)` which will only pop if the minimum
|
125
|
+
score is less than or equal to `max`.
|
61
126
|
|
62
|
-
|
63
|
-
|
64
|
-
Or install it yourself as:
|
127
|
+
Read the [API documentation] for more detailed documentation and examples.
|
65
128
|
|
66
|
-
|
129
|
+
[API documentation]: https://rubydoc.info/gems/d_heap/DHeap
|
67
130
|
|
68
|
-
##
|
131
|
+
## Installation
|
69
132
|
|
70
|
-
|
133
|
+
Add this line to your application's Gemfile:
|
71
134
|
|
72
135
|
```ruby
|
73
|
-
|
136
|
+
gem 'd_heap'
|
137
|
+
```
|
74
138
|
|
75
|
-
|
139
|
+
And then execute:
|
76
140
|
|
77
|
-
|
78
|
-
heap << [Time.now + 5*60, Task.new(1)]
|
79
|
-
heap << [Time.now + 30, Task.new(2)]
|
80
|
-
heap << [Time.now + 60, Task.new(3)]
|
81
|
-
heap << [Time.now + 5, Task.new(4)]
|
141
|
+
$ bundle install
|
82
142
|
|
83
|
-
|
84
|
-
heap.pop.last # => Task[4]
|
85
|
-
heap.pop.last # => Task[2]
|
86
|
-
heap.peak.last # => Task[3]
|
87
|
-
heap.pop.last # => Task[3]
|
88
|
-
heap.pop.last # => Task[1]
|
89
|
-
```
|
143
|
+
Or install it yourself as:
|
90
144
|
|
91
|
-
|
145
|
+
$ gem install d_heap
|
92
146
|
|
93
|
-
##
|
147
|
+
## Motivation
|
94
148
|
|
95
|
-
|
149
|
+
One naive approach to a priority queue is to maintain an array in sorted order.
|
150
|
+
This can be very simply implemented in ruby with `Array#bseach_index` +
|
151
|
+
`Array#insert`. This can be very fast—`Array#pop` is `O(1)`—but the worst-case
|
152
|
+
for insert is `O(n)` because it may need to `memcpy` a significant portion of
|
153
|
+
the array.
|
154
|
+
|
155
|
+
The standard way to implement a priority queue is with a binary heap. Although
|
156
|
+
this increases the time for `pop`, it converts the amortized time per push + pop
|
157
|
+
from `O(n)` to `O(d log n / log d)`.
|
158
|
+
|
159
|
+
However, I was surprised to find that—at least for some benchmarks—my pure ruby
|
160
|
+
heap implementation was much slower than inserting into and popping from a fully
|
161
|
+
sorted array. The reasons for this surprising result: Although it is `O(n)`,
|
162
|
+
`memcpy` has a _very_ small constant factor, and calling `<=>` from ruby code
|
163
|
+
has relatively _much_ larger constant factors. If your queue contains only a
|
164
|
+
few thousand items, the overhead of those extra calls to `<=>` is _far_ more
|
165
|
+
than occasionally calling `memcpy`. In the worst case, a _d_-heap will require
|
166
|
+
`d + 1` times more comparisons for each push + pop than a `bsearch` + `insert`
|
167
|
+
sorted array.
|
168
|
+
|
169
|
+
Moving the sift-up and sift-down code into C helps some. But much more helpful
|
170
|
+
is optimizing the comparison of numeric scores, so `a <=> b` never needs to be
|
171
|
+
called. I'm hopeful that MJIT will eventually obsolete this C-extension. This
|
172
|
+
can be hotspot code, and a basic ruby implementation could perform well if `<=>`
|
173
|
+
had much lower overhead.
|
96
174
|
|
97
175
|
## Analysis
|
98
176
|
|
99
177
|
### Time complexity
|
100
178
|
|
101
|
-
|
102
|
-
|
103
|
-
Swap down performs as many as d comparions per swap: O(d).
|
179
|
+
There are two fundamental heap operations: sift-up (used by push) and sift-down
|
180
|
+
(used by pop).
|
104
181
|
|
105
|
-
|
106
|
-
|
182
|
+
* Both sift operations can perform as many as `log n / log d` swaps, as the
|
183
|
+
element may sift from the bottom of the tree to the top, or vice versa.
|
184
|
+
* Sift-up performs a single comparison per swap: `O(1)`.
|
185
|
+
So pushing a new element is `O(log n / log d)`.
|
186
|
+
* Swap down performs as many as d comparions per swap: `O(d)`.
|
187
|
+
So popping the min element is `O(d log n / log d)`.
|
107
188
|
|
108
|
-
Assuming every inserted
|
109
|
-
the fewest comparisons for combined insert and delete:
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
189
|
+
Assuming every inserted element is eventually deleted from the root, d=4
|
190
|
+
requires the fewest comparisons for combined insert and delete:
|
191
|
+
|
192
|
+
* (1 + 2) lg 2 = 4.328085
|
193
|
+
* (1 + 3) lg 3 = 3.640957
|
194
|
+
* (1 + 4) lg 4 = 3.606738
|
195
|
+
* (1 + 5) lg 5 = 3.728010
|
196
|
+
* (1 + 6) lg 6 = 3.906774
|
197
|
+
* etc...
|
116
198
|
|
117
199
|
Leaf nodes require no comparisons to shift down, and higher values for d have
|
118
200
|
higher percentage of leaf nodes:
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
201
|
+
|
202
|
+
* d=2 has ~50% leaves,
|
203
|
+
* d=3 has ~67% leaves,
|
204
|
+
* d=4 has ~75% leaves,
|
205
|
+
* and so on...
|
123
206
|
|
124
207
|
See https://en.wikipedia.org/wiki/D-ary_heap#Analysis for deeper analysis.
|
125
208
|
|
126
209
|
### Space complexity
|
127
210
|
|
128
|
-
|
129
|
-
|
211
|
+
Space usage is linear, regardless of d. However higher d values may
|
212
|
+
provide better cache locality. Because the heap is a complete binary tree, the
|
213
|
+
elements can be stored in an array, without the need for tree or list pointers.
|
130
214
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
215
|
+
Ruby can compare Numeric values _much_ faster than other ruby objects, even if
|
216
|
+
those objects simply delegate comparison to internal Numeric values. And it is
|
217
|
+
often useful to use external scores for otherwise uncomparable values. So
|
218
|
+
`DHeap` uses twice as many entries (one for score and one for value)
|
219
|
+
as an array which only stores values.
|
220
|
+
|
221
|
+
## Benchmarks
|
222
|
+
|
223
|
+
_See `bin/benchmarks` and `docs/benchmarks.txt`, as well as `bin/profile` and
|
224
|
+
`docs/profile.txt` for more details or updated results. These benchmarks were
|
225
|
+
measured with v0.5.0 and ruby 2.7.2 without MJIT enabled._
|
226
|
+
|
227
|
+
These benchmarks use very simple implementations for a pure-ruby heap and an
|
228
|
+
array that is kept sorted using `Array#bsearch_index` and `Array#insert`. For
|
229
|
+
comparison, an alternate implementation `Array#min` and `Array#delete_at` is
|
230
|
+
also shown.
|
231
|
+
|
232
|
+
Three different scenarios are measured:
|
233
|
+
* push N values but never pop (clearing between each set of pushes).
|
234
|
+
* push N values and then pop N values.
|
235
|
+
Although this could be used for heap sort, we're unlikely to choose heap sort
|
236
|
+
over Ruby's quick sort implementation. I'm using this scenario to represent
|
237
|
+
the amortized cost of creating a heap and (eventually) draining it.
|
238
|
+
* For a heap of size N, repeatedly push and pop while keeping a stable size.
|
239
|
+
This is a _very simple_ approximation for how most scheduler/timer heaps
|
240
|
+
would be used. Usually when a timer fires it will be quickly replaced by a
|
241
|
+
new timer, and the overall count of timers will remain roughly stable.
|
242
|
+
|
243
|
+
In these benchmarks, `DHeap` runs faster than all other implementations for
|
244
|
+
every scenario and every value of N, although the difference is much more
|
245
|
+
noticable at higher values of N. The pure ruby heap implementation is
|
246
|
+
competitive for `push` alone at every value of N, but is significantly slower
|
247
|
+
than bsearch + insert for push + pop until N is _very_ large (somewhere between
|
248
|
+
10k and 100k)!
|
249
|
+
|
250
|
+
For very small N values the benchmark implementations, `DHeap` runs faster than
|
251
|
+
the other implementations for each scenario, although the difference is still
|
252
|
+
relatively small. The pure ruby binary heap is 2x or more slower than bsearch +
|
253
|
+
insert for common common push/pop scenario.
|
254
|
+
|
255
|
+
== push N (N=5) ==========================================================
|
256
|
+
push N (c_dheap): 1969700.7 i/s
|
257
|
+
push N (c++ stl): 1049738.1 i/s - 1.88x slower
|
258
|
+
push N (rb_heap): 928435.2 i/s - 2.12x slower
|
259
|
+
push N (bsearch): 921060.0 i/s - 2.14x slower
|
260
|
+
|
261
|
+
== push N then pop N (N=5) ===============================================
|
262
|
+
push N + pop N (c_dheap): 1375805.0 i/s
|
263
|
+
push N + pop N (c++ stl): 1134997.5 i/s - 1.21x slower
|
264
|
+
push N + pop N (findmin): 862913.1 i/s - 1.59x slower
|
265
|
+
push N + pop N (bsearch): 762887.1 i/s - 1.80x slower
|
266
|
+
push N + pop N (rb_heap): 506890.4 i/s - 2.71x slower
|
267
|
+
|
268
|
+
== Push/pop with pre-filled queue of size=N (N=5) ========================
|
269
|
+
push + pop (c_dheap): 9044435.5 i/s
|
270
|
+
push + pop (c++ stl): 7534583.4 i/s - 1.20x slower
|
271
|
+
push + pop (findmin): 5026155.1 i/s - 1.80x slower
|
272
|
+
push + pop (bsearch): 4300260.0 i/s - 2.10x slower
|
273
|
+
push + pop (rb_heap): 2299499.7 i/s - 3.93x slower
|
274
|
+
|
275
|
+
By N=21, `DHeap` has pulled significantly ahead of bsearch + insert for all
|
276
|
+
scenarios, but the pure ruby heap is still slower than every other
|
277
|
+
implementation—even resorting the array after every `#push`—in any scenario that
|
278
|
+
uses `#pop`.
|
279
|
+
|
280
|
+
== push N (N=21) =========================================================
|
281
|
+
push N (c_dheap): 464231.4 i/s
|
282
|
+
push N (c++ stl): 305546.7 i/s - 1.52x slower
|
283
|
+
push N (rb_heap): 202803.7 i/s - 2.29x slower
|
284
|
+
push N (bsearch): 168678.7 i/s - 2.75x slower
|
285
|
+
|
286
|
+
== push N then pop N (N=21) ==============================================
|
287
|
+
push N + pop N (c_dheap): 298350.3 i/s
|
288
|
+
push N + pop N (c++ stl): 252227.1 i/s - 1.18x slower
|
289
|
+
push N + pop N (findmin): 161998.7 i/s - 1.84x slower
|
290
|
+
push N + pop N (bsearch): 143432.3 i/s - 2.08x slower
|
291
|
+
push N + pop N (rb_heap): 79622.1 i/s - 3.75x slower
|
292
|
+
|
293
|
+
== Push/pop with pre-filled queue of size=N (N=21) =======================
|
294
|
+
push + pop (c_dheap): 8855093.4 i/s
|
295
|
+
push + pop (c++ stl): 7223079.5 i/s - 1.23x slower
|
296
|
+
push + pop (findmin): 4542913.7 i/s - 1.95x slower
|
297
|
+
push + pop (bsearch): 3461802.4 i/s - 2.56x slower
|
298
|
+
push + pop (rb_heap): 1845488.7 i/s - 4.80x slower
|
299
|
+
|
300
|
+
At higher values of N, a heaps logarithmic growth leads to only a little
|
301
|
+
slowdown of `#push`, while insert's linear growth causes it to run noticably
|
302
|
+
slower and slower. But because `#pop` is `O(1)` for a sorted array and `O(d log
|
303
|
+
n / log d)` for a heap, scenarios involving both `#push` and `#pop` remain
|
304
|
+
relatively close, and bsearch + insert still runs faster than a pure ruby heap,
|
305
|
+
even up to queues with 10k items. But as queue size increases beyond than that,
|
306
|
+
the linear time compexity to keep a sorted array dominates.
|
307
|
+
|
308
|
+
== push + pop (rb_heap)
|
309
|
+
queue size = 10000: 736618.2 i/s
|
310
|
+
queue size = 25000: 670186.8 i/s - 1.10x slower
|
311
|
+
queue size = 50000: 618156.7 i/s - 1.19x slower
|
312
|
+
queue size = 100000: 579250.7 i/s - 1.27x slower
|
313
|
+
queue size = 250000: 572795.0 i/s - 1.29x slower
|
314
|
+
queue size = 500000: 543648.3 i/s - 1.35x slower
|
315
|
+
queue size = 1000000: 513523.4 i/s - 1.43x slower
|
316
|
+
queue size = 2500000: 460848.9 i/s - 1.60x slower
|
317
|
+
queue size = 5000000: 445234.5 i/s - 1.65x slower
|
318
|
+
queue size = 10000000: 423119.0 i/s - 1.74x slower
|
319
|
+
|
320
|
+
== push + pop (bsearch)
|
321
|
+
queue size = 10000: 786334.2 i/s
|
322
|
+
queue size = 25000: 364963.8 i/s - 2.15x slower
|
323
|
+
queue size = 50000: 200520.6 i/s - 3.92x slower
|
324
|
+
queue size = 100000: 88607.0 i/s - 8.87x slower
|
325
|
+
queue size = 250000: 34530.5 i/s - 22.77x slower
|
326
|
+
queue size = 500000: 17965.4 i/s - 43.77x slower
|
327
|
+
queue size = 1000000: 5638.7 i/s - 139.45x slower
|
328
|
+
queue size = 2500000: 1302.0 i/s - 603.93x slower
|
329
|
+
queue size = 5000000: 592.0 i/s - 1328.25x slower
|
330
|
+
queue size = 10000000: 288.8 i/s - 2722.66x slower
|
331
|
+
|
332
|
+
== push + pop (c_dheap)
|
333
|
+
queue size = 10000: 7311366.6 i/s
|
334
|
+
queue size = 50000: 6737824.5 i/s - 1.09x slower
|
335
|
+
queue size = 25000: 6407340.6 i/s - 1.14x slower
|
336
|
+
queue size = 100000: 6254396.3 i/s - 1.17x slower
|
337
|
+
queue size = 250000: 5917684.5 i/s - 1.24x slower
|
338
|
+
queue size = 500000: 5126307.6 i/s - 1.43x slower
|
339
|
+
queue size = 1000000: 4403494.1 i/s - 1.66x slower
|
340
|
+
queue size = 2500000: 3304088.2 i/s - 2.21x slower
|
341
|
+
queue size = 5000000: 2664897.7 i/s - 2.74x slower
|
342
|
+
queue size = 10000000: 2137927.6 i/s - 3.42x slower
|
343
|
+
|
344
|
+
## Profiling
|
345
|
+
|
346
|
+
_n.b. `Array#fetch` is reading the input data, external to heap operations.
|
347
|
+
These benchmarks use integers for all scores, which enables significantly faster
|
348
|
+
comparisons. If `a <=> b` were used instead, then the difference between push
|
349
|
+
and pop would be much larger. And ruby's `Tracepoint` impacts these different
|
350
|
+
implementations differently. So we can't use these profiler results for
|
351
|
+
comparisons between implementations. A sampling profiler would be needed for
|
352
|
+
more accurate relative measurements._
|
353
|
+
|
354
|
+
It's informative to look at the `ruby-prof` results for a simple binary search +
|
355
|
+
insert implementation, repeatedly pushing and popping to a large heap. In
|
356
|
+
particular, even with 1000 members, the linear `Array#insert` is _still_ faster
|
357
|
+
than the logarithmic `Array#bsearch_index`. At this scale, ruby comparisons are
|
358
|
+
still (relatively) slow and `memcpy` is (relatively) quite fast!
|
359
|
+
|
360
|
+
%self total self wait child calls name location
|
361
|
+
34.79 2.222 2.222 0.000 0.000 1000000 Array#insert
|
362
|
+
32.59 2.081 2.081 0.000 0.000 1000000 Array#bsearch_index
|
363
|
+
12.84 6.386 0.820 0.000 5.566 1 DHeap::Benchmarks::Scenarios#repeated_push_pop d_heap/benchmarks.rb:77
|
364
|
+
10.38 4.966 0.663 0.000 4.303 1000000 DHeap::Benchmarks::BinarySearchAndInsert#<< d_heap/benchmarks/implementations.rb:61
|
365
|
+
5.38 0.468 0.343 0.000 0.125 1000000 DHeap::Benchmarks::BinarySearchAndInsert#pop d_heap/benchmarks/implementations.rb:70
|
366
|
+
2.06 0.132 0.132 0.000 0.000 1000000 Array#fetch
|
367
|
+
1.95 0.125 0.125 0.000 0.000 1000000 Array#pop
|
368
|
+
|
369
|
+
Contrast this with a simplistic pure-ruby implementation of a binary heap:
|
370
|
+
|
371
|
+
%self total self wait child calls name location
|
372
|
+
48.52 8.487 8.118 0.000 0.369 1000000 DHeap::Benchmarks::NaiveBinaryHeap#pop d_heap/benchmarks/implementations.rb:96
|
373
|
+
42.94 7.310 7.184 0.000 0.126 1000000 DHeap::Benchmarks::NaiveBinaryHeap#<< d_heap/benchmarks/implementations.rb:80
|
374
|
+
4.80 16.732 0.803 0.000 15.929 1 DHeap::Benchmarks::Scenarios#repeated_push_pop d_heap/benchmarks.rb:77
|
375
|
+
|
376
|
+
You can see that it spends almost more time in pop than it does in push. That
|
377
|
+
is expected behavior for a heap: although both are O(log n), pop is
|
378
|
+
significantly more complex, and has _d_ comparisons per layer.
|
379
|
+
|
380
|
+
And `DHeap` shows a similar comparison between push and pop, although it spends
|
381
|
+
half of its time in the benchmark code (which is written in ruby):
|
382
|
+
|
383
|
+
%self total self wait child calls name location
|
384
|
+
43.09 1.685 0.726 0.000 0.959 1 DHeap::Benchmarks::Scenarios#repeated_push_pop d_heap/benchmarks.rb:77
|
385
|
+
26.05 0.439 0.439 0.000 0.000 1000000 DHeap#<<
|
386
|
+
23.57 0.397 0.397 0.000 0.000 1000000 DHeap#pop
|
387
|
+
7.29 0.123 0.123 0.000 0.000 1000000 Array#fetch
|
137
388
|
|
138
389
|
### Timers
|
139
390
|
|
@@ -151,22 +402,54 @@ faster than a delete and re-insert.
|
|
151
402
|
|
152
403
|
## Alternative data structures
|
153
404
|
|
405
|
+
As always, you should run benchmarks with your expected scenarios to determine
|
406
|
+
which is right.
|
407
|
+
|
154
408
|
Depending on what you're doing, maintaining a sorted `Array` using
|
155
|
-
`#bsearch_index` and `#insert` might be
|
156
|
-
O(n) for insertions,
|
157
|
-
|
158
|
-
|
159
|
-
|
409
|
+
`#bsearch_index` and `#insert` might be just fine! As discussed above, although
|
410
|
+
it is `O(n)` for insertions, `memcpy` is so fast on modern hardware that this
|
411
|
+
may not matter. Also, if you can arrange for insertions to occur near the end
|
412
|
+
of the array, that could significantly reduce the `memcpy` overhead even more.
|
413
|
+
|
414
|
+
More complex heap varients, e.g. [Fibonacci heap], can allow heaps to be merged
|
415
|
+
as well as lower amortized time.
|
416
|
+
|
417
|
+
[Fibonacci heap]: https://en.wikipedia.org/wiki/Fibonacci_heap
|
160
418
|
|
161
419
|
If it is important to be able to quickly enumerate the set or find the ranking
|
162
|
-
of values in it, then you
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
be
|
420
|
+
of values in it, then you may want to use a self-balancing binary search tree
|
421
|
+
(e.g. a [red-black tree]) or a [skip-list].
|
422
|
+
|
423
|
+
[red-black tree]: https://en.wikipedia.org/wiki/Red%E2%80%93black_tree
|
424
|
+
[skip-list]: https://en.wikipedia.org/wiki/Skip_list
|
425
|
+
|
426
|
+
[Hashed and Heirarchical Timing Wheels][timing wheels] (or some variant in that
|
427
|
+
family of data structures) can be constructed to have effectively `O(1)` running
|
428
|
+
time in most cases. Although the implementation for that data structure is more
|
429
|
+
complex than a heap, it may be necessary for enormous values of N.
|
430
|
+
|
431
|
+
[timing wheels]: http://www.cs.columbia.edu/~nahum/w6998/papers/ton97-timing-wheels.pdf
|
432
|
+
|
433
|
+
## TODOs...
|
434
|
+
|
435
|
+
_TODO:_ Also ~~included is~~ _will include_ `DHeap::Set`, which augments the
|
436
|
+
basic heap with an internal `Hash`, which maps a set of values to scores.
|
437
|
+
loosely inspired by go's timers. e.g: It lazily sifts its heap after deletion
|
438
|
+
and adjustments, to achieve faster average runtime for *add* and *cancel*
|
439
|
+
operations.
|
440
|
+
|
441
|
+
_TODO:_ Also ~~included is~~ _will include_ `DHeap::Lazy`, which contains some
|
442
|
+
features that are loosely inspired by go's timers. e.g: It lazily sifts its
|
443
|
+
heap after deletion and adjustments, to achieve faster average runtime for *add*
|
444
|
+
and *cancel* operations.
|
445
|
+
|
446
|
+
Additionally, I was inspired by reading go's "timer.go" implementation to
|
447
|
+
experiment with a 4-ary heap instead of the traditional binary heap. In the
|
448
|
+
case of timers, new timers are usually scheduled to run after most of the
|
449
|
+
existing timers. And timers are usually canceled before they have a chance to
|
450
|
+
run. While a binary heap holds 50% of its elements in its last layer, 75% of a
|
451
|
+
4-ary heap will have no children. That diminishes the extra comparison overhead
|
452
|
+
during sift-down.
|
170
453
|
|
171
454
|
## Development
|
172
455
|
|