d_heap 0.2.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c8c06927edbc903bc67c3f52bf6d7027a1c9b04d7860acc712a29b4298fd7adf
4
- data.tar.gz: e347372e483f9d2b46deb5ad78cc6eb4ab9f28a427853e01282f035abdf475b7
3
+ metadata.gz: 3dd1049e0a8041a328da4ed65622c2f0589475bc386a0eb6f20e466c79587bc5
4
+ data.tar.gz: ec44970feaa5ce6aef37f511e71e55342ec93b7e1a0b2a3d40f249afa4e9ac25
5
5
  SHA512:
6
- metadata.gz: 124b615e62c6f6e58b3f8ff55bd7471f99960eab5e937dc40354f067112750fbebdcb0540585ec615d1d713f005fc40d2840a9b6ff4c9b6e6f62dcf374f56d9c
7
- data.tar.gz: 391af9443b13e0902a813cfb9712fdf98ea414d27d9406348ef25aa5ec03a47488b7c2bbbf066bbbfe50e69f04fec95413eb705c5c45c5185172c5b63777c059
6
+ metadata.gz: a6b6e192dbe5980b2b79728e4b4bf413151b3e193733d1435119482edb977f0a2edd692fd156614fdfbc86f4fa1dc6ac9f7907ca640c21ca23050d07b9a1caa6
7
+ data.tar.gz: 27a987139a1fd14f73c16459f72be2bf1059dadbd212e250a28b3691dff2372e708cdf740155e2d2aa21de78cc789816a9d83f80f42d3c396510cd6fce6e6bf2
@@ -0,0 +1,26 @@
1
+ name: Ruby
2
+
3
+ on: [push,pull_request]
4
+
5
+ jobs:
6
+ build:
7
+ strategy:
8
+ fail-fast: false
9
+ matrix:
10
+ ruby: [2.5, 2.6, 2.7, 3.0]
11
+ os: [ubuntu, macos]
12
+ experimental: [false]
13
+ runs-on: ${{ matrix.os }}-latest
14
+ continue-on-error: ${{ matrix.experimental }}
15
+ steps:
16
+ - uses: actions/checkout@v2
17
+ - name: Set up Ruby
18
+ uses: ruby/setup-ruby@v1
19
+ with:
20
+ ruby-version: ${{ matrix.ruby }}
21
+ bundler-cache: true
22
+ - name: Run the default task
23
+ run: |
24
+ gem install bundler -v 2.2.3
25
+ bundle install
26
+ bundle exec rake
@@ -0,0 +1,199 @@
1
+ inherit_mode:
2
+ merge:
3
+ - Exclude
4
+
5
+ AllCops:
6
+ TargetRubyVersion: 2.5
7
+ NewCops: disable
8
+ Exclude:
9
+ - bin/benchmark-driver
10
+ - bin/rake
11
+ - bin/rspec
12
+ - bin/rubocop
13
+
14
+ ###########################################################################
15
+ # rubocop defaults are simply WRONG about many rules... Sorry. It's true.
16
+
17
+ ###########################################################################
18
+ # Layout: Alignment. I want these to work, I really do...
19
+
20
+ # I wish this worked with "table". but that goes wrong sometimes.
21
+ Layout/HashAlignment: { Enabled: false }
22
+
23
+ # This needs to be configurable so parenthesis calls are aligned with first
24
+ # parameter, and non-parenthesis calls are aligned with fixed indentation.
25
+ Layout/ParameterAlignment: { Enabled: false }
26
+
27
+ ###########################################################################
28
+ # Layout: Empty lines
29
+
30
+ Layout/EmptyLineAfterGuardClause: { Enabled: false }
31
+ Layout/EmptyLineAfterMagicComment: { Enabled: true }
32
+ Layout/EmptyLineAfterMultilineCondition: { Enabled: false }
33
+ Layout/EmptyLines: { Enabled: true }
34
+ Layout/EmptyLinesAroundAccessModifier: { Enabled: true }
35
+ Layout/EmptyLinesAroundArguments: { Enabled: true }
36
+ Layout/EmptyLinesAroundBeginBody: { Enabled: true }
37
+ Layout/EmptyLinesAroundBlockBody: { Enabled: false }
38
+ Layout/EmptyLinesAroundExceptionHandlingKeywords: { Enabled: true }
39
+ Layout/EmptyLinesAroundMethodBody: { Enabled: true }
40
+
41
+ Layout/EmptyLineBetweenDefs:
42
+ Enabled: true
43
+ AllowAdjacentOneLineDefs: true
44
+
45
+ Layout/EmptyLinesAroundAttributeAccessor:
46
+ inherit_mode:
47
+ merge:
48
+ - Exclude
49
+ - AllowedMethods
50
+ Enabled: true
51
+ AllowedMethods:
52
+ - delegate
53
+ - def_delegator
54
+ - def_delegators
55
+ - def_instance_delegators
56
+
57
+ # "empty_lines_special" sometimes does the wrong thing and annoys me.
58
+ # But I've mostly learned to live with it... mostly. 🙁
59
+
60
+ Layout/EmptyLinesAroundClassBody:
61
+ Enabled: true
62
+ EnforcedStyle: empty_lines_special
63
+
64
+ Layout/EmptyLinesAroundModuleBody:
65
+ Enabled: true
66
+ EnforcedStyle: empty_lines_special
67
+
68
+ ###########################################################################
69
+ # Layout: Space around, before, inside, etc
70
+
71
+ Layout/SpaceAroundEqualsInParameterDefault: { Enabled: false }
72
+ Layout/SpaceBeforeBlockBraces: { Enabled: false }
73
+ Layout/SpaceBeforeFirstArg: { Enabled: false }
74
+ Layout/SpaceInLambdaLiteral: { Enabled: false }
75
+ Layout/SpaceInsideArrayLiteralBrackets: { Enabled: false }
76
+ Layout/SpaceInsideHashLiteralBraces: { Enabled: false }
77
+
78
+ Layout/SpaceInsideBlockBraces:
79
+ EnforcedStyle: space
80
+ EnforcedStyleForEmptyBraces: space
81
+ SpaceBeforeBlockParameters: false
82
+
83
+ # I would enable this if it were a bit better at handling alignment.
84
+ Layout/ExtraSpacing:
85
+ Enabled: false
86
+ AllowForAlignment: true
87
+ AllowBeforeTrailingComments: true
88
+
89
+ ###########################################################################
90
+ # Layout: Misc
91
+
92
+ Layout/LineLength:
93
+ Max: 90 # should stay under 80, but we'll allow a little wiggle-room
94
+
95
+ Layout/MultilineOperationIndentation: { Enabled: false }
96
+
97
+ Layout/MultilineMethodCallIndentation:
98
+ EnforcedStyle: indented
99
+
100
+ ###########################################################################
101
+ # Lint and Naming: rubocop defaults are mostly good, but...
102
+
103
+ Lint/UnusedMethodArgument: { Enabled: false }
104
+ Naming/BinaryOperatorParameterName: { Enabled: false } # def /(denominator)
105
+ Naming/RescuedExceptionsVariableName: { Enabled: false }
106
+
107
+ ###########################################################################
108
+ # Matrics:
109
+
110
+ Metrics/CyclomaticComplexity:
111
+ Max: 10
112
+
113
+ # Although it may be better to split specs into multiple files...?
114
+ Metrics/BlockLength:
115
+ Exclude:
116
+ - "spec/**/*_spec.rb"
117
+ CountAsOne:
118
+ - array
119
+ - hash
120
+ - heredoc
121
+
122
+ Metrics/ClassLength:
123
+ Max: 200
124
+ CountAsOne:
125
+ - array
126
+ - hash
127
+ - heredoc
128
+
129
+ ###########################################################################
130
+ # Style...
131
+
132
+ Style/AccessorGrouping: { Enabled: false }
133
+ Style/AsciiComments: { Enabled: false } # 👮 can't stop our 🎉🥳🎊🥳!
134
+ Style/ClassAndModuleChildren: { Enabled: false }
135
+ Style/EachWithObject: { Enabled: false }
136
+ Style/FormatStringToken: { Enabled: false }
137
+ Style/FloatDivision: { Enabled: false }
138
+ Style/IfUnlessModifier: { Enabled: false }
139
+ Style/IfWithSemicolon: { Enabled: false }
140
+ Style/Lambda: { Enabled: false }
141
+ Style/LineEndConcatenation: { Enabled: false }
142
+ Style/MixinGrouping: { Enabled: false }
143
+ Style/MultilineBlockChain: { Enabled: false }
144
+ Style/PerlBackrefs: { Enabled: false } # use occasionally/sparingly
145
+ Style/RescueStandardError: { Enabled: false }
146
+ Style/Semicolon: { Enabled: false }
147
+ Style/SingleLineMethods: { Enabled: false }
148
+ Style/StabbyLambdaParentheses: { Enabled: false }
149
+ Style/WhenThen : { Enabled: false }
150
+
151
+ # I require trailing commas elsewhere, but these are optional
152
+ Style/TrailingCommaInArguments: { Enabled: false }
153
+
154
+ # If rubocop had an option to only enforce this on constants and literals (e.g.
155
+ # strings, regexp, range), I'd agree.
156
+ #
157
+ # But if you are using it e.g. on method arguments of unknown type, in the same
158
+ # style that ruby uses it with grep, then you are doing exactly the right thing.
159
+ Style/CaseEquality: { Enabled: false }
160
+
161
+ # I'd enable if "require_parentheses_when_complex" considered unary '!' simple.
162
+ Style/TernaryParentheses:
163
+ EnforcedStyle: require_parentheses_when_complex
164
+ Enabled: false
165
+
166
+ Style/BlockDelimiters:
167
+ inherit_mode:
168
+ merge:
169
+ - Exclude
170
+ - ProceduralMethods
171
+ - IgnoredMethods
172
+ - FunctionalMethods
173
+ EnforcedStyle: semantic
174
+ AllowBracesOnProceduralOneLiners: true
175
+ IgnoredMethods:
176
+ - expect # rspec
177
+ - profile # ruby-prof
178
+ - ips # benchmark-ips
179
+
180
+
181
+ Style/FormatString:
182
+ EnforcedStyle: percent
183
+
184
+ Style/StringLiterals:
185
+ Enabled: true
186
+ EnforcedStyle: double_quotes
187
+
188
+ Style/StringLiteralsInInterpolation:
189
+ Enabled: true
190
+ EnforcedStyle: double_quotes
191
+
192
+ Style/TrailingCommaInHashLiteral:
193
+ EnforcedStyleForMultiline: consistent_comma
194
+
195
+ Style/TrailingCommaInArrayLiteral:
196
+ EnforcedStyleForMultiline: consistent_comma
197
+
198
+ Style/YodaCondition:
199
+ EnforcedStyle: forbid_for_equality_operators_only
@@ -0,0 +1,59 @@
1
+ ## Current/Unreleased
2
+
3
+ ## Release v0.5.0 (2021-01-17)
4
+
5
+ * 🔥 **Breaking**: reversed order of `#push` arguments to `value, score`.
6
+ * ✨ Added `#insert(score, value)` to replace earlier version of `#push`.
7
+ * ✨ Added `#each_pop` enumerator.
8
+ * ✨ Added aliases for `deq`, `enq`, `first`, `pop_below`, `length`, and
9
+ `count`, to mimic other classes in ruby's stdlib.
10
+ * ⚡️♻️ More performance improvements:
11
+ * Created an `ENTRY` struct and store both the score and the value pointer in
12
+ the same `ENTRY *entries` array.
13
+ * Reduced unnecessary allocations or copies in both sift loops. A similar
14
+ refactoring also sped up the pure ruby benchmark implementation.
15
+ * Compiling with `-O3`.
16
+ * 📝 Updated (and in some cases, fixed) yardoc
17
+ * ♻️ Moved aliases and less performance sensitive code into ruby.
18
+ * ♻️ DRY up push/insert methods
19
+
20
+ ## Release v0.4.0 (2021-01-12)
21
+
22
+ * ⚡️ Big performance improvements, by using C `long double *cscores` array
23
+ * ⚡️ Scores must be `Integer` in `-uint64..+uint64`, or convertable to `Float`
24
+ * ⚡️ many many (so many) updates to benchmarks
25
+ * ✨ Added `DHeap#clear`
26
+ * 🐛 Fixed `DHeap#initialize_copy` and `#freeze`
27
+ * ♻️ significant refactoring
28
+ * 📝 Updated docs (mostly adding benchmarks)
29
+
30
+ ## Release v0.3.0 (2020-12-29)
31
+
32
+ * 🔥 **Breaking**: Removed class methods that operated directly on an array.
33
+ They weren't compatible with the performance improvements.
34
+ * ⚡️ Big performance improvements, by converting to a `T_DATA` struct.
35
+ * ♻️ Major refactoring/rewriting of dheap.c
36
+ * ✅ Added benchmark specs
37
+
38
+ ## Release v0.2.2 (2020-12-27)
39
+
40
+ * 🐛 fix `optimized_cmp`, avoiding internal symbols
41
+ * 📝 Update documentation
42
+ * 💚 fix macos CI
43
+ * ➕ Add rubocop 👮🎨
44
+
45
+ ## Release v0.2.1 (2020-12-26)
46
+
47
+ * ⬆️ Upgraded rake (and bundler) to support ruby 3.0
48
+
49
+ ## Release v0.2.0 (2020-12-24)
50
+
51
+ * ✨ Add ability to push separate score and value
52
+ * ⚡️ Big performance gain, by storing scores separately and using ruby's
53
+ internal `OPTIMIZED_CMP` instead of always directly calling `<=>`
54
+
55
+ ## Release v0.1.0 (2020-12-22)
56
+
57
+ 🎉 initial release 🎉
58
+
59
+ * ✨ Add basic d-ary Heap implementation
data/Gemfile CHANGED
@@ -1,8 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source "https://rubygems.org"
2
4
 
3
5
  # Specify your gem's dependencies in d_heap.gemspec
4
6
  gemspec
5
7
 
6
- gem "rake", "~> 12.0"
8
+ gem "pry"
9
+ gem "rake", "~> 13.0"
7
10
  gem "rake-compiler"
8
- gem "rspec", "~> 3.0"
11
+ gem "rspec", "~> 3.10"
12
+ gem "rubocop", "~> 1.0"
13
+
14
+ gem "perf"
15
+ gem "priority_queue_cxx"
16
+ gem "stackprof"
@@ -1,15 +1,30 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- d_heap (0.2.0)
4
+ d_heap (0.5.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
+ ast (2.4.1)
10
+ benchmark_driver (0.15.16)
11
+ coderay (1.1.3)
9
12
  diff-lcs (1.4.4)
10
- rake (12.3.3)
13
+ method_source (1.0.0)
14
+ parallel (1.19.2)
15
+ parser (2.7.2.0)
16
+ ast (~> 2.4.1)
17
+ perf (0.1.2)
18
+ priority_queue_cxx (0.3.4)
19
+ pry (0.13.1)
20
+ coderay (~> 1.1)
21
+ method_source (~> 1.0)
22
+ rainbow (3.0.0)
23
+ rake (13.0.3)
11
24
  rake-compiler (1.1.1)
12
25
  rake
26
+ regexp_parser (1.8.2)
27
+ rexml (3.2.3)
13
28
  rspec (3.10.0)
14
29
  rspec-core (~> 3.10.0)
15
30
  rspec-expectations (~> 3.10.0)
@@ -23,15 +38,37 @@ GEM
23
38
  diff-lcs (>= 1.2.0, < 2.0)
24
39
  rspec-support (~> 3.10.0)
25
40
  rspec-support (3.10.0)
41
+ rubocop (1.2.0)
42
+ parallel (~> 1.10)
43
+ parser (>= 2.7.1.5)
44
+ rainbow (>= 2.2.2, < 4.0)
45
+ regexp_parser (>= 1.8)
46
+ rexml
47
+ rubocop-ast (>= 1.0.1)
48
+ ruby-progressbar (~> 1.7)
49
+ unicode-display_width (>= 1.4.0, < 2.0)
50
+ rubocop-ast (1.1.1)
51
+ parser (>= 2.7.1.5)
52
+ ruby-prof (1.4.2)
53
+ ruby-progressbar (1.10.1)
54
+ stackprof (0.2.16)
55
+ unicode-display_width (1.7.0)
26
56
 
27
57
  PLATFORMS
28
58
  ruby
29
59
 
30
60
  DEPENDENCIES
61
+ benchmark_driver
31
62
  d_heap!
32
- rake (~> 12.0)
63
+ perf
64
+ priority_queue_cxx
65
+ pry
66
+ rake (~> 13.0)
33
67
  rake-compiler
34
- rspec (~> 3.0)
68
+ rspec (~> 3.10)
69
+ rubocop (~> 1.0)
70
+ ruby-prof
71
+ stackprof
35
72
 
36
73
  BUNDLED WITH
37
- 2.1.4
74
+ 2.2.3
data/README.md CHANGED
@@ -1,139 +1,390 @@
1
1
  # DHeap
2
2
 
3
- A fast _d_-ary heap implementation for ruby, useful in priority queues and graph
4
- algorithms.
5
-
6
- The _d_-ary heap data structure is a generalization of the binary heap, in which
7
- the nodes have _d_ children instead of 2. This allows for "decrease priority"
8
- operations to be performed more quickly with the tradeoff of slower delete
9
- minimum. Additionally, _d_-ary heaps can have better memory cache behavior than
10
- binary heaps, allowing them to run more quickly in practice despite slower
11
- worst-case time complexity.
12
-
13
- _TODO:_ In addition to a basic _d_-ary heap class (`DHeap`), this library
14
- ~~includes~~ _will include_ extensions to `Array`, allowing an Array to be
15
- directly handled as a priority queue. These extension methods are meant to be
16
- used similarly to how `#bsearch` and `#bsearch_index` might be used.
17
-
18
- _TODO:_ Also included is `DHeap::Set`, which augments the basic heap with an
19
- internal `Hash`, which maps a set of values to scores.
20
- loosely inspired by go's timers. e.g: It lazily sifts its heap after deletion
21
- and adjustments, to achieve faster average runtime for *add* and *cancel*
22
- operations.
3
+ A fast [_d_-ary heap][d-ary heap] [priority queue] implementation for ruby,
4
+ implemented as a C extension.
5
+
6
+ With a regular queue, you expect "FIFO" behavior: first in, first out. With a
7
+ stack you expect "LIFO": last in first out. A priority queue has a score for
8
+ each element and elements are popped in order by score. Priority queues are
9
+ often used in algorithms for e.g. [scheduling] of timers or bandwidth
10
+ management, for [Huffman coding], and various graph search algorithms such as
11
+ [Dijkstra's algorithm], [A* search], or [Prim's algorithm].
12
+
13
+ The _d_-ary heap data structure is a generalization of the [binary heap], in
14
+ which the nodes have _d_ children instead of 2. This allows for "insert" and
15
+ "decrease priority" operations to be performed more quickly with the tradeoff of
16
+ slower delete minimum. Additionally, _d_-ary heaps can have better memory cache
17
+ behavior than binary heaps, allowing them to run more quickly in practice
18
+ despite slower worst-case time complexity. In the worst case, a _d_-ary heap
19
+ requires only `O(log n / log d)` operations to push, with the tradeoff that pop
20
+ requires `O(d log n / log d)`.
21
+
22
+ Although you should probably just use the default _d_ value of `4` (see the
23
+ analysis below), it's always advisable to benchmark your specific use-case.
24
+
25
+ [d-ary heap]: https://en.wikipedia.org/wiki/D-ary_heap
26
+ [priority queue]: https://en.wikipedia.org/wiki/Priority_queue
27
+ [binary heap]: https://en.wikipedia.org/wiki/Binary_heap
28
+ [scheduling]: https://en.wikipedia.org/wiki/Scheduling_(computing)
29
+ [Huffman coding]: https://en.wikipedia.org/wiki/Huffman_coding#Compression
30
+ [Dijkstra's algorithm]: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm#Using_a_priority_queue
31
+ [A* search]: https://en.wikipedia.org/wiki/A*_search_algorithm#Description
32
+ [Prim's algorithm]: https://en.wikipedia.org/wiki/Prim%27s_algorithm
23
33
 
24
- _TODO:_ Also included is `DHeap::Timers`, which contains some features that are
25
- loosely inspired by go's timers. e.g: It lazily sifts its heap after deletion
26
- and adjustments, to achieve faster average runtime for *add* and *cancel*
27
- operations.
34
+ ## Usage
28
35
 
29
- ## Motivation
36
+ Quick reference:
30
37
 
31
- Ruby's Array class comes with some helpful methods for maintaining a sorted
32
- array, by combining `#bsearch_index` with `#insert`. With certain insert/remove
33
- workloads that can perform very well, but in the worst-case an insert or delete
34
- can result in O(n), since it may need to memcopy a significant portion of the
35
- array. Knowing that priority queues are usually implemented with a heap, and
36
- that the heap is a relatively simple data structure, I set out to replace my
37
- `#bsearch_index` and `#insert` code with a one. I was surprised to find that,
38
- at least under certain benchmarks, my ruby Heap implementation was tied with or
39
- slower than inserting into a fully sorted array. On the one hand, this is a
40
- testament to ruby's fine-tuned Array implementation. On the other hand, it
41
- seemed like a heap implementated in C should easily match the speed of ruby's
42
- bsearch + insert.
38
+ * `heap << object` adds a value, with `Float(object)` as its score.
39
+ * `heap.push(object, score)` adds a value with an extrinsic score.
40
+ * `heap.pop` removes and returns the value with the minimum score.
41
+ * `heap.pop_lte(score)` pops if the minimum score is `<=` the provided score.
42
+ * `heap.peek` to view the minimum value without popping it.
43
+ * `heap.clear` to remove all items from the heap.
44
+ * `heap.empty?` returns true if the heap is empty.
45
+ * `heap.size` returns the number of items in the heap.
43
46
 
44
- Additionally, I was inspired by reading go's "timer.go" implementation to
45
- experiment with a 4-ary heap, instead of the traditional binary heap. In the
46
- case of timers, new timers are usually scheduled to run after most of the
47
- existing timers and timers are usually canceled before they have a chance to
48
- run. While a binary heap holds 50% of its elements in its last layer, 75% of a
49
- 4-ary heap will have no children. That diminishes the extra comparison
50
- overhead during sift-down.
47
+ The basic API is `#push(object, score)` and `pop`. If your values behave as
48
+ their own score, then you can push with `#<<`. If the score changes while the
49
+ object is still in the heap, it will not be re-evaluated again. The score must
50
+ either be `Integer` or `Float` or convertable to a `Float` via `Float(score)`
51
+ (i.e. it should implement `#to_f`).
51
52
 
52
- ## Installation
53
+ ```ruby
54
+ require "d_heap"
53
55
 
54
- Add this line to your application's Gemfile:
56
+ Task = Struct.new(:id, :time) do
57
+ def to_f; time.to_f end
58
+ end
59
+ t1 = Task.new(1, Time.now + 5*60)
60
+ t2 = Task.new(2, Time.now + 50)
61
+ t3 = Task.new(3, Time.now + 60)
62
+ t4 = Task.new(4, Time.now + 5)
63
+
64
+ # if the object returns its own score via #to_f, "<<" is the simplest API
65
+ heap << t1 << t2
66
+
67
+ # or push with an explicit score
68
+ heap.push t3, t4.to_f
69
+ heap.push t4, t4 # score can be implicitly cast with Float
70
+
71
+ # peek and pop
72
+ heap.pop # => #<struct Task id=4, time=2021-01-17 17:02:22.5574 -0500>
73
+ heap.pop # => #<struct Task id=2, time=2021-01-17 17:03:07.5574 -0500>
74
+ heap.peek # => #<struct Task id=3, time=2021-01-17 17:03:17.5574 -0500>
75
+ heap.pop # => #<struct Task id=3, time=2021-01-17 17:03:17.5574 -0500>
76
+ heap.pop # => #<struct Task id=1, time=2021-01-17 17:07:17.5574 -0500>
77
+ heap.empty? # => true
78
+ heap.pop # => nil
79
+ ```
80
+
81
+ Constraining scores to numeric values gives more than 50% speedup under some
82
+ benchmarks! _n.b._ `Integer` _scores must have an absolute value that fits
83
+ into_ `unsigned long long`. _This is architecture dependant but on an IA-64
84
+ system this is 64 bits, which gives a range of -18,446,744,073,709,551,615 to
85
+ +18446744073709551615. Comparing arbitary objects via_ `a <=> b` _was the
86
+ original design and may be added back in a future version,_ if (and only if) _it
87
+ can be done without impacting the speed of numeric comparisons._
55
88
 
56
89
  ```ruby
57
- gem 'd_heap'
90
+ heap.clear
91
+
92
+ # The score can be derived from the value by using to_f.
93
+ # "a <=> b" is *much* slower than comparing numbers, so it isn't used.
94
+ class Event
95
+ include Comparable
96
+ attr_reader :time, :payload
97
+ alias_method :to_time, :time
98
+
99
+ def initialize(time, payload)
100
+ @time = time.to_time
101
+ @payload = payload
102
+ freeze
103
+ end
104
+
105
+ def to_f
106
+ time.to_f
107
+ end
108
+
109
+ def <=>(other)
110
+ to_f <=> other.to_f
111
+ end
112
+ end
113
+
114
+ heap << comparable_max # sorts last, using <=>
115
+ heap << comparable_min # sorts first, using <=>
116
+ heap << comparable_mid # sorts in the middle, using <=>
117
+ heap.pop # => comparable_min
118
+ heap.pop # => comparable_mid
119
+ heap.pop # => comparable_max
120
+ heap.empty? # => true
121
+ heap.pop # => nil
58
122
  ```
59
123
 
60
- And then execute:
124
+ You can also pass a value into `#pop(max)` which will only pop if the minimum
125
+ score is less than or equal to `max`.
61
126
 
62
- $ bundle install
63
-
64
- Or install it yourself as:
127
+ Read the [API documentation] for more detailed documentation and examples.
65
128
 
66
- $ gem install d_heap
129
+ [API documentation]: https://rubydoc.info/gems/d_heap/DHeap
67
130
 
68
- ## Usage
131
+ ## Installation
69
132
 
70
- The simplest way to use it is simply with `#push` and `#pop`. Push will
133
+ Add this line to your application's Gemfile:
71
134
 
72
135
  ```ruby
73
- require "d_heap"
136
+ gem 'd_heap'
137
+ ```
74
138
 
75
- heap = DHeap.new # defaults to a 4-ary heap
139
+ And then execute:
76
140
 
77
- # storing [time, task] tuples
78
- heap << [Time.now + 5*60, Task.new(1)]
79
- heap << [Time.now + 30, Task.new(2)]
80
- heap << [Time.now + 60, Task.new(3)]
81
- heap << [Time.now + 5, Task.new(4)]
141
+ $ bundle install
82
142
 
83
- # peeking and popping (using last to get the task and ignore the time)
84
- heap.pop.last # => Task[4]
85
- heap.pop.last # => Task[2]
86
- heap.peak.last # => Task[3]
87
- heap.pop.last # => Task[3]
88
- heap.pop.last # => Task[1]
89
- ```
143
+ Or install it yourself as:
90
144
 
91
- Read the `rdoc` for more detailed documentation and examples.
145
+ $ gem install d_heap
92
146
 
93
- ## Benchmarks
147
+ ## Motivation
94
148
 
95
- _TODO: put benchmarks here._
149
+ One naive approach to a priority queue is to maintain an array in sorted order.
150
+ This can be very simply implemented in ruby with `Array#bseach_index` +
151
+ `Array#insert`. This can be very fast—`Array#pop` is `O(1)`—but the worst-case
152
+ for insert is `O(n)` because it may need to `memcpy` a significant portion of
153
+ the array.
154
+
155
+ The standard way to implement a priority queue is with a binary heap. Although
156
+ this increases the time for `pop`, it converts the amortized time per push + pop
157
+ from `O(n)` to `O(d log n / log d)`.
158
+
159
+ However, I was surprised to find that—at least for some benchmarks—my pure ruby
160
+ heap implementation was much slower than inserting into and popping from a fully
161
+ sorted array. The reasons for this surprising result: Although it is `O(n)`,
162
+ `memcpy` has a _very_ small constant factor, and calling `<=>` from ruby code
163
+ has relatively _much_ larger constant factors. If your queue contains only a
164
+ few thousand items, the overhead of those extra calls to `<=>` is _far_ more
165
+ than occasionally calling `memcpy`. In the worst case, a _d_-heap will require
166
+ `d + 1` times more comparisons for each push + pop than a `bsearch` + `insert`
167
+ sorted array.
168
+
169
+ Moving the sift-up and sift-down code into C helps some. But much more helpful
170
+ is optimizing the comparison of numeric scores, so `a <=> b` never needs to be
171
+ called. I'm hopeful that MJIT will eventually obsolete this C-extension. This
172
+ can be hotspot code, and a basic ruby implementation could perform well if `<=>`
173
+ had much lower overhead.
96
174
 
97
175
  ## Analysis
98
176
 
99
177
  ### Time complexity
100
178
 
101
- Both sift operations can perform (log[d] n = log n / log d) swaps.
102
- Swap up performs only a single comparison per swap: O(1).
103
- Swap down performs as many as d comparions per swap: O(d).
179
+ There are two fundamental heap operations: sift-up (used by push) and sift-down
180
+ (used by pop).
104
181
 
105
- Inserting an item is O(log n / log d).
106
- Deleting the root is O(d log n / log d).
182
+ * Both sift operations can perform as many as `log n / log d` swaps, as the
183
+ element may sift from the bottom of the tree to the top, or vice versa.
184
+ * Sift-up performs a single comparison per swap: `O(1)`.
185
+ So pushing a new element is `O(log n / log d)`.
186
+ * Swap down performs as many as d comparions per swap: `O(d)`.
187
+ So popping the min element is `O(d log n / log d)`.
107
188
 
108
- Assuming every inserted item is eventually deleted from the root, d=4 requires
109
- the fewest comparisons for combined insert and delete:
110
- * (1 + 2) lg 2 = 4.328085
111
- * (1 + 3) lg 3 = 3.640957
112
- * (1 + 4) lg 4 = 3.606738
113
- * (1 + 5) lg 5 = 3.728010
114
- * (1 + 6) lg 6 = 3.906774
115
- * etc...
189
+ Assuming every inserted element is eventually deleted from the root, d=4
190
+ requires the fewest comparisons for combined insert and delete:
191
+
192
+ * (1 + 2) lg 2 = 4.328085
193
+ * (1 + 3) lg 3 = 3.640957
194
+ * (1 + 4) lg 4 = 3.606738
195
+ * (1 + 5) lg 5 = 3.728010
196
+ * (1 + 6) lg 6 = 3.906774
197
+ * etc...
116
198
 
117
199
  Leaf nodes require no comparisons to shift down, and higher values for d have
118
200
  higher percentage of leaf nodes:
119
- * d=2 has ~50% leaves,
120
- * d=3 has ~67% leaves,
121
- * d=4 has ~75% leaves,
122
- * and so on...
201
+
202
+ * d=2 has ~50% leaves,
203
+ * d=3 has ~67% leaves,
204
+ * d=4 has ~75% leaves,
205
+ * and so on...
123
206
 
124
207
  See https://en.wikipedia.org/wiki/D-ary_heap#Analysis for deeper analysis.
125
208
 
126
209
  ### Space complexity
127
210
 
128
- Because the heap is a complete binary tree, space usage is linear, regardless
129
- of d. However higher d values may provide better cache locality.
211
+ Space usage is linear, regardless of d. However higher d values may
212
+ provide better cache locality. Because the heap is a complete binary tree, the
213
+ elements can be stored in an array, without the need for tree or list pointers.
130
214
 
131
- We can run comparisons much much faster for Numeric or String objects than for
132
- ruby objects which delegate comparison to internal Numeric or String objects.
133
- And it is often advantageous to use extrinsic scores for uncomparable items.
134
- For this, our internal array uses twice as many entries (one for score and one
135
- for value) as it would if it only supported intrinsic comparison or used an
136
- un-memoized "sort_by" proc.
215
+ Ruby can compare Numeric values _much_ faster than other ruby objects, even if
216
+ those objects simply delegate comparison to internal Numeric values. And it is
217
+ often useful to use external scores for otherwise uncomparable values. So
218
+ `DHeap` uses twice as many entries (one for score and one for value)
219
+ as an array which only stores values.
220
+
221
+ ## Benchmarks
222
+
223
+ _See `bin/benchmarks` and `docs/benchmarks.txt`, as well as `bin/profile` and
224
+ `docs/profile.txt` for more details or updated results. These benchmarks were
225
+ measured with v0.5.0 and ruby 2.7.2 without MJIT enabled._
226
+
227
+ These benchmarks use very simple implementations for a pure-ruby heap and an
228
+ array that is kept sorted using `Array#bsearch_index` and `Array#insert`. For
229
+ comparison, an alternate implementation `Array#min` and `Array#delete_at` is
230
+ also shown.
231
+
232
+ Three different scenarios are measured:
233
+ * push N values but never pop (clearing between each set of pushes).
234
+ * push N values and then pop N values.
235
+ Although this could be used for heap sort, we're unlikely to choose heap sort
236
+ over Ruby's quick sort implementation. I'm using this scenario to represent
237
+ the amortized cost of creating a heap and (eventually) draining it.
238
+ * For a heap of size N, repeatedly push and pop while keeping a stable size.
239
+ This is a _very simple_ approximation for how most scheduler/timer heaps
240
+ would be used. Usually when a timer fires it will be quickly replaced by a
241
+ new timer, and the overall count of timers will remain roughly stable.
242
+
243
+ In these benchmarks, `DHeap` runs faster than all other implementations for
244
+ every scenario and every value of N, although the difference is much more
245
+ noticable at higher values of N. The pure ruby heap implementation is
246
+ competitive for `push` alone at every value of N, but is significantly slower
247
+ than bsearch + insert for push + pop until N is _very_ large (somewhere between
248
+ 10k and 100k)!
249
+
250
+ For very small N values the benchmark implementations, `DHeap` runs faster than
251
+ the other implementations for each scenario, although the difference is still
252
+ relatively small. The pure ruby binary heap is 2x or more slower than bsearch +
253
+ insert for common common push/pop scenario.
254
+
255
+ == push N (N=5) ==========================================================
256
+ push N (c_dheap): 1969700.7 i/s
257
+ push N (c++ stl): 1049738.1 i/s - 1.88x slower
258
+ push N (rb_heap): 928435.2 i/s - 2.12x slower
259
+ push N (bsearch): 921060.0 i/s - 2.14x slower
260
+
261
+ == push N then pop N (N=5) ===============================================
262
+ push N + pop N (c_dheap): 1375805.0 i/s
263
+ push N + pop N (c++ stl): 1134997.5 i/s - 1.21x slower
264
+ push N + pop N (findmin): 862913.1 i/s - 1.59x slower
265
+ push N + pop N (bsearch): 762887.1 i/s - 1.80x slower
266
+ push N + pop N (rb_heap): 506890.4 i/s - 2.71x slower
267
+
268
+ == Push/pop with pre-filled queue of size=N (N=5) ========================
269
+ push + pop (c_dheap): 9044435.5 i/s
270
+ push + pop (c++ stl): 7534583.4 i/s - 1.20x slower
271
+ push + pop (findmin): 5026155.1 i/s - 1.80x slower
272
+ push + pop (bsearch): 4300260.0 i/s - 2.10x slower
273
+ push + pop (rb_heap): 2299499.7 i/s - 3.93x slower
274
+
275
+ By N=21, `DHeap` has pulled significantly ahead of bsearch + insert for all
276
+ scenarios, but the pure ruby heap is still slower than every other
277
+ implementation—even resorting the array after every `#push`—in any scenario that
278
+ uses `#pop`.
279
+
280
+ == push N (N=21) =========================================================
281
+ push N (c_dheap): 464231.4 i/s
282
+ push N (c++ stl): 305546.7 i/s - 1.52x slower
283
+ push N (rb_heap): 202803.7 i/s - 2.29x slower
284
+ push N (bsearch): 168678.7 i/s - 2.75x slower
285
+
286
+ == push N then pop N (N=21) ==============================================
287
+ push N + pop N (c_dheap): 298350.3 i/s
288
+ push N + pop N (c++ stl): 252227.1 i/s - 1.18x slower
289
+ push N + pop N (findmin): 161998.7 i/s - 1.84x slower
290
+ push N + pop N (bsearch): 143432.3 i/s - 2.08x slower
291
+ push N + pop N (rb_heap): 79622.1 i/s - 3.75x slower
292
+
293
+ == Push/pop with pre-filled queue of size=N (N=21) =======================
294
+ push + pop (c_dheap): 8855093.4 i/s
295
+ push + pop (c++ stl): 7223079.5 i/s - 1.23x slower
296
+ push + pop (findmin): 4542913.7 i/s - 1.95x slower
297
+ push + pop (bsearch): 3461802.4 i/s - 2.56x slower
298
+ push + pop (rb_heap): 1845488.7 i/s - 4.80x slower
299
+
300
+ At higher values of N, a heaps logarithmic growth leads to only a little
301
+ slowdown of `#push`, while insert's linear growth causes it to run noticably
302
+ slower and slower. But because `#pop` is `O(1)` for a sorted array and `O(d log
303
+ n / log d)` for a heap, scenarios involving both `#push` and `#pop` remain
304
+ relatively close, and bsearch + insert still runs faster than a pure ruby heap,
305
+ even up to queues with 10k items. But as queue size increases beyond than that,
306
+ the linear time compexity to keep a sorted array dominates.
307
+
308
+ == push + pop (rb_heap)
309
+ queue size = 10000: 736618.2 i/s
310
+ queue size = 25000: 670186.8 i/s - 1.10x slower
311
+ queue size = 50000: 618156.7 i/s - 1.19x slower
312
+ queue size = 100000: 579250.7 i/s - 1.27x slower
313
+ queue size = 250000: 572795.0 i/s - 1.29x slower
314
+ queue size = 500000: 543648.3 i/s - 1.35x slower
315
+ queue size = 1000000: 513523.4 i/s - 1.43x slower
316
+ queue size = 2500000: 460848.9 i/s - 1.60x slower
317
+ queue size = 5000000: 445234.5 i/s - 1.65x slower
318
+ queue size = 10000000: 423119.0 i/s - 1.74x slower
319
+
320
+ == push + pop (bsearch)
321
+ queue size = 10000: 786334.2 i/s
322
+ queue size = 25000: 364963.8 i/s - 2.15x slower
323
+ queue size = 50000: 200520.6 i/s - 3.92x slower
324
+ queue size = 100000: 88607.0 i/s - 8.87x slower
325
+ queue size = 250000: 34530.5 i/s - 22.77x slower
326
+ queue size = 500000: 17965.4 i/s - 43.77x slower
327
+ queue size = 1000000: 5638.7 i/s - 139.45x slower
328
+ queue size = 2500000: 1302.0 i/s - 603.93x slower
329
+ queue size = 5000000: 592.0 i/s - 1328.25x slower
330
+ queue size = 10000000: 288.8 i/s - 2722.66x slower
331
+
332
+ == push + pop (c_dheap)
333
+ queue size = 10000: 7311366.6 i/s
334
+ queue size = 50000: 6737824.5 i/s - 1.09x slower
335
+ queue size = 25000: 6407340.6 i/s - 1.14x slower
336
+ queue size = 100000: 6254396.3 i/s - 1.17x slower
337
+ queue size = 250000: 5917684.5 i/s - 1.24x slower
338
+ queue size = 500000: 5126307.6 i/s - 1.43x slower
339
+ queue size = 1000000: 4403494.1 i/s - 1.66x slower
340
+ queue size = 2500000: 3304088.2 i/s - 2.21x slower
341
+ queue size = 5000000: 2664897.7 i/s - 2.74x slower
342
+ queue size = 10000000: 2137927.6 i/s - 3.42x slower
343
+
344
+ ## Profiling
345
+
346
+ _n.b. `Array#fetch` is reading the input data, external to heap operations.
347
+ These benchmarks use integers for all scores, which enables significantly faster
348
+ comparisons. If `a <=> b` were used instead, then the difference between push
349
+ and pop would be much larger. And ruby's `Tracepoint` impacts these different
350
+ implementations differently. So we can't use these profiler results for
351
+ comparisons between implementations. A sampling profiler would be needed for
352
+ more accurate relative measurements._
353
+
354
+ It's informative to look at the `ruby-prof` results for a simple binary search +
355
+ insert implementation, repeatedly pushing and popping to a large heap. In
356
+ particular, even with 1000 members, the linear `Array#insert` is _still_ faster
357
+ than the logarithmic `Array#bsearch_index`. At this scale, ruby comparisons are
358
+ still (relatively) slow and `memcpy` is (relatively) quite fast!
359
+
360
+ %self total self wait child calls name location
361
+ 34.79 2.222 2.222 0.000 0.000 1000000 Array#insert
362
+ 32.59 2.081 2.081 0.000 0.000 1000000 Array#bsearch_index
363
+ 12.84 6.386 0.820 0.000 5.566 1 DHeap::Benchmarks::Scenarios#repeated_push_pop d_heap/benchmarks.rb:77
364
+ 10.38 4.966 0.663 0.000 4.303 1000000 DHeap::Benchmarks::BinarySearchAndInsert#<< d_heap/benchmarks/implementations.rb:61
365
+ 5.38 0.468 0.343 0.000 0.125 1000000 DHeap::Benchmarks::BinarySearchAndInsert#pop d_heap/benchmarks/implementations.rb:70
366
+ 2.06 0.132 0.132 0.000 0.000 1000000 Array#fetch
367
+ 1.95 0.125 0.125 0.000 0.000 1000000 Array#pop
368
+
369
+ Contrast this with a simplistic pure-ruby implementation of a binary heap:
370
+
371
+ %self total self wait child calls name location
372
+ 48.52 8.487 8.118 0.000 0.369 1000000 DHeap::Benchmarks::NaiveBinaryHeap#pop d_heap/benchmarks/implementations.rb:96
373
+ 42.94 7.310 7.184 0.000 0.126 1000000 DHeap::Benchmarks::NaiveBinaryHeap#<< d_heap/benchmarks/implementations.rb:80
374
+ 4.80 16.732 0.803 0.000 15.929 1 DHeap::Benchmarks::Scenarios#repeated_push_pop d_heap/benchmarks.rb:77
375
+
376
+ You can see that it spends almost more time in pop than it does in push. That
377
+ is expected behavior for a heap: although both are O(log n), pop is
378
+ significantly more complex, and has _d_ comparisons per layer.
379
+
380
+ And `DHeap` shows a similar comparison between push and pop, although it spends
381
+ half of its time in the benchmark code (which is written in ruby):
382
+
383
+ %self total self wait child calls name location
384
+ 43.09 1.685 0.726 0.000 0.959 1 DHeap::Benchmarks::Scenarios#repeated_push_pop d_heap/benchmarks.rb:77
385
+ 26.05 0.439 0.439 0.000 0.000 1000000 DHeap#<<
386
+ 23.57 0.397 0.397 0.000 0.000 1000000 DHeap#pop
387
+ 7.29 0.123 0.123 0.000 0.000 1000000 Array#fetch
137
388
 
138
389
  ### Timers
139
390
 
@@ -151,22 +402,54 @@ faster than a delete and re-insert.
151
402
 
152
403
  ## Alternative data structures
153
404
 
405
+ As always, you should run benchmarks with your expected scenarios to determine
406
+ which is right.
407
+
154
408
  Depending on what you're doing, maintaining a sorted `Array` using
155
- `#bsearch_index` and `#insert` might be faster! Although it is technically
156
- O(n) for insertions, the implementations for `memcpy` or `memmove` can be *very*
157
- fast on modern architectures. Also, it can be faster O(n) on average, if
158
- insertions are usually near the end of the array. You should run benchmarks
159
- with your expected scenarios to determine which is right.
409
+ `#bsearch_index` and `#insert` might be just fine! As discussed above, although
410
+ it is `O(n)` for insertions, `memcpy` is so fast on modern hardware that this
411
+ may not matter. Also, if you can arrange for insertions to occur near the end
412
+ of the array, that could significantly reduce the `memcpy` overhead even more.
413
+
414
+ More complex heap varients, e.g. [Fibonacci heap], can allow heaps to be merged
415
+ as well as lower amortized time.
416
+
417
+ [Fibonacci heap]: https://en.wikipedia.org/wiki/Fibonacci_heap
160
418
 
161
419
  If it is important to be able to quickly enumerate the set or find the ranking
162
- of values in it, then you probably want to use a self-balancing binary search
163
- tree (e.g. a red-black tree) or a skip-list.
164
-
165
- A Hashed Timing Wheel or Heirarchical Timing Wheels (or some variant in that
166
- family of data structures) can be constructed to have effectively O(1) running
167
- time in most cases. However, the implementation for that data structure is more
168
- complex than a heap. If a 4-ary heap is good enough for go's timers, it should
169
- be suitable for many use cases.
420
+ of values in it, then you may want to use a self-balancing binary search tree
421
+ (e.g. a [red-black tree]) or a [skip-list].
422
+
423
+ [red-black tree]: https://en.wikipedia.org/wiki/Red%E2%80%93black_tree
424
+ [skip-list]: https://en.wikipedia.org/wiki/Skip_list
425
+
426
+ [Hashed and Heirarchical Timing Wheels][timing wheels] (or some variant in that
427
+ family of data structures) can be constructed to have effectively `O(1)` running
428
+ time in most cases. Although the implementation for that data structure is more
429
+ complex than a heap, it may be necessary for enormous values of N.
430
+
431
+ [timing wheels]: http://www.cs.columbia.edu/~nahum/w6998/papers/ton97-timing-wheels.pdf
432
+
433
+ ## TODOs...
434
+
435
+ _TODO:_ Also ~~included is~~ _will include_ `DHeap::Set`, which augments the
436
+ basic heap with an internal `Hash`, which maps a set of values to scores.
437
+ loosely inspired by go's timers. e.g: It lazily sifts its heap after deletion
438
+ and adjustments, to achieve faster average runtime for *add* and *cancel*
439
+ operations.
440
+
441
+ _TODO:_ Also ~~included is~~ _will include_ `DHeap::Lazy`, which contains some
442
+ features that are loosely inspired by go's timers. e.g: It lazily sifts its
443
+ heap after deletion and adjustments, to achieve faster average runtime for *add*
444
+ and *cancel* operations.
445
+
446
+ Additionally, I was inspired by reading go's "timer.go" implementation to
447
+ experiment with a 4-ary heap instead of the traditional binary heap. In the
448
+ case of timers, new timers are usually scheduled to run after most of the
449
+ existing timers. And timers are usually canceled before they have a chance to
450
+ run. While a binary heap holds 50% of its elements in its last layer, 75% of a
451
+ 4-ary heap will have no children. That diminishes the extra comparison overhead
452
+ during sift-down.
170
453
 
171
454
  ## Development
172
455