d_heap 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 89808cd668688e16b5dd1e7e6b1ce9e57651217089b0686b4aeb83d41c565020
4
- data.tar.gz: 43e11d72c7143061d0b424f8290e4ef7f66dfaedc8df2f5eff11e20cce2f7796
3
+ metadata.gz: f549e01dd83eb6b48c1190443495a628ed1dd64ead7eb94851e661aef2607e14
4
+ data.tar.gz: 492ce5c17ace9ecc9deaccf8fcd47883835da28d4e5f32328aef60989186b2ff
5
5
  SHA512:
6
- metadata.gz: 7ce7b4a755692a99fdee3d30e27f7dd02f6f90c12fe478a5d20f5e224183f82c5d7f82141c80d229edee48a1dcf6a90878c5648b3bb2107d093dcef5884abf59
7
- data.tar.gz: 03daf597a17aee15f67f2bf3aa37625e84c9d2f759c22313a9e1010b9f3878c1e8d2dc005b0f82fca5f1792ee57abf7f93d3ab176edad5e29686ca7dedaae905
6
+ metadata.gz: 85521dee7f2a9992980935756571e87afbe8ae13347b5b3fbad17b501b5709111972b98ad0c9e1fca6d318c4be20ce2983086dfd84f7c0e73636ac9e4f11f253
7
+ data.tar.gz: e1daac5b02fcc817b3c6c6a99395e3ca0b92f42bb14bd813fedbb3037eed698bda335c2898d5b0131b48fecd73e4ccf1615943a52287c36f73764b08bf8b1969
@@ -0,0 +1,26 @@
1
+ name: Ruby
2
+
3
+ on: [push,pull_request]
4
+
5
+ jobs:
6
+ build:
7
+ strategy:
8
+ fail-fast: false
9
+ matrix:
10
+ ruby: [2.5, 2.6, 2.7, 3.0]
11
+ os: [ubuntu, macos]
12
+ experimental: [false]
13
+ runs-on: ${{ matrix.os }}-latest
14
+ continue-on-error: ${{ matrix.experimental }}
15
+ steps:
16
+ - uses: actions/checkout@v2
17
+ - name: Set up Ruby
18
+ uses: ruby/setup-ruby@v1
19
+ with:
20
+ ruby-version: ${{ matrix.ruby }}
21
+ bundler-cache: true
22
+ - name: Run the default task
23
+ run: |
24
+ gem install bundler -v 2.2.3
25
+ bundle install
26
+ bundle exec rake
@@ -0,0 +1,160 @@
1
+ inherit_mode:
2
+ merge:
3
+ - Exclude
4
+
5
+ AllCops:
6
+ TargetRubyVersion: 2.5
7
+ NewCops: disable
8
+ Exclude:
9
+ - bin/rake
10
+ - bin/rspec
11
+ - bin/rubocop
12
+
13
+ ###########################################################################
14
+ # rubocop defaults are simply WRONG about many rules... Sorry. It's true.
15
+
16
+ ###########################################################################
17
+ # Layout: Alignment. I want these to work, I really do...
18
+
19
+ # I wish this worked with "table". but that goes wrong sometimes.
20
+ Layout/HashAlignment: { Enabled: false }
21
+
22
+ # This needs to be configurable so parenthesis calls are aligned with first
23
+ # parameter, and non-parenthesis calls are aligned with fixed indentation.
24
+ Layout/ParameterAlignment: { Enabled: false }
25
+
26
+ ###########################################################################
27
+ # Layout: Empty lines
28
+
29
+ Layout/EmptyLineAfterGuardClause: { Enabled: false }
30
+ Layout/EmptyLineAfterMagicComment: { Enabled: true }
31
+ Layout/EmptyLineAfterMultilineCondition: { Enabled: false }
32
+ Layout/EmptyLines: { Enabled: true }
33
+ Layout/EmptyLinesAroundAccessModifier: { Enabled: true }
34
+ Layout/EmptyLinesAroundArguments: { Enabled: true }
35
+ Layout/EmptyLinesAroundBeginBody: { Enabled: true }
36
+ Layout/EmptyLinesAroundBlockBody: { Enabled: false }
37
+ Layout/EmptyLinesAroundExceptionHandlingKeywords: { Enabled: true }
38
+ Layout/EmptyLinesAroundMethodBody: { Enabled: true }
39
+
40
+ Layout/EmptyLineBetweenDefs:
41
+ Enabled: true
42
+ AllowAdjacentOneLineDefs: true
43
+
44
+ Layout/EmptyLinesAroundAttributeAccessor:
45
+ inherit_mode:
46
+ merge:
47
+ - AllowedMethods
48
+ Enabled: true
49
+ AllowedMethods:
50
+ - delegate
51
+ - def_delegator
52
+ - def_delegators
53
+ - def_instance_delegators
54
+
55
+ # "empty_lines_special" sometimes does the wrong thing and annoys me.
56
+ # But I've mostly learned to live with it... mostly. 🙁
57
+
58
+ Layout/EmptyLinesAroundClassBody:
59
+ Enabled: true
60
+ EnforcedStyle: empty_lines_special
61
+
62
+ Layout/EmptyLinesAroundModuleBody:
63
+ Enabled: true
64
+ EnforcedStyle: empty_lines_special
65
+
66
+ ###########################################################################
67
+ # Layout: Space around, before, inside, etc
68
+
69
+ Layout/SpaceAroundEqualsInParameterDefault: { Enabled: false }
70
+ Layout/SpaceBeforeBlockBraces: { Enabled: false }
71
+ Layout/SpaceBeforeFirstArg: { Enabled: false }
72
+ Layout/SpaceInLambdaLiteral: { Enabled: false }
73
+ Layout/SpaceInsideArrayLiteralBrackets: { Enabled: false }
74
+ Layout/SpaceInsideHashLiteralBraces: { Enabled: false }
75
+
76
+ Layout/SpaceInsideBlockBraces:
77
+ EnforcedStyle: space
78
+ EnforcedStyleForEmptyBraces: space
79
+ SpaceBeforeBlockParameters: false
80
+
81
+ # I would enable this if it were a bit better at handling alignment.
82
+ Layout/ExtraSpacing:
83
+ Enabled: false
84
+ AllowForAlignment: true
85
+ AllowBeforeTrailingComments: true
86
+
87
+ ###########################################################################
88
+ # Layout: Misc
89
+
90
+ Layout/LineLength:
91
+ Max: 90 # should stay under 80, but we'll allow a little wiggle-room
92
+
93
+ Layout/MultilineOperationIndentation: { Enabled: false }
94
+
95
+ Layout/MultilineMethodCallIndentation:
96
+ EnforcedStyle: indented
97
+
98
+ ###########################################################################
99
+ # Lint and Naming: rubocop defaults are mostly good, but...
100
+
101
+ Lint/UnusedMethodArgument: { Enabled: false }
102
+ Naming/BinaryOperatorParameterName: { Enabled: false } # def /(denominator)
103
+ Naming/RescuedExceptionsVariableName: { Enabled: false }
104
+
105
+ ###########################################################################
106
+ # Matrics:
107
+
108
+ # Although it may be better to split specs into multiple files...?
109
+ Metrics/BlockLength:
110
+ Exclude:
111
+ - "spec/**/*_spec.rb"
112
+
113
+ ###########################################################################
114
+ # Style...
115
+
116
+ Style/AccessorGrouping: { Enabled: false }
117
+ Style/AsciiComments: { Enabled: false } # 👮 can't stop our 🎉🥳🎊🥳!
118
+ Style/EachWithObject: { Enabled: false }
119
+ Style/FormatStringToken: { Enabled: false }
120
+ Style/FloatDivision: { Enabled: false }
121
+ Style/Lambda: { Enabled: false }
122
+ Style/LineEndConcatenation: { Enabled: false }
123
+ Style/MixinGrouping: { Enabled: false }
124
+ Style/PerlBackrefs: { Enabled: false } # use occasionally/sparingly
125
+ Style/RescueStandardError: { Enabled: false }
126
+ Style/SingleLineMethods: { Enabled: false }
127
+ Style/StabbyLambdaParentheses: { Enabled: false }
128
+
129
+ # If rubocop had an option to only enforce this on constants and literals (e.g.
130
+ # strings, regexp, range), I'd agree.
131
+ #
132
+ # But if you are using it e.g. on method arguments of unknown type, in the same
133
+ # style that ruby uses it with grep, then you are doing exactly the right thing.
134
+ Style/CaseEquality: { Enabled: false }
135
+
136
+ # I'd enable if "require_parentheses_when_complex" considered unary '!' simple.
137
+ Style/TernaryParentheses:
138
+ EnforcedStyle: require_parentheses_when_complex
139
+ Enabled: false
140
+
141
+ Style/BlockDelimiters:
142
+ EnforcedStyle: semantic
143
+ AllowBracesOnProceduralOneLiners: true
144
+
145
+ Style/FormatString:
146
+ EnforcedStyle: percent
147
+
148
+ Style/StringLiterals:
149
+ Enabled: true
150
+ EnforcedStyle: double_quotes
151
+
152
+ Style/StringLiteralsInInterpolation:
153
+ Enabled: true
154
+ EnforcedStyle: double_quotes
155
+
156
+ Style/TrailingCommaInHashLiteral:
157
+ EnforcedStyleForMultiline: consistent_comma
158
+
159
+ Style/TrailingCommaInArrayLiteral:
160
+ EnforcedStyleForMultiline: consistent_comma
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source "https://rubygems.org"
2
4
 
3
5
  # Specify your gem's dependencies in d_heap.gemspec
@@ -6,3 +8,4 @@ gemspec
6
8
  gem "rake", "~> 13.0"
7
9
  gem "rake-compiler"
8
10
  gem "rspec", "~> 3.10"
11
+ gem "rubocop", "~> 1.0"
@@ -1,15 +1,22 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- d_heap (0.2.1)
4
+ d_heap (0.2.2)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
+ ast (2.4.1)
9
10
  diff-lcs (1.4.4)
11
+ parallel (1.19.2)
12
+ parser (2.7.2.0)
13
+ ast (~> 2.4.1)
14
+ rainbow (3.0.0)
10
15
  rake (13.0.3)
11
16
  rake-compiler (1.1.1)
12
17
  rake
18
+ regexp_parser (1.8.2)
19
+ rexml (3.2.3)
13
20
  rspec (3.10.0)
14
21
  rspec-core (~> 3.10.0)
15
22
  rspec-expectations (~> 3.10.0)
@@ -23,6 +30,19 @@ GEM
23
30
  diff-lcs (>= 1.2.0, < 2.0)
24
31
  rspec-support (~> 3.10.0)
25
32
  rspec-support (3.10.0)
33
+ rubocop (1.2.0)
34
+ parallel (~> 1.10)
35
+ parser (>= 2.7.1.5)
36
+ rainbow (>= 2.2.2, < 4.0)
37
+ regexp_parser (>= 1.8)
38
+ rexml
39
+ rubocop-ast (>= 1.0.1)
40
+ ruby-progressbar (~> 1.7)
41
+ unicode-display_width (>= 1.4.0, < 2.0)
42
+ rubocop-ast (1.1.1)
43
+ parser (>= 2.7.1.5)
44
+ ruby-progressbar (1.10.1)
45
+ unicode-display_width (1.7.0)
26
46
 
27
47
  PLATFORMS
28
48
  ruby
@@ -32,6 +52,7 @@ DEPENDENCIES
32
52
  rake (~> 13.0)
33
53
  rake-compiler
34
54
  rspec (~> 3.10)
55
+ rubocop (~> 1.0)
35
56
 
36
57
  BUNDLED WITH
37
58
  2.2.3
data/README.md CHANGED
@@ -8,46 +8,46 @@ the nodes have _d_ children instead of 2. This allows for "decrease priority"
8
8
  operations to be performed more quickly with the tradeoff of slower delete
9
9
  minimum. Additionally, _d_-ary heaps can have better memory cache behavior than
10
10
  binary heaps, allowing them to run more quickly in practice despite slower
11
- worst-case time complexity.
11
+ worst-case time complexity. In the worst case, a _d_-ary heap requires only
12
+ `O(log n / log d)` to push, with the tradeoff that pop is `O(d log n / log d)`.
12
13
 
13
- _TODO:_ In addition to a basic _d_-ary heap class (`DHeap`), this library
14
- ~~includes~~ _will include_ extensions to `Array`, allowing an Array to be
15
- directly handled as a priority queue. These extension methods are meant to be
16
- used similarly to how `#bsearch` and `#bsearch_index` might be used.
17
-
18
- _TODO:_ Also included is `DHeap::Set`, which augments the basic heap with an
19
- internal `Hash`, which maps a set of values to scores.
20
- loosely inspired by go's timers. e.g: It lazily sifts its heap after deletion
21
- and adjustments, to achieve faster average runtime for *add* and *cancel*
22
- operations.
23
-
24
- _TODO:_ Also included is `DHeap::Timers`, which contains some features that are
25
- loosely inspired by go's timers. e.g: It lazily sifts its heap after deletion
26
- and adjustments, to achieve faster average runtime for *add* and *cancel*
27
- operations.
14
+ Although you should probably just stick with the default _d_ value of `4`, it
15
+ may be worthwhile to benchmark your specific scenario.
28
16
 
29
17
  ## Motivation
30
18
 
31
- Ruby's Array class comes with some helpful methods for maintaining a sorted
32
- array, by combining `#bsearch_index` with `#insert`. With certain insert/remove
33
- workloads that can perform very well, but in the worst-case an insert or delete
34
- can result in O(n), since it may need to memcopy a significant portion of the
35
- array. Knowing that priority queues are usually implemented with a heap, and
36
- that the heap is a relatively simple data structure, I set out to replace my
37
- `#bsearch_index` and `#insert` code with a one. I was surprised to find that,
38
- at least under certain benchmarks, my ruby Heap implementation was tied with or
39
- slower than inserting into a fully sorted array. On the one hand, this is a
40
- testament to ruby's fine-tuned Array implementation. On the other hand, it
41
- seemed like a heap implementated in C should easily match the speed of ruby's
42
- bsearch + insert.
43
-
44
- Additionally, I was inspired by reading go's "timer.go" implementation to
45
- experiment with a 4-ary heap, instead of the traditional binary heap. In the
46
- case of timers, new timers are usually scheduled to run after most of the
47
- existing timers and timers are usually canceled before they have a chance to
48
- run. While a binary heap holds 50% of its elements in its last layer, 75% of a
49
- 4-ary heap will have no children. That diminishes the extra comparison
50
- overhead during sift-down.
19
+ Sometimes you just need a priority queue, right? With a regular queue, you
20
+ expect "FIFO" behavior: first in, first out. With a priority queue, you push
21
+ with a score (or your elements are comparable), and you want to be able to
22
+ efficiently pop off the minimum (or maximum) element.
23
+
24
+ One obvious approach is to simply maintain an array in sorted order. And
25
+ ruby's Array class makes it simple to maintain a sorted array by combining
26
+ `#bsearch_index` with `#insert`. With certain insert/remove workloads that can
27
+ perform very well, but in the worst-case an insert or delete can result in O(n),
28
+ since `#insert` may need to `memcpy` or `memmove` a significant portion of the
29
+ array.
30
+
31
+ But the standard way to efficiently and simply solve this problem is using a
32
+ binary heap. Although it increases the time for `pop`, it converts the
33
+ amortized time per push + pop from `O(n)` to `O(d log n / log d)`.
34
+
35
+ I was surprised to find that, at least under certain benchmarks, my pure ruby
36
+ heap implementation was usually slower than inserting into a fully sorted
37
+ array. While this is a testament to ruby's fine-tuned Array implementationw, a
38
+ heap implementated in C should easily peform faster than `Array#insert`.
39
+
40
+ The biggest issue is that it just takes far too much time to call `<=>` from
41
+ ruby code: A sorted array only requires `log n / log 2` comparisons to insert
42
+ and no comparisons to pop. However a _d_-ary heap requires `log n / log d` to
43
+ insert plus an additional `d log n / log d` to pop. If your queue contains only
44
+ a few hundred items at once, the overhead of those extra calls to `<=>` is far
45
+ more than occasionally calling `memcpy`.
46
+
47
+ It's likely that MJIT will eventually make the C-extension completely
48
+ unnecessary. This is definitely hotspot code, and the basic ruby implementation
49
+ would work fine, if not for that `<=>` overhead. Until then... this gem gets
50
+ the job done.
51
51
 
52
52
  ## Installation
53
53
 
@@ -90,6 +90,32 @@ heap.pop.last # => Task[1]
90
90
 
91
91
  Read the `rdoc` for more detailed documentation and examples.
92
92
 
93
+ ## TODOs...
94
+
95
+ _TODO:_ In addition to a basic _d_-ary heap class (`DHeap`), this library
96
+ ~~includes~~ _will include_ extensions to `Array`, allowing an Array to be
97
+ directly handled as a priority queue. These extension methods are meant to be
98
+ used similarly to how `#bsearch` and `#bsearch_index` might be used.
99
+
100
+ _TODO:_ Also ~~included is~~ _will include_ `DHeap::Set`, which augments the
101
+ basic heap with an internal `Hash`, which maps a set of values to scores.
102
+ loosely inspired by go's timers. e.g: It lazily sifts its heap after deletion
103
+ and adjustments, to achieve faster average runtime for *add* and *cancel*
104
+ operations.
105
+
106
+ _TODO:_ Also ~~included is~~ _will include_ `DHeap::Timers`, which contains some
107
+ features that are loosely inspired by go's timers. e.g: It lazily sifts its
108
+ heap after deletion and adjustments, to achieve faster average runtime for *add*
109
+ and *cancel* operations.
110
+
111
+ Additionally, I was inspired by reading go's "timer.go" implementation to
112
+ experiment with a 4-ary heap instead of the traditional binary heap. In the
113
+ case of timers, new timers are usually scheduled to run after most of the
114
+ existing timers. And timers are usually canceled before they have a chance to
115
+ run. While a binary heap holds 50% of its elements in its last layer, 75% of a
116
+ 4-ary heap will have no children. That diminishes the extra comparison overhead
117
+ during sift-down.
118
+
93
119
  ## Benchmarks
94
120
 
95
121
  _TODO: put benchmarks here._
data/Rakefile CHANGED
@@ -1,14 +1,20 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "bundler/gem_tasks"
2
4
  require "rspec/core/rake_task"
3
5
 
4
6
  RSpec::Core::RakeTask.new(:spec)
5
7
 
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
6
12
  require "rake/extensiontask"
7
13
 
8
- task :build => :compile
14
+ task build: :compile
9
15
 
10
16
  Rake::ExtensionTask.new("d_heap") do |ext|
11
17
  ext.lib_dir = "lib/d_heap"
12
18
  end
13
19
 
14
- task :default => [:clobber, :compile, :spec]
20
+ task default: %i[clobber compile spec rubocop]
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
4
  require "bundler/setup"
4
5
  require "d_heap"
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rubocop' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("rubocop", "rubocop")
@@ -1,4 +1,6 @@
1
- require_relative 'lib/d_heap/version'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/d_heap/version"
2
4
 
3
5
  Gem::Specification.new do |spec|
4
6
  spec.name = "d_heap"
@@ -6,7 +8,7 @@ Gem::Specification.new do |spec|
6
8
  spec.authors = ["nicholas a. evans"]
7
9
  spec.email = ["nicholas.evans@gmail.com"]
8
10
 
9
- spec.summary = %q{A d-ary heap implementation, for priority queues}
11
+ spec.summary = "A d-ary heap implementation, for priority queues"
10
12
  spec.description = <<~DESC
11
13
  A C extension implementation of a d-ary heap data structure, suitable for
12
14
  use in e.g. priority queues or Djikstra's algorithm.
@@ -21,11 +23,11 @@ Gem::Specification.new do |spec|
21
23
 
22
24
  # Specify which files should be added to the gem when it is released.
23
25
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
24
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
25
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
26
- end
26
+ spec.files = Dir.chdir(File.expand_path(__dir__)) {
27
+ `git ls-files -z`.split("\x0").reject {|f| f.match(%r{^(test|spec|features)/}) }
28
+ }
27
29
  spec.bindir = "exe"
28
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
30
+ spec.executables = spec.files.grep(%r{^exe/}) {|f| File.basename(f) }
29
31
  spec.require_paths = ["lib"]
30
32
  spec.extensions = ["ext/d_heap/extconf.rb"]
31
33
  end
@@ -25,10 +25,10 @@ ID id_ivar_d;
25
25
 
26
26
  #define DHEAP_Check_d_size(d) \
27
27
  if (d < 2) { \
28
- rb_raise(rb_eIndexError, "DHeap d=%d is too small", d); \
28
+ rb_raise(rb_eArgError, "DHeap d=%d is too small", d); \
29
29
  } \
30
30
  if (d > DHEAP_MAX_D) { \
31
- rb_raise(rb_eIndexError, "DHeap d=%d is too large", d); \
31
+ rb_raise(rb_eArgError, "DHeap d=%d is too large", d); \
32
32
  }
33
33
 
34
34
  #define DHEAP_Check_Sift_Idx(sift_index, last_index) \
@@ -51,33 +51,31 @@ ID id_ivar_d;
51
51
  VALUE
52
52
  dheap_ary_sift_up(VALUE heap_array, int d, long sift_index) {
53
53
  DHEAP_Check_Sift_Args(heap_array, d, sift_index);
54
- struct cmp_opt_data cmp_opt = { 0, 0 };
55
54
  // sift it up to where it belongs
56
55
  for (long parent_index; 0 < sift_index; sift_index = parent_index) {
57
- // puts(rb_sprintf("sift up(%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
56
+ debug(rb_sprintf("sift up(%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
58
57
  parent_index = IDX_PARENT(sift_index);
59
58
  VALUE parent_score = DHEAP_SCORE(heap_array, parent_index);
60
59
 
61
60
  // parent is smaller: heap is restored
62
- if (CMP_LTE(parent_score, sift_score, cmp_opt)) break;
61
+ if (CMP_LTE(parent_score, sift_score)) break;
63
62
 
64
63
  // parent is larger: swap and continue sifting up
65
64
  VALUE parent_value = DHEAP_VALUE(heap_array, parent_index);
66
65
  DHEAP_ASSIGN(heap_array, sift_index, parent_score, parent_value);
67
66
  DHEAP_ASSIGN(heap_array, parent_index, sift_score, sift_value);
68
67
  }
69
- // puts(rb_sprintf("sifted (%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
68
+ debug(rb_sprintf("sifted (%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
70
69
  return LONG2NUM(sift_index);
71
70
  }
72
71
 
73
72
  VALUE
74
73
  dheap_ary_sift_down(VALUE heap_array, int d, long sift_index) {
75
74
  DHEAP_Check_Sift_Args(heap_array, d, sift_index);
76
- struct cmp_opt_data cmp_opt = { 0, 0 };
77
75
 
78
76
  // iteratively sift it down to where it belongs
79
77
  for (long child_index; sift_index < last_index; sift_index = child_index) {
80
- // puts(rb_sprintf("sift dn(%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
78
+ debug(rb_sprintf("sift dn(%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
81
79
  // find first child index, and break if we've reached the last layer
82
80
  long child_idx0 = child_index = IDX_CHILD0(sift_index);
83
81
  if (last_index < child_idx0) break;
@@ -92,63 +90,81 @@ dheap_ary_sift_down(VALUE heap_array, int d, long sift_index) {
92
90
 
93
91
  VALUE sibling_score = DHEAP_SCORE(heap_array, sibling_index);
94
92
 
95
- if (CMP_LT(sibling_score, child_score, cmp_opt)) {
93
+ if (CMP_LT(sibling_score, child_score)) {
96
94
  child_score = sibling_score;
97
95
  child_index = sibling_index;
98
96
  }
99
97
  }
100
98
 
101
99
  // child is larger: heap is restored
102
- if (CMP_LTE(sift_score, child_score, cmp_opt)) break;
100
+ if (CMP_LTE(sift_score, child_score)) break;
103
101
 
104
102
  // child is smaller: swap and continue sifting down
105
103
  VALUE child_value = DHEAP_VALUE(heap_array, child_index);
106
104
  DHEAP_ASSIGN(heap_array, sift_index, child_score, child_value);
107
105
  DHEAP_ASSIGN(heap_array, child_index, sift_score, sift_value);
108
106
  }
109
- // puts(rb_sprintf("sifted (%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
107
+ debug(rb_sprintf("sifted (%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
110
108
  return LONG2NUM(sift_index);
111
109
  }
112
110
 
113
111
  #define DHEAP_Load_Sift_Vals(heap_array, dval, idxval) \
114
112
  Check_Type(dval, T_FIXNUM); \
115
- int d = FIX2INT(dval); \
116
- long sift_index = NUM2LONG(idxval);
113
+ int dint = FIX2INT(dval); \
114
+ long idx = NUM2LONG(idxval);
117
115
 
118
116
  /*
119
- * call-seq:
120
- * DHeap.array_sift_up(heap_array, d, sift_index)
117
+ * Treats a +heap_array+ as a +d+-ary heap and sifts up from +sift_index+ to
118
+ * restore the heap property for all nodes between it and the root of the tree.
121
119
  *
122
- * Treats +heap_array+ as a +d+-ary heap and sifts up from +sift_index+ to restore
123
- * the heap property.
120
+ * The array is interpreted as holding two entries for each node, a score and a
121
+ * value. The scores will held in every even-numbered array index and the
122
+ * values in every odd numbered index. The array is flat, not an array of
123
+ * length=2 arrays.
124
124
  *
125
- * Time complexity: O(d log n / log d). If the average up shifted element sorts
126
- * into the bottom layer (e.g. new timers), this can avg O(1).
125
+ * Time complexity: <b>O(log n / log d)</b> <i>(worst-case)</i>
127
126
  *
127
+ * @param heap_array [Array] the array which is treated a heap.
128
+ * @param d [Integer] the maximum number of children per parent
129
+ * @param sift_index [Integer] the index to start sifting from
130
+ * @return [Integer] the new index for the object that starts at +sift_index+.
128
131
  */
129
132
  static VALUE
130
- dheap_sift_up_s(VALUE unused, VALUE heap_array, VALUE dval, VALUE idxval) {
131
- DHEAP_Load_Sift_Vals(heap_array, dval, idxval);
132
- return dheap_ary_sift_up(heap_array, d, sift_index);
133
+ dheap_sift_up_s(VALUE unused, VALUE heap_array, VALUE d, VALUE sift_index) {
134
+ DHEAP_Load_Sift_Vals(heap_array, d, sift_index);
135
+ return dheap_ary_sift_up(heap_array, dint, idx);
133
136
  }
134
137
 
135
138
  /*
136
- * call-seq:
137
- * DHeap.array_sift_down(heap_array, d, sift_index)
138
- *
139
139
  * Treats +heap_array+ as a +d+-ary heap and sifts down from +sift_index+ to
140
- * restore the heap property.
140
+ * restore the heap property. If all _d_ subtrees below +sift_index+ are already
141
+ * heaps, this method ensures the entire subtree rooted at +sift_index+ will be
142
+ * a heap.
143
+ *
144
+ * The array is interpreted as holding two entries for each node, a score and a
145
+ * value. The scores will held in every even-numbered array index and the
146
+ * values in every odd numbered index. The array is flat, not an array of
147
+ * length=2 arrays.
141
148
  *
142
- * Time complexity: O(d log n / log d). If the average down shifted element
143
- * sorts into the bottom layer (e.g. canceled timers), this can avg O(1).
149
+ * Time complexity: <b>O(d log n / log d)</b> <i>(worst-case)</i>
144
150
  *
151
+ * @param heap_array [Array] the array which is treated a heap.
152
+ * @param d [Integer] the maximum number of children per parent
153
+ * @param sift_index [Integer] the index to start sifting down from
154
+ * @return [Integer] the new index for the object that starts at +sift_index+.
145
155
  */
146
156
  static VALUE
147
- dheap_sift_down_s(VALUE unused, VALUE heap_array, VALUE dval, VALUE idxval) {
148
- DHEAP_Load_Sift_Vals(heap_array, dval, idxval);
149
- return dheap_ary_sift_down(heap_array, d, sift_index);
157
+ dheap_sift_down_s(VALUE unused, VALUE heap_array, VALUE d, VALUE sift_index) {
158
+ DHEAP_Load_Sift_Vals(heap_array, d, sift_index);
159
+ return dheap_ary_sift_down(heap_array, dint, idx);
150
160
  }
151
161
 
162
+ /*
163
+ * @overload initialize(d = DHeap::DEFAULT_D)
164
+ * Initialize a _d_-ary min-heap.
165
+ *
166
+ * @param d [Integer] maximum number of children per parent
167
+ */
152
168
  static VALUE
153
169
  dheap_initialize(int argc, VALUE *argv, VALUE self) {
154
170
  rb_check_arity(argc, 0, 1);
@@ -162,20 +178,35 @@ dheap_initialize(int argc, VALUE *argv, VALUE self) {
162
178
  return self;
163
179
  }
164
180
 
181
+ /*
182
+ * @return [Integer] the number of elements in the heap
183
+ */
165
184
  static VALUE dheap_size(VALUE self) {
166
185
  VALUE ary = DHEAP_GET_A(self);
167
186
  long size = DHEAP_SIZE(ary);
168
187
  return LONG2NUM(size);
169
188
  }
170
189
 
190
+ /*
191
+ * @return [Boolean] is the heap empty?
192
+ */
171
193
  static VALUE dheap_empty_p(VALUE self) {
172
194
  VALUE ary = DHEAP_GET_A(self);
173
195
  long size = DHEAP_SIZE(ary);
174
196
  return size == 0 ? Qtrue : Qfalse;
175
197
  }
176
198
 
199
+ /*
200
+ * @return [Integer] the maximum number of children per parent
201
+ */
177
202
  static VALUE dheap_attr_d(VALUE self) { return DHEAP_GET_D(self); }
178
203
 
204
+ /*
205
+ * Freezes the heap as well as its underlying array, but does <i>not</i>
206
+ * deep-freeze the elements in the heap.
207
+ *
208
+ * @return [self]
209
+ */
179
210
  static VALUE
180
211
  dheap_freeze(VALUE self) {
181
212
  VALUE ary = DHEAP_GET_A(self);
@@ -193,10 +224,19 @@ dheap_ary_push(VALUE ary, int d, VALUE val, VALUE scr)
193
224
  }
194
225
 
195
226
  /*
196
- * Push val onto the end of the heap, then sift up to maintain heap property.
227
+ * @overload push(score, value = score)
228
+ *
229
+ * Push a value onto heap, using a score to determine sort-order.
230
+ *
231
+ * Ideally, the score should be a frozen value that can be efficiently compared
232
+ * to other scores, e.g. an Integer or Float or (maybe) a String
233
+ *
234
+ * Time complexity: <b>O(log n / log d)</b> <i>(worst-case)</i>
197
235
  *
198
- * Returns the index of the value's final position.
236
+ * @param score [#<=>] a value that can be compared to other scores.
237
+ * @param value [Object] an object that is associated with the score.
199
238
  *
239
+ * @return [Integer] the index of the value's final position.
200
240
  */
201
241
  static VALUE
202
242
  dheap_push(int argc, VALUE *argv, VALUE self) {
@@ -212,16 +252,18 @@ dheap_push(int argc, VALUE *argv, VALUE self) {
212
252
  }
213
253
 
214
254
  /*
215
- * Push val onto the end of the heap, then sift up to maintain heap property.
255
+ * Pushes a comparable value onto the heap.
216
256
  *
217
- * Time complexity: O(d log n / log d).
257
+ * The value will be its own score.
218
258
  *
219
- * Returns +self+.
259
+ * Time complexity: <b>O(log n / log d)</b> <i>(worst-case)</i>
220
260
  *
261
+ * @param value [#<=>] a value that can be compared to other heap members.
262
+ * @return [self]
221
263
  */
222
264
  static VALUE
223
- dheap_left_shift(VALUE self, VALUE val) {
224
- dheap_push(1, &val, self);
265
+ dheap_left_shift(VALUE self, VALUE value) {
266
+ dheap_push(1, &value, self);
225
267
  return self;
226
268
  }
227
269
 
@@ -238,6 +280,12 @@ dheap_left_shift(VALUE self, VALUE val) {
238
280
  DHEAP_DROP_LAST(ary); \
239
281
  dheap_ary_sift_down(ary, FIX2INT(dval), 0);
240
282
 
283
+ /*
284
+ * Returns the next value on the heap to be popped without popping it.
285
+ *
286
+ * Time complexity: <b>O(1)</b> <i>(worst-case)</i>
287
+ * @return [Object] the next value to be popped without popping it.
288
+ */
241
289
  static VALUE
242
290
  dheap_peek(VALUE self) {
243
291
  VALUE ary = DHEAP_GET_A(self);
@@ -245,11 +293,9 @@ dheap_peek(VALUE self) {
245
293
  }
246
294
 
247
295
  /*
248
- * Pops the minimum value from the top of the heap, sifting down to maintain
249
- * heap property.
250
- *
251
- * Time complexity: O(d log n / log d).
296
+ * Pops the minimum value from the top of the heap
252
297
  *
298
+ * Time complexity: <b>O(d log n / log d)</b> <i>(worst-case)</i>
253
299
  */
254
300
  static VALUE
255
301
  dheap_pop(VALUE self) {
@@ -262,42 +308,40 @@ dheap_pop(VALUE self) {
262
308
  }
263
309
 
264
310
  /*
265
- * Pops the minimum value from the top of the heap, sifting down to maintain
266
- * heap property.
311
+ * Pops the minimum value only if it is less than or equal to a max score.
267
312
  *
268
- * Time complexity: O(d log n / log d).
313
+ * @param max_score [#<=>] the maximum score to be popped
269
314
  *
315
+ * @see #pop
270
316
  */
271
317
  static VALUE
272
- dheap_pop_lte(VALUE self, VALUE below_score) {
318
+ dheap_pop_lte(VALUE self, VALUE max_score) {
273
319
  DHEAP_Pop_Init(self);
274
320
  if (last_index < 0) return Qnil;
275
321
  VALUE pop_value = DHEAP_VALUE(ary, 0);
276
322
 
277
323
  VALUE pop_score = DHEAP_SCORE(ary, 0);
278
- struct cmp_opt_data cmp_opt = { 0, 0 };
279
- if (below_score && !CMP_LTE(pop_score, below_score, cmp_opt)) return Qnil;
324
+ if (max_score && !CMP_LTE(pop_score, max_score)) return Qnil;
280
325
 
281
326
  DHEAP_Pop_SwapLastAndSiftDown(ary, dval, last_index, sift_value);
282
327
  return pop_value;
283
328
  }
284
329
 
285
330
  /*
286
- * Pops the minimum value from the top of the heap, sifting down to maintain
287
- * heap property.
331
+ * Pops the minimum value only if it is less than a max score.
288
332
  *
289
- * Time complexity: O(d log n / log d).
333
+ * @param max_score [#<=>] the maximum score to be popped
290
334
  *
335
+ * Time complexity: <b>O(d log n / log d)</b> <i>(worst-case)</i>
291
336
  */
292
337
  static VALUE
293
- dheap_pop_lt(VALUE self, VALUE below_score) {
338
+ dheap_pop_lt(VALUE self, VALUE max_score) {
294
339
  DHEAP_Pop_Init(self);
295
340
  if (last_index < 0) return Qnil;
296
341
  VALUE pop_value = DHEAP_VALUE(ary, 0);
297
342
 
298
343
  VALUE pop_score = DHEAP_SCORE(ary, 0);
299
- struct cmp_opt_data cmp_opt = { 0, 0 };
300
- if (below_score && !CMP_LT(pop_score, below_score, cmp_opt)) return Qnil;
344
+ if (max_score && !CMP_LT(pop_score, max_score)) return Qnil;
301
345
 
302
346
  DHEAP_Pop_SwapLastAndSiftDown(ary, dval, last_index, sift_value);
303
347
  return pop_value;
@@ -311,6 +355,9 @@ Init_d_heap(void)
311
355
  id_ivar_d = rb_intern_const("d");
312
356
 
313
357
  rb_cDHeap = rb_define_class("DHeap", rb_cObject);
358
+ rb_define_const(rb_cDHeap, "MAX_D", INT2NUM(DHEAP_MAX_D));
359
+ rb_define_const(rb_cDHeap, "DEFAULT_D", INT2NUM(DHEAP_DEFAULT_D));
360
+
314
361
  rb_define_singleton_method(rb_cDHeap, "heap_sift_down", dheap_sift_down_s, 3);
315
362
  rb_define_singleton_method(rb_cDHeap, "heap_sift_up", dheap_sift_up_s, 3);
316
363
 
@@ -11,64 +11,55 @@
11
11
  // comparisons as d gets further from 4.
12
12
  #define DHEAP_MAX_D 32
13
13
 
14
+ VALUE rb_cDHeap;
14
15
 
15
- #define CMP_LT(a, b, cmp_opt) \
16
- (OPTIMIZED_CMP(a, b, cmp_opt) < 0)
17
- #define CMP_LTE(a, b, cmp_opt) \
18
- (OPTIMIZED_CMP(a, b, cmp_opt) <= 0)
19
- #define CMP_GT(a, b, cmp_opt) \
20
- (OPTIMIZED_CMP(a, b, cmp_opt) > 0)
21
- #define CMP_GTE(a, b, cmp_opt) \
22
- (OPTIMIZED_CMP(a, b, cmp_opt) >= 0)
16
+ #define CMP_LT(a, b) (optimized_cmp(a, b) < 0)
17
+ #define CMP_LTE(a, b) (optimized_cmp(a, b) <= 0)
18
+ #define CMP_GT(a, b) (optimized_cmp(a, b) > 0)
19
+ #define CMP_GTE(a, b) (optimized_cmp(a, b) >= 0)
23
20
 
24
- VALUE rb_cDHeap;
21
+ // <=>
25
22
  ID id_cmp;
26
23
 
27
- // from internal/numeric.h
28
- #ifndef INTERNAL_NUMERIC_H
29
- int rb_float_cmp(VALUE x, VALUE y);
30
- #endif /* INTERNAL_NUMERIC_H */
31
-
32
24
  // from internal/compar.h
33
- #ifndef INTERNAL_COMPAR_H
34
25
  #define STRING_P(s) (RB_TYPE_P((s), T_STRING) && CLASS_OF(s) == rb_cString)
35
26
 
36
- enum {
37
- cmp_opt_Integer,
38
- cmp_opt_String,
39
- cmp_opt_Float,
40
- cmp_optimizable_count
41
- };
42
-
43
- struct cmp_opt_data {
44
- unsigned int opt_methods;
45
- unsigned int opt_inited;
46
- };
27
+ /*
28
+ * short-circuit evaluation for a few basic types.
29
+ *
30
+ * Only Integer, Float, and String are optimized,
31
+ * and only when both arguments are the same type.
32
+ */
33
+ static inline int
34
+ optimized_cmp(VALUE a, VALUE b) {
35
+ if (a == b) // Fixnum equality and object equality
36
+ return 0;
37
+ if (FIXNUM_P(a) && FIXNUM_P(b))
38
+ return (FIX2LONG(a) < FIX2LONG(b)) ? -1 : 1;
39
+ if (RB_FLOAT_TYPE_P(a) && RB_FLOAT_TYPE_P(b))
40
+ {
41
+ double x, y;
42
+ x = RFLOAT_VALUE(a);
43
+ y = RFLOAT_VALUE(b);
44
+ if (isnan(x) || isnan(y)) rb_cmperr(a, b); // raise ArgumentError
45
+ return (x < y) ? -1 : ((x == y) ? 0 : 1);
46
+ }
47
+ if (RB_TYPE_P(a, T_BIGNUM) && RB_TYPE_P(b, T_BIGNUM))
48
+ return FIX2INT(rb_big_cmp(a, b));
49
+ if (STRING_P(a) && STRING_P(b))
50
+ return rb_str_cmp(a, b);
47
51
 
48
- #define NEW_CMP_OPT_MEMO(type, value) \
49
- NEW_PARTIAL_MEMO_FOR(type, value, cmp_opt)
50
- #define CMP_OPTIMIZABLE_BIT(type) (1U << TOKEN_PASTE(cmp_opt_,type))
51
- #define CMP_OPTIMIZABLE(data, type) \
52
- (((data).opt_inited & CMP_OPTIMIZABLE_BIT(type)) ? \
53
- ((data).opt_methods & CMP_OPTIMIZABLE_BIT(type)) : \
54
- (((data).opt_inited |= CMP_OPTIMIZABLE_BIT(type)), \
55
- rb_method_basic_definition_p(TOKEN_PASTE(rb_c,type), id_cmp) && \
56
- ((data).opt_methods |= CMP_OPTIMIZABLE_BIT(type))))
57
-
58
- #define OPTIMIZED_CMP(a, b, data) \
59
- ((FIXNUM_P(a) && FIXNUM_P(b) && CMP_OPTIMIZABLE(data, Integer)) ? \
60
- (((long)a > (long)b) ? 1 : ((long)a < (long)b) ? -1 : 0) : \
61
- (STRING_P(a) && STRING_P(b) && CMP_OPTIMIZABLE(data, String)) ? \
62
- rb_str_cmp(a, b) : \
63
- (RB_FLOAT_TYPE_P(a) && RB_FLOAT_TYPE_P(b) && CMP_OPTIMIZABLE(data, Float)) ? \
64
- rb_float_cmp(a, b) : \
65
- rb_cmpint(rb_funcallv(a, id_cmp, 1, &b), a, b))
52
+ // give up on an optimized version and just call (a <=> b)
53
+ return rb_cmpint(rb_funcallv(a, id_cmp, 1, &b), a, b);
54
+ }
66
55
 
67
- #define puts(v) { \
56
+ #ifdef __D_HEAP_DEBUG
57
+ #define debug(v) { \
68
58
  ID sym_puts = rb_intern("puts"); \
69
59
  rb_funcall(rb_mKernel, sym_puts, 1, v); \
70
60
  }
71
-
72
- #endif /* INTERNAL_COMPAR_H */
61
+ #else
62
+ #define debug(v)
63
+ #endif
73
64
 
74
65
  #endif /* D_HEAP_H */
@@ -1,3 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "mkmf"
2
4
 
5
+ # if /darwin/ =~ RUBY_PLATFORM
6
+ # $CFLAGS << " -D__D_HEAP_DEBUG"
7
+ # end
8
+
3
9
  create_makefile("d_heap/d_heap")
@@ -1,6 +1,18 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "d_heap/d_heap"
2
4
  require "d_heap/version"
3
5
 
6
+ # A fast _d_-ary heap implementation for ruby, useful in priority queues and graph
7
+ # algorithms.
8
+ #
9
+ # The _d_-ary heap data structure is a generalization of the binary heap, in which
10
+ # the nodes have _d_ children instead of 2. This allows for "decrease priority"
11
+ # operations to be performed more quickly with the tradeoff of slower delete
12
+ # minimum. Additionally, _d_-ary heaps can have better memory cache behavior than
13
+ # binary heaps, allowing them to run more quickly in practice despite slower
14
+ # worst-case time complexity.
15
+ #
4
16
  class DHeap
5
17
 
6
18
  def initialize_copy(other)
@@ -1,3 +1,6 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class DHeap
2
- VERSION = "0.2.1"
4
+ VERSION = "0.2.2"
5
+
3
6
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: d_heap
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - nicholas a. evans
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-12-26 00:00:00.000000000 Z
11
+ date: 2020-12-27 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |
14
14
  A C extension implementation of a d-ary heap data structure, suitable for
@@ -20,8 +20,10 @@ extensions:
20
20
  - ext/d_heap/extconf.rb
21
21
  extra_rdoc_files: []
22
22
  files:
23
+ - ".github/workflows/main.yml"
23
24
  - ".gitignore"
24
25
  - ".rspec"
26
+ - ".rubocop.yml"
25
27
  - ".travis.yml"
26
28
  - CODE_OF_CONDUCT.md
27
29
  - Gemfile
@@ -32,6 +34,7 @@ files:
32
34
  - bin/console
33
35
  - bin/rake
34
36
  - bin/rspec
37
+ - bin/rubocop
35
38
  - bin/setup
36
39
  - d_heap.gemspec
37
40
  - ext/d_heap/d_heap.c
@@ -61,7 +64,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
61
64
  - !ruby/object:Gem::Version
62
65
  version: '0'
63
66
  requirements: []
64
- rubygems_version: 3.2.3
67
+ rubygems_version: 3.1.4
65
68
  signing_key:
66
69
  specification_version: 4
67
70
  summary: A d-ary heap implementation, for priority queues