d_heap 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 89808cd668688e16b5dd1e7e6b1ce9e57651217089b0686b4aeb83d41c565020
4
- data.tar.gz: 43e11d72c7143061d0b424f8290e4ef7f66dfaedc8df2f5eff11e20cce2f7796
3
+ metadata.gz: f549e01dd83eb6b48c1190443495a628ed1dd64ead7eb94851e661aef2607e14
4
+ data.tar.gz: 492ce5c17ace9ecc9deaccf8fcd47883835da28d4e5f32328aef60989186b2ff
5
5
  SHA512:
6
- metadata.gz: 7ce7b4a755692a99fdee3d30e27f7dd02f6f90c12fe478a5d20f5e224183f82c5d7f82141c80d229edee48a1dcf6a90878c5648b3bb2107d093dcef5884abf59
7
- data.tar.gz: 03daf597a17aee15f67f2bf3aa37625e84c9d2f759c22313a9e1010b9f3878c1e8d2dc005b0f82fca5f1792ee57abf7f93d3ab176edad5e29686ca7dedaae905
6
+ metadata.gz: 85521dee7f2a9992980935756571e87afbe8ae13347b5b3fbad17b501b5709111972b98ad0c9e1fca6d318c4be20ce2983086dfd84f7c0e73636ac9e4f11f253
7
+ data.tar.gz: e1daac5b02fcc817b3c6c6a99395e3ca0b92f42bb14bd813fedbb3037eed698bda335c2898d5b0131b48fecd73e4ccf1615943a52287c36f73764b08bf8b1969
@@ -0,0 +1,26 @@
1
+ name: Ruby
2
+
3
+ on: [push,pull_request]
4
+
5
+ jobs:
6
+ build:
7
+ strategy:
8
+ fail-fast: false
9
+ matrix:
10
+ ruby: [2.5, 2.6, 2.7, 3.0]
11
+ os: [ubuntu, macos]
12
+ experimental: [false]
13
+ runs-on: ${{ matrix.os }}-latest
14
+ continue-on-error: ${{ matrix.experimental }}
15
+ steps:
16
+ - uses: actions/checkout@v2
17
+ - name: Set up Ruby
18
+ uses: ruby/setup-ruby@v1
19
+ with:
20
+ ruby-version: ${{ matrix.ruby }}
21
+ bundler-cache: true
22
+ - name: Run the default task
23
+ run: |
24
+ gem install bundler -v 2.2.3
25
+ bundle install
26
+ bundle exec rake
@@ -0,0 +1,160 @@
1
+ inherit_mode:
2
+ merge:
3
+ - Exclude
4
+
5
+ AllCops:
6
+ TargetRubyVersion: 2.5
7
+ NewCops: disable
8
+ Exclude:
9
+ - bin/rake
10
+ - bin/rspec
11
+ - bin/rubocop
12
+
13
+ ###########################################################################
14
+ # rubocop defaults are simply WRONG about many rules... Sorry. It's true.
15
+
16
+ ###########################################################################
17
+ # Layout: Alignment. I want these to work, I really do...
18
+
19
+ # I wish this worked with "table". but that goes wrong sometimes.
20
+ Layout/HashAlignment: { Enabled: false }
21
+
22
+ # This needs to be configurable so parenthesis calls are aligned with first
23
+ # parameter, and non-parenthesis calls are aligned with fixed indentation.
24
+ Layout/ParameterAlignment: { Enabled: false }
25
+
26
+ ###########################################################################
27
+ # Layout: Empty lines
28
+
29
+ Layout/EmptyLineAfterGuardClause: { Enabled: false }
30
+ Layout/EmptyLineAfterMagicComment: { Enabled: true }
31
+ Layout/EmptyLineAfterMultilineCondition: { Enabled: false }
32
+ Layout/EmptyLines: { Enabled: true }
33
+ Layout/EmptyLinesAroundAccessModifier: { Enabled: true }
34
+ Layout/EmptyLinesAroundArguments: { Enabled: true }
35
+ Layout/EmptyLinesAroundBeginBody: { Enabled: true }
36
+ Layout/EmptyLinesAroundBlockBody: { Enabled: false }
37
+ Layout/EmptyLinesAroundExceptionHandlingKeywords: { Enabled: true }
38
+ Layout/EmptyLinesAroundMethodBody: { Enabled: true }
39
+
40
+ Layout/EmptyLineBetweenDefs:
41
+ Enabled: true
42
+ AllowAdjacentOneLineDefs: true
43
+
44
+ Layout/EmptyLinesAroundAttributeAccessor:
45
+ inherit_mode:
46
+ merge:
47
+ - AllowedMethods
48
+ Enabled: true
49
+ AllowedMethods:
50
+ - delegate
51
+ - def_delegator
52
+ - def_delegators
53
+ - def_instance_delegators
54
+
55
+ # "empty_lines_special" sometimes does the wrong thing and annoys me.
56
+ # But I've mostly learned to live with it... mostly. 🙁
57
+
58
+ Layout/EmptyLinesAroundClassBody:
59
+ Enabled: true
60
+ EnforcedStyle: empty_lines_special
61
+
62
+ Layout/EmptyLinesAroundModuleBody:
63
+ Enabled: true
64
+ EnforcedStyle: empty_lines_special
65
+
66
+ ###########################################################################
67
+ # Layout: Space around, before, inside, etc
68
+
69
+ Layout/SpaceAroundEqualsInParameterDefault: { Enabled: false }
70
+ Layout/SpaceBeforeBlockBraces: { Enabled: false }
71
+ Layout/SpaceBeforeFirstArg: { Enabled: false }
72
+ Layout/SpaceInLambdaLiteral: { Enabled: false }
73
+ Layout/SpaceInsideArrayLiteralBrackets: { Enabled: false }
74
+ Layout/SpaceInsideHashLiteralBraces: { Enabled: false }
75
+
76
+ Layout/SpaceInsideBlockBraces:
77
+ EnforcedStyle: space
78
+ EnforcedStyleForEmptyBraces: space
79
+ SpaceBeforeBlockParameters: false
80
+
81
+ # I would enable this if it were a bit better at handling alignment.
82
+ Layout/ExtraSpacing:
83
+ Enabled: false
84
+ AllowForAlignment: true
85
+ AllowBeforeTrailingComments: true
86
+
87
+ ###########################################################################
88
+ # Layout: Misc
89
+
90
+ Layout/LineLength:
91
+ Max: 90 # should stay under 80, but we'll allow a little wiggle-room
92
+
93
+ Layout/MultilineOperationIndentation: { Enabled: false }
94
+
95
+ Layout/MultilineMethodCallIndentation:
96
+ EnforcedStyle: indented
97
+
98
+ ###########################################################################
99
+ # Lint and Naming: rubocop defaults are mostly good, but...
100
+
101
+ Lint/UnusedMethodArgument: { Enabled: false }
102
+ Naming/BinaryOperatorParameterName: { Enabled: false } # def /(denominator)
103
+ Naming/RescuedExceptionsVariableName: { Enabled: false }
104
+
105
+ ###########################################################################
106
+ # Matrics:
107
+
108
+ # Although it may be better to split specs into multiple files...?
109
+ Metrics/BlockLength:
110
+ Exclude:
111
+ - "spec/**/*_spec.rb"
112
+
113
+ ###########################################################################
114
+ # Style...
115
+
116
+ Style/AccessorGrouping: { Enabled: false }
117
+ Style/AsciiComments: { Enabled: false } # 👮 can't stop our 🎉🥳🎊🥳!
118
+ Style/EachWithObject: { Enabled: false }
119
+ Style/FormatStringToken: { Enabled: false }
120
+ Style/FloatDivision: { Enabled: false }
121
+ Style/Lambda: { Enabled: false }
122
+ Style/LineEndConcatenation: { Enabled: false }
123
+ Style/MixinGrouping: { Enabled: false }
124
+ Style/PerlBackrefs: { Enabled: false } # use occasionally/sparingly
125
+ Style/RescueStandardError: { Enabled: false }
126
+ Style/SingleLineMethods: { Enabled: false }
127
+ Style/StabbyLambdaParentheses: { Enabled: false }
128
+
129
+ # If rubocop had an option to only enforce this on constants and literals (e.g.
130
+ # strings, regexp, range), I'd agree.
131
+ #
132
+ # But if you are using it e.g. on method arguments of unknown type, in the same
133
+ # style that ruby uses it with grep, then you are doing exactly the right thing.
134
+ Style/CaseEquality: { Enabled: false }
135
+
136
+ # I'd enable if "require_parentheses_when_complex" considered unary '!' simple.
137
+ Style/TernaryParentheses:
138
+ EnforcedStyle: require_parentheses_when_complex
139
+ Enabled: false
140
+
141
+ Style/BlockDelimiters:
142
+ EnforcedStyle: semantic
143
+ AllowBracesOnProceduralOneLiners: true
144
+
145
+ Style/FormatString:
146
+ EnforcedStyle: percent
147
+
148
+ Style/StringLiterals:
149
+ Enabled: true
150
+ EnforcedStyle: double_quotes
151
+
152
+ Style/StringLiteralsInInterpolation:
153
+ Enabled: true
154
+ EnforcedStyle: double_quotes
155
+
156
+ Style/TrailingCommaInHashLiteral:
157
+ EnforcedStyleForMultiline: consistent_comma
158
+
159
+ Style/TrailingCommaInArrayLiteral:
160
+ EnforcedStyleForMultiline: consistent_comma
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source "https://rubygems.org"
2
4
 
3
5
  # Specify your gem's dependencies in d_heap.gemspec
@@ -6,3 +8,4 @@ gemspec
6
8
  gem "rake", "~> 13.0"
7
9
  gem "rake-compiler"
8
10
  gem "rspec", "~> 3.10"
11
+ gem "rubocop", "~> 1.0"
@@ -1,15 +1,22 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- d_heap (0.2.1)
4
+ d_heap (0.2.2)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
+ ast (2.4.1)
9
10
  diff-lcs (1.4.4)
11
+ parallel (1.19.2)
12
+ parser (2.7.2.0)
13
+ ast (~> 2.4.1)
14
+ rainbow (3.0.0)
10
15
  rake (13.0.3)
11
16
  rake-compiler (1.1.1)
12
17
  rake
18
+ regexp_parser (1.8.2)
19
+ rexml (3.2.3)
13
20
  rspec (3.10.0)
14
21
  rspec-core (~> 3.10.0)
15
22
  rspec-expectations (~> 3.10.0)
@@ -23,6 +30,19 @@ GEM
23
30
  diff-lcs (>= 1.2.0, < 2.0)
24
31
  rspec-support (~> 3.10.0)
25
32
  rspec-support (3.10.0)
33
+ rubocop (1.2.0)
34
+ parallel (~> 1.10)
35
+ parser (>= 2.7.1.5)
36
+ rainbow (>= 2.2.2, < 4.0)
37
+ regexp_parser (>= 1.8)
38
+ rexml
39
+ rubocop-ast (>= 1.0.1)
40
+ ruby-progressbar (~> 1.7)
41
+ unicode-display_width (>= 1.4.0, < 2.0)
42
+ rubocop-ast (1.1.1)
43
+ parser (>= 2.7.1.5)
44
+ ruby-progressbar (1.10.1)
45
+ unicode-display_width (1.7.0)
26
46
 
27
47
  PLATFORMS
28
48
  ruby
@@ -32,6 +52,7 @@ DEPENDENCIES
32
52
  rake (~> 13.0)
33
53
  rake-compiler
34
54
  rspec (~> 3.10)
55
+ rubocop (~> 1.0)
35
56
 
36
57
  BUNDLED WITH
37
58
  2.2.3
data/README.md CHANGED
@@ -8,46 +8,46 @@ the nodes have _d_ children instead of 2. This allows for "decrease priority"
8
8
  operations to be performed more quickly with the tradeoff of slower delete
9
9
  minimum. Additionally, _d_-ary heaps can have better memory cache behavior than
10
10
  binary heaps, allowing them to run more quickly in practice despite slower
11
- worst-case time complexity.
11
+ worst-case time complexity. In the worst case, a _d_-ary heap requires only
12
+ `O(log n / log d)` to push, with the tradeoff that pop is `O(d log n / log d)`.
12
13
 
13
- _TODO:_ In addition to a basic _d_-ary heap class (`DHeap`), this library
14
- ~~includes~~ _will include_ extensions to `Array`, allowing an Array to be
15
- directly handled as a priority queue. These extension methods are meant to be
16
- used similarly to how `#bsearch` and `#bsearch_index` might be used.
17
-
18
- _TODO:_ Also included is `DHeap::Set`, which augments the basic heap with an
19
- internal `Hash`, which maps a set of values to scores.
20
- loosely inspired by go's timers. e.g: It lazily sifts its heap after deletion
21
- and adjustments, to achieve faster average runtime for *add* and *cancel*
22
- operations.
23
-
24
- _TODO:_ Also included is `DHeap::Timers`, which contains some features that are
25
- loosely inspired by go's timers. e.g: It lazily sifts its heap after deletion
26
- and adjustments, to achieve faster average runtime for *add* and *cancel*
27
- operations.
14
+ Although you should probably just stick with the default _d_ value of `4`, it
15
+ may be worthwhile to benchmark your specific scenario.
28
16
 
29
17
  ## Motivation
30
18
 
31
- Ruby's Array class comes with some helpful methods for maintaining a sorted
32
- array, by combining `#bsearch_index` with `#insert`. With certain insert/remove
33
- workloads that can perform very well, but in the worst-case an insert or delete
34
- can result in O(n), since it may need to memcopy a significant portion of the
35
- array. Knowing that priority queues are usually implemented with a heap, and
36
- that the heap is a relatively simple data structure, I set out to replace my
37
- `#bsearch_index` and `#insert` code with a one. I was surprised to find that,
38
- at least under certain benchmarks, my ruby Heap implementation was tied with or
39
- slower than inserting into a fully sorted array. On the one hand, this is a
40
- testament to ruby's fine-tuned Array implementation. On the other hand, it
41
- seemed like a heap implementated in C should easily match the speed of ruby's
42
- bsearch + insert.
43
-
44
- Additionally, I was inspired by reading go's "timer.go" implementation to
45
- experiment with a 4-ary heap, instead of the traditional binary heap. In the
46
- case of timers, new timers are usually scheduled to run after most of the
47
- existing timers and timers are usually canceled before they have a chance to
48
- run. While a binary heap holds 50% of its elements in its last layer, 75% of a
49
- 4-ary heap will have no children. That diminishes the extra comparison
50
- overhead during sift-down.
19
+ Sometimes you just need a priority queue, right? With a regular queue, you
20
+ expect "FIFO" behavior: first in, first out. With a priority queue, you push
21
+ with a score (or your elements are comparable), and you want to be able to
22
+ efficiently pop off the minimum (or maximum) element.
23
+
24
+ One obvious approach is to simply maintain an array in sorted order. And
25
+ ruby's Array class makes it simple to maintain a sorted array by combining
26
+ `#bsearch_index` with `#insert`. With certain insert/remove workloads that can
27
+ perform very well, but in the worst-case an insert or delete can result in O(n),
28
+ since `#insert` may need to `memcpy` or `memmove` a significant portion of the
29
+ array.
30
+
31
+ But the standard way to efficiently and simply solve this problem is using a
32
+ binary heap. Although it increases the time for `pop`, it converts the
33
+ amortized time per push + pop from `O(n)` to `O(d log n / log d)`.
34
+
35
+ I was surprised to find that, at least under certain benchmarks, my pure ruby
36
+ heap implementation was usually slower than inserting into a fully sorted
37
+ array. While this is a testament to ruby's fine-tuned Array implementationw, a
38
+ heap implementated in C should easily peform faster than `Array#insert`.
39
+
40
+ The biggest issue is that it just takes far too much time to call `<=>` from
41
+ ruby code: A sorted array only requires `log n / log 2` comparisons to insert
42
+ and no comparisons to pop. However a _d_-ary heap requires `log n / log d` to
43
+ insert plus an additional `d log n / log d` to pop. If your queue contains only
44
+ a few hundred items at once, the overhead of those extra calls to `<=>` is far
45
+ more than occasionally calling `memcpy`.
46
+
47
+ It's likely that MJIT will eventually make the C-extension completely
48
+ unnecessary. This is definitely hotspot code, and the basic ruby implementation
49
+ would work fine, if not for that `<=>` overhead. Until then... this gem gets
50
+ the job done.
51
51
 
52
52
  ## Installation
53
53
 
@@ -90,6 +90,32 @@ heap.pop.last # => Task[1]
90
90
 
91
91
  Read the `rdoc` for more detailed documentation and examples.
92
92
 
93
+ ## TODOs...
94
+
95
+ _TODO:_ In addition to a basic _d_-ary heap class (`DHeap`), this library
96
+ ~~includes~~ _will include_ extensions to `Array`, allowing an Array to be
97
+ directly handled as a priority queue. These extension methods are meant to be
98
+ used similarly to how `#bsearch` and `#bsearch_index` might be used.
99
+
100
+ _TODO:_ Also ~~included is~~ _will include_ `DHeap::Set`, which augments the
101
+ basic heap with an internal `Hash`, which maps a set of values to scores.
102
+ loosely inspired by go's timers. e.g: It lazily sifts its heap after deletion
103
+ and adjustments, to achieve faster average runtime for *add* and *cancel*
104
+ operations.
105
+
106
+ _TODO:_ Also ~~included is~~ _will include_ `DHeap::Timers`, which contains some
107
+ features that are loosely inspired by go's timers. e.g: It lazily sifts its
108
+ heap after deletion and adjustments, to achieve faster average runtime for *add*
109
+ and *cancel* operations.
110
+
111
+ Additionally, I was inspired by reading go's "timer.go" implementation to
112
+ experiment with a 4-ary heap instead of the traditional binary heap. In the
113
+ case of timers, new timers are usually scheduled to run after most of the
114
+ existing timers. And timers are usually canceled before they have a chance to
115
+ run. While a binary heap holds 50% of its elements in its last layer, 75% of a
116
+ 4-ary heap will have no children. That diminishes the extra comparison overhead
117
+ during sift-down.
118
+
93
119
  ## Benchmarks
94
120
 
95
121
  _TODO: put benchmarks here._
data/Rakefile CHANGED
@@ -1,14 +1,20 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "bundler/gem_tasks"
2
4
  require "rspec/core/rake_task"
3
5
 
4
6
  RSpec::Core::RakeTask.new(:spec)
5
7
 
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
6
12
  require "rake/extensiontask"
7
13
 
8
- task :build => :compile
14
+ task build: :compile
9
15
 
10
16
  Rake::ExtensionTask.new("d_heap") do |ext|
11
17
  ext.lib_dir = "lib/d_heap"
12
18
  end
13
19
 
14
- task :default => [:clobber, :compile, :spec]
20
+ task default: %i[clobber compile spec rubocop]
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
4
  require "bundler/setup"
4
5
  require "d_heap"
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rubocop' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("rubocop", "rubocop")
@@ -1,4 +1,6 @@
1
- require_relative 'lib/d_heap/version'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/d_heap/version"
2
4
 
3
5
  Gem::Specification.new do |spec|
4
6
  spec.name = "d_heap"
@@ -6,7 +8,7 @@ Gem::Specification.new do |spec|
6
8
  spec.authors = ["nicholas a. evans"]
7
9
  spec.email = ["nicholas.evans@gmail.com"]
8
10
 
9
- spec.summary = %q{A d-ary heap implementation, for priority queues}
11
+ spec.summary = "A d-ary heap implementation, for priority queues"
10
12
  spec.description = <<~DESC
11
13
  A C extension implementation of a d-ary heap data structure, suitable for
12
14
  use in e.g. priority queues or Djikstra's algorithm.
@@ -21,11 +23,11 @@ Gem::Specification.new do |spec|
21
23
 
22
24
  # Specify which files should be added to the gem when it is released.
23
25
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
24
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
25
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
26
- end
26
+ spec.files = Dir.chdir(File.expand_path(__dir__)) {
27
+ `git ls-files -z`.split("\x0").reject {|f| f.match(%r{^(test|spec|features)/}) }
28
+ }
27
29
  spec.bindir = "exe"
28
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
30
+ spec.executables = spec.files.grep(%r{^exe/}) {|f| File.basename(f) }
29
31
  spec.require_paths = ["lib"]
30
32
  spec.extensions = ["ext/d_heap/extconf.rb"]
31
33
  end
@@ -25,10 +25,10 @@ ID id_ivar_d;
25
25
 
26
26
  #define DHEAP_Check_d_size(d) \
27
27
  if (d < 2) { \
28
- rb_raise(rb_eIndexError, "DHeap d=%d is too small", d); \
28
+ rb_raise(rb_eArgError, "DHeap d=%d is too small", d); \
29
29
  } \
30
30
  if (d > DHEAP_MAX_D) { \
31
- rb_raise(rb_eIndexError, "DHeap d=%d is too large", d); \
31
+ rb_raise(rb_eArgError, "DHeap d=%d is too large", d); \
32
32
  }
33
33
 
34
34
  #define DHEAP_Check_Sift_Idx(sift_index, last_index) \
@@ -51,33 +51,31 @@ ID id_ivar_d;
51
51
  VALUE
52
52
  dheap_ary_sift_up(VALUE heap_array, int d, long sift_index) {
53
53
  DHEAP_Check_Sift_Args(heap_array, d, sift_index);
54
- struct cmp_opt_data cmp_opt = { 0, 0 };
55
54
  // sift it up to where it belongs
56
55
  for (long parent_index; 0 < sift_index; sift_index = parent_index) {
57
- // puts(rb_sprintf("sift up(%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
56
+ debug(rb_sprintf("sift up(%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
58
57
  parent_index = IDX_PARENT(sift_index);
59
58
  VALUE parent_score = DHEAP_SCORE(heap_array, parent_index);
60
59
 
61
60
  // parent is smaller: heap is restored
62
- if (CMP_LTE(parent_score, sift_score, cmp_opt)) break;
61
+ if (CMP_LTE(parent_score, sift_score)) break;
63
62
 
64
63
  // parent is larger: swap and continue sifting up
65
64
  VALUE parent_value = DHEAP_VALUE(heap_array, parent_index);
66
65
  DHEAP_ASSIGN(heap_array, sift_index, parent_score, parent_value);
67
66
  DHEAP_ASSIGN(heap_array, parent_index, sift_score, sift_value);
68
67
  }
69
- // puts(rb_sprintf("sifted (%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
68
+ debug(rb_sprintf("sifted (%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
70
69
  return LONG2NUM(sift_index);
71
70
  }
72
71
 
73
72
  VALUE
74
73
  dheap_ary_sift_down(VALUE heap_array, int d, long sift_index) {
75
74
  DHEAP_Check_Sift_Args(heap_array, d, sift_index);
76
- struct cmp_opt_data cmp_opt = { 0, 0 };
77
75
 
78
76
  // iteratively sift it down to where it belongs
79
77
  for (long child_index; sift_index < last_index; sift_index = child_index) {
80
- // puts(rb_sprintf("sift dn(%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
78
+ debug(rb_sprintf("sift dn(%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
81
79
  // find first child index, and break if we've reached the last layer
82
80
  long child_idx0 = child_index = IDX_CHILD0(sift_index);
83
81
  if (last_index < child_idx0) break;
@@ -92,63 +90,81 @@ dheap_ary_sift_down(VALUE heap_array, int d, long sift_index) {
92
90
 
93
91
  VALUE sibling_score = DHEAP_SCORE(heap_array, sibling_index);
94
92
 
95
- if (CMP_LT(sibling_score, child_score, cmp_opt)) {
93
+ if (CMP_LT(sibling_score, child_score)) {
96
94
  child_score = sibling_score;
97
95
  child_index = sibling_index;
98
96
  }
99
97
  }
100
98
 
101
99
  // child is larger: heap is restored
102
- if (CMP_LTE(sift_score, child_score, cmp_opt)) break;
100
+ if (CMP_LTE(sift_score, child_score)) break;
103
101
 
104
102
  // child is smaller: swap and continue sifting down
105
103
  VALUE child_value = DHEAP_VALUE(heap_array, child_index);
106
104
  DHEAP_ASSIGN(heap_array, sift_index, child_score, child_value);
107
105
  DHEAP_ASSIGN(heap_array, child_index, sift_score, sift_value);
108
106
  }
109
- // puts(rb_sprintf("sifted (%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
107
+ debug(rb_sprintf("sifted (%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
110
108
  return LONG2NUM(sift_index);
111
109
  }
112
110
 
113
111
  #define DHEAP_Load_Sift_Vals(heap_array, dval, idxval) \
114
112
  Check_Type(dval, T_FIXNUM); \
115
- int d = FIX2INT(dval); \
116
- long sift_index = NUM2LONG(idxval);
113
+ int dint = FIX2INT(dval); \
114
+ long idx = NUM2LONG(idxval);
117
115
 
118
116
  /*
119
- * call-seq:
120
- * DHeap.array_sift_up(heap_array, d, sift_index)
117
+ * Treats a +heap_array+ as a +d+-ary heap and sifts up from +sift_index+ to
118
+ * restore the heap property for all nodes between it and the root of the tree.
121
119
  *
122
- * Treats +heap_array+ as a +d+-ary heap and sifts up from +sift_index+ to restore
123
- * the heap property.
120
+ * The array is interpreted as holding two entries for each node, a score and a
121
+ * value. The scores will held in every even-numbered array index and the
122
+ * values in every odd numbered index. The array is flat, not an array of
123
+ * length=2 arrays.
124
124
  *
125
- * Time complexity: O(d log n / log d). If the average up shifted element sorts
126
- * into the bottom layer (e.g. new timers), this can avg O(1).
125
+ * Time complexity: <b>O(log n / log d)</b> <i>(worst-case)</i>
127
126
  *
127
+ * @param heap_array [Array] the array which is treated a heap.
128
+ * @param d [Integer] the maximum number of children per parent
129
+ * @param sift_index [Integer] the index to start sifting from
130
+ * @return [Integer] the new index for the object that starts at +sift_index+.
128
131
  */
129
132
  static VALUE
130
- dheap_sift_up_s(VALUE unused, VALUE heap_array, VALUE dval, VALUE idxval) {
131
- DHEAP_Load_Sift_Vals(heap_array, dval, idxval);
132
- return dheap_ary_sift_up(heap_array, d, sift_index);
133
+ dheap_sift_up_s(VALUE unused, VALUE heap_array, VALUE d, VALUE sift_index) {
134
+ DHEAP_Load_Sift_Vals(heap_array, d, sift_index);
135
+ return dheap_ary_sift_up(heap_array, dint, idx);
133
136
  }
134
137
 
135
138
  /*
136
- * call-seq:
137
- * DHeap.array_sift_down(heap_array, d, sift_index)
138
- *
139
139
  * Treats +heap_array+ as a +d+-ary heap and sifts down from +sift_index+ to
140
- * restore the heap property.
140
+ * restore the heap property. If all _d_ subtrees below +sift_index+ are already
141
+ * heaps, this method ensures the entire subtree rooted at +sift_index+ will be
142
+ * a heap.
143
+ *
144
+ * The array is interpreted as holding two entries for each node, a score and a
145
+ * value. The scores will held in every even-numbered array index and the
146
+ * values in every odd numbered index. The array is flat, not an array of
147
+ * length=2 arrays.
141
148
  *
142
- * Time complexity: O(d log n / log d). If the average down shifted element
143
- * sorts into the bottom layer (e.g. canceled timers), this can avg O(1).
149
+ * Time complexity: <b>O(d log n / log d)</b> <i>(worst-case)</i>
144
150
  *
151
+ * @param heap_array [Array] the array which is treated a heap.
152
+ * @param d [Integer] the maximum number of children per parent
153
+ * @param sift_index [Integer] the index to start sifting down from
154
+ * @return [Integer] the new index for the object that starts at +sift_index+.
145
155
  */
146
156
  static VALUE
147
- dheap_sift_down_s(VALUE unused, VALUE heap_array, VALUE dval, VALUE idxval) {
148
- DHEAP_Load_Sift_Vals(heap_array, dval, idxval);
149
- return dheap_ary_sift_down(heap_array, d, sift_index);
157
+ dheap_sift_down_s(VALUE unused, VALUE heap_array, VALUE d, VALUE sift_index) {
158
+ DHEAP_Load_Sift_Vals(heap_array, d, sift_index);
159
+ return dheap_ary_sift_down(heap_array, dint, idx);
150
160
  }
151
161
 
162
+ /*
163
+ * @overload initialize(d = DHeap::DEFAULT_D)
164
+ * Initialize a _d_-ary min-heap.
165
+ *
166
+ * @param d [Integer] maximum number of children per parent
167
+ */
152
168
  static VALUE
153
169
  dheap_initialize(int argc, VALUE *argv, VALUE self) {
154
170
  rb_check_arity(argc, 0, 1);
@@ -162,20 +178,35 @@ dheap_initialize(int argc, VALUE *argv, VALUE self) {
162
178
  return self;
163
179
  }
164
180
 
181
+ /*
182
+ * @return [Integer] the number of elements in the heap
183
+ */
165
184
  static VALUE dheap_size(VALUE self) {
166
185
  VALUE ary = DHEAP_GET_A(self);
167
186
  long size = DHEAP_SIZE(ary);
168
187
  return LONG2NUM(size);
169
188
  }
170
189
 
190
+ /*
191
+ * @return [Boolean] is the heap empty?
192
+ */
171
193
  static VALUE dheap_empty_p(VALUE self) {
172
194
  VALUE ary = DHEAP_GET_A(self);
173
195
  long size = DHEAP_SIZE(ary);
174
196
  return size == 0 ? Qtrue : Qfalse;
175
197
  }
176
198
 
199
+ /*
200
+ * @return [Integer] the maximum number of children per parent
201
+ */
177
202
  static VALUE dheap_attr_d(VALUE self) { return DHEAP_GET_D(self); }
178
203
 
204
+ /*
205
+ * Freezes the heap as well as its underlying array, but does <i>not</i>
206
+ * deep-freeze the elements in the heap.
207
+ *
208
+ * @return [self]
209
+ */
179
210
  static VALUE
180
211
  dheap_freeze(VALUE self) {
181
212
  VALUE ary = DHEAP_GET_A(self);
@@ -193,10 +224,19 @@ dheap_ary_push(VALUE ary, int d, VALUE val, VALUE scr)
193
224
  }
194
225
 
195
226
  /*
196
- * Push val onto the end of the heap, then sift up to maintain heap property.
227
+ * @overload push(score, value = score)
228
+ *
229
+ * Push a value onto heap, using a score to determine sort-order.
230
+ *
231
+ * Ideally, the score should be a frozen value that can be efficiently compared
232
+ * to other scores, e.g. an Integer or Float or (maybe) a String
233
+ *
234
+ * Time complexity: <b>O(log n / log d)</b> <i>(worst-case)</i>
197
235
  *
198
- * Returns the index of the value's final position.
236
+ * @param score [#<=>] a value that can be compared to other scores.
237
+ * @param value [Object] an object that is associated with the score.
199
238
  *
239
+ * @return [Integer] the index of the value's final position.
200
240
  */
201
241
  static VALUE
202
242
  dheap_push(int argc, VALUE *argv, VALUE self) {
@@ -212,16 +252,18 @@ dheap_push(int argc, VALUE *argv, VALUE self) {
212
252
  }
213
253
 
214
254
  /*
215
- * Push val onto the end of the heap, then sift up to maintain heap property.
255
+ * Pushes a comparable value onto the heap.
216
256
  *
217
- * Time complexity: O(d log n / log d).
257
+ * The value will be its own score.
218
258
  *
219
- * Returns +self+.
259
+ * Time complexity: <b>O(log n / log d)</b> <i>(worst-case)</i>
220
260
  *
261
+ * @param value [#<=>] a value that can be compared to other heap members.
262
+ * @return [self]
221
263
  */
222
264
  static VALUE
223
- dheap_left_shift(VALUE self, VALUE val) {
224
- dheap_push(1, &val, self);
265
+ dheap_left_shift(VALUE self, VALUE value) {
266
+ dheap_push(1, &value, self);
225
267
  return self;
226
268
  }
227
269
 
@@ -238,6 +280,12 @@ dheap_left_shift(VALUE self, VALUE val) {
238
280
  DHEAP_DROP_LAST(ary); \
239
281
  dheap_ary_sift_down(ary, FIX2INT(dval), 0);
240
282
 
283
+ /*
284
+ * Returns the next value on the heap to be popped without popping it.
285
+ *
286
+ * Time complexity: <b>O(1)</b> <i>(worst-case)</i>
287
+ * @return [Object] the next value to be popped without popping it.
288
+ */
241
289
  static VALUE
242
290
  dheap_peek(VALUE self) {
243
291
  VALUE ary = DHEAP_GET_A(self);
@@ -245,11 +293,9 @@ dheap_peek(VALUE self) {
245
293
  }
246
294
 
247
295
  /*
248
- * Pops the minimum value from the top of the heap, sifting down to maintain
249
- * heap property.
250
- *
251
- * Time complexity: O(d log n / log d).
296
+ * Pops the minimum value from the top of the heap
252
297
  *
298
+ * Time complexity: <b>O(d log n / log d)</b> <i>(worst-case)</i>
253
299
  */
254
300
  static VALUE
255
301
  dheap_pop(VALUE self) {
@@ -262,42 +308,40 @@ dheap_pop(VALUE self) {
262
308
  }
263
309
 
264
310
  /*
265
- * Pops the minimum value from the top of the heap, sifting down to maintain
266
- * heap property.
311
+ * Pops the minimum value only if it is less than or equal to a max score.
267
312
  *
268
- * Time complexity: O(d log n / log d).
313
+ * @param max_score [#<=>] the maximum score to be popped
269
314
  *
315
+ * @see #pop
270
316
  */
271
317
  static VALUE
272
- dheap_pop_lte(VALUE self, VALUE below_score) {
318
+ dheap_pop_lte(VALUE self, VALUE max_score) {
273
319
  DHEAP_Pop_Init(self);
274
320
  if (last_index < 0) return Qnil;
275
321
  VALUE pop_value = DHEAP_VALUE(ary, 0);
276
322
 
277
323
  VALUE pop_score = DHEAP_SCORE(ary, 0);
278
- struct cmp_opt_data cmp_opt = { 0, 0 };
279
- if (below_score && !CMP_LTE(pop_score, below_score, cmp_opt)) return Qnil;
324
+ if (max_score && !CMP_LTE(pop_score, max_score)) return Qnil;
280
325
 
281
326
  DHEAP_Pop_SwapLastAndSiftDown(ary, dval, last_index, sift_value);
282
327
  return pop_value;
283
328
  }
284
329
 
285
330
  /*
286
- * Pops the minimum value from the top of the heap, sifting down to maintain
287
- * heap property.
331
+ * Pops the minimum value only if it is less than a max score.
288
332
  *
289
- * Time complexity: O(d log n / log d).
333
+ * @param max_score [#<=>] the maximum score to be popped
290
334
  *
335
+ * Time complexity: <b>O(d log n / log d)</b> <i>(worst-case)</i>
291
336
  */
292
337
  static VALUE
293
- dheap_pop_lt(VALUE self, VALUE below_score) {
338
+ dheap_pop_lt(VALUE self, VALUE max_score) {
294
339
  DHEAP_Pop_Init(self);
295
340
  if (last_index < 0) return Qnil;
296
341
  VALUE pop_value = DHEAP_VALUE(ary, 0);
297
342
 
298
343
  VALUE pop_score = DHEAP_SCORE(ary, 0);
299
- struct cmp_opt_data cmp_opt = { 0, 0 };
300
- if (below_score && !CMP_LT(pop_score, below_score, cmp_opt)) return Qnil;
344
+ if (max_score && !CMP_LT(pop_score, max_score)) return Qnil;
301
345
 
302
346
  DHEAP_Pop_SwapLastAndSiftDown(ary, dval, last_index, sift_value);
303
347
  return pop_value;
@@ -311,6 +355,9 @@ Init_d_heap(void)
311
355
  id_ivar_d = rb_intern_const("d");
312
356
 
313
357
  rb_cDHeap = rb_define_class("DHeap", rb_cObject);
358
+ rb_define_const(rb_cDHeap, "MAX_D", INT2NUM(DHEAP_MAX_D));
359
+ rb_define_const(rb_cDHeap, "DEFAULT_D", INT2NUM(DHEAP_DEFAULT_D));
360
+
314
361
  rb_define_singleton_method(rb_cDHeap, "heap_sift_down", dheap_sift_down_s, 3);
315
362
  rb_define_singleton_method(rb_cDHeap, "heap_sift_up", dheap_sift_up_s, 3);
316
363
 
@@ -11,64 +11,55 @@
11
11
  // comparisons as d gets further from 4.
12
12
  #define DHEAP_MAX_D 32
13
13
 
14
+ VALUE rb_cDHeap;
14
15
 
15
- #define CMP_LT(a, b, cmp_opt) \
16
- (OPTIMIZED_CMP(a, b, cmp_opt) < 0)
17
- #define CMP_LTE(a, b, cmp_opt) \
18
- (OPTIMIZED_CMP(a, b, cmp_opt) <= 0)
19
- #define CMP_GT(a, b, cmp_opt) \
20
- (OPTIMIZED_CMP(a, b, cmp_opt) > 0)
21
- #define CMP_GTE(a, b, cmp_opt) \
22
- (OPTIMIZED_CMP(a, b, cmp_opt) >= 0)
16
+ #define CMP_LT(a, b) (optimized_cmp(a, b) < 0)
17
+ #define CMP_LTE(a, b) (optimized_cmp(a, b) <= 0)
18
+ #define CMP_GT(a, b) (optimized_cmp(a, b) > 0)
19
+ #define CMP_GTE(a, b) (optimized_cmp(a, b) >= 0)
23
20
 
24
- VALUE rb_cDHeap;
21
+ // <=>
25
22
  ID id_cmp;
26
23
 
27
- // from internal/numeric.h
28
- #ifndef INTERNAL_NUMERIC_H
29
- int rb_float_cmp(VALUE x, VALUE y);
30
- #endif /* INTERNAL_NUMERIC_H */
31
-
32
24
  // from internal/compar.h
33
- #ifndef INTERNAL_COMPAR_H
34
25
  #define STRING_P(s) (RB_TYPE_P((s), T_STRING) && CLASS_OF(s) == rb_cString)
35
26
 
36
- enum {
37
- cmp_opt_Integer,
38
- cmp_opt_String,
39
- cmp_opt_Float,
40
- cmp_optimizable_count
41
- };
42
-
43
- struct cmp_opt_data {
44
- unsigned int opt_methods;
45
- unsigned int opt_inited;
46
- };
27
+ /*
28
+ * short-circuit evaluation for a few basic types.
29
+ *
30
+ * Only Integer, Float, and String are optimized,
31
+ * and only when both arguments are the same type.
32
+ */
33
+ static inline int
34
+ optimized_cmp(VALUE a, VALUE b) {
35
+ if (a == b) // Fixnum equality and object equality
36
+ return 0;
37
+ if (FIXNUM_P(a) && FIXNUM_P(b))
38
+ return (FIX2LONG(a) < FIX2LONG(b)) ? -1 : 1;
39
+ if (RB_FLOAT_TYPE_P(a) && RB_FLOAT_TYPE_P(b))
40
+ {
41
+ double x, y;
42
+ x = RFLOAT_VALUE(a);
43
+ y = RFLOAT_VALUE(b);
44
+ if (isnan(x) || isnan(y)) rb_cmperr(a, b); // raise ArgumentError
45
+ return (x < y) ? -1 : ((x == y) ? 0 : 1);
46
+ }
47
+ if (RB_TYPE_P(a, T_BIGNUM) && RB_TYPE_P(b, T_BIGNUM))
48
+ return FIX2INT(rb_big_cmp(a, b));
49
+ if (STRING_P(a) && STRING_P(b))
50
+ return rb_str_cmp(a, b);
47
51
 
48
- #define NEW_CMP_OPT_MEMO(type, value) \
49
- NEW_PARTIAL_MEMO_FOR(type, value, cmp_opt)
50
- #define CMP_OPTIMIZABLE_BIT(type) (1U << TOKEN_PASTE(cmp_opt_,type))
51
- #define CMP_OPTIMIZABLE(data, type) \
52
- (((data).opt_inited & CMP_OPTIMIZABLE_BIT(type)) ? \
53
- ((data).opt_methods & CMP_OPTIMIZABLE_BIT(type)) : \
54
- (((data).opt_inited |= CMP_OPTIMIZABLE_BIT(type)), \
55
- rb_method_basic_definition_p(TOKEN_PASTE(rb_c,type), id_cmp) && \
56
- ((data).opt_methods |= CMP_OPTIMIZABLE_BIT(type))))
57
-
58
- #define OPTIMIZED_CMP(a, b, data) \
59
- ((FIXNUM_P(a) && FIXNUM_P(b) && CMP_OPTIMIZABLE(data, Integer)) ? \
60
- (((long)a > (long)b) ? 1 : ((long)a < (long)b) ? -1 : 0) : \
61
- (STRING_P(a) && STRING_P(b) && CMP_OPTIMIZABLE(data, String)) ? \
62
- rb_str_cmp(a, b) : \
63
- (RB_FLOAT_TYPE_P(a) && RB_FLOAT_TYPE_P(b) && CMP_OPTIMIZABLE(data, Float)) ? \
64
- rb_float_cmp(a, b) : \
65
- rb_cmpint(rb_funcallv(a, id_cmp, 1, &b), a, b))
52
+ // give up on an optimized version and just call (a <=> b)
53
+ return rb_cmpint(rb_funcallv(a, id_cmp, 1, &b), a, b);
54
+ }
66
55
 
67
- #define puts(v) { \
56
+ #ifdef __D_HEAP_DEBUG
57
+ #define debug(v) { \
68
58
  ID sym_puts = rb_intern("puts"); \
69
59
  rb_funcall(rb_mKernel, sym_puts, 1, v); \
70
60
  }
71
-
72
- #endif /* INTERNAL_COMPAR_H */
61
+ #else
62
+ #define debug(v)
63
+ #endif
73
64
 
74
65
  #endif /* D_HEAP_H */
@@ -1,3 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "mkmf"
2
4
 
5
+ # if /darwin/ =~ RUBY_PLATFORM
6
+ # $CFLAGS << " -D__D_HEAP_DEBUG"
7
+ # end
8
+
3
9
  create_makefile("d_heap/d_heap")
@@ -1,6 +1,18 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "d_heap/d_heap"
2
4
  require "d_heap/version"
3
5
 
6
+ # A fast _d_-ary heap implementation for ruby, useful in priority queues and graph
7
+ # algorithms.
8
+ #
9
+ # The _d_-ary heap data structure is a generalization of the binary heap, in which
10
+ # the nodes have _d_ children instead of 2. This allows for "decrease priority"
11
+ # operations to be performed more quickly with the tradeoff of slower delete
12
+ # minimum. Additionally, _d_-ary heaps can have better memory cache behavior than
13
+ # binary heaps, allowing them to run more quickly in practice despite slower
14
+ # worst-case time complexity.
15
+ #
4
16
  class DHeap
5
17
 
6
18
  def initialize_copy(other)
@@ -1,3 +1,6 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class DHeap
2
- VERSION = "0.2.1"
4
+ VERSION = "0.2.2"
5
+
3
6
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: d_heap
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - nicholas a. evans
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-12-26 00:00:00.000000000 Z
11
+ date: 2020-12-27 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |
14
14
  A C extension implementation of a d-ary heap data structure, suitable for
@@ -20,8 +20,10 @@ extensions:
20
20
  - ext/d_heap/extconf.rb
21
21
  extra_rdoc_files: []
22
22
  files:
23
+ - ".github/workflows/main.yml"
23
24
  - ".gitignore"
24
25
  - ".rspec"
26
+ - ".rubocop.yml"
25
27
  - ".travis.yml"
26
28
  - CODE_OF_CONDUCT.md
27
29
  - Gemfile
@@ -32,6 +34,7 @@ files:
32
34
  - bin/console
33
35
  - bin/rake
34
36
  - bin/rspec
37
+ - bin/rubocop
35
38
  - bin/setup
36
39
  - d_heap.gemspec
37
40
  - ext/d_heap/d_heap.c
@@ -61,7 +64,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
61
64
  - !ruby/object:Gem::Version
62
65
  version: '0'
63
66
  requirements: []
64
- rubygems_version: 3.2.3
67
+ rubygems_version: 3.1.4
65
68
  signing_key:
66
69
  specification_version: 4
67
70
  summary: A d-ary heap implementation, for priority queues