RubyGems - d_heap - Versions diffs - 0.2.1 → 0.2.2 - Mend

d_heap 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 89808cd668688e16b5dd1e7e6b1ce9e57651217089b0686b4aeb83d41c565020
-  data.tar.gz: 43e11d72c7143061d0b424f8290e4ef7f66dfaedc8df2f5eff11e20cce2f7796
+  metadata.gz: f549e01dd83eb6b48c1190443495a628ed1dd64ead7eb94851e661aef2607e14
+  data.tar.gz: 492ce5c17ace9ecc9deaccf8fcd47883835da28d4e5f32328aef60989186b2ff
 SHA512:
-  metadata.gz: 7ce7b4a755692a99fdee3d30e27f7dd02f6f90c12fe478a5d20f5e224183f82c5d7f82141c80d229edee48a1dcf6a90878c5648b3bb2107d093dcef5884abf59
-  data.tar.gz: 03daf597a17aee15f67f2bf3aa37625e84c9d2f759c22313a9e1010b9f3878c1e8d2dc005b0f82fca5f1792ee57abf7f93d3ab176edad5e29686ca7dedaae905
+  metadata.gz: 85521dee7f2a9992980935756571e87afbe8ae13347b5b3fbad17b501b5709111972b98ad0c9e1fca6d318c4be20ce2983086dfd84f7c0e73636ac9e4f11f253
+  data.tar.gz: e1daac5b02fcc817b3c6c6a99395e3ca0b92f42bb14bd813fedbb3037eed698bda335c2898d5b0131b48fecd73e4ccf1615943a52287c36f73764b08bf8b1969

data/.github/workflows/main.yml ADDED

@@ -0,0 +1,26 @@
+name: Ruby
+on: [push,pull_request]
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        ruby: [2.5, 2.6, 2.7, 3.0]
+        os: [ubuntu, macos]
+        experimental: [false]
+    runs-on: ${{ matrix.os }}-latest
+    continue-on-error: ${{ matrix.experimental }}
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Ruby
+      uses: ruby/setup-ruby@v1
+      with:
+        ruby-version: ${{ matrix.ruby }}
+        bundler-cache: true
+    - name: Run the default task
+      run: |
+        gem install bundler -v 2.2.3
+        bundle install
+        bundle exec rake

data/.rubocop.yml ADDED

@@ -0,0 +1,160 @@
+inherit_mode:
+  merge:
+    - Exclude
+AllCops:
+  TargetRubyVersion: 2.5
+  NewCops: disable
+  Exclude:
+    - bin/rake
+    - bin/rspec
+    - bin/rubocop
+###########################################################################
+# rubocop defaults are simply WRONG about many rules... Sorry. It's true.
+###########################################################################
+# Layout: Alignment.  I want these to work, I really do...
+# I wish this worked with "table". but that goes wrong sometimes.
+Layout/HashAlignment: { Enabled: false }
+# This needs to be configurable so parenthesis calls are aligned with first
+# parameter, and non-parenthesis calls are aligned with fixed indentation.
+Layout/ParameterAlignment: { Enabled: false }
+###########################################################################
+# Layout: Empty lines
+Layout/EmptyLineAfterGuardClause:                 { Enabled: false }
+Layout/EmptyLineAfterMagicComment:                { Enabled: true }
+Layout/EmptyLineAfterMultilineCondition:          { Enabled: false }
+Layout/EmptyLines:                                { Enabled: true }
+Layout/EmptyLinesAroundAccessModifier:            { Enabled: true }
+Layout/EmptyLinesAroundArguments:                 { Enabled: true }
+Layout/EmptyLinesAroundBeginBody:                 { Enabled: true }
+Layout/EmptyLinesAroundBlockBody:                 { Enabled: false }
+Layout/EmptyLinesAroundExceptionHandlingKeywords: { Enabled: true }
+Layout/EmptyLinesAroundMethodBody:                { Enabled: true }
+Layout/EmptyLineBetweenDefs:
+  Enabled: true
+  AllowAdjacentOneLineDefs: true
+Layout/EmptyLinesAroundAttributeAccessor:
+  inherit_mode:
+    merge:
+      - AllowedMethods
+  Enabled: true
+  AllowedMethods:
+    - delegate
+    - def_delegator
+    - def_delegators
+    - def_instance_delegators
+# "empty_lines_special" sometimes does the wrong thing and annoys me.
+# But I've mostly learned to live with it... mostly. 🙁
+Layout/EmptyLinesAroundClassBody:
+  Enabled: true
+  EnforcedStyle: empty_lines_special
+Layout/EmptyLinesAroundModuleBody:
+  Enabled: true
+  EnforcedStyle: empty_lines_special
+###########################################################################
+# Layout: Space around, before, inside, etc
+Layout/SpaceAroundEqualsInParameterDefault: { Enabled: false }
+Layout/SpaceBeforeBlockBraces:              { Enabled: false }
+Layout/SpaceBeforeFirstArg:                 { Enabled: false }
+Layout/SpaceInLambdaLiteral:                { Enabled: false }
+Layout/SpaceInsideArrayLiteralBrackets:     { Enabled: false }
+Layout/SpaceInsideHashLiteralBraces:        { Enabled: false }
+Layout/SpaceInsideBlockBraces:
+  EnforcedStyle: space
+  EnforcedStyleForEmptyBraces: space
+  SpaceBeforeBlockParameters: false
+# I would enable this if it were a bit better at handling alignment.
+Layout/ExtraSpacing:
+  Enabled: false
+  AllowForAlignment: true
+  AllowBeforeTrailingComments: true
+###########################################################################
+# Layout: Misc
+Layout/LineLength:
+  Max: 90 # should stay under 80, but we'll allow a little wiggle-room
+Layout/MultilineOperationIndentation: { Enabled: false }
+Layout/MultilineMethodCallIndentation:
+  EnforcedStyle: indented
+###########################################################################
+# Lint and Naming: rubocop defaults are mostly good, but...
+Lint/UnusedMethodArgument: { Enabled: false }
+Naming/BinaryOperatorParameterName: { Enabled: false } # def /(denominator)
+Naming/RescuedExceptionsVariableName: { Enabled: false }
+###########################################################################
+# Matrics:
+# Although it may be better to split specs into multiple files...?
+Metrics/BlockLength:
+  Exclude:
+    - "spec/**/*_spec.rb"
+###########################################################################
+# Style...
+Style/AccessorGrouping:        { Enabled: false }
+Style/AsciiComments:           { Enabled: false } # 👮 can't stop our 🎉🥳🎊🥳!
+Style/EachWithObject:          { Enabled: false }
+Style/FormatStringToken:       { Enabled: false }
+Style/FloatDivision:           { Enabled: false }
+Style/Lambda:                  { Enabled: false }
+Style/LineEndConcatenation:    { Enabled: false }
+Style/MixinGrouping:           { Enabled: false }
+Style/PerlBackrefs:            { Enabled: false } # use occasionally/sparingly
+Style/RescueStandardError:     { Enabled: false }
+Style/SingleLineMethods:       { Enabled: false }
+Style/StabbyLambdaParentheses: { Enabled: false }
+# If rubocop had an option to only enforce this on constants and literals (e.g.
+# strings, regexp, range), I'd agree.
+#
+# But if you are using it e.g. on method arguments of unknown type, in the same
+# style that ruby uses it with grep, then you are doing exactly the right thing.
+Style/CaseEquality: { Enabled: false }
+# I'd enable if "require_parentheses_when_complex" considered unary '!' simple.
+Style/TernaryParentheses:
+  EnforcedStyle: require_parentheses_when_complex
+  Enabled: false
+Style/BlockDelimiters:
+  EnforcedStyle: semantic
+  AllowBracesOnProceduralOneLiners: true
+Style/FormatString:
+  EnforcedStyle: percent
+Style/StringLiterals:
+  Enabled: true
+  EnforcedStyle: double_quotes
+Style/StringLiteralsInInterpolation:
+  Enabled: true
+  EnforcedStyle: double_quotes
+Style/TrailingCommaInHashLiteral:
+  EnforcedStyleForMultiline: consistent_comma
+Style/TrailingCommaInArrayLiteral:
+  EnforcedStyleForMultiline: consistent_comma

data/Gemfile CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 source "https://rubygems.org"
 # Specify your gem's dependencies in d_heap.gemspec
@@ -6,3 +8,4 @@ gemspec
 gem "rake", "~> 13.0"
 gem "rake-compiler"
 gem "rspec", "~> 3.10"
+gem "rubocop", "~> 1.0"

data/Gemfile.lock CHANGED

@@ -1,15 +1,22 @@
 PATH
   remote: .
   specs:
-    d_heap (0.2.1)
+    d_heap (0.2.2)
 GEM
   remote: https://rubygems.org/
   specs:
+    ast (2.4.1)
     diff-lcs (1.4.4)
+    parallel (1.19.2)
+    parser (2.7.2.0)
+      ast (~> 2.4.1)
+    rainbow (3.0.0)
     rake (13.0.3)
     rake-compiler (1.1.1)
       rake
+    regexp_parser (1.8.2)
+    rexml (3.2.3)
     rspec (3.10.0)
       rspec-core (~> 3.10.0)
       rspec-expectations (~> 3.10.0)
@@ -23,6 +30,19 @@ GEM
       diff-lcs (>= 1.2.0, < 2.0)
       rspec-support (~> 3.10.0)
     rspec-support (3.10.0)
+    rubocop (1.2.0)
+      parallel (~> 1.10)
+      parser (>= 2.7.1.5)
+      rainbow (>= 2.2.2, < 4.0)
+      regexp_parser (>= 1.8)
+      rexml
+      rubocop-ast (>= 1.0.1)
+      ruby-progressbar (~> 1.7)
+      unicode-display_width (>= 1.4.0, < 2.0)
+    rubocop-ast (1.1.1)
+      parser (>= 2.7.1.5)
+    ruby-progressbar (1.10.1)
+    unicode-display_width (1.7.0)
 PLATFORMS
   ruby
@@ -32,6 +52,7 @@ DEPENDENCIES
   rake (~> 13.0)
   rake-compiler
   rspec (~> 3.10)
+  rubocop (~> 1.0)
 BUNDLED WITH
    2.2.3

data/README.md CHANGED

@@ -8,46 +8,46 @@ the nodes have _d_ children instead of 2.  This allows for "decrease priority"
 operations to be performed more quickly with the tradeoff of slower delete
 minimum.  Additionally, _d_-ary heaps can have better memory cache behavior than
 binary heaps, allowing them to run more quickly in practice despite slower
-worst-case time complexity.
+worst-case time complexity. In the worst case, a _d_-ary heap requires only
+`O(log n / log d)` to push, with the tradeoff that pop is `O(d log n / log d)`.
-_TODO:_ In addition to a basic _d_-ary heap class (`DHeap`), this library
-~~includes~~ _will include_ extensions to `Array`, allowing an Array to be
-directly handled as a priority queue.  These extension methods are meant to be
-used similarly to how `#bsearch` and `#bsearch_index` might be used.
-_TODO:_ Also included is `DHeap::Set`, which augments the basic heap with an
-internal `Hash`, which maps a set of values to scores.
-loosely inspired by go's timers.  e.g: It lazily sifts its heap after deletion
-and adjustments, to achieve faster average runtime for *add* and *cancel*
-operations.
-_TODO:_ Also included is `DHeap::Timers`, which contains some features that are
-loosely inspired by go's timers.  e.g: It lazily sifts its heap after deletion
-and adjustments, to achieve faster average runtime for *add* and *cancel*
-operations.
+Although you should probably just stick with the default _d_ value  of `4`, it
+may be worthwhile to benchmark your specific scenario.
 ## Motivation
-Ruby's Array class comes with some helpful methods for maintaining a sorted
-array, by combining `#bsearch_index` with `#insert`.  With certain insert/remove
-workloads that can perform very well, but in the worst-case an insert or delete
-can result in O(n), since it may need to memcopy a significant portion of the
-array.  Knowing that priority queues are usually implemented with a heap, and
-that the heap is a relatively simple data structure, I set out to replace my
-`#bsearch_index` and `#insert` code with a one.  I was surprised to find that,
-at least under certain benchmarks, my ruby Heap implementation was tied with or
-slower than inserting into a fully sorted array.  On the one hand, this is a
-testament to ruby's fine-tuned Array implementation.  On the other hand, it
-seemed like a heap implementated in C should easily match the speed of ruby's
-bsearch + insert.
-Additionally, I was inspired by reading go's "timer.go" implementation to
-experiment with a 4-ary heap, instead of the traditional binary heap.  In the
-case of timers, new timers are usually scheduled to run after most of the
-existing timers and timers are usually canceled before they have a chance to
-run. While a binary heap holds 50% of its elements in its last layer, 75% of a
-4-ary heap will have no children.  That diminishes the extra comparison
-overhead during sift-down.
+Sometimes you just need a priority queue, right?  With a regular queue, you
+expect "FIFO" behavior: first in, first out.  With a priority queue, you push
+with a score (or your elements are comparable), and you want to be able to
+efficiently pop off the minimum (or maximum) element.
+One obvious approach is to simply maintain an array in sorted order.  And
+ruby's Array class makes it simple to maintain a sorted array by combining
+`#bsearch_index` with `#insert`.  With certain insert/remove workloads that can
+perform very well, but in the worst-case an insert or delete can result in O(n),
+since `#insert` may need to `memcpy` or `memmove` a significant portion of the
+array.
+But the standard way to efficiently and simply solve this problem is using a
+binary heap.  Although it increases the time for `pop`, it converts the
+amortized time per push + pop from `O(n)` to `O(d log n / log d)`.
+I was surprised to find that, at least under certain benchmarks, my pure ruby
+heap implementation was usually slower than inserting into a fully sorted
+array.  While this is a testament to ruby's fine-tuned Array implementationw, a
+heap implementated in C should easily peform faster than `Array#insert`.
+The biggest issue is that it just takes far too much time to call `<=>` from
+ruby code: A sorted array only requires `log n / log 2` comparisons to insert
+and no comparisons to pop.  However a _d_-ary heap requires `log n / log d` to
+insert plus an additional `d log n / log d` to pop.  If your queue contains only
+a few hundred items at once, the overhead of those extra calls to `<=>` is far
+more than occasionally calling `memcpy`.
+It's likely that MJIT will eventually make the C-extension completely
+unnecessary.  This is definitely hotspot code, and the basic ruby implementation
+would work fine, if not for that `<=>` overhead.  Until then... this gem gets
+the job done.
 ## Installation
@@ -90,6 +90,32 @@ heap.pop.last # => Task[1]
 Read the `rdoc` for more detailed documentation and examples.
+## TODOs...
+_TODO:_ In addition to a basic _d_-ary heap class (`DHeap`), this library
+~~includes~~ _will include_ extensions to `Array`, allowing an Array to be
+directly handled as a priority queue.  These extension methods are meant to be
+used similarly to how `#bsearch` and `#bsearch_index` might be used.
+_TODO:_ Also ~~included is~~ _will include_ `DHeap::Set`, which augments the
+basic heap with an internal `Hash`, which maps a set of values to scores.
+loosely inspired by go's timers.  e.g: It lazily sifts its heap after deletion
+and adjustments, to achieve faster average runtime for *add* and *cancel*
+operations.
+_TODO:_ Also ~~included is~~ _will include_ `DHeap::Timers`, which contains some
+features that are loosely inspired by go's timers.  e.g: It lazily sifts its
+heap after deletion and adjustments, to achieve faster average runtime for *add*
+and *cancel* operations.
+Additionally, I was inspired by reading go's "timer.go" implementation to
+experiment with a 4-ary heap instead of the traditional binary heap.  In the
+case of timers, new timers are usually scheduled to run after most of the
+existing timers.  And timers are usually canceled before they have a chance to
+run. While a binary heap holds 50% of its elements in its last layer, 75% of a
+4-ary heap will have no children.  That diminishes the extra comparison overhead
+during sift-down.
 ## Benchmarks
 _TODO: put benchmarks here._

data/Rakefile CHANGED

@@ -1,14 +1,20 @@
+# frozen_string_literal: true
 require "bundler/gem_tasks"
 require "rspec/core/rake_task"
 RSpec::Core::RakeTask.new(:spec)
+require "rubocop/rake_task"
+RuboCop::RakeTask.new
 require "rake/extensiontask"
-task :build => :compile
+task build: :compile
 Rake::ExtensionTask.new("d_heap") do |ext|
   ext.lib_dir = "lib/d_heap"
 end
-task :default => [:clobber, :compile, :spec]
+task default: %i[clobber compile spec rubocop]

data/bin/console CHANGED

@@ -1,4 +1,5 @@
 #!/usr/bin/env ruby
+# frozen_string_literal: true
 require "bundler/setup"
 require "d_heap"

data/bin/rubocop ADDED

@@ -0,0 +1,29 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+#
+# This file was generated by Bundler.
+#
+# The application 'rubocop' is installed as part of a gem, and
+# this file is here to facilitate running it.
+#
+require "pathname"
+ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
+  Pathname.new(__FILE__).realpath)
+bundle_binstub = File.expand_path("../bundle", __FILE__)
+if File.file?(bundle_binstub)
+  if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
+    load(bundle_binstub)
+  else
+    abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
+Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
+  end
+end
+require "rubygems"
+require "bundler/setup"
+load Gem.bin_path("rubocop", "rubocop")

data/d_heap.gemspec CHANGED

@@ -1,4 +1,6 @@
-require_relative 'lib/d_heap/version'
+# frozen_string_literal: true
+require_relative "lib/d_heap/version"
 Gem::Specification.new do |spec|
   spec.name          = "d_heap"
@@ -6,7 +8,7 @@ Gem::Specification.new do |spec|
   spec.authors       = ["nicholas a. evans"]
   spec.email         = ["nicholas.evans@gmail.com"]
-  spec.summary       = %q{A d-ary heap implementation, for priority queues}
+  spec.summary       = "A d-ary heap implementation, for priority queues"
   spec.description   = <<~DESC
     A C extension implementation of a d-ary heap data structure, suitable for
     use in e.g. priority queues or Djikstra's algorithm.
@@ -21,11 +23,11 @@ Gem::Specification.new do |spec|
   # Specify which files should be added to the gem when it is released.
   # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
-  spec.files         = Dir.chdir(File.expand_path('..', __FILE__)) do
-    `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
-  end
+  spec.files = Dir.chdir(File.expand_path(__dir__)) {
+    `git ls-files -z`.split("\x0").reject {|f| f.match(%r{^(test|spec|features)/}) }
+  }
   spec.bindir        = "exe"
-  spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
+  spec.executables   = spec.files.grep(%r{^exe/}) {|f| File.basename(f) }
   spec.require_paths = ["lib"]
   spec.extensions    = ["ext/d_heap/extconf.rb"]
 end

data/ext/d_heap/d_heap.c CHANGED

@@ -25,10 +25,10 @@ ID id_ivar_d;
 #define DHEAP_Check_d_size(d) \
     if (d < 2) { \
-        rb_raise(rb_eIndexError, "DHeap d=%d is too small", d); \
+        rb_raise(rb_eArgError, "DHeap d=%d is too small", d); \
     } \
     if (d > DHEAP_MAX_D) { \
-        rb_raise(rb_eIndexError, "DHeap d=%d is too large", d); \
+        rb_raise(rb_eArgError, "DHeap d=%d is too large", d); \
     }
 #define DHEAP_Check_Sift_Idx(sift_index, last_index) \
@@ -51,33 +51,31 @@ ID id_ivar_d;
 VALUE
 dheap_ary_sift_up(VALUE heap_array, int d, long sift_index) {
     DHEAP_Check_Sift_Args(heap_array, d, sift_index);
-    struct cmp_opt_data cmp_opt = { 0, 0 };
     // sift it up to where it belongs
     for (long parent_index; 0 < sift_index; sift_index = parent_index) {
-        // puts(rb_sprintf("sift up(%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
+        debug(rb_sprintf("sift up(%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
         parent_index = IDX_PARENT(sift_index);
         VALUE parent_score = DHEAP_SCORE(heap_array, parent_index);
         // parent is smaller: heap is restored
-        if (CMP_LTE(parent_score, sift_score, cmp_opt)) break;
+        if (CMP_LTE(parent_score, sift_score)) break;
         // parent is larger: swap and continue sifting up
         VALUE parent_value = DHEAP_VALUE(heap_array, parent_index);
         DHEAP_ASSIGN(heap_array, sift_index, parent_score, parent_value);
         DHEAP_ASSIGN(heap_array, parent_index, sift_score, sift_value);
     }
-    // puts(rb_sprintf("sifted (%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
+    debug(rb_sprintf("sifted (%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
     return LONG2NUM(sift_index);
 }
 VALUE
 dheap_ary_sift_down(VALUE heap_array, int d, long sift_index) {
     DHEAP_Check_Sift_Args(heap_array, d, sift_index);
-    struct cmp_opt_data cmp_opt = { 0, 0 };
      // iteratively sift it down to where it belongs
     for (long child_index; sift_index < last_index; sift_index = child_index) {
-        // puts(rb_sprintf("sift dn(%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
+        debug(rb_sprintf("sift dn(%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
         // find first child index, and break if we've reached the last layer
         long child_idx0 = child_index = IDX_CHILD0(sift_index);
         if (last_index < child_idx0) break;
@@ -92,63 +90,81 @@ dheap_ary_sift_down(VALUE heap_array, int d, long sift_index) {
             VALUE sibling_score = DHEAP_SCORE(heap_array, sibling_index);
-            if (CMP_LT(sibling_score, child_score, cmp_opt)) {
+            if (CMP_LT(sibling_score, child_score)) {
                 child_score = sibling_score;
                 child_index = sibling_index;
             }
         }
         // child is larger: heap is restored
-        if (CMP_LTE(sift_score, child_score, cmp_opt)) break;
+        if (CMP_LTE(sift_score, child_score)) break;
         // child is smaller: swap and continue sifting down
         VALUE child_value = DHEAP_VALUE(heap_array, child_index);
         DHEAP_ASSIGN(heap_array, sift_index, child_score, child_value);
         DHEAP_ASSIGN(heap_array, child_index, sift_score, sift_value);
     }
-    // puts(rb_sprintf("sifted (%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
+    debug(rb_sprintf("sifted (%"PRIsVALUE", %d, %ld)", heap_array, d, sift_index));
     return LONG2NUM(sift_index);
 }
 #define DHEAP_Load_Sift_Vals(heap_array, dval, idxval) \
     Check_Type(dval, T_FIXNUM); \
-    int d = FIX2INT(dval); \
-    long sift_index = NUM2LONG(idxval);
+    int dint = FIX2INT(dval); \
+    long idx = NUM2LONG(idxval);
 /*
- * call-seq:
- *    DHeap.array_sift_up(heap_array, d, sift_index)
+ * Treats a +heap_array+ as a +d+-ary heap and sifts up from +sift_index+ to
+ * restore the heap property for all nodes between it and the root of the tree.
  *
- * Treats +heap_array+ as a +d+-ary heap and sifts up from +sift_index+ to restore
- * the heap property.
+ * The array is interpreted as holding two entries for each node, a score and a
+ * value.  The scores will held in every even-numbered array index and the
+ * values in every odd numbered index.  The array is flat, not an array of
+ * length=2 arrays.
  *
- * Time complexity: O(d log n / log d).  If the average up shifted element sorts
- * into the bottom layer (e.g. new timers), this can avg O(1).
+ * Time complexity: <b>O(log n / log d)</b> <i>(worst-case)</i>
  *
+ * @param heap_array [Array] the array which is treated a heap.
+ * @param d [Integer] the maximum number of children per parent
+ * @param sift_index [Integer] the index to start sifting from
+ * @return [Integer] the new index for the object that starts at +sift_index+.
  */
 static VALUE
-dheap_sift_up_s(VALUE unused, VALUE heap_array, VALUE dval, VALUE idxval) {
-    DHEAP_Load_Sift_Vals(heap_array, dval, idxval);
-    return dheap_ary_sift_up(heap_array, d, sift_index);
+dheap_sift_up_s(VALUE unused, VALUE heap_array, VALUE d, VALUE sift_index) {
+    DHEAP_Load_Sift_Vals(heap_array, d, sift_index);
+    return dheap_ary_sift_up(heap_array, dint, idx);
 }
 /*
- * call-seq:
- *    DHeap.array_sift_down(heap_array, d, sift_index)
- *
  * Treats +heap_array+ as a +d+-ary heap and sifts down from +sift_index+ to
- * restore the heap property.
+ * restore the heap property. If all _d_ subtrees below +sift_index+ are already
+ * heaps, this method ensures the entire subtree rooted at +sift_index+ will be
+ * a heap.
+ *
+ * The array is interpreted as holding two entries for each node, a score and a
+ * value.  The scores will held in every even-numbered array index and the
+ * values in every odd numbered index.  The array is flat, not an array of
+ * length=2 arrays.
  *
- * Time complexity: O(d log n / log d).  If the average down shifted element
- * sorts into the bottom layer (e.g. canceled timers), this can avg O(1).
+ * Time complexity: <b>O(d log n / log d)</b> <i>(worst-case)</i>
  *
+ * @param heap_array [Array] the array which is treated a heap.
+ * @param d [Integer] the maximum number of children per parent
+ * @param sift_index [Integer] the index to start sifting down from
+ * @return [Integer] the new index for the object that starts at +sift_index+.
  */
 static VALUE
-dheap_sift_down_s(VALUE unused, VALUE heap_array, VALUE dval, VALUE idxval) {
-    DHEAP_Load_Sift_Vals(heap_array, dval, idxval);
-    return dheap_ary_sift_down(heap_array, d, sift_index);
+dheap_sift_down_s(VALUE unused, VALUE heap_array, VALUE d, VALUE sift_index) {
+    DHEAP_Load_Sift_Vals(heap_array, d, sift_index);
+    return dheap_ary_sift_down(heap_array, dint, idx);
 }
+/*
+ * @overload initialize(d = DHeap::DEFAULT_D)
+ *   Initialize a _d_-ary min-heap.
+ *
+ *   @param d [Integer] maximum number of children per parent
+ */
 static VALUE
 dheap_initialize(int argc, VALUE *argv, VALUE self) {
     rb_check_arity(argc, 0, 1);
@@ -162,20 +178,35 @@ dheap_initialize(int argc, VALUE *argv, VALUE self) {
     return self;
 }
+/*
+ * @return [Integer] the number of elements in the heap
+ */
 static VALUE dheap_size(VALUE self) {
     VALUE ary = DHEAP_GET_A(self);
     long size = DHEAP_SIZE(ary);
     return LONG2NUM(size);
 }
+/*
+ * @return [Boolean] is the heap empty?
+ */
 static VALUE dheap_empty_p(VALUE self) {
     VALUE ary = DHEAP_GET_A(self);
     long size = DHEAP_SIZE(ary);
     return size == 0 ? Qtrue : Qfalse;
 }
+/*
+ * @return [Integer] the maximum number of children per parent
+ */
 static VALUE dheap_attr_d(VALUE self) { return DHEAP_GET_D(self); }
+/*
+ * Freezes the heap as well as its underlying array, but does <i>not</i>
+ * deep-freeze the elements in the heap.
+ *
+ * @return [self]
+ */
 static VALUE
 dheap_freeze(VALUE self) {
     VALUE ary = DHEAP_GET_A(self);
@@ -193,10 +224,19 @@ dheap_ary_push(VALUE ary, int d, VALUE val, VALUE scr)
 }
 /*
- * Push val onto the end of the heap, then sift up to maintain heap property.
+ * @overload push(score, value = score)
+ *
+ * Push a value onto heap, using a score to determine sort-order.
+ *
+ * Ideally, the score should be a frozen value that can be efficiently compared
+ * to other scores, e.g. an Integer or Float or (maybe) a String
+ *
+ * Time complexity: <b>O(log n / log d)</b> <i>(worst-case)</i>
  *
- * Returns the index of the value's final position.
+ * @param score [#<=>] a value that can be compared to other scores.
+ * @param value [Object] an object that is associated with the score.
  *
+ * @return [Integer] the index of the value's final position.
  */
 static VALUE
 dheap_push(int argc, VALUE *argv, VALUE self) {
@@ -212,16 +252,18 @@ dheap_push(int argc, VALUE *argv, VALUE self) {
 }
 /*
- * Push val onto the end of the heap, then sift up to maintain heap property.
+ * Pushes a comparable value onto the heap.
  *
- * Time complexity: O(d log n / log d).
+ * The value will be its own score.
  *
- * Returns +self+.
+ * Time complexity: <b>O(log n / log d)</b> <i>(worst-case)</i>
  *
+ * @param value [#<=>] a value that can be compared to other heap members.
+ * @return [self]
  */
 static VALUE
-dheap_left_shift(VALUE self, VALUE val) {
-    dheap_push(1, &val, self);
+dheap_left_shift(VALUE self, VALUE value) {
+    dheap_push(1, &value, self);
     return self;
 }
@@ -238,6 +280,12 @@ dheap_left_shift(VALUE self, VALUE val) {
     DHEAP_DROP_LAST(ary); \
     dheap_ary_sift_down(ary, FIX2INT(dval), 0);
+/*
+ * Returns the next value on the heap to be popped without popping it.
+ *
+ * Time complexity: <b>O(1)</b> <i>(worst-case)</i>
+ * @return [Object] the next value to be popped without popping it.
+ */
 static VALUE
 dheap_peek(VALUE self) {
     VALUE ary = DHEAP_GET_A(self);
@@ -245,11 +293,9 @@ dheap_peek(VALUE self) {
 }
 /*
- * Pops the minimum value from the top of the heap, sifting down to maintain
- * heap property.
- *
- * Time complexity: O(d log n / log d).
+ * Pops the minimum value from the top of the heap
  *
+ * Time complexity: <b>O(d log n / log d)</b> <i>(worst-case)</i>
  */
 static VALUE
 dheap_pop(VALUE self) {
@@ -262,42 +308,40 @@ dheap_pop(VALUE self) {
 }
 /*
- * Pops the minimum value from the top of the heap, sifting down to maintain
- * heap property.
+ * Pops the minimum value only if it is less than or equal to a max score.
  *
- * Time complexity: O(d log n / log d).
+ * @param max_score [#<=>] the maximum score to be popped
  *
+ * @see #pop
  */
 static VALUE
-dheap_pop_lte(VALUE self, VALUE below_score) {
+dheap_pop_lte(VALUE self, VALUE max_score) {
     DHEAP_Pop_Init(self);
     if (last_index <  0) return Qnil;
     VALUE pop_value = DHEAP_VALUE(ary, 0);
     VALUE pop_score = DHEAP_SCORE(ary, 0);
-    struct cmp_opt_data cmp_opt = { 0, 0 };
-    if (below_score && !CMP_LTE(pop_score, below_score, cmp_opt)) return Qnil;
+    if (max_score && !CMP_LTE(pop_score, max_score)) return Qnil;
     DHEAP_Pop_SwapLastAndSiftDown(ary, dval, last_index, sift_value);
     return pop_value;
 }
 /*
- * Pops the minimum value from the top of the heap, sifting down to maintain
- * heap property.
+ * Pops the minimum value only if it is less than a max score.
  *
- * Time complexity: O(d log n / log d).
+ * @param max_score [#<=>] the maximum score to be popped
  *
+ * Time complexity: <b>O(d log n / log d)</b> <i>(worst-case)</i>
  */
 static VALUE
-dheap_pop_lt(VALUE self, VALUE below_score) {
+dheap_pop_lt(VALUE self, VALUE max_score) {
     DHEAP_Pop_Init(self);
     if (last_index <  0) return Qnil;
     VALUE pop_value = DHEAP_VALUE(ary, 0);
     VALUE pop_score = DHEAP_SCORE(ary, 0);
-    struct cmp_opt_data cmp_opt = { 0, 0 };
-    if (below_score && !CMP_LT(pop_score, below_score, cmp_opt)) return Qnil;
+    if (max_score && !CMP_LT(pop_score, max_score)) return Qnil;
     DHEAP_Pop_SwapLastAndSiftDown(ary, dval, last_index, sift_value);
     return pop_value;
@@ -311,6 +355,9 @@ Init_d_heap(void)
     id_ivar_d = rb_intern_const("d");
     rb_cDHeap = rb_define_class("DHeap", rb_cObject);
+    rb_define_const(rb_cDHeap, "MAX_D", INT2NUM(DHEAP_MAX_D));
+    rb_define_const(rb_cDHeap, "DEFAULT_D", INT2NUM(DHEAP_DEFAULT_D));
     rb_define_singleton_method(rb_cDHeap, "heap_sift_down", dheap_sift_down_s, 3);
     rb_define_singleton_method(rb_cDHeap, "heap_sift_up",   dheap_sift_up_s, 3);

data/ext/d_heap/d_heap.h CHANGED

@@ -11,64 +11,55 @@
 // comparisons as d gets further from 4.
 #define DHEAP_MAX_D 32
+VALUE rb_cDHeap;
-#define CMP_LT(a, b, cmp_opt) \
-    (OPTIMIZED_CMP(a, b, cmp_opt) < 0)
-#define CMP_LTE(a, b, cmp_opt) \
-    (OPTIMIZED_CMP(a, b, cmp_opt) <= 0)
-#define CMP_GT(a, b, cmp_opt) \
-    (OPTIMIZED_CMP(a, b, cmp_opt) > 0)
-#define CMP_GTE(a, b, cmp_opt) \
-    (OPTIMIZED_CMP(a, b, cmp_opt) >= 0)
+#define CMP_LT(a, b)  (optimized_cmp(a, b) <  0)
+#define CMP_LTE(a, b) (optimized_cmp(a, b) <= 0)
+#define CMP_GT(a, b)  (optimized_cmp(a, b) >  0)
+#define CMP_GTE(a, b) (optimized_cmp(a, b) >= 0)
-VALUE rb_cDHeap;
+// <=>
 ID id_cmp;
-// from internal/numeric.h
-#ifndef INTERNAL_NUMERIC_H
-int rb_float_cmp(VALUE x, VALUE y);
-#endif /* INTERNAL_NUMERIC_H */
 // from internal/compar.h
-#ifndef INTERNAL_COMPAR_H
 #define STRING_P(s) (RB_TYPE_P((s), T_STRING) && CLASS_OF(s) == rb_cString)
-enum {
-    cmp_opt_Integer,
-    cmp_opt_String,
-    cmp_opt_Float,
-    cmp_optimizable_count
-};
-struct cmp_opt_data {
-    unsigned int opt_methods;
-    unsigned int opt_inited;
-};
+/*
+ * short-circuit evaluation for a few basic types.
+ *
+ * Only Integer, Float, and String are optimized,
+ * and only when both arguments are the same type.
+ */
+static inline int
+optimized_cmp(VALUE a, VALUE b) {
+    if (a == b) // Fixnum equality and object equality
+        return 0;
+    if (FIXNUM_P(a) && FIXNUM_P(b))
+        return (FIX2LONG(a) < FIX2LONG(b)) ? -1 : 1;
+    if (RB_FLOAT_TYPE_P(a) && RB_FLOAT_TYPE_P(b))
+    {
+        double x, y;
+        x = RFLOAT_VALUE(a);
+        y = RFLOAT_VALUE(b);
+        if (isnan(x) || isnan(y)) rb_cmperr(a, b); // raise ArgumentError
+        return (x < y) ? -1 : ((x == y) ? 0 : 1);
+    }
+    if (RB_TYPE_P(a, T_BIGNUM) && RB_TYPE_P(b, T_BIGNUM))
+        return FIX2INT(rb_big_cmp(a, b));
+    if (STRING_P(a) && STRING_P(b))
+        return rb_str_cmp(a, b);
-#define NEW_CMP_OPT_MEMO(type, value) \
-    NEW_PARTIAL_MEMO_FOR(type, value, cmp_opt)
-#define CMP_OPTIMIZABLE_BIT(type) (1U << TOKEN_PASTE(cmp_opt_,type))
-#define CMP_OPTIMIZABLE(data, type) \
-    (((data).opt_inited & CMP_OPTIMIZABLE_BIT(type)) ? \
-     ((data).opt_methods & CMP_OPTIMIZABLE_BIT(type)) : \
-     (((data).opt_inited |= CMP_OPTIMIZABLE_BIT(type)), \
-      rb_method_basic_definition_p(TOKEN_PASTE(rb_c,type), id_cmp) && \
-      ((data).opt_methods |= CMP_OPTIMIZABLE_BIT(type))))
-#define OPTIMIZED_CMP(a, b, data) \
-    ((FIXNUM_P(a) && FIXNUM_P(b) && CMP_OPTIMIZABLE(data, Integer)) ? \
-     (((long)a > (long)b) ? 1 : ((long)a < (long)b) ? -1 : 0) : \
-     (STRING_P(a) && STRING_P(b) && CMP_OPTIMIZABLE(data, String)) ? \
-     rb_str_cmp(a, b) : \
-     (RB_FLOAT_TYPE_P(a) && RB_FLOAT_TYPE_P(b) && CMP_OPTIMIZABLE(data, Float)) ? \
-     rb_float_cmp(a, b) : \
-     rb_cmpint(rb_funcallv(a, id_cmp, 1, &b), a, b))
+    // give up on an optimized version and just call (a <=> b)
+    return rb_cmpint(rb_funcallv(a, id_cmp, 1, &b), a, b);
+}
-#define puts(v) { \
+#ifdef __D_HEAP_DEBUG
+#define debug(v) { \
     ID sym_puts = rb_intern("puts"); \
     rb_funcall(rb_mKernel, sym_puts, 1, v); \
 }
-#endif /* INTERNAL_COMPAR_H */
+#else
+#define debug(v)
+#endif
 #endif /* D_HEAP_H */

data/ext/d_heap/extconf.rb CHANGED

@@ -1,3 +1,9 @@
+# frozen_string_literal: true
 require "mkmf"
+# if /darwin/ =~ RUBY_PLATFORM
+#   $CFLAGS << " -D__D_HEAP_DEBUG"
+# end
 create_makefile("d_heap/d_heap")

data/lib/d_heap.rb CHANGED

@@ -1,6 +1,18 @@
+# frozen_string_literal: true
 require "d_heap/d_heap"
 require "d_heap/version"
+# A fast _d_-ary heap implementation for ruby, useful in priority queues and graph
+# algorithms.
+#
+# The _d_-ary heap data structure is a generalization of the binary heap, in which
+# the nodes have _d_ children instead of 2.  This allows for "decrease priority"
+# operations to be performed more quickly with the tradeoff of slower delete
+# minimum.  Additionally, _d_-ary heaps can have better memory cache behavior than
+# binary heaps, allowing them to run more quickly in practice despite slower
+# worst-case time complexity.
+#
 class DHeap
   def initialize_copy(other)

data/lib/d_heap/version.rb CHANGED

@@ -1,3 +1,6 @@
+# frozen_string_literal: true
 class DHeap
-  VERSION = "0.2.1"
+  VERSION = "0.2.2"
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: d_heap
 version: !ruby/object:Gem::Version
-  version: 0.2.1
+  version: 0.2.2
 platform: ruby
 authors:
 - nicholas a. evans
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2020-12-26 00:00:00.000000000 Z
+date: 2020-12-27 00:00:00.000000000 Z
 dependencies: []
 description: |
   A C extension implementation of a d-ary heap data structure, suitable for
@@ -20,8 +20,10 @@ extensions:
 - ext/d_heap/extconf.rb
 extra_rdoc_files: []
 files:
+- ".github/workflows/main.yml"
 - ".gitignore"
 - ".rspec"
+- ".rubocop.yml"
 - ".travis.yml"
 - CODE_OF_CONDUCT.md
 - Gemfile
@@ -32,6 +34,7 @@ files:
 - bin/console
 - bin/rake
 - bin/rspec
+- bin/rubocop
 - bin/setup
 - d_heap.gemspec
 - ext/d_heap/d_heap.c
@@ -61,7 +64,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.2.3
+rubygems_version: 3.1.4
 signing_key:
 specification_version: 4
 summary: A d-ary heap implementation, for priority queues