RubyGems - d_heap - Versions diffs - 0.5.0 → 0.6.0 - Mend

d_heap 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
data/.github/workflows/main.yml +2 -2
data/.gitignore +1 -0
data/.rubocop.yml +1 -1
data/.yardopts +10 -0
data/CHANGELOG.md +19 -6
data/Gemfile +4 -0
data/Gemfile.lock +10 -1
data/N +7 -0
data/README.md +185 -231
data/benchmarks/push_n.yml +10 -6
data/benchmarks/push_n_pop_n.yml +27 -10
data/benchmarks/push_pop.yml +5 -0
data/bin/bench_charts +13 -0
data/d_heap.gemspec +1 -1
data/ext/d_heap/d_heap.c +435 -140
data/ext/d_heap/extconf.rb +3 -4
data/images/push_n.png +0 -0
data/images/push_n_pop_n.png +0 -0
data/images/push_pop.png +0 -0
data/images/wikipedia-min-heap.png +0 -0
data/lib/benchmark_driver/runner/ips_zero_fail.rb +89 -51
data/lib/d_heap.rb +81 -18
data/lib/d_heap/benchmarks/implementations.rb +30 -28
data/lib/d_heap/benchmarks/rspec_matchers.rb +29 -51
data/lib/d_heap/version.rb +1 -1
metadata +10 -4
data/ext/d_heap/d_heap.h +0 -50

data/ext/d_heap/extconf.rb CHANGED

@@ -8,10 +8,9 @@ require "mkmf"
 # $CFLAGS << " -g -ginline-points "
 # $CFLAGS << " -fno-omit-frame-pointer "
-# CONFIG["debugflags"] << " -ggdb3 -gstatement-frontiers -ginline-points "
-CONFIG["optflags"]  << " -O3 "
-CONFIG["optflags"]  << " -fno-omit-frame-pointer "
-CONFIG["warnflags"] << " -Werror"
+if enable_config("debug")
+  CONFIG["warnflags"] << " -Werror -Wpedantic "
+end
 have_func "rb_gc_mark_movable" # since ruby-2.7

data/images/push_n.png ADDED

Binary file

data/images/push_n_pop_n.png ADDED

Binary file

data/images/push_pop.png ADDED

Binary file

data/images/wikipedia-min-heap.png ADDED

Binary file

data/lib/benchmark_driver/runner/ips_zero_fail.rb CHANGED

@@ -29,77 +29,115 @@ class BenchmarkDriver::Runner::IpsZeroFail < BenchmarkDriver::Runner::Ips
   class Job < BenchmarkDriver::DefaultJob
     attr_accessor :warmup_value, :warmup_duration, :warmup_loop_count
+    def add_warmup_attrs(value, duration, loop_count)
+      self.warmup_value = value
+      self.warmup_duration = duration
+      self.warmup_loop_count = loop_count
+    end
   end
   # BenchmarkDriver::Runner looks for this class
   JobParser = BenchmarkDriver::DefaultJobParser.for(klass: Job, metrics: [METRIC])
-  # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/PerceivedComplexity, Metrics/BlockLength, Layout/LineLength, Layout/SpaceInsideBlockBraces, Style/BlockDelimiters
   # This method is dynamically called by `BenchmarkDriver::JobRunner.run`
   # @param [Array<BenchmarkDriver::Default::Job>] jobs
   def run(jobs)
-    if jobs.any? { |job| job.loop_count.nil? }
-      @output.with_warmup do
-        jobs = jobs.map do |job|
-          next job if job.loop_count # skip warmup if loop_count is set
-          @output.with_job(name: job.name) do
-            context = job.runnable_contexts(@contexts).first
-            duration, loop_count = run_warmup(job, context: context)
-            value, duration = value_duration(duration: duration, loop_count: loop_count)
-            @output.with_context(name: context.name, executable: context.executable, gems: context.gems, prelude: context.prelude) do
-              @output.report(values: { metric => value }, duration: duration, loop_count: loop_count)
-            end
-            warmup_loop_count = loop_count
-            loop_count = (loop_count.to_f * @config.run_duration / duration).floor
-            Job.new(**job.to_h.merge(loop_count: loop_count))
-              .tap {|j| j.warmup_value      = value }
-              .tap {|j| j.warmup_duration   = duration }
-              .tap {|j| j.warmup_loop_count = warmup_loop_count }
-          end
-        end
-          .compact
-      end
+    jobs = run_all_jobs_warmup(jobs)
+    run_all_jobs_benchmarks(jobs)
+  end
+  def run_all_jobs_warmup(jobs)
+    return jobs if jobs.all?(&:loop_count)
+    @output.with_warmup do
+      jobs.map! {|job|
+        # skip warmup if loop_count is set
+        job.loop_count ? job : output_warmup_and_config_job(job)
+      }
     end
+  end
+  def run_all_jobs_benchmarks(jobs)
     @output.with_benchmark do
       jobs.each do |job|
         @output.with_job(name: job.name) do
           job.runnable_contexts(@contexts).each do |context|
-            repeat_params = { config: @config, larger_better: true, rest_on_average: :average }
-            result =
-              if job.loop_count&.positive?
-                loop_count = job.loop_count
-                BenchmarkDriver::Repeater.with_repeat(**repeat_params) do
-                  run_benchmark(job, context: context)
-                end
-              else
-                loop_count = job.warmup_loop_count
-                repeater_value = [job.warmup_value, job.warmup_duration]
-                BenchmarkDriver::Repeater::RepeatResult.new(
-                  value: repeater_value, all_values: [repeater_value]
-                )
-              end
-            value, duration = result.value
-            @output.with_context(name: context.name, executable: context.executable, gems: context.gems, prelude: context.prelude) do
-              @output.report(
-                values: { metric => value },
-                all_values: { metric => result.all_values },
-                duration: duration,
-                loop_count: loop_count,
-              )
-            end
+            run_and_report_job(job, context)
           end
         end
       end
     end
   end
-  # rubocop:enable Metrics/MethodLength, Metrics/AbcSize, Metrics/PerceivedComplexity, Metrics/BlockLength, Layout/LineLength, Layout/SpaceInsideBlockBraces, Style/BlockDelimiters
+  def output_warmup_and_config_job(job)
+    @output.with_job(name: job.name) do
+      context = job.runnable_contexts(@contexts).first
+      value, duration, warmup_loop_count = run_and_report_warmup_job(job, context)
+      loop_count = (warmup_loop_count.to_f * @config.run_duration / duration).floor
+      Job.new(**job.to_h.merge(loop_count: loop_count))
+        .tap {|j| j.add_warmup_attrs(value, duration, warmup_loop_count) }
+    end
+  end
+  def run_and_report_warmup_job(job, context)
+    duration, loop_count = run_warmup(job, context: context)
+    value, duration = value_duration(duration: duration, loop_count: loop_count)
+    output_with_context(context) do
+      @output.report(
+        values: {metric => value}, duration: duration, loop_count: loop_count
+      )
+    end
+    [value, duration, loop_count]
+  end
+  def run_and_report_job(job, context)
+    result, loop_count = run_job_with_repeater(job, context)
+    value, duration = result.value
+    output_with_context(context) do
+      @output.report(
+        values: { metric => value },
+        all_values: { metric => result.all_values },
+        duration: duration,
+        loop_count: loop_count,
+      )
+    end
+  end
+  def output_with_context(context, &block)
+    @output.with_context(
+      name: context.name,
+      executable: context.executable,
+      gems: context.gems,
+      prelude: context.prelude,
+      &block
+    )
+  end
+  def run_job_with_repeater(job, context)
+    repeat_params = { config: @config, larger_better: true, rest_on_average: :average }
+    if job.loop_count&.positive?
+      run_job_with_own_loop_count(job, context, repeat_params)
+    else
+      run_job_with_warmup_loop_count(job, context, repeat_params)
+    end
+  end
+  def run_job_with_own_loop_count(job, context, repeat_params)
+    loop_count = job.loop_count
+    result = BenchmarkDriver::Repeater.with_repeat(**repeat_params) {
+      run_benchmark(job, context: context)
+    }
+    [result, loop_count]
+  end
+  def run_job_with_warmup_loop_count(job, context, repeat_params)
+    loop_count = job.warmup_loop_count
+    repeater_value = [job.warmup_value, job.warmup_duration]
+    result = BenchmarkDriver::Repeater::RepeatResult.new(
+      value: repeater_value, all_values: [repeater_value]
+    )
+    [result, loop_count]
+  end
   def run_warmup(job, context:)
     start = Time.now

data/lib/d_heap.rb CHANGED

@@ -10,24 +10,80 @@ require "d_heap/version"
 # the nodes have _d_ children instead of 2.  This allows for "decrease priority"
 # operations to be performed more quickly with the tradeoff of slower delete
 # minimum.  Additionally, _d_-ary heaps can have better memory cache behavior than
-# binary heaps, allowing them to run more quickly in practice despite slower
+# binary heaps, allowing them to pop more quickly in practice despite slower
 # worst-case time complexity.
 #
+# Although _d_ can be configured when creating the heap, it's usually best to
+# keep the default value of 4, because d=4 gives the smallest coefficient for
+# <tt>(d + 1) log n / log d</tt> result.  As always, use benchmarks for your
+# particular use-case.
+#
+# @example Basic push, peek, and pop
+#     # create some example objects to place in our heap
+#     Task = Struct.new(:id, :time) do
+#       def to_f; time.to_f end
+#     end
+#     t1 = Task.new(1, Time.now + 5*60)
+#     t2 = Task.new(2, Time.now + 50)
+#     t3 = Task.new(3, Time.now + 60)
+#     t4 = Task.new(4, Time.now +  5)
+#
+#     # create the heap
+#     require "d_heap"
+#     heap = DHeap.new
+#
+#     # push with an explicit score (which might be extrinsic to the value)
+#     heap.push t1, t1.to_f
+#
+#     # the score will be implicitly cast with Float, so any object with #to_f
+#     heap.push t2, t2
+#
+#     # if the object has an intrinsic score via #to_f, "<<" is the simplest API
+#     heap << t3 << t4
+#
+#     # pop returns the lowest scored item, and removes it from the heap
+#     heap.pop    # => #<struct Task id=4, time=2021-01-17 17:02:22.5574 -0500>
+#     heap.pop    # => #<struct Task id=2, time=2021-01-17 17:03:07.5574 -0500>
+#
+#     # peek returns the lowest scored item, without removing it from the heap
+#     heap.peek   # => #<struct Task id=3, time=2021-01-17 17:03:17.5574 -0500>
+#     heap.pop    # => #<struct Task id=3, time=2021-01-17 17:03:17.5574 -0500>
+#
+#     # pop_lte handles the common "h.pop if h.peek_score < max" pattern
+#     heap.pop_lte(Time.now + 65) # => nil
+#
+#     # the heap size can be inspected with size and empty?
+#     heap.empty? # => false
+#     heap.size   # => 1
+#     heap.pop    # => #<struct Task id=1, time=2021-01-17 17:07:17.5574 -0500>
+#     heap.empty? # => true
+#     heap.size   # => 0
+#
+#     # popping from an empty heap returns nil
+#     heap.pop    # => nil
+#
 class DHeap
-  alias deq       pop
-  alias enq       push
-  alias first     peek
-  alias pop_below pop_lt
-  alias length    size
-  alias count     size
-  # ruby 3.0+ (2.x can just use inherited initialize_clone)
-  if Object.instance_method(:initialize_clone).arity == -1
-    # @!visibility private
-    def initialize_clone(other, freeze: nil)
-      __init_clone__(other, freeze ? true : freeze)
-    end
+  alias deq        pop
+  alias shift      pop
+  alias next       pop
+  alias pop_all_lt pop_all_below
+  alias pop_below  pop_lt
+  alias enq        push
+  alias first      peek
+  alias length     size
+  alias count      size
+  # Initialize a _d_-ary min-heap.
+  #
+  # @param d [Integer] Number of children for each parent node.
+  #          Higher values generally speed up push but slow down pop.
+  #          If all pushes are popped, the default is probably best.
+  # @param capacity [Integer] initial capacity of the heap.
+  def initialize(d: DEFAULT_D, capacity: DEFAULT_CAPA) # rubocop:disable Naming/MethodParameterName
+    __init_without_kw__(d, capacity)
   end
   # Consumes the heap by popping each minumum value until it is empty.
@@ -35,13 +91,20 @@ class DHeap
   # If you want to iterate over the heap without consuming it, you will need to
   # first call +#dup+
   #
+  # @param with_score [Boolean] if scores shoul also be yielded
+  #
   # @yieldparam value [Object] each value that would be popped
+  # @yieldparam score [Numeric] each value's score, if +with_scores+ is true
   #
   # @return [Enumerator] if no block is given
   # @return [nil] if a block is given
-  def each_pop
-    return to_enum(__method__) unless block_given?
-    yield pop until empty?
+  def each_pop(with_scores: false)
+    return to_enum(__method__, with_scores: with_scores) unless block_given?
+    if with_scores
+      yield(*pop_with_score) until empty?
+    else
+      yield pop until empty?
+    end
     nil
   end

data/lib/d_heap/benchmarks/implementations.rb CHANGED

@@ -101,14 +101,28 @@ module DHeap::Benchmarks
   end
   # a very simple pure ruby binary heap
-  # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
   class RbHeap < ExamplePriorityQueue
     def <<(value)
       raise ArgumentError unless value
       @a.push(value)
-      # shift up
-      index = @a.size - 1
+      sift_up(@a.size - 1, value)
+    end
+    def pop
+      return if @a.empty?
+      popped = @a.first
+      value = @a.pop
+      last_index = @a.size - 1
+      return popped unless 0 <= last_index
+      sift_down(0, last_index, value)
+      popped
+    end
+    private
+    def sift_up(index, value = @a[index])
       while 0 < index # rubocop:disable Style/NumericPredicate
         parent_index = (index - 1) / 2
         parent_value = @a[parent_index]
@@ -117,43 +131,28 @@ module DHeap::Benchmarks
         index = parent_index
       end
       @a[index] = value
-      # dbg "__push__(%p)" % [value]
       # check_heap!(index)
     end
-    def pop
-      return if @a.empty?
-      popped = @a.first
-      value = @a.pop
-      last_index = @a.size - 1
+    def sift_down(index, last_index = @a.size - 1, value = @a[index])
       last_parent = (last_index - 1) / 2
-      return popped unless 0 <= last_index
-      # sift down from 0
-      index = 0
-      child_index = 1
       while index <= last_parent
-        child_value = @a[child_index]
-        # select min child
-        if child_index < last_index
-          other_child_index = child_index + 1
-          other_child_value = @a[other_child_index]
-          if other_child_value < child_value
-            child_value = other_child_value
-            child_index = other_child_index
-          end
-        end
+        child_index, child_value = select_min_child(index, last_index)
         break if value <= child_value
         @a[index] = child_value
         index = child_index
         child_index = index * 2 + 1
       end
       @a[index] = value
-      popped
     end
-    private
+    def select_min_child(index, last_index = @a.size - 1)
+      child_index = index * 2 + 1
+      if child_index < last_index && a[child_index + 1] < @a[child_index]
+        child_index += 1
+      end
+      [child_index, @a[child_index]]
+    end
     def check_heap!(idx)
       check_heap_up!(idx)
@@ -186,7 +185,6 @@ module DHeap::Benchmarks
     end
   end
-  # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
   # minor adjustments to the "priority_queue_cxx" gem, to match the API
   class CppSTL
@@ -201,6 +199,10 @@ module DHeap::Benchmarks
       @q = FastContainers::PriorityQueue.new(:min)
     end
+    def empty?
+      @q.empty?
+    end
     def pop
       @q.pop
     rescue RuntimeError

data/lib/d_heap/benchmarks/rspec_matchers.rb CHANGED

@@ -39,30 +39,6 @@ module DHeap::Benchmarks
     matcher :perform_at_least do |expected|
       supports_block_expectations
-      def __debug__(name, caller_binding)
-        lvars = __debug_lvars__(caller_binding)
-        ivars = __debug_ivars__(caller_binding)
-        puts "%s, locals => %p, ivars => %p" % [name, lvars, ivars]
-      end
-      def __debug_lvars__(caller_binding)
-        caller_binding.local_variables.map {|lvar|
-          next if %i[type unit].include?(lvar)
-          next if (val = caller_binding.local_variable_get(lvar)).nil?
-          [lvar, val]
-        }.compact.to_h
-      end
-      def __debug_ivars__(caller_binding)
-        instance_variables.map {|ivar|
-          next if %i[@name @actual @expected_as_array @matcher_execution_context
-                     @chained_method_clauses @block_arg]
-            .include?(ivar)
-          next if (val = instance_variable_get(ivar)).nil?
-          [ivar, val]
-        }.compact.to_h
-      end
       %i[
         is_at_least
         running_at_most
@@ -70,7 +46,6 @@ module DHeap::Benchmarks
         warmup_at_most
       ].each do |type|
         chain type do |number|
-          # __debug__ "%s(%p)" % [type, number], binding
           reason, value = ___number_reason_and_value___
           if reason || value
             raise "Need to handle unit-less number first: %s(%p)" % [reason, value]
@@ -88,22 +63,15 @@ module DHeap::Benchmarks
         milliseconds
       ].each do |unit|
         chain unit do
-          # __debug__ unit, binding
           reason, value = ___number_reason_and_value___
           raise "No number was specified" unless reason && value
-          case reason
-          when :running_at_most;  apply_max_run unit
-          when :running_at_least; apply_min_run unit
-          when :warmup_at_most;   apply_warmup  unit
-          else raise "%s is incompatible with %s(%p)" % [unit, reason, value]
-          end
+          apply_number_to_reason(reason, value, unit)
           @number_for = @number_val = nil
         end
       end
       # TODO: let IPS set time to run instead of iterations to run
       chain :ips do
-        # __debug__ "ips", binding
         reason, value = ___number_reason_and_value___
         raise "'ips' unit is only for assertions" unless reason == :is_at_least
         raise "Already asserting %s ips" % [@expect_ips] if @expect_ips
@@ -115,7 +83,6 @@ module DHeap::Benchmarks
       # need to use method because "chain" can't take a block
       def times_faster_than(&other)
-        # __debug__ "times_faster_than"
         reason, value = ___number_reason_and_value___
         raise "'times_faster_than' is only for assertions" unless reason == :is_at_least
         raise "Already asserting %sx comparison" % [@expect_cmp] if @expect_cmp
@@ -174,7 +141,6 @@ module DHeap::Benchmarks
       chain :__convert_expected_to_ivars__ do
         @number_val ||= expected
         @number_for ||= :is_at_least if @number_val
-        # __debug__ "__convert_expected_to_ivars__", binding
         expected = nil
       end
       private :__convert_expected_to_ivars__
@@ -184,30 +150,43 @@ module DHeap::Benchmarks
         [@number_for, @number_val]
       end
-      def apply_min_run(unit)
+      def apply_number_to_reason(reason, value, unit)
+        normalized_value, normalized_unit = normalize_unit(unit)
+        case reason
+        when :running_at_most;  apply_max_run normalized_value, normalized_unit
+        when :running_at_least; apply_min_run normalized_value, normalized_unit
+        when :warmup_at_most;   apply_warmup  normalized_value, normalized_unit
+        else raise "%s is incompatible with %s(%p)" % [unit, reason, value]
+        end
+      end
+      def normalize_unit(unit)
+        case unit
+        when :seconds;      [Float(@number_val),          :seconds]
+        when :milliseconds; [Float(@number_val) / 1000.0, :seconds]
+        when :times;        [Integer(@number_val),        :times]
+        else raise "Invalid unit %s for %s(%p)" % [unit, reason, value]
+        end
+      end
+      def apply_min_run(value, unit)
         case unit
-        when :seconds;      @min_time = Float(@number_val)
-        when :milliseconds; @min_time = Float(@number_val) / 1000.0
-        when :times;        @min_iter = Integer(@number_val)
-        else raise "Invalid unit %s for %s(%p)" % [unit, @number_for, @number_val]
+        when :seconds; @min_time = value
+        when :times;   @min_iter = value
         end
       end
-      def apply_max_run(unit)
+      def apply_max_run(value, unit)
         case unit
-        when :seconds;      @max_time = Float(@number_val)
-        when :milliseconds; @max_time = Float(@number_val) / 1000.0
-        when :times;        @max_iter = Integer(@number_val)
-        else raise "Invalid unit %s for %s(%p)" % [unit, @number_for, @number_val]
+        when :seconds; @max_time = value
+        when :times;   @max_iter = value
         end
       end
-      def apply_warmup(unit)
+      def apply_warmup(value, unit)
         case unit
-        when :seconds;      @warmup_time = Float(@number_val)
-        when :milliseconds; @warmup_time = Float(@number_val) / 1000.0
-        when :times;        @warmup_iter = Integer(@number_val)
-        else raise "Invalid unit %s for %s(%p)" % [unit, @number_for, @number_val]
+        when :seconds; @warmup_time = value
+        when :times;   @warmup_iter = value
         end
       end
@@ -224,7 +203,6 @@ module DHeap::Benchmarks
       def run_measurements
         puts header if loud?
-        # __debug__ "run_measurements", binding
         warmup
         take_measurements
       end