RubyGems - d_heap - Versions diffs - 0.2.1 → 0.6.0 - Mend

d_heap 0.2.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

checksums.yaml +4 -4
data/.github/workflows/main.yml +26 -0
data/.gitignore +1 -0
data/.rubocop.yml +199 -0
data/.yardopts +10 -0
data/CHANGELOG.md +72 -0
data/Gemfile +12 -0
data/Gemfile.lock +47 -1
data/N +7 -0
data/README.md +362 -125
data/Rakefile +8 -2
data/benchmarks/perf.rb +29 -0
data/benchmarks/push_n.yml +35 -0
data/benchmarks/push_n_pop_n.yml +52 -0
data/benchmarks/push_pop.yml +32 -0
data/benchmarks/stackprof.rb +31 -0
data/bin/bench_charts +13 -0
data/bin/bench_n +7 -0
data/bin/benchmark-driver +29 -0
data/bin/benchmarks +10 -0
data/bin/console +1 -0
data/bin/profile +10 -0
data/bin/rubocop +29 -0
data/d_heap.gemspec +12 -7
data/docs/benchmarks-2.txt +75 -0
data/docs/benchmarks-mem.txt +39 -0
data/docs/benchmarks.txt +515 -0
data/docs/profile.txt +392 -0
data/ext/d_heap/d_heap.c +851 -226
data/ext/d_heap/extconf.rb +19 -0
data/images/push_n.png +0 -0
data/images/push_n_pop_n.png +0 -0
data/images/push_pop.png +0 -0
data/images/wikipedia-min-heap.png +0 -0
data/lib/benchmark_driver/runner/ips_zero_fail.rb +158 -0
data/lib/d_heap.rb +103 -2
data/lib/d_heap/benchmarks.rb +112 -0
data/lib/d_heap/benchmarks/benchmarker.rb +116 -0
data/lib/d_heap/benchmarks/implementations.rb +224 -0
data/lib/d_heap/benchmarks/profiler.rb +71 -0
data/lib/d_heap/benchmarks/rspec_matchers.rb +352 -0
data/lib/d_heap/version.rb +4 -1
metadata +63 -6
data/ext/d_heap/d_heap.h +0 -74

data/ext/d_heap/extconf.rb CHANGED

@@ -1,3 +1,22 @@
+# frozen_string_literal: true
 require "mkmf"
+# For testing in CI (because I don't otherwise have easy access to Mac OS):
+# $CFLAGS << " -D__D_HEAP_DEBUG" if /darwin/ =~ RUBY_PLATFORM
+# $CFLAGS << " -debug inline-debug-info "
+# $CFLAGS << " -g -ginline-points "
+# $CFLAGS << " -fno-omit-frame-pointer "
+if enable_config("debug")
+  CONFIG["warnflags"] << " -Werror -Wpedantic "
+end
+have_func "rb_gc_mark_movable" # since ruby-2.7
+check_sizeof("long")
+check_sizeof("unsigned long long")
+check_sizeof("long double")
+have_macro("LDBL_MANT_DIG", "float.h")
 create_makefile("d_heap/d_heap")

data/images/push_n.png ADDED

Binary file

data/images/push_n_pop_n.png ADDED

Binary file

data/images/push_pop.png ADDED

Binary file

data/images/wikipedia-min-heap.png ADDED

Binary file

data/lib/benchmark_driver/runner/ips_zero_fail.rb ADDED

@@ -0,0 +1,158 @@
+# frozen_string_literal: true
+require "English" # $CHILD_STATUS
+require "timeout" # Timeout::Error
+require "benchmark_driver"
+# monkey-patch to convert miniscule values to 0.0
+class BenchmarkDriver::Output::Compare
+  # monkey-patch to convert miniscule values to 0.0
+  module MinisculeToZero
+    def humanize(value, width = 10)
+      value <= 0.0.next_float.next_float ? 0.0 : super(value, width)
+    end
+  end
+  prepend MinisculeToZero
+end
+# A simple patch to let slow specs error out without
+class BenchmarkDriver::Runner::IpsZeroFail < BenchmarkDriver::Runner::Ips
+  METRIC = BenchmarkDriver::Runner::Ips::METRIC
+  # always run at least once
+  class Job < BenchmarkDriver::DefaultJob
+    attr_accessor :warmup_value, :warmup_duration, :warmup_loop_count
+    def add_warmup_attrs(value, duration, loop_count)
+      self.warmup_value = value
+      self.warmup_duration = duration
+      self.warmup_loop_count = loop_count
+    end
+  end
+  # BenchmarkDriver::Runner looks for this class
+  JobParser = BenchmarkDriver::DefaultJobParser.for(klass: Job, metrics: [METRIC])
+  # This method is dynamically called by `BenchmarkDriver::JobRunner.run`
+  # @param [Array<BenchmarkDriver::Default::Job>] jobs
+  def run(jobs)
+    jobs = run_all_jobs_warmup(jobs)
+    run_all_jobs_benchmarks(jobs)
+  end
+  def run_all_jobs_warmup(jobs)
+    return jobs if jobs.all?(&:loop_count)
+    @output.with_warmup do
+      jobs.map! {|job|
+        # skip warmup if loop_count is set
+        job.loop_count ? job : output_warmup_and_config_job(job)
+      }
+    end
+  end
+  def run_all_jobs_benchmarks(jobs)
+    @output.with_benchmark do
+      jobs.each do |job|
+        @output.with_job(name: job.name) do
+          job.runnable_contexts(@contexts).each do |context|
+            run_and_report_job(job, context)
+          end
+        end
+      end
+    end
+  end
+  def output_warmup_and_config_job(job)
+    @output.with_job(name: job.name) do
+      context = job.runnable_contexts(@contexts).first
+      value, duration, warmup_loop_count = run_and_report_warmup_job(job, context)
+      loop_count = (warmup_loop_count.to_f * @config.run_duration / duration).floor
+      Job.new(**job.to_h.merge(loop_count: loop_count))
+        .tap {|j| j.add_warmup_attrs(value, duration, warmup_loop_count) }
+    end
+  end
+  def run_and_report_warmup_job(job, context)
+    duration, loop_count = run_warmup(job, context: context)
+    value, duration = value_duration(duration: duration, loop_count: loop_count)
+    output_with_context(context) do
+      @output.report(
+        values: {metric => value}, duration: duration, loop_count: loop_count
+      )
+    end
+    [value, duration, loop_count]
+  end
+  def run_and_report_job(job, context)
+    result, loop_count = run_job_with_repeater(job, context)
+    value, duration = result.value
+    output_with_context(context) do
+      @output.report(
+        values: { metric => value },
+        all_values: { metric => result.all_values },
+        duration: duration,
+        loop_count: loop_count,
+      )
+    end
+  end
+  def output_with_context(context, &block)
+    @output.with_context(
+      name: context.name,
+      executable: context.executable,
+      gems: context.gems,
+      prelude: context.prelude,
+      &block
+    )
+  end
+  def run_job_with_repeater(job, context)
+    repeat_params = { config: @config, larger_better: true, rest_on_average: :average }
+    if job.loop_count&.positive?
+      run_job_with_own_loop_count(job, context, repeat_params)
+    else
+      run_job_with_warmup_loop_count(job, context, repeat_params)
+    end
+  end
+  def run_job_with_own_loop_count(job, context, repeat_params)
+    loop_count = job.loop_count
+    result = BenchmarkDriver::Repeater.with_repeat(**repeat_params) {
+      run_benchmark(job, context: context)
+    }
+    [result, loop_count]
+  end
+  def run_job_with_warmup_loop_count(job, context, repeat_params)
+    loop_count = job.warmup_loop_count
+    repeater_value = [job.warmup_value, job.warmup_duration]
+    result = BenchmarkDriver::Repeater::RepeatResult.new(
+      value: repeater_value, all_values: [repeater_value]
+    )
+    [result, loop_count]
+  end
+  def run_warmup(job, context:)
+    start = Time.now
+    super(job, context: context)
+  rescue Timeout::Error
+    [Time.now - start, 0.0.next_float]
+  end
+  def execute(*args, exception: true)
+    super
+  rescue RuntimeError => ex
+    if args.include?("timeout") && $CHILD_STATUS&.exitstatus == 124
+      raise Timeout::Error, ex.message
+    end
+    raise ex
+  end
+end

data/lib/d_heap.rb CHANGED

@@ -1,10 +1,111 @@
+# frozen_string_literal: true
 require "d_heap/d_heap"
 require "d_heap/version"
+# A fast _d_-ary heap implementation for ruby, useful in priority queues and graph
+# algorithms.
+#
+# The _d_-ary heap data structure is a generalization of the binary heap, in which
+# the nodes have _d_ children instead of 2.  This allows for "decrease priority"
+# operations to be performed more quickly with the tradeoff of slower delete
+# minimum.  Additionally, _d_-ary heaps can have better memory cache behavior than
+# binary heaps, allowing them to pop more quickly in practice despite slower
+# worst-case time complexity.
+#
+# Although _d_ can be configured when creating the heap, it's usually best to
+# keep the default value of 4, because d=4 gives the smallest coefficient for
+# <tt>(d + 1) log n / log d</tt> result.  As always, use benchmarks for your
+# particular use-case.
+#
+# @example Basic push, peek, and pop
+#     # create some example objects to place in our heap
+#     Task = Struct.new(:id, :time) do
+#       def to_f; time.to_f end
+#     end
+#     t1 = Task.new(1, Time.now + 5*60)
+#     t2 = Task.new(2, Time.now + 50)
+#     t3 = Task.new(3, Time.now + 60)
+#     t4 = Task.new(4, Time.now +  5)
+#
+#     # create the heap
+#     require "d_heap"
+#     heap = DHeap.new
+#
+#     # push with an explicit score (which might be extrinsic to the value)
+#     heap.push t1, t1.to_f
+#
+#     # the score will be implicitly cast with Float, so any object with #to_f
+#     heap.push t2, t2
+#
+#     # if the object has an intrinsic score via #to_f, "<<" is the simplest API
+#     heap << t3 << t4
+#
+#     # pop returns the lowest scored item, and removes it from the heap
+#     heap.pop    # => #<struct Task id=4, time=2021-01-17 17:02:22.5574 -0500>
+#     heap.pop    # => #<struct Task id=2, time=2021-01-17 17:03:07.5574 -0500>
+#
+#     # peek returns the lowest scored item, without removing it from the heap
+#     heap.peek   # => #<struct Task id=3, time=2021-01-17 17:03:17.5574 -0500>
+#     heap.pop    # => #<struct Task id=3, time=2021-01-17 17:03:17.5574 -0500>
+#
+#     # pop_lte handles the common "h.pop if h.peek_score < max" pattern
+#     heap.pop_lte(Time.now + 65) # => nil
+#
+#     # the heap size can be inspected with size and empty?
+#     heap.empty? # => false
+#     heap.size   # => 1
+#     heap.pop    # => #<struct Task id=1, time=2021-01-17 17:07:17.5574 -0500>
+#     heap.empty? # => true
+#     heap.size   # => 0
+#
+#     # popping from an empty heap returns nil
+#     heap.pop    # => nil
+#
 class DHeap
+  alias deq        pop
+  alias shift      pop
+  alias next       pop
+  alias pop_all_lt pop_all_below
+  alias pop_below  pop_lt
+  alias enq        push
+  alias first      peek
+  alias length     size
+  alias count      size
+  # Initialize a _d_-ary min-heap.
+  #
+  # @param d [Integer] Number of children for each parent node.
+  #          Higher values generally speed up push but slow down pop.
+  #          If all pushes are popped, the default is probably best.
+  # @param capacity [Integer] initial capacity of the heap.
+  def initialize(d: DEFAULT_D, capacity: DEFAULT_CAPA) # rubocop:disable Naming/MethodParameterName
+    __init_without_kw__(d, capacity)
+  end
-  def initialize_copy(other)
-    raise NotImplementedError, "initialize_copy should deep copy array"
+  # Consumes the heap by popping each minumum value until it is empty.
+  #
+  # If you want to iterate over the heap without consuming it, you will need to
+  # first call +#dup+
+  #
+  # @param with_score [Boolean] if scores shoul also be yielded
+  #
+  # @yieldparam value [Object] each value that would be popped
+  # @yieldparam score [Numeric] each value's score, if +with_scores+ is true
+  #
+  # @return [Enumerator] if no block is given
+  # @return [nil] if a block is given
+  def each_pop(with_scores: false)
+    return to_enum(__method__, with_scores: with_scores) unless block_given?
+    if with_scores
+      yield(*pop_with_score) until empty?
+    else
+      yield pop until empty?
+    end
+    nil
   end
 end

data/lib/d_heap/benchmarks.rb ADDED

@@ -0,0 +1,112 @@
+# frozen_string_literal: true
+require "d_heap"
+require "ostruct"
+# Different benchmark scenarios and implementations to benchmark
+module DHeap::Benchmarks
+  def self.puts_version_info(type = "Benchmark", io = $stdout)
+    io.puts "#{type} run at %s" % [Time.now]
+    io.puts "ruby v%s, DHeap v%s" % [RUBY_VERSION, DHeap::VERSION]
+    io.puts
+  end
+  # rubocop:disable Style/NumericPredicate
+  # moves "rand" outside the benchmarked code, to avoid measuring that too.
+  module Randomness
+    def default_randomness_size; 1_000_000 end
+    def fill_random_vals(target_size = default_randomness_size, io: $stdout)
+      @dheap_bm_random_vals ||= []
+      count = target_size - @dheap_bm_random_vals.length
+      return 0 if count <= 0
+      millions = (count / 1_000_000.0).round(3)
+      io&.puts "~~~~~~ filling @dheap_bm_random_vals with #{millions}M ~~~~~~"
+      io&.flush
+      count.times do @dheap_bm_random_vals << rand(0..10_000) end
+      @dheap_bm_random_len = @dheap_bm_random_vals.length
+      @dheap_bm_random_idx = (((@dheap_bm_random_idx || -1) + 1) % @dheap_bm_random_len)
+      nil
+    end
+    def random_val
+      @dheap_bm_random_vals.fetch(
+        @dheap_bm_random_idx = ((@dheap_bm_random_idx + 1) % @dheap_bm_random_len)
+      )
+    end
+  end
+  # different scenarios to be benchmarked or profiled
+  module Scenarios
+    def push_n_multiple_queues(count, *queues)
+      while 0 < count
+        value = @dheap_bm_random_vals.fetch(
+          @dheap_bm_random_idx = ((@dheap_bm_random_idx + 1) % @dheap_bm_random_len)
+        )
+        queues.each do |queue|
+          queue << value
+        end
+        count -= 1
+      end
+    end
+    def push_n(queue, count)
+      while 0 < count
+        queue << @dheap_bm_random_vals.fetch(
+          @dheap_bm_random_idx = ((@dheap_bm_random_idx + 1) % @dheap_bm_random_len)
+        )
+        count -= 1
+      end
+    end
+    def push_n_then_pop_n(queue, count) # rubocop:disable Metrics/MethodLength
+      i = 0
+      while i < count
+        queue << @dheap_bm_random_vals.fetch(
+          @dheap_bm_random_idx = ((@dheap_bm_random_idx + 1) % @dheap_bm_random_len)
+        )
+        i += 1
+      end
+      while 0 < i
+        queue.pop
+        i -= 1
+      end
+    end
+    def repeated_push_pop(queue, count)
+      while 0 < count
+        queue << @dheap_bm_random_vals.fetch(
+          @dheap_bm_random_idx = ((@dheap_bm_random_idx + 1) % @dheap_bm_random_len)
+        )
+        queue.pop
+        count -= 1
+      end
+    end
+  end
+  include Randomness
+  include Scenarios
+  def initq(klass, count = 0, clear: false)
+    queue = klass.new
+    while 0 < count
+      queue << @dheap_bm_random_vals.fetch(
+        @dheap_bm_random_idx = ((@dheap_bm_random_idx + 1) % @dheap_bm_random_len)
+      )
+      count -= 1
+    end
+    queue.clear if clear
+    queue
+  end
+  # rubocop:enable Style/NumericPredicate
+  require "d_heap/benchmarks/implementations"
+end

data/lib/d_heap/benchmarks/benchmarker.rb ADDED

@@ -0,0 +1,116 @@
+# frozen_string_literal: true
+require "d_heap/benchmarks"
+require "benchmark_driver"
+require "shellwords"
+require "English"
+module DHeap::Benchmarks
+  # Benchmarks different implementations with different sizes
+  class Benchmarker
+    include Randomness
+    include Scenarios
+    N_COUNTS = [
+      5,      # 1 + 4
+      21,     # 1 + 4 + 16
+      85,     # 1 + 4 + 16 + 64
+      341,    # 1 + 4 + 16 + 64 + 256
+      1365,   # 1 + 4 + 16 + 64 + 256 + 1024
+      5461,   # 1 + 4 + 16 + 64 + 256 + 1024 + 4096
+      21_845, # 1 + 4 + 16 + 64 + 256 + 1024 + 4096 + 16384
+      87_381, # 1 + 4 + 16 + 64 + 256 + 1024 + 4096 + 16384 + 65536
+    ].freeze
+    attr_reader :time
+    attr_reader :iterations_for_push_pop
+    attr_reader :io
+    def initialize(
+      time: Integer(ENV.fetch("BENCHMARK_TIME", 10)),
+      iterations_for_push_pop: 10_000,
+      io: $stdout
+    )
+      @time = time
+      @iterations_for_push_pop = Integer(iterations_for_push_pop)
+      @io = io
+    end
+    def call(queue_size: ENV.fetch("BENCHMARK_QUEUE_SIZE", :unset))
+      DHeap::Benchmarks.puts_version_info("Benchmarking")
+      sizes = (queue_size == :unset) ? N_COUNTS : [Integer(queue_size)]
+      sizes.each do |size|
+        benchmark_size(size)
+      end
+    end
+    def benchmark_size(size)
+      sep "#", "Benchmarks with N=#{size} (t=#{time}sec/benchmark)", big: true
+      io.puts
+      benchmark_push_n            size
+      benchmark_push_n_then_pop_n size
+      benchmark_repeated_push_pop size
+    end
+    def benchmark_push_n(queue_size)
+      benchmarking("push N", "push_n", queue_size)
+    end
+    def benchmark_push_n_then_pop_n(queue_size)
+      benchmarking("push N then pop N", "push_n_pop_n", queue_size)
+    end
+    def benchmark_repeated_push_pop(queue_size)
+      benchmarking(
+        "Push/pop with pre-filled queue (size=N)", "push_pop", queue_size
+      )
+    end
+    private
+    # TODO: move somewhere else...
+    def skip_profiling?(queue_size, impl)
+      impl.klass == DHeap::Benchmarks::PushAndResort && 10_000 < queue_size
+    end
+    # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
+    def benchmarking(name, file, size)
+      Bundler.with_unbundled_env do
+        sep "==", "#{name} (N=#{size})"
+        cmd = %W[
+          bin/benchmark-driver
+          --bundler
+          --run-duration 6
+          --timeout 15
+          --runner ips_zero_fail
+          benchmarks/#{file}.yml
+        ]
+        if file == "push_n"
+          cmd << "--filter" << /dheap|\bstl\b|\bbsearch\b|\brb_heap\b/.to_s
+        end
+        env = ENV.to_h.merge(
+          "BENCH_N" => size.to_s,
+          "RUBYLIB" => File.expand_path("../..", __dir__),
+        )
+        system(env, *cmd)
+      end
+    end
+    def sep(sep, msg = "", width: 80, big: false)
+      txt = String.new
+      txt += "#{sep * (width / sep.length)}\n" if big
+      txt += sep
+      txt += " #{msg}" if msg && !msg.empty?
+      txt += " " unless big
+      txt += sep * ((width - txt.length) / sep.length) unless big
+      txt += "\n"
+      txt += "#{sep * (width / sep.length)}\n" if big
+      io.print txt
+    end
+    # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
+  end
+end