RubyGems - d_heap - Versions diffs - 0.2.0 → 0.5.0 - Mend

d_heap 0.2.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

checksums.yaml +4 -4
data/.github/workflows/main.yml +26 -0
data/.rubocop.yml +199 -0
data/CHANGELOG.md +59 -0
data/Gemfile +10 -2
data/Gemfile.lock +42 -5
data/README.md +392 -109
data/Rakefile +8 -2
data/benchmarks/perf.rb +29 -0
data/benchmarks/push_n.yml +31 -0
data/benchmarks/push_n_pop_n.yml +35 -0
data/benchmarks/push_pop.yml +27 -0
data/benchmarks/stackprof.rb +31 -0
data/bin/bench_n +7 -0
data/bin/benchmark-driver +29 -0
data/bin/benchmarks +10 -0
data/bin/console +1 -0
data/bin/profile +10 -0
data/bin/rubocop +29 -0
data/d_heap.gemspec +11 -6
data/docs/benchmarks-2.txt +75 -0
data/docs/benchmarks-mem.txt +39 -0
data/docs/benchmarks.txt +515 -0
data/docs/profile.txt +392 -0
data/ext/d_heap/d_heap.c +555 -225
data/ext/d_heap/d_heap.h +24 -48
data/ext/d_heap/extconf.rb +20 -0
data/lib/benchmark_driver/runner/ips_zero_fail.rb +120 -0
data/lib/d_heap.rb +40 -2
data/lib/d_heap/benchmarks.rb +112 -0
data/lib/d_heap/benchmarks/benchmarker.rb +116 -0
data/lib/d_heap/benchmarks/implementations.rb +222 -0
data/lib/d_heap/benchmarks/profiler.rb +71 -0
data/lib/d_heap/benchmarks/rspec_matchers.rb +374 -0
data/lib/d_heap/version.rb +4 -1
metadata +54 -3

data/lib/d_heap/benchmarks/implementations.rb ADDED

@@ -0,0 +1,222 @@
+# frozen_string_literal: true
+require "fc"
+module DHeap::Benchmarks
+  # base class for example priority queues
+  class ExamplePriorityQueue
+    attr_reader :a
+    # quick initialization by simply sorting the array once.
+    def initialize(count = nil, &block)
+      @a = []
+      return unless count
+      count.times {|i| @a << block.call(i) }
+      @a.sort!
+    end
+    def clear
+      @a.clear
+    end
+    def empty?
+      @a.empty?
+    end
+    if ENV["LOG_LEVEL"] == "debug"
+      def dbg(msg)
+        puts "%20s: %p, %p" % [msg, @a.first, (@a[1..-1] || []).each_slice(2).to_a]
+      end
+    else
+      def dbg(msg) nil end
+    end
+  end
+  # The most naive approach--completely unsorted!--is ironically not the worst.
+  class FindMin < ExamplePriorityQueue
+    # O(1)
+    def <<(score)
+      raise ArgumentError unless score
+      @a.push score
+    end
+    # O(n)
+    def pop
+      return unless (score = @a.min)
+      index = @a.rindex(score)
+      @a.delete_at(index)
+      score
+    end
+  end
+  # Re-sorting after each insert: this both naive and performs the worst.
+  class Sorting < ExamplePriorityQueue
+    # O(n log n)
+    def <<(score)
+      raise ArgumentError unless score
+      @a.push score
+      @a.sort!
+    end
+    # O(1)
+    def pop
+      @a.shift
+    end
+  end
+  # A very simple example priority queue that is implemented with a sorted array.
+  #
+  # It uses Array#bsearch + Array#insert to push new values, and Array#pop to pop
+  # the min value.
+  class BSearch < ExamplePriorityQueue
+    # Array#bsearch_index is O(log n)
+    # Array#insert        is O(n)
+    #
+    # So this should be O(n).
+    #
+    # In practice though, memcpy has a *very* small constant factor.
+    # And bsearch_index uses *exactly* (log n / log 2) comparisons.
+    def <<(score)
+      raise ArgumentError unless score
+      index = @a.bsearch_index {|other| score > other } || @a.length
+      @a.insert(index, score)
+    end
+    # Array#pop is O(1). It updates length without changing capacity or contents.
+    #
+    # No comparisons are necessary.
+    #
+    # shift is usually also O(1) and could be used if it were sorted normally.
+    def pop
+      @a.pop
+    end
+  end
+  # a very simple pure ruby binary heap
+  # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
+  class RbHeap < ExamplePriorityQueue
+    def <<(value)
+      raise ArgumentError unless value
+      @a.push(value)
+      # shift up
+      index = @a.size - 1
+      while 0 < index # rubocop:disable Style/NumericPredicate
+        parent_index = (index - 1) / 2
+        parent_value = @a[parent_index]
+        break if parent_value <= value
+        @a[index] = parent_value
+        index = parent_index
+      end
+      @a[index] = value
+      # dbg "__push__(%p)" % [value]
+      # check_heap!(index)
+    end
+    def pop
+      return if @a.empty?
+      popped = @a.first
+      value = @a.pop
+      last_index = @a.size - 1
+      last_parent = (last_index - 1) / 2
+      return popped unless 0 <= last_index
+      # sift down from 0
+      index = 0
+      child_index = 1
+      while index <= last_parent
+        child_value = @a[child_index]
+        # select min child
+        if child_index < last_index
+          other_child_index = child_index + 1
+          other_child_value = @a[other_child_index]
+          if other_child_value < child_value
+            child_value = other_child_value
+            child_index = other_child_index
+          end
+        end
+        break if value <= child_value
+        @a[index] = child_value
+        index = child_index
+        child_index = index * 2 + 1
+      end
+      @a[index] = value
+      popped
+    end
+    private
+    def check_heap!(idx)
+      check_heap_up!(idx)
+      check_heap_dn!(idx)
+    end
+    # compares index to its parent
+    def check_heap_at!(idx)
+      value = @a[idx]
+      unless idx <= 0
+        pidx = (idx - 1) / 2
+        pval = @a[pidx]
+        raise "@a[#{idx}] == #{value}, #{pval} > #{value}" if pval > value
+      end
+      value
+    end
+    def check_heap_up!(idx)
+      return if idx <= 0
+      pidx = (idx - 1) / 2
+      check_heap_at!(pidx)
+      check_heap_up!(pidx)
+    end
+    def check_heap_dn!(idx)
+      return unless @a.size <= idx
+      check_heap_at!(idx)
+      check_heap_down!(idx * 2 + 1)
+      check_heap_down!(idx * 2 + 2)
+    end
+  end
+  # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
+  # minor adjustments to the "priority_queue_cxx" gem, to match the API
+  class CppSTL
+    def initialize
+      clear
+    end
+    def <<(value); @q.push(value, value) end
+    def clear
+      @q = FastContainers::PriorityQueue.new(:min)
+    end
+    def pop
+      @q.pop
+    rescue RuntimeError
+      nil
+    end
+  end
+  # Different duck-typed priority queue implemenations
+  IMPLEMENTATIONS = [
+    OpenStruct.new(name: " push and resort", klass: Sorting).freeze,
+    OpenStruct.new(name: "  find min + del", klass: FindMin).freeze,
+    OpenStruct.new(name: "bsearch + insert", klass: BSearch).freeze,
+    OpenStruct.new(name: "ruby binary heap", klass: RbHeap).freeze,
+    OpenStruct.new(name: "C++STL PriorityQ", klass: CppSTL).freeze,
+    OpenStruct.new(name: "quaternary DHeap", klass: DHeap).freeze,
+  ].freeze
+end

data/lib/d_heap/benchmarks/profiler.rb ADDED

@@ -0,0 +1,71 @@
+# frozen_string_literal: true
+require "d_heap/benchmarks"
+require "ruby-prof"
+module DHeap::Benchmarks
+  # Profiles different implementations with different sizes
+  class Profiler
+    include Randomness
+    include Scenarios
+    N_COUNTS = [
+      5,      # 1 + 4
+      1365,   # 1 + 4 + 16 + 64 + 256 + 1024
+      87_381, # 1 + 4 + 16 + 64 + 256 + 1024 + 4096 + 16384 + 65536
+    ].freeze
+    def call(
+      queue_size: ENV.fetch("PROFILE_QUEUE_SIZE", :unset),
+      iterations: ENV.fetch("PROFILE_ITERATIONS", 1_000_000)
+    )
+      DHeap::Benchmarks.puts_version_info("Profiling")
+      fill_random_vals
+      sizes = queue_size == :unset ? N_COUNTS : [Integer(queue_size)]
+      sizes.each do |size|
+        profile_all(size, iterations)
+      end
+    end
+    def profile_all(queue_size, iterations, io: $stdout)
+      io.puts <<~TEXT
+        ########################################################################
+        # Profile w/ N=#{queue_size} (i=#{iterations})
+        # (n.b. RubyProf & tracepoint can change relative performance.
+        #       A sampling profiler can provide more accurate relative metrics.
+        ########################################################################
+      TEXT
+      DHeap::Benchmarks::IMPLEMENTATIONS.each do |impl|
+        profile_one(impl, queue_size, iterations, io: io)
+      end
+    end
+    # TODO: move somewhere else...
+    def skip_profiling?(queue_size, impl)
+      impl.klass == DHeap::Benchmarks::Sorting && 10_000 < queue_size
+    end
+    def profile_one(impl, queue_size, iterations, io: $stdout)
+      return if skip_profiling?(queue_size, impl)
+      io.puts "Filling   #{impl.name} ---------------------------"
+      queue = impl.klass.new
+      push_n(queue, queue_size)
+      io.puts "Profiling #{impl.name} ---------------------------"
+      profiling do
+        repeated_push_pop(queue, iterations)
+      end
+    end
+    def profiling(io: $stdout, &block)
+      # do the thing
+      result = RubyProf.profile(&block)
+      # report_the_thing
+      printer = RubyProf::FlatPrinter.new(result)
+      printer.print($stdout, min_percent: 1.0)
+      io.puts
+    end
+  end
+end

data/lib/d_heap/benchmarks/rspec_matchers.rb ADDED

@@ -0,0 +1,374 @@
+# frozen_string_literal: true
+require "d_heap/benchmarks"
+module DHeap::Benchmarks
+  # Profiles different implementations with different sizes
+  module RSpecMatchers # rubocop:disable Metrics/ModuleLength
+    extend RSpec::Matchers::DSL
+    # Assert ips (iterations per second):
+    #
+    #     expect { ... }.to perform_at_least(1_000_000).ips
+    #        .running_at_least(10).times             # optional, defaults to  1
+    #        .running_at_least(10).seconds           # optional, defaults to  1s
+    #        .running_at_most(10_000_000).times      # optional, defaults to nil
+    #        .running_at_most(2).seconds             # optional, defaults to  2s
+    #        .warmup_at_most(1000).times             # optional, defaults to  1k
+    #        .warmup_at_most(0.100).seconds          # optional, defaults to  0.1s
+    #        .iterations_per_round                   # optional, defaults to  1
+    #        .and_at_least(1.1).times.faster_than { ... } # can also compare
+    #
+    # Assert comparison (and optionally runtime or ips):
+    #
+    #     expect { ... }.to perform_at_least(2.5).times_faster_than { ... }
+    #        .running_at_least(10).times             # optional, defaults to  1
+    #        .running_at_least(10).seconds           # optional, defaults to  1s
+    #        .running_at_most(10_000_000).times      # optional, defaults to nil
+    #        .running_at_most(2).seconds             # optional, defaults to  2s
+    #        .warmup_at_most(1000).times             # optional, defaults to  1k
+    #        .warmup_at_most(0.100).seconds          # optional, defaults to  0.1s
+    #        .iterations_per_call                    # optional, defaults to  1
+    #        .and_at_least(100).ips { ... } # can also assert ips
+    #
+    # n.b: Given a known constant number of iterations, run time and ips are both
+    # measuring the same underlying metric.
+    #
+    # rubocop:disable Metrics/BlockLength, Layout/SpaceAroundOperators
+    matcher :perform_at_least do |expected|
+      supports_block_expectations
+      def __debug__(name, caller_binding)
+        lvars = __debug_lvars__(caller_binding)
+        ivars = __debug_ivars__(caller_binding)
+        puts "%s, locals => %p, ivars => %p" % [name, lvars, ivars]
+      end
+      def __debug_lvars__(caller_binding)
+        caller_binding.local_variables.map {|lvar|
+          next if %i[type unit].include?(lvar)
+          next if (val = caller_binding.local_variable_get(lvar)).nil?
+          [lvar, val]
+        }.compact.to_h
+      end
+      def __debug_ivars__(caller_binding)
+        instance_variables.map {|ivar|
+          next if %i[@name @actual @expected_as_array @matcher_execution_context
+                     @chained_method_clauses @block_arg]
+            .include?(ivar)
+          next if (val = instance_variable_get(ivar)).nil?
+          [ivar, val]
+        }.compact.to_h
+      end
+      %i[
+        is_at_least
+        running_at_most
+        running_at_least
+        warmup_at_most
+      ].each do |type|
+        chain type do |number|
+          # __debug__ "%s(%p)" % [type, number], binding
+          reason, value = ___number_reason_and_value___
+          if reason || value
+            raise "Need to handle unit-less number first: %s(%p)" % [reason, value]
+          end
+          @number_for = type
+          @number_val = number
+        end
+      end
+      alias_method :and_at_least, :is_at_least
+      %i[
+        times
+        seconds
+        milliseconds
+      ].each do |unit|
+        chain unit do
+          # __debug__ unit, binding
+          reason, value = ___number_reason_and_value___
+          raise "No number was specified" unless reason && value
+          case reason
+          when :running_at_most;  apply_max_run unit
+          when :running_at_least; apply_min_run unit
+          when :warmup_at_most;   apply_warmup  unit
+          else raise "%s is incompatible with %s(%p)" % [unit, reason, value]
+          end
+          @number_for = @number_val = nil
+        end
+      end
+      # TODO: let IPS set time to run instead of iterations to run
+      chain :ips do
+        # __debug__ "ips", binding
+        reason, value = ___number_reason_and_value___
+        raise "'ips' unit is only for assertions" unless reason == :is_at_least
+        raise "Already asserting %s ips" % [@expect_ips] if @expect_ips
+        raise "'ips' assertion has already been made" if @expect_ips
+        raise "Unknown assertion count" unless value
+        @expect_ips = Integer(value)
+        @number_for = @number_val = nil
+      end
+      # need to use method because "chain" can't take a block
+      def times_faster_than(&other)
+        # __debug__ "times_faster_than"
+        reason, value = ___number_reason_and_value___
+        raise "'times_faster_than' is only for assertions" unless reason == :is_at_least
+        raise "Already asserting %sx comparison" % [@expect_cmp] if @expect_cmp
+        raise ArgumentError, "must provide a proc" unless other
+        @expect_cmp = Float(value)
+        @cmp_proc = other
+        @number_for = @number_val = nil
+        self
+      end
+      chain :loudly  do @volume = :loud  end
+      chain :quietly do @volume = :quiet end
+      chain :volume do |volume|
+        raise "Invalid volume" unless %i[loud quiet].include?(volume)
+        @volume = volume
+      end
+      chain :iterations_per_round do |iterations|
+        if @iterations_per_round
+          raise "Already set iterations per round (%p)" [@iterations_per_round]
+        end
+        @iterations_per_round = Integer(iterations)
+      end
+      match do |actual|
+        require "benchmark"
+        raise "Need to expect a proc or block" unless actual.respond_to?(:to_proc)
+        raise "Need a performance assertion" unless assertion?
+        @actual_proc = actual
+        prepare_for_measurement
+        if @max_iter && (@max_iter % @iterations_per_round) != 0
+          raise "Iterations per round (%p) must divide evenly by max iterations (%p)" % [
+            @iterations_per_round, @max_iter,
+          ]
+        end
+        run_measurements
+        cmp_okay? && ips_okay?
+      end
+      description do
+        [
+          @expect_cmp && cmp_okay_msg,
+          @expect_ips && ips_okay_msg,
+        ].join(", and ")
+      end
+      failure_message do
+        [
+          cmp_okay? ? nil : "expected to #{cmp_okay_msg} but #{cmp_fail_msg}",  # =>
+          ips_okay? ? nil : "expected to #{ips_okay_msg} but #{ips_fail_msg}",
+        ].compact.join(", and ")
+      end
+      private
+      chain :__convert_expected_to_ivars__ do
+        @number_val ||= expected
+        @number_for ||= :is_at_least if @number_val
+        # __debug__ "__convert_expected_to_ivars__", binding
+        expected = nil
+      end
+      private :__convert_expected_to_ivars__
+      def ___number_reason_and_value___
+        __convert_expected_to_ivars__
+        [@number_for, @number_val]
+      end
+      def apply_min_run(unit)
+        case unit
+        when :seconds;      @min_time = Float(@number_val)
+        when :milliseconds; @min_time = Float(@number_val) / 1000.0
+        when :times;        @min_iter = Integer(@number_val)
+        else raise "Invalid unit %s for %s(%p)" % [unit, @number_for, @number_val]
+        end
+      end
+      def apply_max_run(unit)
+        case unit
+        when :seconds;      @max_time = Float(@number_val)
+        when :milliseconds; @max_time = Float(@number_val) / 1000.0
+        when :times;        @max_iter = Integer(@number_val)
+        else raise "Invalid unit %s for %s(%p)" % [unit, @number_for, @number_val]
+        end
+      end
+      def apply_warmup(unit)
+        case unit
+        when :seconds;      @warmup_time = Float(@number_val)
+        when :milliseconds; @warmup_time = Float(@number_val) / 1000.0
+        when :times;        @warmup_iter = Integer(@number_val)
+        else raise "Invalid unit %s for %s(%p)" % [unit, @number_for, @number_val]
+        end
+      end
+      def prepare_for_measurement
+        @volume               ||= ENV.fetch("RSPEC_BENCHMARK_VOLUME", :quiet).to_sym
+        @max_time             ||= 2
+        @min_time             ||= 1
+        @min_iter             ||= 1
+        @warmup_time          ||= 0.100
+        @warmup_iter          ||= 1000
+        @iterations_per_round ||= 1
+        nil
+      end
+      def run_measurements
+        puts header if loud?
+        # __debug__ "run_measurements", binding
+        warmup
+        take_measurements
+      end
+      def header
+        max_rounds = @max_iter && @max_iter / @iterations_per_round
+        [
+          "Warmup time %s, or iterations: %s" % [@min_iter, @max_iter],
+          "Benchmark time (%s..%s) or iterations (%s..%s), max rounds: %p" % [
+            @min_time, @max_time, @min_iter, @max_iter, max_rounds,
+          ],
+          "%-10s %s" % ["", Benchmark::CAPTION],
+        ].join("\n")
+      end
+      def warmup
+        return unless 0 < @warmup_time && 0 < @warmup_iter # rubocop:disable Style/NumericPredicate
+        args = [@warmup_iter, 0, @warmup_time, 1, @warmup_iter]
+        measure("warmup",     *args, &@actual_proc)
+        measure("warmup cmp", *args, &@cmp_proc) if @cmp_proc
+      end
+      def take_measurements
+        args = [@iterations_per_round, @min_time, @max_time, @min_iter, @max_iter]
+        @actual_tms = measure("actual", *args, &@actual_proc)
+        @cmp_tms    = measure("cmp",    *args, &@cmp_proc) if @cmp_proc
+        return unless @cmp_proc
+        # how many times faster?
+        @actual_cmp = @actual_tms.ips_real / @cmp_tms.ips_real
+        puts "Ran %0.3fx as fast as comparison" % [@actual_cmp] if loud?
+      end
+      def loud?; @volume == :loud end
+      def assertion?; !!(@expect_cmp || @expect_ips) end
+      def cmp_okay?; !@expect_cmp || @expect_cmp < @actual_cmp end
+      def ips_okay?; !@expect_tms || @expect_tms.ips < @actual_tms.ips end
+      def measure(name, ipr, *args)
+        measurements = TmsMeasurements.new(name, ipr, *args)
+        measurements.max_rounds.times do
+          # GC.start(full_mark: true, immediate_sweep: true)
+          # GC.compact
+          measurements << Benchmark.measure do
+            yield ipr
+          end
+          # p measurements.real
+          break if measurements.max_time < measurements.real
+        end
+        log_measurement(name, measurements)
+        measurements
+      end
+      # rubocop:disable Metrics/AbcSize
+      def units_str(num)
+        if    num >= 10**12; "%7.3fT" % [num.to_f / 10**12]
+        elsif num >= 10** 9; "%7.3fB" % [num.to_f / 10** 9]
+        elsif num >= 10** 6; "%7.3fM" % [num.to_f / 10** 6]
+        elsif num >= 10** 3; "%7.3fk" % [num.to_f / 10** 3]
+        else                 "%7.3f" % [num.to_f]
+        end
+      end
+      # rubocop:enable Metrics/AbcSize
+      def log_measurement(name, measurements)
+        return unless loud?
+        puts "%-10s %s => %s ips (%d rounds)" % [
+          name,
+          measurements.tms.to_s.rstrip,
+          units_str(measurements.ips_real),
+          measurements.size,
+        ]
+      end
+      def cmp_okay_msg; "run %0.2fx faster"          % [@expect_cmp] end
+      def cmp_fail_msg; "was only %0.2fx as fast"    % [@actual_cmp] end
+      def ips_okay_msg; "run with %s ips"            % [units_str(@expect_ips)] end
+      def ips_fail_msg; "was only %s ips"            % [units_str(@actual_ips)] end
+    end
+    # rubocop:enable Metrics/BlockLength, Layout/SpaceAroundOperators
+    alias_matcher :perform_with, :perform
+  end
+  # Replicates a subset of the functionality in benchmark-ips
+  #
+  # TODO: merge this with benchmark-ips
+  # TODO: implement (or remove) min_time, min_iter
+  class TmsMeasurements
+    attr_reader :iterations_per_entry
+    attr_reader :iterations
+    attr_reader :min_time
+    attr_reader :max_time
+    attr_reader :min_iter
+    attr_reader :max_iter
+    def initialize(name, ipe, min_time, max_time, min_iter, max_iter) # rubocop:disable Metrics/ParameterLists
+      @name = name
+      @iterations_per_entry = Integer(ipe)
+      @min_time = Float(min_time)
+      @max_time = Float(max_time)
+      @min_iter = Integer(min_iter)
+      @max_iter = Integer(max_iter)
+      @entries = []
+      @sum = Benchmark::Tms.new
+      @iterations = 0
+    end
+    def size; entries.size end
+    def <<(tms)
+      raise TypeError, "not a #{Benchmark::Tms}" unless tms.is_a?(Benchmark::Tms)
+      raise IndexError, "full" if @max_iter <= size
+      @sum += tms
+      @iterations += @iterations_per_entry
+      @entries << tms
+      self
+    end
+    def sum; @sum.dup end
+    alias tms sum
+    def entries; @entries.dup end
+    def cstime; @sum.cstime end
+    def cutime; @sum.cutime end
+    def real;   @sum.real   end
+    def stime;  @sum.stime  end
+    def total;  @sum.total  end
+    def utime;  @sum.utime  end
+    def ips_real;  @iterations / real  end
+    def ips_total; @iterations / total end
+    def ips_utime; @iterations / utime end
+    def max_rounds
+      @max_iter && @max_iter / @iterations_per_entry
+    end
+  end
+end