RubyGems - rperf - Versions diffs - 0.6.0 → 0.8.0 - Mend

rperf 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

data/lib/rperf.rb CHANGED Viewed

@@ -19,15 +19,19 @@ module Rperf
   @stat_start_mono = nil
   # Starts profiling.
-  # format: :pprof, :collapsed, or :text. nil = auto-detect from output extension
+  # format: :json, :pprof, :collapsed, or :text. nil = auto-detect from output extension
+  #   .json.gz   → json (rperf native, default)
   #   .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
   #   .txt       → text report (human/AI readable flat + cumulative table)
-  #   otherwise (.pb.gz etc) → pprof protobuf (gzip compressed)
-  def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false, signal: nil, aggregate: true)
+  #   .pb.gz     → pprof protobuf (gzip compressed)
+  def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false, signal: nil, aggregate: true, defer: false)
     raise ArgumentError, "frequency must be a positive integer (got #{frequency.inspect})" unless frequency.is_a?(Integer) && frequency > 0
     raise ArgumentError, "frequency must be <= 10000 (10KHz), got #{frequency}" if frequency > 10_000
     raise ArgumentError, "mode must be :cpu or :wall, got #{mode.inspect}" unless %i[cpu wall].include?(mode)
     c_mode = mode == :cpu ? 0 : 1
+    unless signal.nil? || signal == false || signal.is_a?(Integer)
+      raise ArgumentError, "signal must be nil, false, or an Integer, got #{signal.inspect}"
+    end
     c_signal = signal.nil? ? -1 : (signal ? signal.to_i : 0)
     if c_signal > 0
       raise ArgumentError, "signal mode is only supported on Linux" unless RUBY_PLATFORM =~ /linux/
@@ -41,20 +45,33 @@ module Rperf
     @output = output
     @format = format
     @stat = stat
-    @stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC) if @stat
+    if @stat
+      @stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+      @stat_start_times = Process.times
+    end
     @label_set_table = nil
     @label_set_index = nil
-    _c_start(frequency, c_mode, aggregate, c_signal)
+    _c_start(frequency, c_mode, aggregate, c_signal, defer)
     if block_given?
       begin
         yield
       ensure
-        return stop
+        result = stop
       end
+      result
     end
   end
+  # VM state integer → label value mapping.
+  # These values appear in the "Ruby" label key.
+  VM_STATE_LABELS = {
+    1 => ["%GVL", "blocked"],
+    2 => ["%GVL", "wait"],
+    3 => ["%GC",  "mark"],
+    4 => ["%GC",  "sweep"],
+  }.freeze
   def self.stop
     data = _c_stop
     return unless data
@@ -63,17 +80,19 @@ module Rperf
     # :aggregated_samples.  Build aggregated view so encoders always work.
     if data[:raw_samples] && !data[:aggregated_samples]
       merged = {}
-      data[:raw_samples].each do |frames, weight, thread_seq, label_set_id|
-        key = [frames, thread_seq || 0, label_set_id || 0]
+      data[:raw_samples].each do |frames, weight, thread_seq, label_set_id, vm_state|
+        key = [frames, thread_seq || 0, label_set_id || 0, vm_state || 0]
         if merged.key?(key)
           merged[key] += weight
         else
           merged[key] = weight
         end
       end
-      data[:aggregated_samples] = merged.map { |(frames, ts, lsi), w| [frames, w, ts, lsi] }
+      data[:aggregated_samples] = merged.map { |(frames, ts, lsi, vs), w| [frames, w, ts, lsi, vs] }
     end
+    merge_vm_state_labels!(data)
     print_stats(data) if @verbose
     print_stat(data) if @stat
@@ -95,7 +114,10 @@ module Rperf
   # This allows interval-based profiling where each snapshot covers only
   # the period since the last clear.
   def self.snapshot(clear: false)
-    _c_snapshot(clear)
+    data = _c_snapshot(clear)
+    return unless data
+    merge_vm_state_labels!(data)
+    data
   end
   # Label set management for per-context profiling.
@@ -130,6 +152,9 @@ module Rperf
   #
   # Values of nil remove that key. Existing labels are merged.
   def self.label(**kw, &block)
+    return yield if block && !_c_running?
+    return unless _c_running?
     _init_label_sets unless @label_set_table
     cur_id = _c_get_label
@@ -148,6 +173,38 @@ module Rperf
     end
   end
+  # Profiles the given block: activates timer sampling for the duration
+  # and optionally applies labels. Use with start(defer: true) to profile
+  # only specific sections of code.
+  #
+  #   Rperf.start(defer: true, mode: :wall)
+  #   Rperf.profile(endpoint: "/users") { handle_request }
+  #   data = Rperf.stop
+  #
+  # Nesting is supported: timer stays active until the outermost profile exits.
+  # Requires a block. Raises if profiling is not started.
+  def self.profile(**kw, &block)
+    raise ArgumentError, "Rperf.profile requires a block" unless block
+    raise RuntimeError, "Rperf is not started" unless _c_running?
+    _init_label_sets unless @label_set_table
+    cur_id = _c_get_label
+    cur_labels = @label_set_table[cur_id] || {}
+    new_labels = cur_labels.merge(kw).reject { |_, v| v.nil? }
+    new_id = _intern_label_set(new_labels)
+    _c_set_label(new_id)
+    _c_profile_inc
+    begin
+      yield
+    ensure
+      _c_profile_dec
+      _c_set_label(cur_id)
+    end
+  end
   # Returns the current thread's labels as a Hash.
   # Returns an empty Hash if no labels are set or profiling is not running.
   def self.labels
@@ -157,11 +214,53 @@ module Rperf
   end
+  # Merge vm_state from C samples into label_sets as a "Ruby" label key.
+  # Mutates data in place: updates label_set_id on each sample, strips vm_state,
+  # and extends label_sets with new entries as needed.
+  def self.merge_vm_state_labels!(data)
+    samples_key = data[:aggregated_samples] ? :aggregated_samples : :raw_samples
+    samples = data[samples_key]
+    return unless samples
+    orig_label_sets = data[:label_sets]
+    label_sets = (orig_label_sets || [{}]).dup
+    mapping = {}  # [original_label_set_id, vm_state] => new_label_set_id
+    modified = false
+    samples.each do |sample|
+      vm_state = sample[4] || 0
+      next if vm_state == 0
+      next unless VM_STATE_LABELS.key?(vm_state)
+      label_set_id = sample[3] || 0
+      cache_key = [label_set_id, vm_state]
+      new_id = mapping[cache_key]
+      unless new_id
+        base = label_sets[label_set_id] || {}
+        key, value = VM_STATE_LABELS[vm_state]
+        new_ls = base.merge(key => value).freeze
+        new_id = label_sets.size
+        label_sets << new_ls
+        mapping[cache_key] = new_id
+      end
+      sample[3] = new_id
+      modified = true
+    end
+    # Strip vm_state (5th element) from all samples
+    samples.each { |s| s.pop if s.size > 4 }
+    # Only set label_sets if they were already present or we added vm_state labels
+    data[:label_sets] = label_sets if orig_label_sets || modified
+  end
+  private_class_method :merge_vm_state_labels!
   # Saves profiling data to a file.
-  # format: :pprof, :collapsed, or :text. nil = auto-detect from path extension
+  # format: :json, :pprof, :collapsed, or :text. nil = auto-detect from path extension
+  #   .json.gz   → json (rperf native, default)
   #   .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
   #   .txt       → text report (human/AI readable flat + cumulative table)
-  #   otherwise (.pb.gz etc) → pprof protobuf (gzip compressed)
+  #   .pb.gz     → pprof protobuf (gzip compressed)
   def self.save(path, data, format: nil)
     write_data(path, data, format)
   end
@@ -173,17 +272,38 @@ module Rperf
       File.write(path, Collapsed.encode(data))
     when :text
       File.write(path, Text.encode(data))
+    when :json
+      require "json"
+      File.binwrite(path, gzip(JSON.generate(data.merge(rperf_version: VERSION))))
     else
       File.binwrite(path, gzip(PProf.encode(data)))
     end
   end
   private_class_method :write_data
+  # Load a profile saved by rperf record (.json.gz).
+  # Returns the data hash (same format as Rperf.stop / Rperf.snapshot).
+  # Warns to stderr if the file was saved by a different rperf version.
+  def self.load(path)
+    compressed = File.binread(path)
+    raw = Zlib::GzipReader.new(StringIO.new(compressed)).read
+    require "json"
+    data = JSON.parse(raw, symbolize_names: true)
+    saved_version = data.delete(:rperf_version)
+    if saved_version && saved_version != VERSION
+      $stderr.puts "rperf: warning: file was saved by rperf #{saved_version} (current: #{VERSION})"
+    elsif saved_version.nil?
+      $stderr.puts "rperf: warning: file has no version info (may be from an older rperf)"
+    end
+    data
+  end
   def self.detect_format(path, format)
     return format.to_sym if format
     case path.to_s
-    when /\.collapsed\z/ then :collapsed
-    when /\.txt\z/       then :text
+    when /\.collapsed\z/   then :collapsed
+    when /\.txt\z/         then :text
+    when /\.json(\.gz)?\z/ then :json
     else :pprof
     end
   end
@@ -201,16 +321,15 @@ module Rperf
   def self.print_stats(data)
     count = data[:sampling_count] || 0
     total_ns = data[:sampling_time_ns] || 0
-    sample_count = data[:sampling_count] || 0
     mode = data[:mode] || :cpu
     frequency = data[:frequency] || 0
     total_ms = total_ns / 1_000_000.0
     avg_us = count > 0 ? total_ns / count / 1000.0 : 0.0
-    $stderr.puts "[rperf] mode=#{mode} frequency=#{frequency}Hz"
-    $stderr.puts "[rperf] sampling: #{count} calls, #{format("%.2f", total_ms)}ms total, #{format("%.1f", avg_us)}us/call avg"
-    $stderr.puts "[rperf] samples recorded: #{sample_count}"
+    $stderr.puts "[Rperf] mode=#{mode} frequency=#{frequency}Hz"
+    $stderr.puts "[Rperf] sampling: #{count} calls, #{format("%.2f", total_ms)}ms total, #{format("%.1f", avg_us)}us/call avg"
+    $stderr.puts "[Rperf] samples recorded: #{count}"
     print_top(data)
   end
@@ -259,13 +378,13 @@ module Rperf
   def self.print_top_table(kind, table, total_weight)
     top = table.sort_by { |_, w| -w }.first(TOP_N)
-    $stderr.puts "[rperf] top #{top.size} by #{kind}:"
+    $stderr.puts "[Rperf] top #{top.size} by #{kind}:"
     top.each do |key, weight|
       label, path = key
       ms = weight / 1_000_000.0
       pct = total_weight > 0 ? weight * 100.0 / total_weight : 0.0
       loc = path.empty? ? "" : " (#{path})"
-      $stderr.puts format("[rperf]   %8.1fms %5.1f%%  %s%s", ms, pct, label, loc)
+      $stderr.puts format("[Rperf]   %8.1fms %5.1f%%  %s%s", ms, pct, label, loc)
     end
   end
@@ -282,8 +401,9 @@ module Rperf
     samples_raw = data[:aggregated_samples] || []
     real_ns = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - @stat_start_mono) * 1_000_000_000).to_i
     times = Process.times
-    user_ns = (times.utime * 1_000_000_000).to_i
-    sys_ns = (times.stime * 1_000_000_000).to_i
+    start_times = @stat_start_times || Struct.new(:utime, :stime).new(0.0, 0.0)
+    user_ns = ((times.utime - start_times.utime) * 1_000_000_000).to_i
+    sys_ns = ((times.stime - start_times.stime) * 1_000_000_000).to_i
     command = ENV["RPERF_STAT_COMMAND"] || "(unknown)"
@@ -295,7 +415,7 @@ module Rperf
     $stderr.puts format("  %14s ms   real", format_ms(real_ns))
     if samples_raw.size > 0
-      breakdown, total_weight = compute_stat_breakdown(samples_raw)
+      breakdown, total_weight = compute_stat_breakdown(samples_raw, data[:label_sets])
       print_stat_breakdown(breakdown, total_weight)
       print_stat_runtime_info(data)
       print_stat_system_info
@@ -306,20 +426,25 @@ module Rperf
     $stderr.puts
   end
-  def self.compute_stat_breakdown(samples_raw)
+  def self.compute_stat_breakdown(samples_raw, label_sets)
     breakdown = Hash.new(0)
     total_weight = 0
-    samples_raw.each do |frames, weight|
+    samples_raw.each do |frames, weight, _thread_seq, label_set_id|
       total_weight += weight
-      leaf_label = frames.first&.last || ""
-      category = case leaf_label
-                 when "[GVL blocked]" then :gvl_blocked
-                 when "[GVL wait]"    then :gvl_wait
-                 when "[GC marking]"  then :gc_marking
-                 when "[GC sweeping]" then :gc_sweeping
-                 else :cpu_execution
-                 end
+      category = :cpu_execution
+      if label_sets && label_set_id && label_set_id > 0
+        ls = label_sets[label_set_id]
+        if ls
+          gvl = ls["%GVL"]
+          gc  = ls["%GC"]
+          if gvl == "blocked"    then category = :gvl_blocked
+          elsif gvl == "wait"    then category = :gvl_wait
+          elsif gc  == "mark"    then category = :gc_marking
+          elsif gc  == "sweep"   then category = :gc_sweeping
+          end
+        end
+      end
       breakdown[category] += weight
     end
@@ -331,11 +456,11 @@ module Rperf
     $stderr.puts
     [
-      [:cpu_execution, "CPU execution"],
-      [:gvl_blocked,   "[Ruby] GVL blocked (I/O, sleep)"],
-      [:gvl_wait,      "[Ruby] GVL wait (contention)"],
-      [:gc_marking,    "[Ruby] GC marking"],
-      [:gc_sweeping,   "[Ruby] GC sweeping"],
+      [:cpu_execution, "[Rperf] CPU execution"],
+      [:gvl_blocked,   "[Rperf] GVL blocked (I/O, sleep)"],
+      [:gvl_wait,      "[Rperf] GVL wait (contention)"],
+      [:gc_marking,    "[Rperf] GC marking"],
+      [:gc_sweeping,   "[Rperf] GC sweeping"],
     ].each do |key, label|
       w = breakdown[key]
       next if w == 0
@@ -346,20 +471,20 @@ module Rperf
   private_class_method :print_stat_breakdown
   def self.print_stat_runtime_info(data)
-    thread_count = data[:detected_thread_count] || 0
-    $stderr.puts STAT_LINE.call(format_integer(thread_count), "  ", "[Ruby] detected threads") if thread_count > 0
     gc = GC.stat
     $stderr.puts STAT_LINE.call(format_ms(gc[:time] * 1_000_000), "ms",
-                                "[Ruby] GC time (%s count: %s minor, %s major)" % [
+                                "[Ruby ] GC time (%s count: %s minor, %s major)" % [
                                   format_integer(gc[:count]),
                                   format_integer(gc[:minor_gc_count]),
                                   format_integer(gc[:major_gc_count])])
-    $stderr.puts STAT_LINE.call(format_integer(gc[:total_allocated_objects]), "  ", "[Ruby] allocated objects")
-    $stderr.puts STAT_LINE.call(format_integer(gc[:total_freed_objects]), "  ", "[Ruby] freed objects")
+    $stderr.puts STAT_LINE.call(format_integer(gc[:total_allocated_objects]), "  ", "[Ruby ] allocated objects")
+    $stderr.puts STAT_LINE.call(format_integer(gc[:total_freed_objects]), "  ", "[Ruby ] freed objects")
+    thread_count = data[:detected_thread_count] || 0
+    $stderr.puts STAT_LINE.call(format_integer(thread_count), "  ", "[Ruby ] detected threads") if thread_count > 0
     if defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled?
       yjit = RubyVM::YJIT.runtime_stats
       if yjit[:ratio_in_yjit]
-        $stderr.puts STAT_LINE.call(format("%.1f%%", yjit[:ratio_in_yjit] * 100), "  ", "[Ruby] YJIT code execution ratio")
+        $stderr.puts STAT_LINE.call(format("%.1f%%", yjit[:ratio_in_yjit] * 100), "  ", "[Ruby ] YJIT code execution ratio")
       end
     end
   end
@@ -369,12 +494,20 @@ module Rperf
     sys_stats = get_system_stats
     maxrss_kb = sys_stats[:maxrss_kb]
     if maxrss_kb
-      $stderr.puts STAT_LINE.call(format_integer((maxrss_kb / 1024.0).round), "MB", "[OS] peak memory (maxrss)")
+      $stderr.puts STAT_LINE.call(format_integer((maxrss_kb / 1024.0).round), "MB", "[OS   ] peak memory (maxrss)")
+    end
+    if sys_stats[:page_faults_minor]
+      minor = sys_stats[:page_faults_minor]
+      major = sys_stats[:page_faults_major]
+      $stderr.puts STAT_LINE.call(
+        format_integer(minor + major), "  ",
+        "[OS   ] page faults (%s minor, %s major)" % [
+          format_integer(minor), format_integer(major)])
     end
     if sys_stats[:ctx_voluntary]
       $stderr.puts STAT_LINE.call(
         format_integer(sys_stats[:ctx_voluntary] + sys_stats[:ctx_involuntary]), "  ",
-        "[OS] context switches (%s voluntary, %s involuntary)" % [
+        "[OS   ] context switches (%s voluntary, %s involuntary)" % [
           format_integer(sys_stats[:ctx_voluntary]),
           format_integer(sys_stats[:ctx_involuntary])])
     end
@@ -383,7 +516,7 @@ module Rperf
       w = sys_stats[:io_write_bytes]
       $stderr.puts STAT_LINE.call(
         format_integer(((r + w) / 1024.0 / 1024.0).round), "MB",
-        "[OS] disk I/O (%s MB read, %s MB write)" % [
+        "[OS   ] disk I/O (%s MB read, %s MB write)" % [
           format_integer((r / 1024.0 / 1024.0).round),
           format_integer((w / 1024.0 / 1024.0).round)])
     end
@@ -404,6 +537,10 @@ module Rperf
     samples = data[:sampling_count] || samples_raw.size
     $stderr.puts format("  %d samples / %d triggers, %.1f%% profiler overhead",
                         samples, triggers, overhead_pct)
+    dropped = data[:dropped_samples] || 0
+    if dropped > 0
+      $stderr.puts format("  WARNING: %d samples dropped due to memory allocation failure", dropped)
+    end
   end
   private_class_method :print_stat_footer
@@ -445,6 +582,12 @@ module Rperf
       stats[:maxrss_kb] = rss if rss && rss > 0
     end
+    if File.readable?("/proc/self/stat")
+      fields = File.read("/proc/self/stat").split
+      stats[:page_faults_minor] = fields[9].to_i
+      stats[:page_faults_major] = fields[11].to_i
+    end
     if File.readable?("/proc/self/io")
       # Linux: parse /proc/self/io
       File.read("/proc/self/io").each_line do |line|
@@ -468,16 +611,22 @@ module Rperf
       raise ArgumentError, "RPERF_MODE must be 'cpu' or 'wall', got: #{_rperf_mode_str.inspect}"
     end
     _rperf_mode = _rperf_mode_str == "wall" ? :wall : :cpu
-    _rperf_format = ENV["RPERF_FORMAT"] ? ENV["RPERF_FORMAT"].to_sym : nil
+    _rperf_format = if ENV["RPERF_FORMAT"]
+                      unless %w[pprof collapsed text json].include?(ENV["RPERF_FORMAT"])
+                        raise ArgumentError, "RPERF_FORMAT must be one of pprof, collapsed, text, json, got: #{ENV["RPERF_FORMAT"].inspect}"
+                      end
+                      ENV["RPERF_FORMAT"].to_sym
+                    end
     _rperf_stat = ENV["RPERF_STAT"] == "1"
     _rperf_signal = case ENV["RPERF_SIGNAL"]
                     when nil then nil
                     when "false" then false
-                    else ENV["RPERF_SIGNAL"].to_i
+                    when /\A\d+\z/ then ENV["RPERF_SIGNAL"].to_i
+                    else raise ArgumentError, "RPERF_SIGNAL must be a signal number or 'false', got: #{ENV["RPERF_SIGNAL"].inspect}"
                     end
     _rperf_aggregate = ENV["RPERF_AGGREGATE"] != "0"
     _rperf_start_opts = { frequency: (ENV["RPERF_FREQUENCY"] || 1000).to_i, mode: _rperf_mode,
-                          output: _rperf_stat ? ENV["RPERF_OUTPUT"] : (ENV["RPERF_OUTPUT"] || "rperf.data"),
+                          output: _rperf_stat ? ENV["RPERF_OUTPUT"] : (ENV["RPERF_OUTPUT"] || "rperf.json.gz"),
                           verbose: ENV["RPERF_VERBOSE"] == "1",
                           format: _rperf_format,
                           stat: _rperf_stat,
@@ -660,7 +809,7 @@ module Rperf
         intern.("frequency: #{frequency}Hz"),
         intern.("ruby: #{RUBY_DESCRIPTION}"),
       ]
-      doc_url_idx = intern.("https://ko1.github.io/rperf/help.html")
+      doc_url_idx = intern.("https://ko1.github.io/rperf/docs/help.html")
       # field 6: string_table (repeated string)
       string_table.each do |s|

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: rperf
 version: !ruby/object:Gem::Version
-  version: 0.6.0
+  version: 0.8.0
 platform: ruby
 authors:
 - Koichi Sasada
@@ -38,8 +38,8 @@ dependencies:
       - !ruby/object:Gem::Version
         version: '3.6'
 description: A safepoint-based sampling performance profiler that uses thread CPU
-  time deltas as weights to correct safepoint bias. Outputs pprof, collapsed stacks,
-  or text report.
+  time deltas as weights to correct safepoint bias. Outputs JSON, pprof, collapsed
+  stacks, or text report.
 executables:
 - rperf
 extensions:
@@ -48,14 +48,16 @@ extra_rdoc_files: []
 files:
 - README.md
 - docs/help.md
+- docs/logo.svg
 - exe/rperf
 - ext/rperf/extconf.rb
 - ext/rperf/rperf.c
 - lib/rperf.rb
 - lib/rperf/active_job.rb
-- lib/rperf/middleware.rb
+- lib/rperf/rack.rb
 - lib/rperf/sidekiq.rb
 - lib/rperf/version.rb
+- lib/rperf/viewer.rb
 homepage: https://github.com/ko1/rperf
 licenses:
 - MIT
@@ -74,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 4.0.3
+rubygems_version: 4.0.6
 specification_version: 4
 summary: Safepoint-based sampling performance profiler for Ruby
 test_files: []

data/lib/rperf/middleware.rb DELETED Viewed

@@ -1,15 +0,0 @@
-require "rperf"
-class Rperf::Middleware
-  def initialize(app, label_key: :endpoint)
-    @app = app
-    @label_key = label_key
-  end
-  def call(env)
-    endpoint = "#{env["REQUEST_METHOD"]} #{env["PATH_INFO"]}"
-    Rperf.label(@label_key => endpoint) do
-      @app.call(env)
-    end
-  end
-end