sperf 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aecc23432b8dba72018c524b1fb6653a0c83a44e9e8b2c9af17c87cde20b648b
4
- data.tar.gz: fbdf1dddcb5b8fef86e358f315a801478bfcad7768d8f28cbcf82726c67d529c
3
+ metadata.gz: a2d6adf217328a161001097ef85fd62f9716a70089c254f6a831f2d8135d7a52
4
+ data.tar.gz: 92b0aefb1cd263fb86c84553b76cbf1f3124eff5c24e2ceb7d8e15e36f7d5012
5
5
  SHA512:
6
- metadata.gz: ff265b78dd2e237dac4d07a32b7459e07e73d77504347556945878e4addc1ccf89c47c409824591ff800cbe5605a863ae536b200c7f7c5eca5d06bbbd4ca6c05
7
- data.tar.gz: 950c3860737f37a9d57d15dae9db259635ebe94494096ad06c0489ab6c028827ea2b24d7e0046462e3b57ea546173993b2ed3e0c910bc677828b23b01ed718e9
6
+ metadata.gz: ee0ba192454f4794b1f9c064ea743978dde3f8eb5588c2ed590b0cb90604e26aa373223e4d7eaf3b8f3ed56b7a8564a79aef7cc40b9fd806b528ac8482a22dc2
7
+ data.tar.gz: e3519944bab031f83418893bdbf122ab0e308f1d72246dfca5315128365749c085fa3e1aa99f1fb75edd6d2fd2e7c3014fc86c996348420f69b32c80fb1a4b22
data/README.md CHANGED
@@ -60,6 +60,8 @@ Run `sperf help` for full documentation (all options, output interpretation, dia
60
60
 
61
61
  ## Subcommands
62
62
 
63
+ Inspired by Linux `perf` — familiar subcommand interface for profiling workflows.
64
+
63
65
  | Command | Description |
64
66
  |---------|-------------|
65
67
  | `sperf record` | Profile a command and save to file |
@@ -79,8 +81,8 @@ Ruby's sampling profilers collect stack traces at **safepoints**, not at the exa
79
81
  sperf uses **time deltas as sample weights**:
80
82
 
81
83
  ```
82
- Timer thread (pthread) VM thread (postponed job)
83
- ───────────────────── ────────────────────────
84
+ Timer (signal or thread) VM thread (postponed job)
85
+ ──────────────────────── ────────────────────────
84
86
  every 1/frequency sec: at next safepoint:
85
87
  rb_postponed_job_trigger() → sperf_sample_job()
86
88
  time_now = read_clock()
@@ -88,6 +90,9 @@ Timer thread (pthread) VM thread (postponed job)
88
90
  record(backtrace, weight)
89
91
  ```
90
92
 
93
+ On Linux, the timer uses `timer_create` + signal delivery (no extra thread).
94
+ On other platforms, a dedicated pthread with `nanosleep` is used.
95
+
91
96
  If a safepoint is delayed, the sample carries proportionally more weight. The total weight equals the total time, accurately distributed across call stacks.
92
97
 
93
98
  ### Modes
@@ -110,6 +115,24 @@ sperf hooks GVL and GC events to attribute non-CPU time:
110
115
  | `[GC marking]` | Time in GC mark phase |
111
116
  | `[GC sweeping]` | Time in GC sweep phase |
112
117
 
118
+ ## Pros & Cons
119
+
120
+ ### Pros
121
+
122
+ - **Safepoint-based, but accurate**: Unlike signal-based profilers (e.g., stackprof), sperf samples at safepoints. Safepoint sampling is safer — no async-signal-safety constraints, so backtraces and VM state (GC phase, GVL ownership) can be inspected reliably. The downside is less precise sampling timing, but sperf compensates by using actual time deltas as sample weights — so the profiling results faithfully reflect where time is actually spent.
123
+ - **GVL & GC visibility** (wall mode): Attributes off-GVL time, GVL contention, and GC phases to the responsible call stacks with synthetic frames.
124
+ - **Low overhead**: No extra thread on Linux (signal-based timer). Sampling overhead is ~1-5 us per sample.
125
+ - **pprof compatible**: Output works with `go tool pprof`, speedscope, and other standard tools.
126
+ - **No code changes required**: Profile any Ruby program via CLI (`sperf stat ruby app.rb`) or environment variables (`SPERF_ENABLED=1`).
127
+ - **perf-like CLI**: Familiar subcommand interface — `record`, `stat`, `report`, `diff` — inspired by Linux perf.
128
+
129
+ ### Cons
130
+
131
+ - **Method-level only**: Profiles at the method level, not the line level. You can see which method is slow, but not which line within it.
132
+ - **Ruby >= 3.4.0**: Requires recent Ruby for the internal APIs used (postponed jobs, thread event hooks).
133
+ - **POSIX only**: Linux, macOS, etc. No Windows support.
134
+ - **Safepoint sampling**: Cannot sample inside C extensions or during long-running C calls that don't reach a safepoint. Time spent there is attributed to the next sample.
135
+
113
136
  ## Output Formats
114
137
 
115
138
  | Format | Extension | Use case |
data/ext/sperf/sperf.c CHANGED
@@ -6,6 +6,14 @@
6
6
  #include <string.h>
7
7
  #include <stdlib.h>
8
8
  #include <unistd.h>
9
+ #include <signal.h>
10
+
11
+ #ifdef __linux__
12
+ #define SPERF_USE_TIMER_SIGNAL 1
13
+ #define SPERF_TIMER_SIGNAL_DEFAULT (SIGRTMIN + 8)
14
+ #else
15
+ #define SPERF_USE_TIMER_SIGNAL 0
16
+ #endif
9
17
 
10
18
  #define SPERF_MAX_STACK_DEPTH 512
11
19
  #define SPERF_INITIAL_SAMPLES 1024
@@ -49,6 +57,10 @@ typedef struct sperf_profiler {
49
57
  int mode; /* 0 = cpu, 1 = wall */
50
58
  volatile int running;
51
59
  pthread_t timer_thread;
60
+ #if SPERF_USE_TIMER_SIGNAL
61
+ timer_t timer_id;
62
+ int timer_signal; /* >0: use timer signal, 0: use nanosleep thread */
63
+ #endif
52
64
  rb_postponed_job_handle_t pj_handle;
53
65
  sperf_sample_t *samples;
54
66
  size_t sample_count;
@@ -407,7 +419,15 @@ sperf_sample_job(void *arg)
407
419
  (ts_end.tv_nsec - ts_start.tv_nsec);
408
420
  }
409
421
 
410
- /* ---- Timer thread ---- */
422
+ /* ---- Timer ---- */
423
+
424
+ #if SPERF_USE_TIMER_SIGNAL
425
+ static void
426
+ sperf_signal_handler(int sig)
427
+ {
428
+ rb_postponed_job_trigger(g_profiler.pj_handle);
429
+ }
430
+ #endif
411
431
 
412
432
  static void *
413
433
  sperf_timer_func(void *arg)
@@ -448,6 +468,9 @@ rb_sperf_start(int argc, VALUE *argv, VALUE self)
448
468
  VALUE opts;
449
469
  int frequency = 1000;
450
470
  int mode = 0; /* 0 = cpu, 1 = wall */
471
+ #if SPERF_USE_TIMER_SIGNAL
472
+ int timer_signal = SPERF_TIMER_SIGNAL_DEFAULT;
473
+ #endif
451
474
 
452
475
  rb_scan_args(argc, argv, ":", &opts);
453
476
  if (!NIL_P(opts)) {
@@ -469,6 +492,21 @@ rb_sperf_start(int argc, VALUE *argv, VALUE self)
469
492
  rb_raise(rb_eArgError, "mode must be :cpu or :wall");
470
493
  }
471
494
  }
495
+ #if SPERF_USE_TIMER_SIGNAL
496
+ VALUE vsig = rb_hash_aref(opts, ID2SYM(rb_intern("signal")));
497
+ if (!NIL_P(vsig)) {
498
+ if (RTEST(vsig)) {
499
+ timer_signal = NUM2INT(vsig);
500
+ if (timer_signal < SIGRTMIN || timer_signal > SIGRTMAX) {
501
+ rb_raise(rb_eArgError, "signal must be between SIGRTMIN(%d) and SIGRTMAX(%d)",
502
+ SIGRTMIN, SIGRTMAX);
503
+ }
504
+ } else {
505
+ /* signal: false or signal: 0 → use nanosleep thread */
506
+ timer_signal = 0;
507
+ }
508
+ }
509
+ #endif
472
510
  }
473
511
 
474
512
  if (g_profiler.running) {
@@ -534,8 +572,43 @@ rb_sperf_start(int argc, VALUE *argv, VALUE self)
534
572
 
535
573
  g_profiler.running = 1;
536
574
 
537
- if (pthread_create(&g_profiler.timer_thread, NULL, sperf_timer_func, &g_profiler) != 0) {
538
- g_profiler.running = 0;
575
+ #if SPERF_USE_TIMER_SIGNAL
576
+ g_profiler.timer_signal = timer_signal;
577
+
578
+ if (timer_signal > 0) {
579
+ struct sigaction sa;
580
+ struct sigevent sev;
581
+ struct itimerspec its;
582
+
583
+ memset(&sa, 0, sizeof(sa));
584
+ sa.sa_handler = sperf_signal_handler;
585
+ sa.sa_flags = SA_RESTART;
586
+ sigaction(g_profiler.timer_signal, &sa, NULL);
587
+
588
+ memset(&sev, 0, sizeof(sev));
589
+ sev.sigev_notify = SIGEV_SIGNAL;
590
+ sev.sigev_signo = g_profiler.timer_signal;
591
+ if (timer_create(CLOCK_MONOTONIC, &sev, &g_profiler.timer_id) != 0) {
592
+ g_profiler.running = 0;
593
+ signal(g_profiler.timer_signal, SIG_DFL);
594
+ goto timer_fail;
595
+ }
596
+
597
+ its.it_value.tv_sec = 0;
598
+ its.it_value.tv_nsec = 1000000000L / g_profiler.frequency;
599
+ its.it_interval = its.it_value;
600
+ timer_settime(g_profiler.timer_id, 0, &its, NULL);
601
+ } else
602
+ #endif
603
+ {
604
+ if (pthread_create(&g_profiler.timer_thread, NULL, sperf_timer_func, &g_profiler) != 0) {
605
+ g_profiler.running = 0;
606
+ goto timer_fail;
607
+ }
608
+ }
609
+
610
+ if (0) {
611
+ timer_fail:
539
612
  {
540
613
  VALUE cur = rb_thread_current();
541
614
  sperf_thread_data_t *td = (sperf_thread_data_t *)rb_internal_thread_specific_get(cur, g_profiler.ts_key);
@@ -550,7 +623,7 @@ rb_sperf_start(int argc, VALUE *argv, VALUE self)
550
623
  g_profiler.samples = NULL;
551
624
  free(g_profiler.frame_pool);
552
625
  g_profiler.frame_pool = NULL;
553
- rb_raise(rb_eRuntimeError, "sperf: failed to create timer thread");
626
+ rb_raise(rb_eRuntimeError, "sperf: failed to create timer");
554
627
  }
555
628
 
556
629
  return Qtrue;
@@ -568,7 +641,15 @@ rb_sperf_stop(VALUE self)
568
641
  }
569
642
 
570
643
  g_profiler.running = 0;
571
- pthread_join(g_profiler.timer_thread, NULL);
644
+ #if SPERF_USE_TIMER_SIGNAL
645
+ if (g_profiler.timer_signal > 0) {
646
+ timer_delete(g_profiler.timer_id);
647
+ signal(g_profiler.timer_signal, SIG_DFL);
648
+ } else
649
+ #endif
650
+ {
651
+ pthread_join(g_profiler.timer_thread, NULL);
652
+ }
572
653
 
573
654
  if (g_profiler.thread_hook) {
574
655
  rb_internal_thread_remove_event_hook(g_profiler.thread_hook);
@@ -657,9 +738,16 @@ sperf_after_fork_child(void)
657
738
  {
658
739
  if (!g_profiler.running) return;
659
740
 
660
- /* Mark as not running — timer thread doesn't exist in child */
741
+ /* Mark as not running — timer doesn't exist in child */
661
742
  g_profiler.running = 0;
662
743
 
744
+ #if SPERF_USE_TIMER_SIGNAL
745
+ /* timer_create timers are not inherited across fork; reset signal handler */
746
+ if (g_profiler.timer_signal > 0) {
747
+ signal(g_profiler.timer_signal, SIG_DFL);
748
+ }
749
+ #endif
750
+
663
751
  /* Remove hooks so they don't fire with stale state */
664
752
  if (g_profiler.thread_hook) {
665
753
  rb_internal_thread_remove_event_hook(g_profiler.thread_hook);
@@ -0,0 +1,3 @@
1
+ module Sperf
2
+ VERSION = "0.2.0"
3
+ end
data/lib/sperf.rb CHANGED
@@ -1,9 +1,9 @@
1
1
  require "sperf.so"
2
+ require "sperf/version"
2
3
  require "zlib"
3
4
  require "stringio"
4
5
 
5
6
  module Sperf
6
- VERSION = "0.1.0"
7
7
 
8
8
  @verbose = false
9
9
  @output = nil
@@ -17,13 +17,15 @@ module Sperf
17
17
  # .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
18
18
  # .txt → text report (human/AI readable flat + cumulative table)
19
19
  # otherwise (.pb.gz etc) → pprof protobuf (gzip compressed)
20
- def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false)
20
+ def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false, signal: nil)
21
21
  @verbose = verbose || ENV["SPERF_VERBOSE"] == "1"
22
22
  @output = output
23
23
  @format = format
24
24
  @stat = stat
25
25
  @stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC) if @stat
26
- _c_start(frequency: frequency, mode: mode)
26
+ c_opts = { frequency: frequency, mode: mode }
27
+ c_opts[:signal] = signal unless signal.nil?
28
+ _c_start(**c_opts)
27
29
 
28
30
  if block_given?
29
31
  begin
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sperf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Koichi Sasada
@@ -53,6 +53,7 @@ files:
53
53
  - ext/sperf/extconf.rb
54
54
  - ext/sperf/sperf.c
55
55
  - lib/sperf.rb
56
+ - lib/sperf/version.rb
56
57
  homepage: "https://github.com/ko1/sperf"
57
58
  licenses:
58
59
  - MIT