sperf 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +25 -2
- data/ext/sperf/sperf.c +94 -6
- data/lib/sperf/version.rb +3 -0
- data/lib/sperf.rb +5 -3
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a2d6adf217328a161001097ef85fd62f9716a70089c254f6a831f2d8135d7a52
|
|
4
|
+
data.tar.gz: 92b0aefb1cd263fb86c84553b76cbf1f3124eff5c24e2ceb7d8e15e36f7d5012
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ee0ba192454f4794b1f9c064ea743978dde3f8eb5588c2ed590b0cb90604e26aa373223e4d7eaf3b8f3ed56b7a8564a79aef7cc40b9fd806b528ac8482a22dc2
|
|
7
|
+
data.tar.gz: e3519944bab031f83418893bdbf122ab0e308f1d72246dfca5315128365749c085fa3e1aa99f1fb75edd6d2fd2e7c3014fc86c996348420f69b32c80fb1a4b22
|
data/README.md
CHANGED
|
@@ -60,6 +60,8 @@ Run `sperf help` for full documentation (all options, output interpretation, dia
|
|
|
60
60
|
|
|
61
61
|
## Subcommands
|
|
62
62
|
|
|
63
|
+
Inspired by Linux `perf` — familiar subcommand interface for profiling workflows.
|
|
64
|
+
|
|
63
65
|
| Command | Description |
|
|
64
66
|
|---------|-------------|
|
|
65
67
|
| `sperf record` | Profile a command and save to file |
|
|
@@ -79,8 +81,8 @@ Ruby's sampling profilers collect stack traces at **safepoints**, not at the exa
|
|
|
79
81
|
sperf uses **time deltas as sample weights**:
|
|
80
82
|
|
|
81
83
|
```
|
|
82
|
-
Timer thread
|
|
83
|
-
|
|
84
|
+
Timer (signal or thread) VM thread (postponed job)
|
|
85
|
+
──────────────────────── ────────────────────────
|
|
84
86
|
every 1/frequency sec: at next safepoint:
|
|
85
87
|
rb_postponed_job_trigger() → sperf_sample_job()
|
|
86
88
|
time_now = read_clock()
|
|
@@ -88,6 +90,9 @@ Timer thread (pthread) VM thread (postponed job)
|
|
|
88
90
|
record(backtrace, weight)
|
|
89
91
|
```
|
|
90
92
|
|
|
93
|
+
On Linux, the timer uses `timer_create` + signal delivery (no extra thread).
|
|
94
|
+
On other platforms, a dedicated pthread with `nanosleep` is used.
|
|
95
|
+
|
|
91
96
|
If a safepoint is delayed, the sample carries proportionally more weight. The total weight equals the total time, accurately distributed across call stacks.
|
|
92
97
|
|
|
93
98
|
### Modes
|
|
@@ -110,6 +115,24 @@ sperf hooks GVL and GC events to attribute non-CPU time:
|
|
|
110
115
|
| `[GC marking]` | Time in GC mark phase |
|
|
111
116
|
| `[GC sweeping]` | Time in GC sweep phase |
|
|
112
117
|
|
|
118
|
+
## Pros & Cons
|
|
119
|
+
|
|
120
|
+
### Pros
|
|
121
|
+
|
|
122
|
+
- **Safepoint-based, but accurate**: Unlike signal-based profilers (e.g., stackprof), sperf samples at safepoints. Safepoint sampling is safer — no async-signal-safety constraints, so backtraces and VM state (GC phase, GVL ownership) can be inspected reliably. The downside is less precise sampling timing, but sperf compensates by using actual time deltas as sample weights — so the profiling results faithfully reflect where time is actually spent.
|
|
123
|
+
- **GVL & GC visibility** (wall mode): Attributes off-GVL time, GVL contention, and GC phases to the responsible call stacks with synthetic frames.
|
|
124
|
+
- **Low overhead**: No extra thread on Linux (signal-based timer). Sampling overhead is ~1-5 us per sample.
|
|
125
|
+
- **pprof compatible**: Output works with `go tool pprof`, speedscope, and other standard tools.
|
|
126
|
+
- **No code changes required**: Profile any Ruby program via CLI (`sperf stat ruby app.rb`) or environment variables (`SPERF_ENABLED=1`).
|
|
127
|
+
- **perf-like CLI**: Familiar subcommand interface — `record`, `stat`, `report`, `diff` — inspired by Linux perf.
|
|
128
|
+
|
|
129
|
+
### Cons
|
|
130
|
+
|
|
131
|
+
- **Method-level only**: Profiles at the method level, not the line level. You can see which method is slow, but not which line within it.
|
|
132
|
+
- **Ruby >= 3.4.0**: Requires recent Ruby for the internal APIs used (postponed jobs, thread event hooks).
|
|
133
|
+
- **POSIX only**: Linux, macOS, etc. No Windows support.
|
|
134
|
+
- **Safepoint sampling**: Cannot sample inside C extensions or during long-running C calls that don't reach a safepoint. Time spent there is attributed to the next sample.
|
|
135
|
+
|
|
113
136
|
## Output Formats
|
|
114
137
|
|
|
115
138
|
| Format | Extension | Use case |
|
data/ext/sperf/sperf.c
CHANGED
|
@@ -6,6 +6,14 @@
|
|
|
6
6
|
#include <string.h>
|
|
7
7
|
#include <stdlib.h>
|
|
8
8
|
#include <unistd.h>
|
|
9
|
+
#include <signal.h>
|
|
10
|
+
|
|
11
|
+
#ifdef __linux__
|
|
12
|
+
#define SPERF_USE_TIMER_SIGNAL 1
|
|
13
|
+
#define SPERF_TIMER_SIGNAL_DEFAULT (SIGRTMIN + 8)
|
|
14
|
+
#else
|
|
15
|
+
#define SPERF_USE_TIMER_SIGNAL 0
|
|
16
|
+
#endif
|
|
9
17
|
|
|
10
18
|
#define SPERF_MAX_STACK_DEPTH 512
|
|
11
19
|
#define SPERF_INITIAL_SAMPLES 1024
|
|
@@ -49,6 +57,10 @@ typedef struct sperf_profiler {
|
|
|
49
57
|
int mode; /* 0 = cpu, 1 = wall */
|
|
50
58
|
volatile int running;
|
|
51
59
|
pthread_t timer_thread;
|
|
60
|
+
#if SPERF_USE_TIMER_SIGNAL
|
|
61
|
+
timer_t timer_id;
|
|
62
|
+
int timer_signal; /* >0: use timer signal, 0: use nanosleep thread */
|
|
63
|
+
#endif
|
|
52
64
|
rb_postponed_job_handle_t pj_handle;
|
|
53
65
|
sperf_sample_t *samples;
|
|
54
66
|
size_t sample_count;
|
|
@@ -407,7 +419,15 @@ sperf_sample_job(void *arg)
|
|
|
407
419
|
(ts_end.tv_nsec - ts_start.tv_nsec);
|
|
408
420
|
}
|
|
409
421
|
|
|
410
|
-
/* ---- Timer
|
|
422
|
+
/* ---- Timer ---- */
|
|
423
|
+
|
|
424
|
+
#if SPERF_USE_TIMER_SIGNAL
|
|
425
|
+
static void
|
|
426
|
+
sperf_signal_handler(int sig)
|
|
427
|
+
{
|
|
428
|
+
rb_postponed_job_trigger(g_profiler.pj_handle);
|
|
429
|
+
}
|
|
430
|
+
#endif
|
|
411
431
|
|
|
412
432
|
static void *
|
|
413
433
|
sperf_timer_func(void *arg)
|
|
@@ -448,6 +468,9 @@ rb_sperf_start(int argc, VALUE *argv, VALUE self)
|
|
|
448
468
|
VALUE opts;
|
|
449
469
|
int frequency = 1000;
|
|
450
470
|
int mode = 0; /* 0 = cpu, 1 = wall */
|
|
471
|
+
#if SPERF_USE_TIMER_SIGNAL
|
|
472
|
+
int timer_signal = SPERF_TIMER_SIGNAL_DEFAULT;
|
|
473
|
+
#endif
|
|
451
474
|
|
|
452
475
|
rb_scan_args(argc, argv, ":", &opts);
|
|
453
476
|
if (!NIL_P(opts)) {
|
|
@@ -469,6 +492,21 @@ rb_sperf_start(int argc, VALUE *argv, VALUE self)
|
|
|
469
492
|
rb_raise(rb_eArgError, "mode must be :cpu or :wall");
|
|
470
493
|
}
|
|
471
494
|
}
|
|
495
|
+
#if SPERF_USE_TIMER_SIGNAL
|
|
496
|
+
VALUE vsig = rb_hash_aref(opts, ID2SYM(rb_intern("signal")));
|
|
497
|
+
if (!NIL_P(vsig)) {
|
|
498
|
+
if (RTEST(vsig)) {
|
|
499
|
+
timer_signal = NUM2INT(vsig);
|
|
500
|
+
if (timer_signal < SIGRTMIN || timer_signal > SIGRTMAX) {
|
|
501
|
+
rb_raise(rb_eArgError, "signal must be between SIGRTMIN(%d) and SIGRTMAX(%d)",
|
|
502
|
+
SIGRTMIN, SIGRTMAX);
|
|
503
|
+
}
|
|
504
|
+
} else {
|
|
505
|
+
/* signal: false or signal: 0 → use nanosleep thread */
|
|
506
|
+
timer_signal = 0;
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
#endif
|
|
472
510
|
}
|
|
473
511
|
|
|
474
512
|
if (g_profiler.running) {
|
|
@@ -534,8 +572,43 @@ rb_sperf_start(int argc, VALUE *argv, VALUE self)
|
|
|
534
572
|
|
|
535
573
|
g_profiler.running = 1;
|
|
536
574
|
|
|
537
|
-
|
|
538
|
-
|
|
575
|
+
#if SPERF_USE_TIMER_SIGNAL
|
|
576
|
+
g_profiler.timer_signal = timer_signal;
|
|
577
|
+
|
|
578
|
+
if (timer_signal > 0) {
|
|
579
|
+
struct sigaction sa;
|
|
580
|
+
struct sigevent sev;
|
|
581
|
+
struct itimerspec its;
|
|
582
|
+
|
|
583
|
+
memset(&sa, 0, sizeof(sa));
|
|
584
|
+
sa.sa_handler = sperf_signal_handler;
|
|
585
|
+
sa.sa_flags = SA_RESTART;
|
|
586
|
+
sigaction(g_profiler.timer_signal, &sa, NULL);
|
|
587
|
+
|
|
588
|
+
memset(&sev, 0, sizeof(sev));
|
|
589
|
+
sev.sigev_notify = SIGEV_SIGNAL;
|
|
590
|
+
sev.sigev_signo = g_profiler.timer_signal;
|
|
591
|
+
if (timer_create(CLOCK_MONOTONIC, &sev, &g_profiler.timer_id) != 0) {
|
|
592
|
+
g_profiler.running = 0;
|
|
593
|
+
signal(g_profiler.timer_signal, SIG_DFL);
|
|
594
|
+
goto timer_fail;
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
its.it_value.tv_sec = 0;
|
|
598
|
+
its.it_value.tv_nsec = 1000000000L / g_profiler.frequency;
|
|
599
|
+
its.it_interval = its.it_value;
|
|
600
|
+
timer_settime(g_profiler.timer_id, 0, &its, NULL);
|
|
601
|
+
} else
|
|
602
|
+
#endif
|
|
603
|
+
{
|
|
604
|
+
if (pthread_create(&g_profiler.timer_thread, NULL, sperf_timer_func, &g_profiler) != 0) {
|
|
605
|
+
g_profiler.running = 0;
|
|
606
|
+
goto timer_fail;
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
if (0) {
|
|
611
|
+
timer_fail:
|
|
539
612
|
{
|
|
540
613
|
VALUE cur = rb_thread_current();
|
|
541
614
|
sperf_thread_data_t *td = (sperf_thread_data_t *)rb_internal_thread_specific_get(cur, g_profiler.ts_key);
|
|
@@ -550,7 +623,7 @@ rb_sperf_start(int argc, VALUE *argv, VALUE self)
|
|
|
550
623
|
g_profiler.samples = NULL;
|
|
551
624
|
free(g_profiler.frame_pool);
|
|
552
625
|
g_profiler.frame_pool = NULL;
|
|
553
|
-
rb_raise(rb_eRuntimeError, "sperf: failed to create timer
|
|
626
|
+
rb_raise(rb_eRuntimeError, "sperf: failed to create timer");
|
|
554
627
|
}
|
|
555
628
|
|
|
556
629
|
return Qtrue;
|
|
@@ -568,7 +641,15 @@ rb_sperf_stop(VALUE self)
|
|
|
568
641
|
}
|
|
569
642
|
|
|
570
643
|
g_profiler.running = 0;
|
|
571
|
-
|
|
644
|
+
#if SPERF_USE_TIMER_SIGNAL
|
|
645
|
+
if (g_profiler.timer_signal > 0) {
|
|
646
|
+
timer_delete(g_profiler.timer_id);
|
|
647
|
+
signal(g_profiler.timer_signal, SIG_DFL);
|
|
648
|
+
} else
|
|
649
|
+
#endif
|
|
650
|
+
{
|
|
651
|
+
pthread_join(g_profiler.timer_thread, NULL);
|
|
652
|
+
}
|
|
572
653
|
|
|
573
654
|
if (g_profiler.thread_hook) {
|
|
574
655
|
rb_internal_thread_remove_event_hook(g_profiler.thread_hook);
|
|
@@ -657,9 +738,16 @@ sperf_after_fork_child(void)
|
|
|
657
738
|
{
|
|
658
739
|
if (!g_profiler.running) return;
|
|
659
740
|
|
|
660
|
-
/* Mark as not running — timer
|
|
741
|
+
/* Mark as not running — timer doesn't exist in child */
|
|
661
742
|
g_profiler.running = 0;
|
|
662
743
|
|
|
744
|
+
#if SPERF_USE_TIMER_SIGNAL
|
|
745
|
+
/* timer_create timers are not inherited across fork; reset signal handler */
|
|
746
|
+
if (g_profiler.timer_signal > 0) {
|
|
747
|
+
signal(g_profiler.timer_signal, SIG_DFL);
|
|
748
|
+
}
|
|
749
|
+
#endif
|
|
750
|
+
|
|
663
751
|
/* Remove hooks so they don't fire with stale state */
|
|
664
752
|
if (g_profiler.thread_hook) {
|
|
665
753
|
rb_internal_thread_remove_event_hook(g_profiler.thread_hook);
|
data/lib/sperf.rb
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
require "sperf.so"
|
|
2
|
+
require "sperf/version"
|
|
2
3
|
require "zlib"
|
|
3
4
|
require "stringio"
|
|
4
5
|
|
|
5
6
|
module Sperf
|
|
6
|
-
VERSION = "0.1.0"
|
|
7
7
|
|
|
8
8
|
@verbose = false
|
|
9
9
|
@output = nil
|
|
@@ -17,13 +17,15 @@ module Sperf
|
|
|
17
17
|
# .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
|
|
18
18
|
# .txt → text report (human/AI readable flat + cumulative table)
|
|
19
19
|
# otherwise (.pb.gz etc) → pprof protobuf (gzip compressed)
|
|
20
|
-
def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false)
|
|
20
|
+
def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false, signal: nil)
|
|
21
21
|
@verbose = verbose || ENV["SPERF_VERBOSE"] == "1"
|
|
22
22
|
@output = output
|
|
23
23
|
@format = format
|
|
24
24
|
@stat = stat
|
|
25
25
|
@stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC) if @stat
|
|
26
|
-
|
|
26
|
+
c_opts = { frequency: frequency, mode: mode }
|
|
27
|
+
c_opts[:signal] = signal unless signal.nil?
|
|
28
|
+
_c_start(**c_opts)
|
|
27
29
|
|
|
28
30
|
if block_given?
|
|
29
31
|
begin
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: sperf
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Koichi Sasada
|
|
@@ -53,6 +53,7 @@ files:
|
|
|
53
53
|
- ext/sperf/extconf.rb
|
|
54
54
|
- ext/sperf/sperf.c
|
|
55
55
|
- lib/sperf.rb
|
|
56
|
+
- lib/sperf/version.rb
|
|
56
57
|
homepage: "https://github.com/ko1/sperf"
|
|
57
58
|
licenses:
|
|
58
59
|
- MIT
|