rperf 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -6
- data/docs/help.md +179 -10
- data/exe/rperf +247 -53
- data/ext/rperf/rperf.c +96 -43
- data/lib/rperf/meta.rb +343 -0
- data/lib/rperf/rack.rb +7 -2
- data/lib/rperf/table.rb +156 -0
- data/lib/rperf/version.rb +1 -1
- data/lib/rperf/viewer/viewer.html +1148 -0
- data/lib/rperf/viewer.rb +101 -653
- data/lib/rperf.rb +208 -69
- metadata +4 -1
data/ext/rperf/rperf.c
CHANGED
|
@@ -80,7 +80,11 @@ typedef struct rperf_sample_buffer {
|
|
|
80
80
|
size_t sample_count;
|
|
81
81
|
size_t sample_capacity;
|
|
82
82
|
VALUE *frame_pool;
|
|
83
|
-
|
|
83
|
+
/* _Atomic: read by GC dmark concurrently with the aggregator's clear.
|
|
84
|
+
* Seq-cst accesses pair with the frame_table count release-stores so
|
|
85
|
+
* dmark never observes the cleared pool together with a stale
|
|
86
|
+
* frame_table count (which would leave frames unmarked). */
|
|
87
|
+
_Atomic size_t frame_pool_count;
|
|
84
88
|
size_t frame_pool_capacity;
|
|
85
89
|
} rperf_sample_buffer_t;
|
|
86
90
|
|
|
@@ -102,8 +106,6 @@ typedef struct rperf_frame_table {
|
|
|
102
106
|
|
|
103
107
|
/* ---- Aggregation table: stack → weight ---- */
|
|
104
108
|
|
|
105
|
-
#define RPERF_AGG_ENTRY_EMPTY 0
|
|
106
|
-
|
|
107
109
|
typedef struct rperf_agg_entry {
|
|
108
110
|
uint32_t frame_start; /* offset into stack_pool */
|
|
109
111
|
int depth;
|
|
@@ -145,7 +147,10 @@ typedef struct rperf_gc_state {
|
|
|
145
147
|
/* ---- Sampling overhead stats ---- */
|
|
146
148
|
|
|
147
149
|
typedef struct rperf_stats {
|
|
148
|
-
|
|
150
|
+
/* _Atomic: incremented by the signal handler / nanosleep worker, read and
|
|
151
|
+
* cleared by snapshot while running (atomic size_t is async-signal-safe
|
|
152
|
+
* when lock-free, which it is on all supported platforms). */
|
|
153
|
+
_Atomic size_t trigger_count;
|
|
149
154
|
size_t sampling_count;
|
|
150
155
|
int64_t sampling_total_ns;
|
|
151
156
|
size_t dropped_samples; /* samples lost due to allocation failure */
|
|
@@ -206,12 +211,17 @@ rperf_profiler_mark(void *ptr)
|
|
|
206
211
|
{
|
|
207
212
|
rperf_profiler_t *prof = (rperf_profiler_t *)ptr;
|
|
208
213
|
int i;
|
|
209
|
-
/* Mark both sample buffers' frame_pools
|
|
214
|
+
/* Mark both sample buffers' frame_pools.
|
|
215
|
+
* Load the count once: the aggregator may clear it concurrently, and the
|
|
216
|
+
* pools must be read BEFORE frame_table.count below — seeing the cleared
|
|
217
|
+
* count (seq-cst) guarantees the corresponding frame_table inserts are
|
|
218
|
+
* visible, so every frame is covered by at least one mark source. */
|
|
210
219
|
for (i = 0; i < 2; i++) {
|
|
211
220
|
rperf_sample_buffer_t *buf = &prof->buffers[i];
|
|
212
|
-
|
|
221
|
+
size_t fp_count = buf->frame_pool_count;
|
|
222
|
+
if (buf->frame_pool && fp_count > 0) {
|
|
213
223
|
rb_gc_mark_locations(buf->frame_pool,
|
|
214
|
-
buf->frame_pool +
|
|
224
|
+
buf->frame_pool + fp_count);
|
|
215
225
|
}
|
|
216
226
|
}
|
|
217
227
|
/* Mark label_sets array */
|
|
@@ -249,10 +259,8 @@ rperf_profiler_memsize(const void *ptr)
|
|
|
249
259
|
/* Frame table */
|
|
250
260
|
size += prof->frame_table.capacity * sizeof(VALUE); /* keys */
|
|
251
261
|
size += prof->frame_table.bucket_capacity * sizeof(uint32_t); /* buckets */
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
* but the pointer array itself is accounted for below. */
|
|
255
|
-
}
|
|
262
|
+
/* old_keys entries are previous keys arrays; exact sizes unknown,
|
|
263
|
+
* only the pointer array itself is accounted for. */
|
|
256
264
|
size += prof->frame_table.old_keys_capacity * sizeof(VALUE *); /* old_keys */
|
|
257
265
|
|
|
258
266
|
/* Aggregation table */
|
|
@@ -637,7 +645,12 @@ rperf_aggregate_buffer(rperf_profiler_t *prof, rperf_sample_buffer_t *buf)
|
|
|
637
645
|
/* Convert VALUE frames to frame_ids */
|
|
638
646
|
int overflow = 0;
|
|
639
647
|
for (j = 0; j < s->depth; j++) {
|
|
640
|
-
if (s->frame_start + j >= buf->frame_pool_count)
|
|
648
|
+
if (s->frame_start + j >= buf->frame_pool_count) {
|
|
649
|
+
/* Defensive: sample points past the pool — truncate the
|
|
650
|
+
* sample so we never hash/insert uninitialized temp_ids */
|
|
651
|
+
s->depth = j;
|
|
652
|
+
break;
|
|
653
|
+
}
|
|
641
654
|
VALUE fval = buf->frame_pool[s->frame_start + j];
|
|
642
655
|
uint32_t fid = rperf_frame_table_insert(&prof->frame_table, fval);
|
|
643
656
|
if (fid == RPERF_FRAME_TABLE_EMPTY) { overflow = 1; break; }
|
|
@@ -648,6 +661,7 @@ rperf_aggregate_buffer(rperf_profiler_t *prof, rperf_sample_buffer_t *buf)
|
|
|
648
661
|
prof->stats.dropped_aggregation += buf->sample_count - i;
|
|
649
662
|
break;
|
|
650
663
|
}
|
|
664
|
+
if (s->depth <= 0) continue;
|
|
651
665
|
|
|
652
666
|
hash = rperf_fnv1a_u32(temp_ids, s->depth, s->thread_seq, s->label_set_id, s->vm_state);
|
|
653
667
|
|
|
@@ -753,7 +767,8 @@ static void
|
|
|
753
767
|
rperf_handle_suspended(rperf_profiler_t *prof, VALUE thread, rperf_thread_data_t *td)
|
|
754
768
|
{
|
|
755
769
|
/* Has GVL — safe to call Ruby APIs */
|
|
756
|
-
|
|
770
|
+
/* suspended_at_ns is only consumed by RESUMED in wall mode */
|
|
771
|
+
int64_t wall_now = (prof->mode == RPERF_MODE_WALL) ? rperf_wall_time_ns() : 0;
|
|
757
772
|
|
|
758
773
|
int is_first = 0;
|
|
759
774
|
|
|
@@ -766,19 +781,22 @@ rperf_handle_suspended(rperf_profiler_t *prof, VALUE thread, rperf_thread_data_t
|
|
|
766
781
|
int64_t time_now = rperf_current_time_ns(prof);
|
|
767
782
|
if (time_now < 0) return;
|
|
768
783
|
|
|
769
|
-
/*
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
int depth = rb_profile_frames(0, RPERF_MAX_STACK_DEPTH,
|
|
774
|
-
&buf->frame_pool[frame_start], NULL);
|
|
775
|
-
if (depth <= 0) return;
|
|
776
|
-
buf->frame_pool_count += depth;
|
|
777
|
-
|
|
778
|
-
/* Record normal sample (skip if first time — no prev_time, or if paused) */
|
|
784
|
+
/* Record normal sample (skip if first time — no prev_time, or if paused).
|
|
785
|
+
* The backtrace is captured only when a sample is actually recorded:
|
|
786
|
+
* committing frames to the pool while paused would grow it without bound,
|
|
787
|
+
* because no aggregation runs until samples accumulate. */
|
|
779
788
|
if (!is_first && !RPERF_PAUSED(prof)) {
|
|
780
|
-
|
|
781
|
-
|
|
789
|
+
rperf_sample_buffer_t *buf = &prof->buffers[atomic_load_explicit(&prof->active_idx, memory_order_relaxed)];
|
|
790
|
+
if (rperf_ensure_frame_pool_capacity(buf, RPERF_MAX_STACK_DEPTH) >= 0) {
|
|
791
|
+
size_t frame_start = buf->frame_pool_count;
|
|
792
|
+
int depth = rb_profile_frames(0, RPERF_MAX_STACK_DEPTH,
|
|
793
|
+
&buf->frame_pool[frame_start], NULL);
|
|
794
|
+
if (depth > 0) {
|
|
795
|
+
buf->frame_pool_count += depth;
|
|
796
|
+
int64_t weight = time_now - td->prev_time_ns;
|
|
797
|
+
rperf_record_sample(prof, frame_start, depth, weight, RPERF_VM_STATE_NORMAL, td->thread_seq, td->label_set_id);
|
|
798
|
+
}
|
|
799
|
+
}
|
|
782
800
|
}
|
|
783
801
|
|
|
784
802
|
/* Save timestamp for READY/RESUMED */
|
|
@@ -863,19 +881,26 @@ static void
|
|
|
863
881
|
rperf_thread_event_hook(rb_event_flag_t event, const rb_internal_thread_event_data_t *data, void *user_data)
|
|
864
882
|
{
|
|
865
883
|
rperf_profiler_t *prof = (rperf_profiler_t *)user_data;
|
|
866
|
-
if (!prof->running) return;
|
|
867
884
|
|
|
868
885
|
VALUE thread = data->thread;
|
|
869
886
|
rperf_thread_data_t *td = (rperf_thread_data_t *)rb_internal_thread_specific_get(thread, prof->ts_key);
|
|
870
887
|
|
|
888
|
+
/* EXITED frees the thread's data even when running == 0: a thread can
|
|
889
|
+
* exit between stop setting running = 0 and the hook removal, and its td
|
|
890
|
+
* would otherwise leak (stop's Thread.list cleanup no longer sees it). */
|
|
891
|
+
if (event & RUBY_INTERNAL_THREAD_EVENT_EXITED) {
|
|
892
|
+
rperf_handle_exited(prof, thread, td);
|
|
893
|
+
return;
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
if (!prof->running) return;
|
|
897
|
+
|
|
871
898
|
if (event & RUBY_INTERNAL_THREAD_EVENT_SUSPENDED)
|
|
872
899
|
rperf_handle_suspended(prof, thread, td);
|
|
873
900
|
else if (event & RUBY_INTERNAL_THREAD_EVENT_READY)
|
|
874
901
|
rperf_handle_ready(td);
|
|
875
902
|
else if (event & RUBY_INTERNAL_THREAD_EVENT_RESUMED)
|
|
876
903
|
rperf_handle_resumed(prof, thread, td);
|
|
877
|
-
else if (event & RUBY_INTERNAL_THREAD_EVENT_EXITED)
|
|
878
|
-
rperf_handle_exited(prof, thread, td);
|
|
879
904
|
}
|
|
880
905
|
|
|
881
906
|
/* ---- GC event hook ---- */
|
|
@@ -1142,7 +1167,7 @@ rperf_build_aggregated_result(rperf_profiler_t *prof)
|
|
|
1142
1167
|
rb_hash_aset(result, ID2SYM(rb_intern("frequency")), INT2NUM(prof->frequency));
|
|
1143
1168
|
rb_hash_aset(result, ID2SYM(rb_intern("trigger_count")), SIZET2NUM(prof->stats.trigger_count));
|
|
1144
1169
|
rb_hash_aset(result, ID2SYM(rb_intern("sampling_count")), SIZET2NUM(prof->stats.sampling_count));
|
|
1145
|
-
rb_hash_aset(result, ID2SYM(rb_intern("sampling_time_ns")),
|
|
1170
|
+
rb_hash_aset(result, ID2SYM(rb_intern("sampling_time_ns")), LL2NUM(prof->stats.sampling_total_ns));
|
|
1146
1171
|
if (prof->stats.dropped_samples > 0)
|
|
1147
1172
|
rb_hash_aset(result, ID2SYM(rb_intern("dropped_samples")), SIZET2NUM(prof->stats.dropped_samples));
|
|
1148
1173
|
if (prof->stats.dropped_aggregation > 0)
|
|
@@ -1161,8 +1186,8 @@ rperf_build_aggregated_result(rperf_profiler_t *prof)
|
|
|
1161
1186
|
+ (int64_t)prof->start_realtime.tv_nsec;
|
|
1162
1187
|
duration_ns = ((int64_t)now_monotonic.tv_sec - (int64_t)prof->start_monotonic.tv_sec) * 1000000000LL
|
|
1163
1188
|
+ ((int64_t)now_monotonic.tv_nsec - (int64_t)prof->start_monotonic.tv_nsec);
|
|
1164
|
-
rb_hash_aset(result, ID2SYM(rb_intern("start_time_ns")),
|
|
1165
|
-
rb_hash_aset(result, ID2SYM(rb_intern("duration_ns")),
|
|
1189
|
+
rb_hash_aset(result, ID2SYM(rb_intern("start_time_ns")), LL2NUM(start_ns));
|
|
1190
|
+
rb_hash_aset(result, ID2SYM(rb_intern("duration_ns")), LL2NUM(duration_ns));
|
|
1166
1191
|
}
|
|
1167
1192
|
|
|
1168
1193
|
{
|
|
@@ -1188,7 +1213,7 @@ rperf_build_aggregated_result(rperf_profiler_t *prof)
|
|
|
1188
1213
|
|
|
1189
1214
|
VALUE sample = rb_ary_new_capa(5);
|
|
1190
1215
|
rb_ary_push(sample, frames);
|
|
1191
|
-
rb_ary_push(sample,
|
|
1216
|
+
rb_ary_push(sample, LL2NUM(e->weight));
|
|
1192
1217
|
rb_ary_push(sample, INT2NUM(e->thread_seq));
|
|
1193
1218
|
rb_ary_push(sample, INT2NUM(e->label_set_id));
|
|
1194
1219
|
rb_ary_push(sample, INT2NUM(e->vm_state));
|
|
@@ -1313,6 +1338,14 @@ rb_rperf_start(VALUE self, VALUE vfreq, VALUE vmode, VALUE vagg, VALUE vsig, VAL
|
|
|
1313
1338
|
/* Pre-initialize current thread's time so the first sample is not skipped */
|
|
1314
1339
|
{
|
|
1315
1340
|
VALUE cur_thread = rb_thread_current();
|
|
1341
|
+
/* A stale td can survive a fork (the atfork child handler does not
|
|
1342
|
+
* free the forking thread's data) — free it before creating a fresh
|
|
1343
|
+
* one, or it would leak on every fork + restart cycle. */
|
|
1344
|
+
rperf_thread_data_t *stale = (rperf_thread_data_t *)rb_internal_thread_specific_get(cur_thread, g_profiler.ts_key);
|
|
1345
|
+
if (stale) {
|
|
1346
|
+
free(stale);
|
|
1347
|
+
rb_internal_thread_specific_set(cur_thread, g_profiler.ts_key, NULL);
|
|
1348
|
+
}
|
|
1316
1349
|
rperf_thread_data_t *td = rperf_thread_data_create(&g_profiler, cur_thread);
|
|
1317
1350
|
if (!td) {
|
|
1318
1351
|
rb_remove_event_hook(rperf_gc_event_hook);
|
|
@@ -1377,24 +1410,34 @@ rb_rperf_start(VALUE self, VALUE vfreq, VALUE vmode, VALUE vagg, VALUE vsig, VAL
|
|
|
1377
1410
|
if (timer_create(CLOCK_MONOTONIC, &sev, &g_profiler.timer_id) != 0) {
|
|
1378
1411
|
g_profiler.running = 0;
|
|
1379
1412
|
sigaction(g_profiler.timer_signal, &g_profiler.old_sigaction, NULL);
|
|
1413
|
+
/* Signal under the mutex — see rb_rperf_stop for the rationale */
|
|
1414
|
+
CHECKED(pthread_mutex_lock(&g_profiler.worker_mutex));
|
|
1380
1415
|
CHECKED(pthread_cond_signal(&g_profiler.worker_cond));
|
|
1416
|
+
CHECKED(pthread_mutex_unlock(&g_profiler.worker_mutex));
|
|
1381
1417
|
CHECKED(pthread_join(g_profiler.worker_thread, NULL));
|
|
1382
1418
|
goto timer_fail;
|
|
1383
1419
|
}
|
|
1384
1420
|
|
|
1385
|
-
its.it_value.tv_sec = 0;
|
|
1386
1421
|
if (RPERF_PAUSED(&g_profiler)) {
|
|
1387
1422
|
/* defer mode: create timer but don't arm it */
|
|
1423
|
+
its.it_value.tv_sec = 0;
|
|
1388
1424
|
its.it_value.tv_nsec = 0;
|
|
1389
1425
|
} else {
|
|
1390
|
-
|
|
1426
|
+
/* Split into sec/nsec: frequency 1 gives a 1s interval, and
|
|
1427
|
+
* tv_nsec must be < 1e9 or timer_settime fails with EINVAL */
|
|
1428
|
+
long interval_ns = 1000000000L / g_profiler.frequency;
|
|
1429
|
+
its.it_value.tv_sec = interval_ns / 1000000000L;
|
|
1430
|
+
its.it_value.tv_nsec = interval_ns % 1000000000L;
|
|
1391
1431
|
}
|
|
1392
1432
|
its.it_interval = its.it_value;
|
|
1393
1433
|
if (timer_settime(g_profiler.timer_id, 0, &its, NULL) != 0) {
|
|
1394
1434
|
timer_delete(g_profiler.timer_id);
|
|
1395
1435
|
g_profiler.running = 0;
|
|
1396
1436
|
sigaction(g_profiler.timer_signal, &g_profiler.old_sigaction, NULL);
|
|
1437
|
+
/* Signal under the mutex — see rb_rperf_stop for the rationale */
|
|
1438
|
+
CHECKED(pthread_mutex_lock(&g_profiler.worker_mutex));
|
|
1397
1439
|
CHECKED(pthread_cond_signal(&g_profiler.worker_cond));
|
|
1440
|
+
CHECKED(pthread_mutex_unlock(&g_profiler.worker_mutex));
|
|
1398
1441
|
CHECKED(pthread_join(g_profiler.worker_thread, NULL));
|
|
1399
1442
|
goto timer_fail;
|
|
1400
1443
|
}
|
|
@@ -1455,10 +1498,15 @@ rb_rperf_stop(VALUE self)
|
|
|
1455
1498
|
}
|
|
1456
1499
|
#endif
|
|
1457
1500
|
|
|
1458
|
-
/* Wake and join worker thread.
|
|
1501
|
+
/* Wake and join worker thread. Signal while holding worker_mutex:
|
|
1502
|
+
* the worker re-checks its predicate (running) with the mutex held, so
|
|
1503
|
+
* signaling under the mutex guarantees it either sees running == 0 or is
|
|
1504
|
+
* already inside cond_wait when the signal fires — no lost wakeup.
|
|
1459
1505
|
* Any pending timer signals are still handled by rperf_signal_handler
|
|
1460
1506
|
* (just increments trigger_count + calls rb_postponed_job_trigger). */
|
|
1507
|
+
CHECKED(pthread_mutex_lock(&g_profiler.worker_mutex));
|
|
1461
1508
|
CHECKED(pthread_cond_signal(&g_profiler.worker_cond));
|
|
1509
|
+
CHECKED(pthread_mutex_unlock(&g_profiler.worker_mutex));
|
|
1462
1510
|
CHECKED(pthread_join(g_profiler.worker_thread, NULL));
|
|
1463
1511
|
CHECKED(pthread_mutex_destroy(&g_profiler.worker_mutex));
|
|
1464
1512
|
CHECKED(pthread_cond_destroy(&g_profiler.worker_cond));
|
|
@@ -1517,7 +1565,7 @@ rb_rperf_stop(VALUE self)
|
|
|
1517
1565
|
rb_hash_aset(result, ID2SYM(rb_intern("frequency")), INT2NUM(g_profiler.frequency));
|
|
1518
1566
|
rb_hash_aset(result, ID2SYM(rb_intern("trigger_count")), SIZET2NUM(g_profiler.stats.trigger_count));
|
|
1519
1567
|
rb_hash_aset(result, ID2SYM(rb_intern("sampling_count")), SIZET2NUM(g_profiler.stats.sampling_count));
|
|
1520
|
-
rb_hash_aset(result, ID2SYM(rb_intern("sampling_time_ns")),
|
|
1568
|
+
rb_hash_aset(result, ID2SYM(rb_intern("sampling_time_ns")), LL2NUM(g_profiler.stats.sampling_total_ns));
|
|
1521
1569
|
if (g_profiler.stats.dropped_samples > 0)
|
|
1522
1570
|
rb_hash_aset(result, ID2SYM(rb_intern("dropped_samples")), SIZET2NUM(g_profiler.stats.dropped_samples));
|
|
1523
1571
|
if (g_profiler.stats.dropped_aggregation > 0)
|
|
@@ -1531,8 +1579,8 @@ rb_rperf_stop(VALUE self)
|
|
|
1531
1579
|
+ (int64_t)g_profiler.start_realtime.tv_nsec;
|
|
1532
1580
|
duration_ns = ((int64_t)stop_monotonic.tv_sec - (int64_t)g_profiler.start_monotonic.tv_sec) * 1000000000LL
|
|
1533
1581
|
+ ((int64_t)stop_monotonic.tv_nsec - (int64_t)g_profiler.start_monotonic.tv_nsec);
|
|
1534
|
-
rb_hash_aset(result, ID2SYM(rb_intern("start_time_ns")),
|
|
1535
|
-
rb_hash_aset(result, ID2SYM(rb_intern("duration_ns")),
|
|
1582
|
+
rb_hash_aset(result, ID2SYM(rb_intern("start_time_ns")), LL2NUM(start_ns));
|
|
1583
|
+
rb_hash_aset(result, ID2SYM(rb_intern("duration_ns")), LL2NUM(duration_ns));
|
|
1536
1584
|
}
|
|
1537
1585
|
|
|
1538
1586
|
samples_ary = rb_ary_new_capa((long)buf->sample_count);
|
|
@@ -1548,7 +1596,7 @@ rb_rperf_stop(VALUE self)
|
|
|
1548
1596
|
|
|
1549
1597
|
VALUE sample = rb_ary_new_capa(5);
|
|
1550
1598
|
rb_ary_push(sample, frames);
|
|
1551
|
-
rb_ary_push(sample,
|
|
1599
|
+
rb_ary_push(sample, LL2NUM(s->weight));
|
|
1552
1600
|
rb_ary_push(sample, INT2NUM(s->thread_seq));
|
|
1553
1601
|
rb_ary_push(sample, INT2NUM(s->label_set_id));
|
|
1554
1602
|
rb_ary_push(sample, INT2NUM(s->vm_state));
|
|
@@ -1684,10 +1732,13 @@ rperf_arm_timer(rperf_profiler_t *prof)
|
|
|
1684
1732
|
#if RPERF_USE_TIMER_SIGNAL
|
|
1685
1733
|
if (prof->timer_signal > 0) {
|
|
1686
1734
|
struct itimerspec its;
|
|
1687
|
-
|
|
1688
|
-
its.it_value.
|
|
1735
|
+
long interval_ns = 1000000000L / prof->frequency;
|
|
1736
|
+
its.it_value.tv_sec = interval_ns / 1000000000L;
|
|
1737
|
+
its.it_value.tv_nsec = interval_ns % 1000000000L;
|
|
1689
1738
|
its.it_interval = its.it_value;
|
|
1690
|
-
timer_settime(prof->timer_id, 0, &its, NULL)
|
|
1739
|
+
if (timer_settime(prof->timer_id, 0, &its, NULL) != 0) {
|
|
1740
|
+
fprintf(stderr, "rperf: timer_settime (arm) failed: %s\n", strerror(errno));
|
|
1741
|
+
}
|
|
1691
1742
|
return;
|
|
1692
1743
|
}
|
|
1693
1744
|
#endif
|
|
@@ -1705,7 +1756,9 @@ rperf_disarm_timer(rperf_profiler_t *prof)
|
|
|
1705
1756
|
if (prof->timer_signal > 0) {
|
|
1706
1757
|
struct itimerspec its;
|
|
1707
1758
|
memset(&its, 0, sizeof(its));
|
|
1708
|
-
timer_settime(prof->timer_id, 0, &its, NULL)
|
|
1759
|
+
if (timer_settime(prof->timer_id, 0, &its, NULL) != 0) {
|
|
1760
|
+
fprintf(stderr, "rperf: timer_settime (disarm) failed: %s\n", strerror(errno));
|
|
1761
|
+
}
|
|
1709
1762
|
return;
|
|
1710
1763
|
}
|
|
1711
1764
|
#endif
|
data/lib/rperf/meta.rb
ADDED
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
# Profile metadata support: git/host info collection, summary statistics,
|
|
2
|
+
# snapshot file naming, and a meta/summary prefix reader that lists profiles
|
|
3
|
+
# without loading the sample body.
|
|
4
|
+
#
|
|
5
|
+
# JSON profiles written by rperf >= 0.10 place "meta" and "summary" as the
|
|
6
|
+
# first two top-level keys, so Meta.read can decompress only the head of the
|
|
7
|
+
# file and stop as soon as both are extracted.
|
|
8
|
+
|
|
9
|
+
require "json"
|
|
10
|
+
require "time"
|
|
11
|
+
require "zlib"
|
|
12
|
+
|
|
13
|
+
module Rperf
|
|
14
|
+
module Meta
|
|
15
|
+
FORMAT_VERSION = 1
|
|
16
|
+
TOP_METHODS_LIMIT = 50
|
|
17
|
+
|
|
18
|
+
module_function
|
|
19
|
+
|
|
20
|
+
# Collect git information for the profiled working directory.
|
|
21
|
+
# GitHub Actions environment variables take priority over git commands
|
|
22
|
+
# (CI checkouts may be detached or shallow). Returns a Hash with
|
|
23
|
+
# sha/branch/subject/committed_at/dirty, or nil when not in a git
|
|
24
|
+
# repository or git is unavailable.
|
|
25
|
+
def collect_git(dir = Dir.pwd)
|
|
26
|
+
gh_sha = ENV["GITHUB_SHA"]
|
|
27
|
+
# Validate the sha shape: the value is passed to git as a positional
|
|
28
|
+
# argument, and a crafted value starting with "-" would be parsed as
|
|
29
|
+
# a git option
|
|
30
|
+
if gh_sha && gh_sha.match?(/\A\h{7,64}\z/)
|
|
31
|
+
git = { sha: gh_sha, dirty: false }
|
|
32
|
+
branch = ENV["GITHUB_HEAD_REF"]
|
|
33
|
+
branch = ENV["GITHUB_REF_NAME"] if branch.nil? || branch.empty?
|
|
34
|
+
git[:branch] = branch if branch && !branch.empty?
|
|
35
|
+
# Enrich from the local checkout when possible (may fail on shallow clones)
|
|
36
|
+
subject = git_capture(dir, "log", "-1", "--format=%s", gh_sha)
|
|
37
|
+
committed_at = git_capture(dir, "log", "-1", "--format=%cI", gh_sha)
|
|
38
|
+
git[:subject] = subject if subject && !subject.empty?
|
|
39
|
+
git[:committed_at] = committed_at if committed_at && !committed_at.empty?
|
|
40
|
+
return git
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
sha = git_capture(dir, "rev-parse", "HEAD")
|
|
44
|
+
return nil if sha.nil? || sha.empty?
|
|
45
|
+
|
|
46
|
+
git = { sha: sha }
|
|
47
|
+
branch = git_capture(dir, "rev-parse", "--abbrev-ref", "HEAD")
|
|
48
|
+
git[:branch] = branch if branch && !branch.empty? && branch != "HEAD"
|
|
49
|
+
subject = git_capture(dir, "log", "-1", "--format=%s")
|
|
50
|
+
git[:subject] = subject if subject && !subject.empty?
|
|
51
|
+
committed_at = git_capture(dir, "log", "-1", "--format=%cI")
|
|
52
|
+
git[:committed_at] = committed_at if committed_at && !committed_at.empty?
|
|
53
|
+
status = git_capture(dir, "status", "--porcelain")
|
|
54
|
+
git[:dirty] = !status.empty? if status
|
|
55
|
+
git
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Run a git command, returning stripped stdout or nil on failure
|
|
59
|
+
# (no git binary, not a repository, etc.).
|
|
60
|
+
def git_capture(dir, *args)
|
|
61
|
+
out = IO.popen(["git", "-C", dir, *args], err: File::NULL, &:read)
|
|
62
|
+
$?.success? ? out.strip : nil
|
|
63
|
+
rescue SystemCallError
|
|
64
|
+
nil
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# File name used by `rperf record --snapshot-dir`.
|
|
68
|
+
# In a git repository: rperf-<sha7>-<timestamp>.json.gz
|
|
69
|
+
# Outside: rperf-nogit-<timestamp>-<pid>.json.gz
|
|
70
|
+
def snapshot_filename(git, time: Time.now.utc, pid: Process.pid)
|
|
71
|
+
ts = time.utc.strftime("%Y%m%dT%H%M%SZ")
|
|
72
|
+
if git && git[:sha]
|
|
73
|
+
"rperf-#{git[:sha][0, 7]}-#{ts}.json.gz"
|
|
74
|
+
else
|
|
75
|
+
"rperf-nogit-#{ts}-#{pid}.json.gz"
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Build the meta hash for a profile about to be written.
|
|
80
|
+
# Git info comes from RPERF_META_GIT (set by the CLI, which collects it
|
|
81
|
+
# before exec so a chdir in the profiled app cannot point at the wrong
|
|
82
|
+
# repository); when unset (direct API usage) it is collected here.
|
|
83
|
+
# RPERF_META_GIT="null" means "already checked, not a repository".
|
|
84
|
+
def build_meta(data)
|
|
85
|
+
meta = {
|
|
86
|
+
format_version: FORMAT_VERSION,
|
|
87
|
+
created_at: Time.now.utc.iso8601,
|
|
88
|
+
ruby_version: RUBY_VERSION,
|
|
89
|
+
rperf_version: Rperf::VERSION,
|
|
90
|
+
mode: (data[:mode] || :cpu).to_s,
|
|
91
|
+
}
|
|
92
|
+
hostname = safe_hostname
|
|
93
|
+
meta[:hostname] = hostname if hostname
|
|
94
|
+
git = git_from_env_or_collect
|
|
95
|
+
meta[:git] = git if git
|
|
96
|
+
labels = labels_from_env
|
|
97
|
+
meta[:labels] = labels if labels && !labels.empty?
|
|
98
|
+
meta
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def git_from_env_or_collect
|
|
102
|
+
if ENV.key?("RPERF_META_GIT")
|
|
103
|
+
v = ENV["RPERF_META_GIT"].to_s
|
|
104
|
+
return nil if v.empty? || v == "null"
|
|
105
|
+
begin
|
|
106
|
+
JSON.parse(v, symbolize_names: true)
|
|
107
|
+
rescue JSON::ParserError
|
|
108
|
+
nil
|
|
109
|
+
end
|
|
110
|
+
else
|
|
111
|
+
# Memoized (array wraps a legitimate nil): periodic viewer snapshots
|
|
112
|
+
# must not spawn git subprocesses on every take_snapshot!
|
|
113
|
+
@collect_git_memo ||= [collect_git]
|
|
114
|
+
@collect_git_memo[0]
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def labels_from_env
|
|
119
|
+
v = ENV["RPERF_META_LABELS"]
|
|
120
|
+
return nil unless v
|
|
121
|
+
begin
|
|
122
|
+
labels = JSON.parse(v)
|
|
123
|
+
labels.is_a?(Hash) ? labels : nil
|
|
124
|
+
rescue JSON::ParserError
|
|
125
|
+
nil
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def safe_hostname
|
|
130
|
+
require "socket"
|
|
131
|
+
Socket.gethostname
|
|
132
|
+
rescue StandardError
|
|
133
|
+
nil
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Build the summary hash from profile data (as returned by Rperf.stop).
|
|
137
|
+
# Fields whose source data is missing are omitted.
|
|
138
|
+
def build_summary(data)
|
|
139
|
+
s = {}
|
|
140
|
+
s[:total_ms] = (data[:duration_ns] / 1e6).round(1) if data[:duration_ns]
|
|
141
|
+
if data[:user_ns] || data[:sys_ns]
|
|
142
|
+
s[:cpu_ms] = (((data[:user_ns] || 0) + (data[:sys_ns] || 0)) / 1e6).round(1)
|
|
143
|
+
end
|
|
144
|
+
if (gc = data[:gc_stats])
|
|
145
|
+
s[:gc_count_minor] = gc[:minor_count] if gc[:minor_count]
|
|
146
|
+
s[:gc_count_major] = gc[:major_count] if gc[:major_count]
|
|
147
|
+
s[:gc_ms] = gc[:time_ms].to_f.round(1) if gc[:time_ms]
|
|
148
|
+
s[:allocated_objects] = gc[:allocated_objects] if gc[:allocated_objects]
|
|
149
|
+
s[:freed_objects] = gc[:freed_objects] if gc[:freed_objects]
|
|
150
|
+
end
|
|
151
|
+
s[:maxrss_mb] = data[:maxrss_mb] if data[:maxrss_mb]
|
|
152
|
+
s[:samples] = data[:sampling_count] if data[:sampling_count]
|
|
153
|
+
s[:top_methods] = top_methods(data)
|
|
154
|
+
s
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Top methods by self time, merged by method name (shares the by-name
|
|
158
|
+
# fold with Table so report/summary numbers can never diverge).
|
|
159
|
+
def top_methods(data, limit: TOP_METHODS_LIMIT)
|
|
160
|
+
samples = data[:aggregated_samples]
|
|
161
|
+
return [] if !samples || samples.empty?
|
|
162
|
+
|
|
163
|
+
flat_by_name, cum_by_name, total = Table.flat_cum_by_name(data)
|
|
164
|
+
return [] if total <= 0
|
|
165
|
+
|
|
166
|
+
flat_by_name.sort_by { |_, w| -w }.first(limit).map do |name, w|
|
|
167
|
+
{
|
|
168
|
+
name: name,
|
|
169
|
+
self_pct: (w * 100.0 / total).round(1),
|
|
170
|
+
total_pct: (cum_by_name[name] * 100.0 / total).round(1),
|
|
171
|
+
}
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# --- meta/summary prefix reader ---
|
|
176
|
+
|
|
177
|
+
READ_CHUNK = 64 * 1024
|
|
178
|
+
READ_LIMIT = 8 * 1024 * 1024
|
|
179
|
+
|
|
180
|
+
# Read meta/summary from a .json(.gz) profile without loading the body.
|
|
181
|
+
# Returns { meta: Hash|nil, summary: Hash|nil } or nil for files without
|
|
182
|
+
# leading meta/summary keys (old format) and unreadable/corrupt files.
|
|
183
|
+
def read(path)
|
|
184
|
+
File.open(path, "rb") do |f|
|
|
185
|
+
magic = f.read(2)
|
|
186
|
+
f.rewind
|
|
187
|
+
io = (magic == "\x1f\x8b".b) ? Zlib::GzipReader.new(f) : f
|
|
188
|
+
begin
|
|
189
|
+
buf = "".b
|
|
190
|
+
loop do
|
|
191
|
+
chunk = io.read(READ_CHUNK)
|
|
192
|
+
buf << chunk if chunk
|
|
193
|
+
result = scan_prefix(buf)
|
|
194
|
+
return result unless result == :incomplete
|
|
195
|
+
return nil if chunk.nil? || buf.bytesize > READ_LIMIT
|
|
196
|
+
end
|
|
197
|
+
ensure
|
|
198
|
+
# Free the inflate zstream now — directory listings open many files
|
|
199
|
+
# and the buffers would otherwise linger until GC
|
|
200
|
+
io.close if io.is_a?(Zlib::GzipReader)
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
rescue Zlib::Error, SystemCallError, JSON::ParserError
|
|
204
|
+
# Zlib::Error covers GzipFile::Error (truncated) and also DataError /
|
|
205
|
+
# BufError (valid gzip header, corrupt deflate body) — one corrupt
|
|
206
|
+
# snapshot must not break listing an entire directory
|
|
207
|
+
nil
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# Byte codes used by the scanner. Byte-wise scanning is safe in UTF-8:
|
|
211
|
+
# continuation bytes are >= 0x80 and never collide with ASCII syntax.
|
|
212
|
+
DQUOTE = 0x22
|
|
213
|
+
BSLASH = 0x5c
|
|
214
|
+
LBRACE = 0x7b
|
|
215
|
+
RBRACE = 0x7d
|
|
216
|
+
LBRACKET = 0x5b
|
|
217
|
+
RBRACKET = 0x5d
|
|
218
|
+
COMMA = 0x2c
|
|
219
|
+
COLON = 0x3a
|
|
220
|
+
|
|
221
|
+
# Scan the head of a JSON object for top-level "meta" / "summary" keys.
|
|
222
|
+
# rperf writes them first, so scanning stops at the first other key —
|
|
223
|
+
# large sample arrays are never traversed.
|
|
224
|
+
# Returns { meta:, summary: }, nil (old format / malformed),
|
|
225
|
+
# or :incomplete (need more input).
|
|
226
|
+
def scan_prefix(buf)
|
|
227
|
+
n = buf.bytesize
|
|
228
|
+
i = skip_ws(buf, 0, n)
|
|
229
|
+
return :incomplete if i >= n
|
|
230
|
+
return nil unless buf.getbyte(i) == LBRACE
|
|
231
|
+
i += 1
|
|
232
|
+
found = {}
|
|
233
|
+
|
|
234
|
+
loop do
|
|
235
|
+
i = skip_ws(buf, i, n)
|
|
236
|
+
return :incomplete if i >= n
|
|
237
|
+
return finalize_scan(found) if buf.getbyte(i) == RBRACE
|
|
238
|
+
return nil unless buf.getbyte(i) == DQUOTE
|
|
239
|
+
|
|
240
|
+
key_start = i
|
|
241
|
+
i = scan_string(buf, i, n)
|
|
242
|
+
return :incomplete unless i
|
|
243
|
+
key = buf.byteslice(key_start + 1, i - key_start - 2)
|
|
244
|
+
|
|
245
|
+
# First key that is not meta/summary ends the scan (old format or body)
|
|
246
|
+
return finalize_scan(found) unless key == "meta" || key == "summary"
|
|
247
|
+
|
|
248
|
+
i = skip_ws(buf, i, n)
|
|
249
|
+
return :incomplete if i >= n
|
|
250
|
+
return nil unless buf.getbyte(i) == COLON
|
|
251
|
+
i += 1
|
|
252
|
+
i = skip_ws(buf, i, n)
|
|
253
|
+
return :incomplete if i >= n
|
|
254
|
+
|
|
255
|
+
vstart = i
|
|
256
|
+
i = scan_value(buf, i, n)
|
|
257
|
+
return :incomplete unless i
|
|
258
|
+
fragment = buf.byteslice(vstart, i - vstart).force_encoding(Encoding::UTF_8)
|
|
259
|
+
found[key.to_sym] = JSON.parse(fragment, symbolize_names: true)
|
|
260
|
+
return finalize_scan(found) if found.key?(:meta) && found.key?(:summary)
|
|
261
|
+
|
|
262
|
+
i = skip_ws(buf, i, n)
|
|
263
|
+
return :incomplete if i >= n
|
|
264
|
+
case buf.getbyte(i)
|
|
265
|
+
when COMMA then i += 1
|
|
266
|
+
when RBRACE then return finalize_scan(found)
|
|
267
|
+
else return nil
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
rescue JSON::ParserError
|
|
271
|
+
nil
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
def finalize_scan(found)
|
|
275
|
+
found.empty? ? nil : { meta: found[:meta], summary: found[:summary] }
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
def skip_ws(buf, i, n)
|
|
279
|
+
while i < n
|
|
280
|
+
b = buf.getbyte(i)
|
|
281
|
+
break unless b == 0x20 || b == 0x09 || b == 0x0a || b == 0x0d
|
|
282
|
+
i += 1
|
|
283
|
+
end
|
|
284
|
+
i
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
# Scan a JSON string starting at the opening quote.
|
|
288
|
+
# Returns the index just past the closing quote, or nil if truncated.
|
|
289
|
+
def scan_string(buf, i, n)
|
|
290
|
+
j = i + 1
|
|
291
|
+
while j < n
|
|
292
|
+
b = buf.getbyte(j)
|
|
293
|
+
if b == BSLASH
|
|
294
|
+
j += 2
|
|
295
|
+
elsif b == DQUOTE
|
|
296
|
+
return j + 1
|
|
297
|
+
else
|
|
298
|
+
j += 1
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
nil
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
# Scan a JSON value (string, container, or scalar) starting at i.
|
|
305
|
+
# Returns the index just past the value, or nil if truncated.
|
|
306
|
+
def scan_value(buf, i, n)
|
|
307
|
+
case buf.getbyte(i)
|
|
308
|
+
when DQUOTE
|
|
309
|
+
scan_string(buf, i, n)
|
|
310
|
+
when LBRACE, LBRACKET
|
|
311
|
+
depth = 0
|
|
312
|
+
j = i
|
|
313
|
+
while j < n
|
|
314
|
+
b = buf.getbyte(j)
|
|
315
|
+
if b == DQUOTE
|
|
316
|
+
j = scan_string(buf, j, n)
|
|
317
|
+
return nil unless j
|
|
318
|
+
elsif b == LBRACE || b == LBRACKET
|
|
319
|
+
depth += 1
|
|
320
|
+
j += 1
|
|
321
|
+
elsif b == RBRACE || b == RBRACKET
|
|
322
|
+
depth -= 1
|
|
323
|
+
j += 1
|
|
324
|
+
return j if depth == 0
|
|
325
|
+
else
|
|
326
|
+
j += 1
|
|
327
|
+
end
|
|
328
|
+
end
|
|
329
|
+
nil
|
|
330
|
+
else
|
|
331
|
+
# scalar: number, true, false, null
|
|
332
|
+
j = i
|
|
333
|
+
while j < n
|
|
334
|
+
b = buf.getbyte(j)
|
|
335
|
+
break if b == COMMA || b == RBRACE || b == RBRACKET ||
|
|
336
|
+
b == 0x20 || b == 0x09 || b == 0x0a || b == 0x0d
|
|
337
|
+
j += 1
|
|
338
|
+
end
|
|
339
|
+
j < n ? j : nil
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
end
|
data/lib/rperf/rack.rb
CHANGED
|
@@ -16,10 +16,15 @@ class Rperf::RackMiddleware
|
|
|
16
16
|
@label_proc = label
|
|
17
17
|
end
|
|
18
18
|
|
|
19
|
-
UUID_RE = %r{/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}}i
|
|
20
|
-
NUMERIC_RE = %r{/\d+}
|
|
19
|
+
UUID_RE = %r{/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}(?=/|\z)}i
|
|
20
|
+
NUMERIC_RE = %r{/\d+(?=/|\z)}
|
|
21
21
|
|
|
22
22
|
def call(env)
|
|
23
|
+
# No-op when the profiler is not running (Rperf.profile would raise):
|
|
24
|
+
# the app may boot without Rperf.start, stop mid-run, or run in a forked
|
|
25
|
+
# worker where the atfork handler silently stopped profiling.
|
|
26
|
+
return @app.call(env) unless Rperf.running?
|
|
27
|
+
|
|
23
28
|
endpoint = if @label_proc == :raw
|
|
24
29
|
"#{env["REQUEST_METHOD"]} #{env["PATH_INFO"]}"
|
|
25
30
|
elsif @label_proc
|