rperf 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/rperf/rperf.c CHANGED
@@ -80,7 +80,11 @@ typedef struct rperf_sample_buffer {
80
80
  size_t sample_count;
81
81
  size_t sample_capacity;
82
82
  VALUE *frame_pool;
83
- size_t frame_pool_count;
83
+ /* _Atomic: read by GC dmark concurrently with the aggregator's clear.
84
+ * Seq-cst accesses pair with the frame_table count release-stores so
85
+ * dmark never observes the cleared pool together with a stale
86
+ * frame_table count (which would leave frames unmarked). */
87
+ _Atomic size_t frame_pool_count;
84
88
  size_t frame_pool_capacity;
85
89
  } rperf_sample_buffer_t;
86
90
 
@@ -102,8 +106,6 @@ typedef struct rperf_frame_table {
102
106
 
103
107
  /* ---- Aggregation table: stack → weight ---- */
104
108
 
105
- #define RPERF_AGG_ENTRY_EMPTY 0
106
-
107
109
  typedef struct rperf_agg_entry {
108
110
  uint32_t frame_start; /* offset into stack_pool */
109
111
  int depth;
@@ -145,7 +147,10 @@ typedef struct rperf_gc_state {
145
147
  /* ---- Sampling overhead stats ---- */
146
148
 
147
149
  typedef struct rperf_stats {
148
- size_t trigger_count;
150
+ /* _Atomic: incremented by the signal handler / nanosleep worker, read and
151
+ * cleared by snapshot while running (atomic size_t is async-signal-safe
152
+ * when lock-free, which it is on all supported platforms). */
153
+ _Atomic size_t trigger_count;
149
154
  size_t sampling_count;
150
155
  int64_t sampling_total_ns;
151
156
  size_t dropped_samples; /* samples lost due to allocation failure */
@@ -206,12 +211,17 @@ rperf_profiler_mark(void *ptr)
206
211
  {
207
212
  rperf_profiler_t *prof = (rperf_profiler_t *)ptr;
208
213
  int i;
209
- /* Mark both sample buffers' frame_pools */
214
+ /* Mark both sample buffers' frame_pools.
215
+ * Load the count once: the aggregator may clear it concurrently, and the
216
+ * pools must be read BEFORE frame_table.count below — seeing the cleared
217
+ * count (seq-cst) guarantees the corresponding frame_table inserts are
218
+ * visible, so every frame is covered by at least one mark source. */
210
219
  for (i = 0; i < 2; i++) {
211
220
  rperf_sample_buffer_t *buf = &prof->buffers[i];
212
- if (buf->frame_pool && buf->frame_pool_count > 0) {
221
+ size_t fp_count = buf->frame_pool_count;
222
+ if (buf->frame_pool && fp_count > 0) {
213
223
  rb_gc_mark_locations(buf->frame_pool,
214
- buf->frame_pool + buf->frame_pool_count);
224
+ buf->frame_pool + fp_count);
215
225
  }
216
226
  }
217
227
  /* Mark label_sets array */
@@ -249,10 +259,8 @@ rperf_profiler_memsize(const void *ptr)
249
259
  /* Frame table */
250
260
  size += prof->frame_table.capacity * sizeof(VALUE); /* keys */
251
261
  size += prof->frame_table.bucket_capacity * sizeof(uint32_t); /* buckets */
252
- for (i = 0; i < prof->frame_table.old_keys_count; i++) {
253
- /* old_keys entries are previous keys arrays; exact sizes unknown,
254
- * but the pointer array itself is accounted for below. */
255
- }
262
+ /* old_keys entries are previous keys arrays; exact sizes unknown,
263
+ * only the pointer array itself is accounted for. */
256
264
  size += prof->frame_table.old_keys_capacity * sizeof(VALUE *); /* old_keys */
257
265
 
258
266
  /* Aggregation table */
@@ -637,7 +645,12 @@ rperf_aggregate_buffer(rperf_profiler_t *prof, rperf_sample_buffer_t *buf)
637
645
  /* Convert VALUE frames to frame_ids */
638
646
  int overflow = 0;
639
647
  for (j = 0; j < s->depth; j++) {
640
- if (s->frame_start + j >= buf->frame_pool_count) break;
648
+ if (s->frame_start + j >= buf->frame_pool_count) {
649
+ /* Defensive: sample points past the pool — truncate the
650
+ * sample so we never hash/insert uninitialized temp_ids */
651
+ s->depth = j;
652
+ break;
653
+ }
641
654
  VALUE fval = buf->frame_pool[s->frame_start + j];
642
655
  uint32_t fid = rperf_frame_table_insert(&prof->frame_table, fval);
643
656
  if (fid == RPERF_FRAME_TABLE_EMPTY) { overflow = 1; break; }
@@ -648,6 +661,7 @@ rperf_aggregate_buffer(rperf_profiler_t *prof, rperf_sample_buffer_t *buf)
648
661
  prof->stats.dropped_aggregation += buf->sample_count - i;
649
662
  break;
650
663
  }
664
+ if (s->depth <= 0) continue;
651
665
 
652
666
  hash = rperf_fnv1a_u32(temp_ids, s->depth, s->thread_seq, s->label_set_id, s->vm_state);
653
667
 
@@ -753,7 +767,8 @@ static void
753
767
  rperf_handle_suspended(rperf_profiler_t *prof, VALUE thread, rperf_thread_data_t *td)
754
768
  {
755
769
  /* Has GVL — safe to call Ruby APIs */
756
- int64_t wall_now = rperf_wall_time_ns();
770
+ /* suspended_at_ns is only consumed by RESUMED in wall mode */
771
+ int64_t wall_now = (prof->mode == RPERF_MODE_WALL) ? rperf_wall_time_ns() : 0;
757
772
 
758
773
  int is_first = 0;
759
774
 
@@ -766,19 +781,22 @@ rperf_handle_suspended(rperf_profiler_t *prof, VALUE thread, rperf_thread_data_t
766
781
  int64_t time_now = rperf_current_time_ns(prof);
767
782
  if (time_now < 0) return;
768
783
 
769
- /* Capture backtrace into active buffer's frame_pool */
770
- rperf_sample_buffer_t *buf = &prof->buffers[atomic_load_explicit(&prof->active_idx, memory_order_relaxed)];
771
- if (rperf_ensure_frame_pool_capacity(buf, RPERF_MAX_STACK_DEPTH) < 0) return;
772
- size_t frame_start = buf->frame_pool_count;
773
- int depth = rb_profile_frames(0, RPERF_MAX_STACK_DEPTH,
774
- &buf->frame_pool[frame_start], NULL);
775
- if (depth <= 0) return;
776
- buf->frame_pool_count += depth;
777
-
778
- /* Record normal sample (skip if first time — no prev_time, or if paused) */
784
+ /* Record normal sample (skip if first time — no prev_time, or if paused).
785
+ * The backtrace is captured only when a sample is actually recorded:
786
+ * committing frames to the pool while paused would grow it without bound,
787
+ * because no aggregation runs until samples accumulate. */
779
788
  if (!is_first && !RPERF_PAUSED(prof)) {
780
- int64_t weight = time_now - td->prev_time_ns;
781
- rperf_record_sample(prof, frame_start, depth, weight, RPERF_VM_STATE_NORMAL, td->thread_seq, td->label_set_id);
789
+ rperf_sample_buffer_t *buf = &prof->buffers[atomic_load_explicit(&prof->active_idx, memory_order_relaxed)];
790
+ if (rperf_ensure_frame_pool_capacity(buf, RPERF_MAX_STACK_DEPTH) >= 0) {
791
+ size_t frame_start = buf->frame_pool_count;
792
+ int depth = rb_profile_frames(0, RPERF_MAX_STACK_DEPTH,
793
+ &buf->frame_pool[frame_start], NULL);
794
+ if (depth > 0) {
795
+ buf->frame_pool_count += depth;
796
+ int64_t weight = time_now - td->prev_time_ns;
797
+ rperf_record_sample(prof, frame_start, depth, weight, RPERF_VM_STATE_NORMAL, td->thread_seq, td->label_set_id);
798
+ }
799
+ }
782
800
  }
783
801
 
784
802
  /* Save timestamp for READY/RESUMED */
@@ -863,19 +881,26 @@ static void
863
881
  rperf_thread_event_hook(rb_event_flag_t event, const rb_internal_thread_event_data_t *data, void *user_data)
864
882
  {
865
883
  rperf_profiler_t *prof = (rperf_profiler_t *)user_data;
866
- if (!prof->running) return;
867
884
 
868
885
  VALUE thread = data->thread;
869
886
  rperf_thread_data_t *td = (rperf_thread_data_t *)rb_internal_thread_specific_get(thread, prof->ts_key);
870
887
 
888
+ /* EXITED frees the thread's data even when running == 0: a thread can
889
+ * exit between stop setting running = 0 and the hook removal, and its td
890
+ * would otherwise leak (stop's Thread.list cleanup no longer sees it). */
891
+ if (event & RUBY_INTERNAL_THREAD_EVENT_EXITED) {
892
+ rperf_handle_exited(prof, thread, td);
893
+ return;
894
+ }
895
+
896
+ if (!prof->running) return;
897
+
871
898
  if (event & RUBY_INTERNAL_THREAD_EVENT_SUSPENDED)
872
899
  rperf_handle_suspended(prof, thread, td);
873
900
  else if (event & RUBY_INTERNAL_THREAD_EVENT_READY)
874
901
  rperf_handle_ready(td);
875
902
  else if (event & RUBY_INTERNAL_THREAD_EVENT_RESUMED)
876
903
  rperf_handle_resumed(prof, thread, td);
877
- else if (event & RUBY_INTERNAL_THREAD_EVENT_EXITED)
878
- rperf_handle_exited(prof, thread, td);
879
904
  }
880
905
 
881
906
  /* ---- GC event hook ---- */
@@ -1142,7 +1167,7 @@ rperf_build_aggregated_result(rperf_profiler_t *prof)
1142
1167
  rb_hash_aset(result, ID2SYM(rb_intern("frequency")), INT2NUM(prof->frequency));
1143
1168
  rb_hash_aset(result, ID2SYM(rb_intern("trigger_count")), SIZET2NUM(prof->stats.trigger_count));
1144
1169
  rb_hash_aset(result, ID2SYM(rb_intern("sampling_count")), SIZET2NUM(prof->stats.sampling_count));
1145
- rb_hash_aset(result, ID2SYM(rb_intern("sampling_time_ns")), LONG2NUM(prof->stats.sampling_total_ns));
1170
+ rb_hash_aset(result, ID2SYM(rb_intern("sampling_time_ns")), LL2NUM(prof->stats.sampling_total_ns));
1146
1171
  if (prof->stats.dropped_samples > 0)
1147
1172
  rb_hash_aset(result, ID2SYM(rb_intern("dropped_samples")), SIZET2NUM(prof->stats.dropped_samples));
1148
1173
  if (prof->stats.dropped_aggregation > 0)
@@ -1161,8 +1186,8 @@ rperf_build_aggregated_result(rperf_profiler_t *prof)
1161
1186
  + (int64_t)prof->start_realtime.tv_nsec;
1162
1187
  duration_ns = ((int64_t)now_monotonic.tv_sec - (int64_t)prof->start_monotonic.tv_sec) * 1000000000LL
1163
1188
  + ((int64_t)now_monotonic.tv_nsec - (int64_t)prof->start_monotonic.tv_nsec);
1164
- rb_hash_aset(result, ID2SYM(rb_intern("start_time_ns")), LONG2NUM(start_ns));
1165
- rb_hash_aset(result, ID2SYM(rb_intern("duration_ns")), LONG2NUM(duration_ns));
1189
+ rb_hash_aset(result, ID2SYM(rb_intern("start_time_ns")), LL2NUM(start_ns));
1190
+ rb_hash_aset(result, ID2SYM(rb_intern("duration_ns")), LL2NUM(duration_ns));
1166
1191
  }
1167
1192
 
1168
1193
  {
@@ -1188,7 +1213,7 @@ rperf_build_aggregated_result(rperf_profiler_t *prof)
1188
1213
 
1189
1214
  VALUE sample = rb_ary_new_capa(5);
1190
1215
  rb_ary_push(sample, frames);
1191
- rb_ary_push(sample, LONG2NUM(e->weight));
1216
+ rb_ary_push(sample, LL2NUM(e->weight));
1192
1217
  rb_ary_push(sample, INT2NUM(e->thread_seq));
1193
1218
  rb_ary_push(sample, INT2NUM(e->label_set_id));
1194
1219
  rb_ary_push(sample, INT2NUM(e->vm_state));
@@ -1313,6 +1338,14 @@ rb_rperf_start(VALUE self, VALUE vfreq, VALUE vmode, VALUE vagg, VALUE vsig, VAL
1313
1338
  /* Pre-initialize current thread's time so the first sample is not skipped */
1314
1339
  {
1315
1340
  VALUE cur_thread = rb_thread_current();
1341
+ /* A stale td can survive a fork (the atfork child handler does not
1342
+ * free the forking thread's data) — free it before creating a fresh
1343
+ * one, or it would leak on every fork + restart cycle. */
1344
+ rperf_thread_data_t *stale = (rperf_thread_data_t *)rb_internal_thread_specific_get(cur_thread, g_profiler.ts_key);
1345
+ if (stale) {
1346
+ free(stale);
1347
+ rb_internal_thread_specific_set(cur_thread, g_profiler.ts_key, NULL);
1348
+ }
1316
1349
  rperf_thread_data_t *td = rperf_thread_data_create(&g_profiler, cur_thread);
1317
1350
  if (!td) {
1318
1351
  rb_remove_event_hook(rperf_gc_event_hook);
@@ -1377,24 +1410,34 @@ rb_rperf_start(VALUE self, VALUE vfreq, VALUE vmode, VALUE vagg, VALUE vsig, VAL
1377
1410
  if (timer_create(CLOCK_MONOTONIC, &sev, &g_profiler.timer_id) != 0) {
1378
1411
  g_profiler.running = 0;
1379
1412
  sigaction(g_profiler.timer_signal, &g_profiler.old_sigaction, NULL);
1413
+ /* Signal under the mutex — see rb_rperf_stop for the rationale */
1414
+ CHECKED(pthread_mutex_lock(&g_profiler.worker_mutex));
1380
1415
  CHECKED(pthread_cond_signal(&g_profiler.worker_cond));
1416
+ CHECKED(pthread_mutex_unlock(&g_profiler.worker_mutex));
1381
1417
  CHECKED(pthread_join(g_profiler.worker_thread, NULL));
1382
1418
  goto timer_fail;
1383
1419
  }
1384
1420
 
1385
- its.it_value.tv_sec = 0;
1386
1421
  if (RPERF_PAUSED(&g_profiler)) {
1387
1422
  /* defer mode: create timer but don't arm it */
1423
+ its.it_value.tv_sec = 0;
1388
1424
  its.it_value.tv_nsec = 0;
1389
1425
  } else {
1390
- its.it_value.tv_nsec = 1000000000L / g_profiler.frequency;
1426
+ /* Split into sec/nsec: frequency 1 gives a 1s interval, and
1427
+ * tv_nsec must be < 1e9 or timer_settime fails with EINVAL */
1428
+ long interval_ns = 1000000000L / g_profiler.frequency;
1429
+ its.it_value.tv_sec = interval_ns / 1000000000L;
1430
+ its.it_value.tv_nsec = interval_ns % 1000000000L;
1391
1431
  }
1392
1432
  its.it_interval = its.it_value;
1393
1433
  if (timer_settime(g_profiler.timer_id, 0, &its, NULL) != 0) {
1394
1434
  timer_delete(g_profiler.timer_id);
1395
1435
  g_profiler.running = 0;
1396
1436
  sigaction(g_profiler.timer_signal, &g_profiler.old_sigaction, NULL);
1437
+ /* Signal under the mutex — see rb_rperf_stop for the rationale */
1438
+ CHECKED(pthread_mutex_lock(&g_profiler.worker_mutex));
1397
1439
  CHECKED(pthread_cond_signal(&g_profiler.worker_cond));
1440
+ CHECKED(pthread_mutex_unlock(&g_profiler.worker_mutex));
1398
1441
  CHECKED(pthread_join(g_profiler.worker_thread, NULL));
1399
1442
  goto timer_fail;
1400
1443
  }
@@ -1455,10 +1498,15 @@ rb_rperf_stop(VALUE self)
1455
1498
  }
1456
1499
  #endif
1457
1500
 
1458
- /* Wake and join worker thread.
1501
+ /* Wake and join worker thread. Signal while holding worker_mutex:
1502
+ * the worker re-checks its predicate (running) with the mutex held, so
1503
+ * signaling under the mutex guarantees it either sees running == 0 or is
1504
+ * already inside cond_wait when the signal fires — no lost wakeup.
1459
1505
  * Any pending timer signals are still handled by rperf_signal_handler
1460
1506
  * (just increments trigger_count + calls rb_postponed_job_trigger). */
1507
+ CHECKED(pthread_mutex_lock(&g_profiler.worker_mutex));
1461
1508
  CHECKED(pthread_cond_signal(&g_profiler.worker_cond));
1509
+ CHECKED(pthread_mutex_unlock(&g_profiler.worker_mutex));
1462
1510
  CHECKED(pthread_join(g_profiler.worker_thread, NULL));
1463
1511
  CHECKED(pthread_mutex_destroy(&g_profiler.worker_mutex));
1464
1512
  CHECKED(pthread_cond_destroy(&g_profiler.worker_cond));
@@ -1517,7 +1565,7 @@ rb_rperf_stop(VALUE self)
1517
1565
  rb_hash_aset(result, ID2SYM(rb_intern("frequency")), INT2NUM(g_profiler.frequency));
1518
1566
  rb_hash_aset(result, ID2SYM(rb_intern("trigger_count")), SIZET2NUM(g_profiler.stats.trigger_count));
1519
1567
  rb_hash_aset(result, ID2SYM(rb_intern("sampling_count")), SIZET2NUM(g_profiler.stats.sampling_count));
1520
- rb_hash_aset(result, ID2SYM(rb_intern("sampling_time_ns")), LONG2NUM(g_profiler.stats.sampling_total_ns));
1568
+ rb_hash_aset(result, ID2SYM(rb_intern("sampling_time_ns")), LL2NUM(g_profiler.stats.sampling_total_ns));
1521
1569
  if (g_profiler.stats.dropped_samples > 0)
1522
1570
  rb_hash_aset(result, ID2SYM(rb_intern("dropped_samples")), SIZET2NUM(g_profiler.stats.dropped_samples));
1523
1571
  if (g_profiler.stats.dropped_aggregation > 0)
@@ -1531,8 +1579,8 @@ rb_rperf_stop(VALUE self)
1531
1579
  + (int64_t)g_profiler.start_realtime.tv_nsec;
1532
1580
  duration_ns = ((int64_t)stop_monotonic.tv_sec - (int64_t)g_profiler.start_monotonic.tv_sec) * 1000000000LL
1533
1581
  + ((int64_t)stop_monotonic.tv_nsec - (int64_t)g_profiler.start_monotonic.tv_nsec);
1534
- rb_hash_aset(result, ID2SYM(rb_intern("start_time_ns")), LONG2NUM(start_ns));
1535
- rb_hash_aset(result, ID2SYM(rb_intern("duration_ns")), LONG2NUM(duration_ns));
1582
+ rb_hash_aset(result, ID2SYM(rb_intern("start_time_ns")), LL2NUM(start_ns));
1583
+ rb_hash_aset(result, ID2SYM(rb_intern("duration_ns")), LL2NUM(duration_ns));
1536
1584
  }
1537
1585
 
1538
1586
  samples_ary = rb_ary_new_capa((long)buf->sample_count);
@@ -1548,7 +1596,7 @@ rb_rperf_stop(VALUE self)
1548
1596
 
1549
1597
  VALUE sample = rb_ary_new_capa(5);
1550
1598
  rb_ary_push(sample, frames);
1551
- rb_ary_push(sample, LONG2NUM(s->weight));
1599
+ rb_ary_push(sample, LL2NUM(s->weight));
1552
1600
  rb_ary_push(sample, INT2NUM(s->thread_seq));
1553
1601
  rb_ary_push(sample, INT2NUM(s->label_set_id));
1554
1602
  rb_ary_push(sample, INT2NUM(s->vm_state));
@@ -1684,10 +1732,13 @@ rperf_arm_timer(rperf_profiler_t *prof)
1684
1732
  #if RPERF_USE_TIMER_SIGNAL
1685
1733
  if (prof->timer_signal > 0) {
1686
1734
  struct itimerspec its;
1687
- its.it_value.tv_sec = 0;
1688
- its.it_value.tv_nsec = 1000000000L / prof->frequency;
1735
+ long interval_ns = 1000000000L / prof->frequency;
1736
+ its.it_value.tv_sec = interval_ns / 1000000000L;
1737
+ its.it_value.tv_nsec = interval_ns % 1000000000L;
1689
1738
  its.it_interval = its.it_value;
1690
- timer_settime(prof->timer_id, 0, &its, NULL);
1739
+ if (timer_settime(prof->timer_id, 0, &its, NULL) != 0) {
1740
+ fprintf(stderr, "rperf: timer_settime (arm) failed: %s\n", strerror(errno));
1741
+ }
1691
1742
  return;
1692
1743
  }
1693
1744
  #endif
@@ -1705,7 +1756,9 @@ rperf_disarm_timer(rperf_profiler_t *prof)
1705
1756
  if (prof->timer_signal > 0) {
1706
1757
  struct itimerspec its;
1707
1758
  memset(&its, 0, sizeof(its));
1708
- timer_settime(prof->timer_id, 0, &its, NULL);
1759
+ if (timer_settime(prof->timer_id, 0, &its, NULL) != 0) {
1760
+ fprintf(stderr, "rperf: timer_settime (disarm) failed: %s\n", strerror(errno));
1761
+ }
1709
1762
  return;
1710
1763
  }
1711
1764
  #endif
data/lib/rperf/meta.rb ADDED
@@ -0,0 +1,343 @@
1
+ # Profile metadata support: git/host info collection, summary statistics,
2
+ # snapshot file naming, and a meta/summary prefix reader that lists profiles
3
+ # without loading the sample body.
4
+ #
5
+ # JSON profiles written by rperf >= 0.10 place "meta" and "summary" as the
6
+ # first two top-level keys, so Meta.read can decompress only the head of the
7
+ # file and stop as soon as both are extracted.
8
+
9
+ require "json"
10
+ require "time"
11
+ require "zlib"
12
+
13
+ module Rperf
14
+ module Meta
15
+ FORMAT_VERSION = 1
16
+ TOP_METHODS_LIMIT = 50
17
+
18
+ module_function
19
+
20
+ # Collect git information for the profiled working directory.
21
+ # GitHub Actions environment variables take priority over git commands
22
+ # (CI checkouts may be detached or shallow). Returns a Hash with
23
+ # sha/branch/subject/committed_at/dirty, or nil when not in a git
24
+ # repository or git is unavailable.
25
+ def collect_git(dir = Dir.pwd)
26
+ gh_sha = ENV["GITHUB_SHA"]
27
+ # Validate the sha shape: the value is passed to git as a positional
28
+ # argument, and a crafted value starting with "-" would be parsed as
29
+ # a git option
30
+ if gh_sha && gh_sha.match?(/\A\h{7,64}\z/)
31
+ git = { sha: gh_sha, dirty: false }
32
+ branch = ENV["GITHUB_HEAD_REF"]
33
+ branch = ENV["GITHUB_REF_NAME"] if branch.nil? || branch.empty?
34
+ git[:branch] = branch if branch && !branch.empty?
35
+ # Enrich from the local checkout when possible (may fail on shallow clones)
36
+ subject = git_capture(dir, "log", "-1", "--format=%s", gh_sha)
37
+ committed_at = git_capture(dir, "log", "-1", "--format=%cI", gh_sha)
38
+ git[:subject] = subject if subject && !subject.empty?
39
+ git[:committed_at] = committed_at if committed_at && !committed_at.empty?
40
+ return git
41
+ end
42
+
43
+ sha = git_capture(dir, "rev-parse", "HEAD")
44
+ return nil if sha.nil? || sha.empty?
45
+
46
+ git = { sha: sha }
47
+ branch = git_capture(dir, "rev-parse", "--abbrev-ref", "HEAD")
48
+ git[:branch] = branch if branch && !branch.empty? && branch != "HEAD"
49
+ subject = git_capture(dir, "log", "-1", "--format=%s")
50
+ git[:subject] = subject if subject && !subject.empty?
51
+ committed_at = git_capture(dir, "log", "-1", "--format=%cI")
52
+ git[:committed_at] = committed_at if committed_at && !committed_at.empty?
53
+ status = git_capture(dir, "status", "--porcelain")
54
+ git[:dirty] = !status.empty? if status
55
+ git
56
+ end
57
+
58
+ # Run a git command, returning stripped stdout or nil on failure
59
+ # (no git binary, not a repository, etc.).
60
+ def git_capture(dir, *args)
61
+ out = IO.popen(["git", "-C", dir, *args], err: File::NULL, &:read)
62
+ $?.success? ? out.strip : nil
63
+ rescue SystemCallError
64
+ nil
65
+ end
66
+
67
+ # File name used by `rperf record --snapshot-dir`.
68
+ # In a git repository: rperf-<sha7>-<timestamp>.json.gz
69
+ # Outside: rperf-nogit-<timestamp>-<pid>.json.gz
70
+ def snapshot_filename(git, time: Time.now.utc, pid: Process.pid)
71
+ ts = time.utc.strftime("%Y%m%dT%H%M%SZ")
72
+ if git && git[:sha]
73
+ "rperf-#{git[:sha][0, 7]}-#{ts}.json.gz"
74
+ else
75
+ "rperf-nogit-#{ts}-#{pid}.json.gz"
76
+ end
77
+ end
78
+
79
+ # Build the meta hash for a profile about to be written.
80
+ # Git info comes from RPERF_META_GIT (set by the CLI, which collects it
81
+ # before exec so a chdir in the profiled app cannot point at the wrong
82
+ # repository); when unset (direct API usage) it is collected here.
83
+ # RPERF_META_GIT="null" means "already checked, not a repository".
84
+ def build_meta(data)
85
+ meta = {
86
+ format_version: FORMAT_VERSION,
87
+ created_at: Time.now.utc.iso8601,
88
+ ruby_version: RUBY_VERSION,
89
+ rperf_version: Rperf::VERSION,
90
+ mode: (data[:mode] || :cpu).to_s,
91
+ }
92
+ hostname = safe_hostname
93
+ meta[:hostname] = hostname if hostname
94
+ git = git_from_env_or_collect
95
+ meta[:git] = git if git
96
+ labels = labels_from_env
97
+ meta[:labels] = labels if labels && !labels.empty?
98
+ meta
99
+ end
100
+
101
+ def git_from_env_or_collect
102
+ if ENV.key?("RPERF_META_GIT")
103
+ v = ENV["RPERF_META_GIT"].to_s
104
+ return nil if v.empty? || v == "null"
105
+ begin
106
+ JSON.parse(v, symbolize_names: true)
107
+ rescue JSON::ParserError
108
+ nil
109
+ end
110
+ else
111
+ # Memoized (array wraps a legitimate nil): periodic viewer snapshots
112
+ # must not spawn git subprocesses on every take_snapshot!
113
+ @collect_git_memo ||= [collect_git]
114
+ @collect_git_memo[0]
115
+ end
116
+ end
117
+
118
+ def labels_from_env
119
+ v = ENV["RPERF_META_LABELS"]
120
+ return nil unless v
121
+ begin
122
+ labels = JSON.parse(v)
123
+ labels.is_a?(Hash) ? labels : nil
124
+ rescue JSON::ParserError
125
+ nil
126
+ end
127
+ end
128
+
129
+ def safe_hostname
130
+ require "socket"
131
+ Socket.gethostname
132
+ rescue StandardError
133
+ nil
134
+ end
135
+
136
+ # Build the summary hash from profile data (as returned by Rperf.stop).
137
+ # Fields whose source data is missing are omitted.
138
+ def build_summary(data)
139
+ s = {}
140
+ s[:total_ms] = (data[:duration_ns] / 1e6).round(1) if data[:duration_ns]
141
+ if data[:user_ns] || data[:sys_ns]
142
+ s[:cpu_ms] = (((data[:user_ns] || 0) + (data[:sys_ns] || 0)) / 1e6).round(1)
143
+ end
144
+ if (gc = data[:gc_stats])
145
+ s[:gc_count_minor] = gc[:minor_count] if gc[:minor_count]
146
+ s[:gc_count_major] = gc[:major_count] if gc[:major_count]
147
+ s[:gc_ms] = gc[:time_ms].to_f.round(1) if gc[:time_ms]
148
+ s[:allocated_objects] = gc[:allocated_objects] if gc[:allocated_objects]
149
+ s[:freed_objects] = gc[:freed_objects] if gc[:freed_objects]
150
+ end
151
+ s[:maxrss_mb] = data[:maxrss_mb] if data[:maxrss_mb]
152
+ s[:samples] = data[:sampling_count] if data[:sampling_count]
153
+ s[:top_methods] = top_methods(data)
154
+ s
155
+ end
156
+
157
+ # Top methods by self time, merged by method name (shares the by-name
158
+ # fold with Table so report/summary numbers can never diverge).
159
+ def top_methods(data, limit: TOP_METHODS_LIMIT)
160
+ samples = data[:aggregated_samples]
161
+ return [] if !samples || samples.empty?
162
+
163
+ flat_by_name, cum_by_name, total = Table.flat_cum_by_name(data)
164
+ return [] if total <= 0
165
+
166
+ flat_by_name.sort_by { |_, w| -w }.first(limit).map do |name, w|
167
+ {
168
+ name: name,
169
+ self_pct: (w * 100.0 / total).round(1),
170
+ total_pct: (cum_by_name[name] * 100.0 / total).round(1),
171
+ }
172
+ end
173
+ end
174
+
175
+ # --- meta/summary prefix reader ---
176
+
177
+ READ_CHUNK = 64 * 1024
178
+ READ_LIMIT = 8 * 1024 * 1024
179
+
180
+ # Read meta/summary from a .json(.gz) profile without loading the body.
181
+ # Returns { meta: Hash|nil, summary: Hash|nil } or nil for files without
182
+ # leading meta/summary keys (old format) and unreadable/corrupt files.
183
+ def read(path)
184
+ File.open(path, "rb") do |f|
185
+ magic = f.read(2)
186
+ f.rewind
187
+ io = (magic == "\x1f\x8b".b) ? Zlib::GzipReader.new(f) : f
188
+ begin
189
+ buf = "".b
190
+ loop do
191
+ chunk = io.read(READ_CHUNK)
192
+ buf << chunk if chunk
193
+ result = scan_prefix(buf)
194
+ return result unless result == :incomplete
195
+ return nil if chunk.nil? || buf.bytesize > READ_LIMIT
196
+ end
197
+ ensure
198
+ # Free the inflate zstream now — directory listings open many files
199
+ # and the buffers would otherwise linger until GC
200
+ io.close if io.is_a?(Zlib::GzipReader)
201
+ end
202
+ end
203
+ rescue Zlib::Error, SystemCallError, JSON::ParserError
204
+ # Zlib::Error covers GzipFile::Error (truncated) and also DataError /
205
+ # BufError (valid gzip header, corrupt deflate body) — one corrupt
206
+ # snapshot must not break listing an entire directory
207
+ nil
208
+ end
209
+
210
+ # Byte codes used by the scanner. Byte-wise scanning is safe in UTF-8:
211
+ # continuation bytes are >= 0x80 and never collide with ASCII syntax.
212
+ DQUOTE = 0x22
213
+ BSLASH = 0x5c
214
+ LBRACE = 0x7b
215
+ RBRACE = 0x7d
216
+ LBRACKET = 0x5b
217
+ RBRACKET = 0x5d
218
+ COMMA = 0x2c
219
+ COLON = 0x3a
220
+
221
+ # Scan the head of a JSON object for top-level "meta" / "summary" keys.
222
+ # rperf writes them first, so scanning stops at the first other key —
223
+ # large sample arrays are never traversed.
224
+ # Returns { meta:, summary: }, nil (old format / malformed),
225
+ # or :incomplete (need more input).
226
+ def scan_prefix(buf)
227
+ n = buf.bytesize
228
+ i = skip_ws(buf, 0, n)
229
+ return :incomplete if i >= n
230
+ return nil unless buf.getbyte(i) == LBRACE
231
+ i += 1
232
+ found = {}
233
+
234
+ loop do
235
+ i = skip_ws(buf, i, n)
236
+ return :incomplete if i >= n
237
+ return finalize_scan(found) if buf.getbyte(i) == RBRACE
238
+ return nil unless buf.getbyte(i) == DQUOTE
239
+
240
+ key_start = i
241
+ i = scan_string(buf, i, n)
242
+ return :incomplete unless i
243
+ key = buf.byteslice(key_start + 1, i - key_start - 2)
244
+
245
+ # First key that is not meta/summary ends the scan (old format or body)
246
+ return finalize_scan(found) unless key == "meta" || key == "summary"
247
+
248
+ i = skip_ws(buf, i, n)
249
+ return :incomplete if i >= n
250
+ return nil unless buf.getbyte(i) == COLON
251
+ i += 1
252
+ i = skip_ws(buf, i, n)
253
+ return :incomplete if i >= n
254
+
255
+ vstart = i
256
+ i = scan_value(buf, i, n)
257
+ return :incomplete unless i
258
+ fragment = buf.byteslice(vstart, i - vstart).force_encoding(Encoding::UTF_8)
259
+ found[key.to_sym] = JSON.parse(fragment, symbolize_names: true)
260
+ return finalize_scan(found) if found.key?(:meta) && found.key?(:summary)
261
+
262
+ i = skip_ws(buf, i, n)
263
+ return :incomplete if i >= n
264
+ case buf.getbyte(i)
265
+ when COMMA then i += 1
266
+ when RBRACE then return finalize_scan(found)
267
+ else return nil
268
+ end
269
+ end
270
+ rescue JSON::ParserError
271
+ nil
272
+ end
273
+
274
+ def finalize_scan(found)
275
+ found.empty? ? nil : { meta: found[:meta], summary: found[:summary] }
276
+ end
277
+
278
+ def skip_ws(buf, i, n)
279
+ while i < n
280
+ b = buf.getbyte(i)
281
+ break unless b == 0x20 || b == 0x09 || b == 0x0a || b == 0x0d
282
+ i += 1
283
+ end
284
+ i
285
+ end
286
+
287
+ # Scan a JSON string starting at the opening quote.
288
+ # Returns the index just past the closing quote, or nil if truncated.
289
+ def scan_string(buf, i, n)
290
+ j = i + 1
291
+ while j < n
292
+ b = buf.getbyte(j)
293
+ if b == BSLASH
294
+ j += 2
295
+ elsif b == DQUOTE
296
+ return j + 1
297
+ else
298
+ j += 1
299
+ end
300
+ end
301
+ nil
302
+ end
303
+
304
+ # Scan a JSON value (string, container, or scalar) starting at i.
305
+ # Returns the index just past the value, or nil if truncated.
306
+ def scan_value(buf, i, n)
307
+ case buf.getbyte(i)
308
+ when DQUOTE
309
+ scan_string(buf, i, n)
310
+ when LBRACE, LBRACKET
311
+ depth = 0
312
+ j = i
313
+ while j < n
314
+ b = buf.getbyte(j)
315
+ if b == DQUOTE
316
+ j = scan_string(buf, j, n)
317
+ return nil unless j
318
+ elsif b == LBRACE || b == LBRACKET
319
+ depth += 1
320
+ j += 1
321
+ elsif b == RBRACE || b == RBRACKET
322
+ depth -= 1
323
+ j += 1
324
+ return j if depth == 0
325
+ else
326
+ j += 1
327
+ end
328
+ end
329
+ nil
330
+ else
331
+ # scalar: number, true, false, null
332
+ j = i
333
+ while j < n
334
+ b = buf.getbyte(j)
335
+ break if b == COMMA || b == RBRACE || b == RBRACKET ||
336
+ b == 0x20 || b == 0x09 || b == 0x0a || b == 0x0d
337
+ j += 1
338
+ end
339
+ j < n ? j : nil
340
+ end
341
+ end
342
+ end
343
+ end
data/lib/rperf/rack.rb CHANGED
@@ -16,10 +16,15 @@ class Rperf::RackMiddleware
16
16
  @label_proc = label
17
17
  end
18
18
 
19
- UUID_RE = %r{/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}}i
20
- NUMERIC_RE = %r{/\d+}
19
+ UUID_RE = %r{/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}(?=/|\z)}i
20
+ NUMERIC_RE = %r{/\d+(?=/|\z)}
21
21
 
22
22
  def call(env)
23
+ # No-op when the profiler is not running (Rperf.profile would raise):
24
+ # the app may boot without Rperf.start, stop mid-run, or run in a forked
25
+ # worker where the atfork handler silently stopped profiling.
26
+ return @app.call(env) unless Rperf.running?
27
+
23
28
  endpoint = if @label_proc == :raw
24
29
  "#{env["REQUEST_METHOD"]} #{env["PATH_INFO"]}"
25
30
  elsif @label_proc