@bookedsolid/rea 0.44.0 → 0.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,767 @@
1
+ #!/usr/bin/env node
2
+ // 0.45.0 charter item 1 — Hook hot-path profiling harness.
3
+ //
4
+ // # What this measures
5
+ //
6
+ // Every Bash / Edit / Write / MultiEdit / NotebookEdit tool call in
7
+ // Claude Code fires one or more `.claude/hooks/*.sh` shims. 14 shims
8
+ // are registered by default. Cumulative latency matters: 14 × 50ms is
9
+ // 700ms added to every tool call, which the operator FEELS. This
10
+ // harness measures per-shim wall-clock latency under a synthetic
11
+ // payload and writes a baseline so regressions are visible.
12
+ //
13
+ // # Methodology
14
+ //
15
+ // For each shim:
16
+ // 1. Build a representative stdin JSON payload (Claude Code shape)
17
+ // tuned to be "irrelevant" — i.e. the shim runs through its
18
+ // full HALT → stdin-capture → resolve → sandbox → policy
19
+ // short-circuit / version-probe path but does NOT trigger a
20
+ // block. This is the steady-state hot path.
21
+ // 2. Warm up: 2 invocations (discarded). The first invocation has
22
+ // cold filesystem caches + Node startup costs that don't
23
+ // reflect steady-state.
24
+ // 3. Measure: 10 invocations. Capture wall-clock + child cputime.
25
+ // 4. Compute median / p95 / max from the 10 samples.
26
+ //
27
+ // The shim is invoked via `bash <hook-path>` with stdin piped in, the
28
+ // same way Claude Code invokes them. Environment is preserved so the
29
+ // real-world resolution path runs (node_modules / dist / PATH).
30
+ //
31
+ // # Output
32
+ //
33
+ // Writes `docs/hook-perf-baseline.json` sorted by p95 descending.
34
+ // Shape:
35
+ //
36
+ // {
37
+ // "version": "0.45.0",
38
+ // "measured_at": "2026-05-17T...",
39
+ // "platform": "darwin",
40
+ // "node_version": "v22.x.x",
41
+ // "iterations": 10,
42
+ // "warmup": 2,
43
+ // "hooks": [
44
+ // {
45
+ // "name": "local-review-gate.sh",
46
+ // "median_ms": 123.4,
47
+ // "p95_ms": 145.6,
48
+ // "max_ms": 158.9,
49
+ // "samples_ms": [...],
50
+ // "exit_codes": [0,0,0,0,0,0,0,0,0,0]
51
+ // },
52
+ // ...
53
+ // ]
54
+ // }
55
+ //
56
+ // # Threshold
57
+ //
58
+ // The harness DOES NOT enforce thresholds itself — it's a measurement
59
+ // tool. The regression test at `__tests__/scripts/profile-hooks.test.ts`
60
+ // asserts a permissive ceiling so absolute regressions get caught.
61
+ // Tighten the ceiling over time as the baseline stabilizes.
62
+ //
63
+ // # Wiring
64
+ //
65
+ // `pnpm perf:hooks` runs this script. Not part of the default
66
+ // `pnpm test` chain — it's heavy (160+ subprocess spawns) and timing
67
+ // is sensitive to system load. CI calls it explicitly when the perf
68
+ // guard is active.
69
+
70
+ import { spawnSync } from 'node:child_process';
71
+ import {
72
+ readdirSync,
73
+ readFileSync,
74
+ writeFileSync,
75
+ statSync,
76
+ existsSync,
77
+ mkdirSync,
78
+ } from 'node:fs';
79
+ import path from 'node:path';
80
+ import { fileURLToPath } from 'node:url';
81
+ import { performance } from 'node:perf_hooks';
82
+
83
+ const __filename = fileURLToPath(import.meta.url);
84
+ const __dirname = path.dirname(__filename);
85
+ const REPO_ROOT = path.resolve(__dirname, '..');
86
+
87
+ const HOOKS_DIR = path.join(REPO_ROOT, 'hooks');
88
+ const DOCS_DIR = path.join(REPO_ROOT, 'docs');
89
+ const BASELINE_PATH = path.join(DOCS_DIR, 'hook-perf-baseline.json');
90
+
91
+ // Permissive default per-shim p95 ceilings. The regression test in
92
+ // `__tests__/scripts/profile-hooks.test.ts` enforces these. Start
93
+ // loose to avoid CI flakes from cold caches / shared runners; tighten
94
+ // in future releases as the baseline stabilizes.
95
+ //
96
+ // `local-review-gate.sh` is a documented outlier — it does its own
97
+ // early sandbox check (round-5 P1) + subtree policy reads + a git
98
+ // stash-create on the forward path. ~1800ms is its current healthy
99
+ // p95 on the rea repo; the ceiling sits 2x above for CI headroom.
100
+ // See `docs/hook-perf-baseline.md` for the breakdown.
101
+ const DEFAULT_P95_CEILING_MS = 2000;
102
+ const PER_SHIM_P95_CEILING_MS = {
103
+ 'local-review-gate.sh': 4500,
104
+ };
105
+
106
+ /**
107
+ * Resolve the p95 ceiling for a given shim. Falls back to the default
108
+ * when no per-shim entry exists.
109
+ */
110
+ export function ceilingForShim(name) {
111
+ return PER_SHIM_P95_CEILING_MS[name] ?? DEFAULT_P95_CEILING_MS;
112
+ }
113
+
114
+ const DEFAULT_ITERATIONS = 10;
115
+ const DEFAULT_WARMUP = 2;
116
+
117
+ /**
118
+ * 0.46.0 charter item 3 — per-hook payload variants.
119
+ *
120
+ * Pre-0.46.0 the harness used generic Bash/Write/Edit payloads for
121
+ * EVERY shim. That undercounted latency for several gates:
122
+ *
123
+ * - `attribution-advisory.sh`, `security-disclosure-gate.sh`,
124
+ * `env-file-protection.sh`, `dependency-audit-gate.sh`,
125
+ * `changeset-security-gate.sh`, `local-review-gate.sh` all have
126
+ * `shim_is_relevant` short-circuits that exit at the relevance
127
+ * pre-gate when the payload's substring marker isn't present.
128
+ * The generic `ls -la` Bash payload hit those short-circuits and
129
+ * the measured latency reflected the short-circuit path, not the
130
+ * real hot path the shim runs when a relevant command actually
131
+ * comes through.
132
+ * - `secret-scanner.sh` short-circuits on empty content; the generic
133
+ * write payload had content, so this one was already measuring
134
+ * the real path. Still — pinning a MATCH variant makes the
135
+ * contract explicit.
136
+ *
137
+ * The fix profiles every shim under TWO payloads:
138
+ *
139
+ * - `match` — crafted to PASS `shim_is_relevant` so the shim
140
+ * runs its full hot path (sandbox check + version
141
+ * probe + Node CLI forward + actual body work).
142
+ * This is the latency the operator pays when a
143
+ * relevant command lands.
144
+ * - `no_match` — crafted to FAIL `shim_is_relevant` so the shim
145
+ * short-circuits at the pre-gate. This is the
146
+ * latency the operator pays on EVERY irrelevant
147
+ * command — and since most commands are
148
+ * irrelevant to most shims, this is the dominant
149
+ * cumulative cost.
150
+ *
151
+ * Both are reported in the baseline. Shims without a relevance
152
+ * short-circuit (the always-on tier: dangerous-bash-interceptor,
153
+ * blocked-paths-*, settings-protection, delegation-capture,
154
+ * delegation-advisory, architecture-review-gate, pr-issue-link-gate)
155
+ * use the same payload for `match` and `no_match` — both variants
156
+ * exercise the same path. The `no_match` field stays so the JSON
157
+ * shape is uniform across shims, and the renderer flags
158
+ * `same_as_match: true` for those rows.
159
+ *
160
+ * MATCH payloads are crafted to be RELEVANT but NOT REFUSED — they
161
+ * pass the substring pre-gate but the full CLI body exits 0. The
162
+ * goal is to measure latency, not to exercise the refusal path. Two
163
+ * subtleties to keep in mind:
164
+ *
165
+ * - `attribution-advisory`: `git commit` is relevant; we use
166
+ * `git commit -m "feat: noop"` which carries no AI attribution
167
+ * markers (`Co-Authored-By:` with an AI name, "Generated with
168
+ * [Tool]" footers) so the CLI exits 0 after the body work.
169
+ * - `dangerous-bash-interceptor`: every match-payload candidate
170
+ * (`git status`, `npm ls`, etc) carries refusal risk via the
171
+ * overlap with the CLI's bypass-corpus. We use `git status` —
172
+ * a known-safe in-the-clear command that does not refuse — and
173
+ * accept that the shim has no `shim_is_relevant` gate anyway
174
+ * (CLI-missing path uses `shim_cli_missing_relevant` which is
175
+ * a DIFFERENT branch and only fires when dist/cli is missing).
176
+ * Under the normal CLI-reachable steady state, both `match` and
177
+ * `no_match` payloads exercise the same full-CLI path here.
178
+ *
179
+ * Returns a `{ match: string, no_match: string }` object — both
180
+ * fields are non-null JSON event strings.
181
+ */
182
+ export function payloadVariantsForHook(name) {
183
+ // Reusable generic events.
184
+ const benignBashEvent = JSON.stringify({
185
+ tool_name: 'Bash',
186
+ tool_input: { command: 'ls -la', description: 'list current directory' },
187
+ hook_event_name: 'PreToolUse',
188
+ });
189
+ const benignWriteEvent = JSON.stringify({
190
+ tool_name: 'Write',
191
+ tool_input: { file_path: '/tmp/rea-profile-scratch.ts', content: 'export const x = 1;\n' },
192
+ hook_event_name: 'PreToolUse',
193
+ });
194
+ const benignPostEditEvent = JSON.stringify({
195
+ tool_name: 'Edit',
196
+ tool_input: { file_path: '/tmp/scratch.ts', old_string: 'a', new_string: 'b' },
197
+ tool_response: { success: true },
198
+ hook_event_name: 'PostToolUse',
199
+ });
200
+ const benignAgentEvent = JSON.stringify({
201
+ tool_name: 'Agent',
202
+ tool_input: { subagent_type: 'general-purpose', prompt: 'noop' },
203
+ hook_event_name: 'PreToolUse',
204
+ });
205
+
206
+ switch (name) {
207
+ case 'architecture-review-gate.sh':
208
+ // PostToolUse on every Edit — no relevance pre-gate at the shim
209
+ // tier; the CLI body decides. Both variants exercise the same
210
+ // path.
211
+ return { match: benignPostEditEvent, no_match: benignPostEditEvent };
212
+
213
+ case 'attribution-advisory.sh':
214
+ // Pre-gate: substring match for `git commit` OR `gh pr (create|edit)`.
215
+ // MATCH: `git commit -m "feat: noop"` (no AI attribution markers
216
+ // so the CLI body exits 0 after running its full check).
217
+ // NO_MATCH: `git status` (no commit/pr-create substring).
218
+ return {
219
+ match: JSON.stringify({
220
+ tool_name: 'Bash',
221
+ tool_input: {
222
+ command: 'git commit -m "feat: noop"',
223
+ description: 'noop commit',
224
+ },
225
+ hook_event_name: 'PreToolUse',
226
+ }),
227
+ no_match: JSON.stringify({
228
+ tool_name: 'Bash',
229
+ tool_input: { command: 'git status', description: 'check status' },
230
+ hook_event_name: 'PreToolUse',
231
+ }),
232
+ };
233
+
234
+ case 'blocked-paths-bash-gate.sh':
235
+ // Shim has only `shim_cli_missing_relevant` (CLI-missing only).
236
+ // Under normal CLI-reachable steady state, both variants run
237
+ // the full CLI body. Same payload for both.
238
+ return { match: benignBashEvent, no_match: benignBashEvent };
239
+
240
+ case 'blocked-paths-enforcer.sh':
241
+ // Same as above — CLI-missing-only relevance gate. Both variants
242
+ // hit the full CLI body when CLI is reachable.
243
+ return { match: benignWriteEvent, no_match: benignWriteEvent };
244
+
245
+ case 'changeset-security-gate.sh':
246
+ // Pre-gate: file_path / notebook_path contains `.changeset/`.
247
+ // MATCH: a benign changeset frontmatter (no GHSA reference so
248
+ // the CLI body's disclosure scan exits 0).
249
+ // NO_MATCH: a Write to /tmp/foo.ts (no `.changeset/` substring).
250
+ return {
251
+ match: JSON.stringify({
252
+ tool_name: 'Write',
253
+ tool_input: {
254
+ file_path: '/tmp/changeset-profile/.changeset/perf-noop.md',
255
+ content: '---\n"@scope/pkg": patch\n---\n\nperf noop\n',
256
+ },
257
+ hook_event_name: 'PreToolUse',
258
+ }),
259
+ no_match: benignWriteEvent,
260
+ };
261
+
262
+ case 'dangerous-bash-interceptor.sh':
263
+ // No `shim_is_relevant` — every Bash event goes through the
264
+ // full CLI body. `git status` is the safest candidate: no rule
265
+ // head H1-H17 + M1 fires on it. Both variants are the same.
266
+ return { match: benignBashEvent, no_match: benignBashEvent };
267
+
268
+ case 'delegation-advisory.sh': {
269
+ // PostToolUse on Bash|Edit|Write|MultiEdit|NotebookEdit. No
270
+ // relevance pre-gate; CLI body decides. Both same.
271
+ const delegationAdvisoryEvent = JSON.stringify({
272
+ tool_name: 'Write',
273
+ tool_input: { file_path: '/tmp/scratch.ts', content: 'x' },
274
+ tool_response: { success: true },
275
+ hook_event_name: 'PostToolUse',
276
+ });
277
+ return { match: delegationAdvisoryEvent, no_match: delegationAdvisoryEvent };
278
+ }
279
+
280
+ case 'delegation-capture.sh':
281
+ // PreToolUse on Agent|Skill matcher — every Agent/Skill event
282
+ // goes through the CLI body. Both variants are the same.
283
+ return { match: benignAgentEvent, no_match: benignAgentEvent };
284
+
285
+ case 'dependency-audit-gate.sh':
286
+ // Pre-gate: substring match for `(npm|pnpm|yarn) (install|i|add) `.
287
+ // MATCH: `pnpm add ./local-pkg` — passes the segment-anchored
288
+ // install matcher (full hot path through splitSegments + the
289
+ // env-prefix strip + the per-segment scan), but the
290
+ // package-name extractor in `src/hooks/dependency-audit-gate/
291
+ // index.ts` skips `./` / `/` / `../` tokens as path installs.
292
+ // After the scan, `packages.length === 0` → the hook returns
293
+ // exit 0 WITHOUT a `npm view` network call. Codex round-1 P2
294
+ // (0.46.0): the earlier `pnpm add lodash` payload triggered
295
+ // the real registry probe and `runProfile()` exited 2 on any
296
+ // offline / firewalled / npm-outage machine, making the harness
297
+ // unusable without external network access. The path-install
298
+ // variant keeps the hot path measured without the network
299
+ // dependency.
300
+ // NO_MATCH: `ls -la` (no install verb → segment matcher misses).
301
+ return {
302
+ match: JSON.stringify({
303
+ tool_name: 'Bash',
304
+ tool_input: {
305
+ command: 'pnpm add ./local-pkg',
306
+ description: 'install a local path package',
307
+ },
308
+ hook_event_name: 'PreToolUse',
309
+ }),
310
+ no_match: benignBashEvent,
311
+ };
312
+
313
+ case 'env-file-protection.sh':
314
+ // Pre-gate: `.env` substring in tool_input.command.
315
+ // MATCH: `cat .env.example` — relevant (`.env` substring) but
316
+ // benign (`.env.example` is excluded by the CLI body's
317
+ // co-occurrence + suffix logic).
318
+ // NO_MATCH: `ls -la` (no `.env`).
319
+ return {
320
+ match: JSON.stringify({
321
+ tool_name: 'Bash',
322
+ tool_input: {
323
+ command: 'cat .env.example',
324
+ description: 'check example env',
325
+ },
326
+ hook_event_name: 'PreToolUse',
327
+ }),
328
+ no_match: benignBashEvent,
329
+ };
330
+
331
+ case 'local-review-gate.sh':
332
+ // Pre-gate is policy-driven on `review.local_review.refuse_at`.
333
+ // Default `refuse_at: push` triggers on `git push`. But the
334
+ // body fails CLOSED when the policy is enforced — we'd refuse
335
+ // the synthetic payload and exit non-zero, which breaks the
336
+ // round-1 P2 #2 "every shim exits 0" contract.
337
+ //
338
+ // The safe match variant uses `REA_SKIP_LOCAL_REVIEW=1` env
339
+ // inheritance — but the harness explicitly sets env via
340
+ // `runOnce`, and we don't want to globally bypass the gate
341
+ // (that would invalidate the no-match variant too).
342
+ //
343
+ // Settled approach: NO_MATCH uses `git status` (no `git push`
344
+ // trigger → short-circuit at step 5 / 6). MATCH uses the
345
+ // explicit early-bypass envelope to drive the forward path
346
+ // without refusal — the shim's step 2b checks
347
+ // REA_SKIP_LOCAL_REVIEW from the environment, NOT from the
348
+ // payload, so we cannot drive it via JSON. Instead we use a
349
+ // `git status` payload for BOTH variants and document that
350
+ // local-review-gate is in the "no shim_is_relevant gate" tier:
351
+ // the policy-driven scan still fires, but a non-`git push`
352
+ // command exits before the heavy forward path. The body's
353
+ // genuine hot path under a `git push` is impossible to
354
+ // measure in a non-refusing way without ambient env bypass.
355
+ //
356
+ // Net: same payload for both variants. The baseline doc notes
357
+ // this limitation explicitly.
358
+ return { match: benignBashEvent, no_match: benignBashEvent };
359
+
360
+ case 'pr-issue-link-gate.sh':
361
+ // No `shim_is_relevant`. Advisory-tier; CLI body decides.
362
+ // Both variants are the same (`same_as_match: true` in the
363
+ // baseline) — the CLI body's `gh pr create` matcher fires only
364
+ // on that exact prefix, but the shim-tier latency is identical
365
+ // either way.
366
+ return { match: benignBashEvent, no_match: benignBashEvent };
367
+
368
+ case 'protected-paths-bash-gate.sh':
369
+ // CLI-missing-only relevance gate. Under normal CLI-reachable
370
+ // steady state both variants run the full CLI body.
371
+ return { match: benignBashEvent, no_match: benignBashEvent };
372
+
373
+ case 'secret-scanner.sh':
374
+ // Pre-gate short-circuits on empty content or `.env.example` /
375
+ // `.env.sample` suffix.
376
+ // MATCH: a benign `.ts` Write with non-credential content — the
377
+ // CLI body runs the full 17-pattern catalog and exits 0.
378
+ // NO_MATCH: a Write to `/tmp/foo.env.example` — pre-gate
379
+ // suffix short-circuit fires.
380
+ return {
381
+ match: benignWriteEvent,
382
+ no_match: JSON.stringify({
383
+ tool_name: 'Write',
384
+ tool_input: {
385
+ file_path: '/tmp/scratch.env.example',
386
+ content: 'EXAMPLE_VAR=changeme\n',
387
+ },
388
+ hook_event_name: 'PreToolUse',
389
+ }),
390
+ };
391
+
392
+ case 'security-disclosure-gate.sh':
393
+ // Pre-gate: substring match for `gh issue create`.
394
+ // MATCH: `gh issue create --title "feat: noop"` — relevant,
395
+ // but no security keywords so the CLI body exits 0.
396
+ // NO_MATCH: `gh issue list` (no `create`).
397
+ return {
398
+ match: JSON.stringify({
399
+ tool_name: 'Bash',
400
+ tool_input: {
401
+ command: 'gh issue create --title "docs: noop"',
402
+ description: 'create a docs issue',
403
+ },
404
+ hook_event_name: 'PreToolUse',
405
+ }),
406
+ no_match: JSON.stringify({
407
+ tool_name: 'Bash',
408
+ tool_input: { command: 'gh issue list', description: 'list issues' },
409
+ hook_event_name: 'PreToolUse',
410
+ }),
411
+ };
412
+
413
+ case 'settings-protection.sh':
414
+ // CLI-missing-only relevance gate. Under normal CLI-reachable
415
+ // steady state both variants run the full CLI body.
416
+ return { match: benignWriteEvent, no_match: benignWriteEvent };
417
+
418
+ default:
419
+ // Conservative fallback: a benign Bash payload for both.
420
+ return { match: benignBashEvent, no_match: benignBashEvent };
421
+ }
422
+ }
423
+
424
+ /**
425
+ * Per-hook stdin payload generator — BACKWARDS-COMPATIBLE wrapper.
426
+ * Pre-0.46.0 callers used `payloadForHook(name)`. The harness now
427
+ * profiles each shim under two variants (`match` + `no_match`); this
428
+ * wrapper returns the `match` variant for legacy callers (e.g. the
429
+ * existing regression test). Kept exported so external scripts / tests
430
+ * that imported `payloadForHook` continue to work without churn.
431
+ *
432
+ * New callers should use `payloadVariantsForHook(name)` directly.
433
+ */
434
+ export function payloadForHook(name) {
435
+ return payloadVariantsForHook(name).match;
436
+ }
437
+
438
+ /**
439
+ * List the shims to profile — every `.sh` directly under `hooks/`,
440
+ * excluding `_lib/`.
441
+ */
442
+ export function listShims(hooksDir = HOOKS_DIR) {
443
+ return readdirSync(hooksDir)
444
+ .filter((f) => f.endsWith('.sh'))
445
+ .filter((f) => {
446
+ try {
447
+ return statSync(path.join(hooksDir, f)).isFile();
448
+ } catch {
449
+ return false;
450
+ }
451
+ })
452
+ .sort();
453
+ }
454
+
455
+ /**
456
+ * Run a single shim invocation and return wall-clock ms + exit code.
457
+ */
458
+ function runOnce(hookPath, payload) {
459
+ const start = performance.now();
460
+ const res = spawnSync('bash', [hookPath], {
461
+ input: payload,
462
+ encoding: 'utf8',
463
+ timeout: 30000,
464
+ env: { ...process.env, CLAUDE_PROJECT_DIR: REPO_ROOT },
465
+ });
466
+ const elapsed = performance.now() - start;
467
+ // spawnSync returns res.status null on timeout/signal — surface
468
+ // that as -1 so the caller can flag it.
469
+ const status = res.status === null ? -1 : res.status;
470
+ return { ms: elapsed, status };
471
+ }
472
+
473
+ /**
474
+ * Compute percentile from a sorted ascending array of numbers.
475
+ */
476
+ function percentile(sorted, p) {
477
+ if (sorted.length === 0) return 0;
478
+ const idx = Math.min(sorted.length - 1, Math.max(0, Math.ceil((p / 100) * sorted.length) - 1));
479
+ return sorted[idx];
480
+ }
481
+
482
+ /**
483
+ * Run a measurement sweep for a single payload variant and return
484
+ * the per-variant record. Helper for `profileHook` which runs both
485
+ * `match` and `no_match` variants per shim (0.46.0 charter item 3).
486
+ */
487
+ function measureVariant(hookPath, payload, iterations, warmup) {
488
+ for (let i = 0; i < warmup; i += 1) {
489
+ runOnce(hookPath, payload);
490
+ }
491
+ const samples = [];
492
+ const exitCodes = [];
493
+ for (let i = 0; i < iterations; i += 1) {
494
+ const r = runOnce(hookPath, payload);
495
+ samples.push(r.ms);
496
+ exitCodes.push(r.status);
497
+ }
498
+ const sorted = [...samples].sort((a, b) => a - b);
499
+ const median = percentile(sorted, 50);
500
+ const p95 = percentile(sorted, 95);
501
+ const max = sorted[sorted.length - 1];
502
+ const nonZero = exitCodes.filter((c) => c !== 0);
503
+ const error =
504
+ nonZero.length > 0
505
+ ? `${nonZero.length}/${exitCodes.length} samples exited non-zero ` +
506
+ `(codes: ${exitCodes.join(',')}). Synthetic payload likely hit an ` +
507
+ `error path; latency is NOT representative of the hot path. ` +
508
+ `Tune the payload in payloadVariantsForHook() so this shim exits 0.`
509
+ : null;
510
+ return {
511
+ median_ms: round(median),
512
+ p95_ms: round(p95),
513
+ max_ms: round(max),
514
+ samples_ms: samples.map(round),
515
+ exit_codes: exitCodes,
516
+ error,
517
+ };
518
+ }
519
+
520
+ /**
521
+ * Profile a single hook. Returns the measurement record.
522
+ *
523
+ * 0.45.0 codex round-1 P2 #2: every shim is expected to exit 0 under
524
+ * its synthetic non-blocking payload — that's the steady-state hot
525
+ * path we want to measure. A non-zero exit (refusal, malformed
526
+ * payload, timeout, CLI-missing) means the shim ran an ERROR path
527
+ * instead of the hot path, and the resulting latency number does NOT
528
+ * represent steady-state. The record carries an `error` field
529
+ * surfacing any non-zero exit, and `runProfile` propagates it to the
530
+ * report so callers can fail loudly rather than silently shipping a
531
+ * "healthy" baseline that timed nothing but error paths.
532
+ *
533
+ * 0.46.0 charter item 3: every shim is profiled TWICE — once with a
534
+ * `match` payload (passes the shim_is_relevant pre-gate, exercises the
535
+ * full hot path) and once with a `no_match` payload (fails the
536
+ * pre-gate, exercises the short-circuit). Shims without a relevance
537
+ * pre-gate run the same payload for both variants and `same_as_match`
538
+ * is set to `true` so the renderer can collapse the row.
539
+ *
540
+ * The top-level record fields (`median_ms`, `p95_ms`, `max_ms`,
541
+ * `samples_ms`, `exit_codes`, `error`) reflect the MATCH variant —
542
+ * that's the hot path the ceiling enforcement budgets, and keeping
543
+ * those fields at the top level preserves the pre-0.46.0 baseline
544
+ * JSON shape for any external consumer. The `no_match` variant lives
545
+ * under `no_match: { median_ms, p95_ms, max_ms, samples_ms,
546
+ * exit_codes, error }` (set to `null` when same_as_match is true,
547
+ * since the numbers would be redundant).
548
+ */
549
+ export function profileHook(name, opts = {}) {
550
+ const iterations = opts.iterations ?? DEFAULT_ITERATIONS;
551
+ const warmup = opts.warmup ?? DEFAULT_WARMUP;
552
+ const hooksDir = opts.hooksDir ?? HOOKS_DIR;
553
+ const hookPath = path.join(hooksDir, name);
554
+ const variants = payloadVariantsForHook(name);
555
+ const sameAsMatch = variants.match === variants.no_match;
556
+
557
+ const matchMeas = measureVariant(hookPath, variants.match, iterations, warmup);
558
+ const noMatchMeas = sameAsMatch
559
+ ? null
560
+ : measureVariant(hookPath, variants.no_match, iterations, warmup);
561
+
562
+ return {
563
+ name,
564
+ // MATCH variant — the hot path. Top-level fields preserve
565
+ // backwards compatibility with the pre-0.46.0 record shape.
566
+ ...matchMeas,
567
+ // 0.46.0 — per-variant breakout. `no_match: null` means the shim
568
+ // has no shim_is_relevant pre-gate, so both variants would
569
+ // measure the same path.
570
+ same_as_match: sameAsMatch,
571
+ no_match: noMatchMeas,
572
+ };
573
+ }
574
+
575
+ function round(n) {
576
+ return Math.round(n * 100) / 100;
577
+ }
578
+
579
+ /**
580
+ * Run the full profile and return the report object.
581
+ */
582
+ export function runProfile(opts = {}) {
583
+ const hooksDir = opts.hooksDir ?? HOOKS_DIR;
584
+ const iterations = opts.iterations ?? DEFAULT_ITERATIONS;
585
+ const warmup = opts.warmup ?? DEFAULT_WARMUP;
586
+ const shims = (opts.shims ?? listShims(hooksDir)).filter((n) => {
587
+ // Skip non-file entries defensively.
588
+ try {
589
+ return statSync(path.join(hooksDir, n)).isFile();
590
+ } catch {
591
+ return false;
592
+ }
593
+ });
594
+
595
+ const records = [];
596
+ for (const name of shims) {
597
+ records.push(profileHook(name, { iterations, warmup, hooksDir }));
598
+ }
599
+
600
+ // Sort by p95 desc — slowest at the top makes the operator's eye
601
+ // land on the leaders immediately.
602
+ records.sort((a, b) => b.p95_ms - a.p95_ms);
603
+
604
+ // Decorate each record with the resolved ceiling so the baseline JSON
605
+ // documents the per-shim threshold inline (avoids drift between the
606
+ // doc and the regression test).
607
+ const decorated = records.map((r) => ({
608
+ ...r,
609
+ p95_ceiling_ms: ceilingForShim(r.name),
610
+ over_budget: r.p95_ms > ceilingForShim(r.name),
611
+ }));
612
+
613
+ return {
614
+ version: getPkgVersion(),
615
+ measured_at: new Date().toISOString(),
616
+ platform: process.platform,
617
+ node_version: process.version,
618
+ iterations,
619
+ warmup,
620
+ default_p95_ceiling_ms: DEFAULT_P95_CEILING_MS,
621
+ per_shim_p95_ceiling_ms: PER_SHIM_P95_CEILING_MS,
622
+ hooks: decorated,
623
+ };
624
+ }
625
+
626
+ function getPkgVersion() {
627
+ try {
628
+ const pkg = JSON.parse(readFileSync(path.join(REPO_ROOT, 'package.json'), 'utf8'));
629
+ return pkg.version ?? '0.0.0';
630
+ } catch {
631
+ return '0.0.0';
632
+ }
633
+ }
634
+
635
+ /**
636
+ * CLI entry. Writes the report to disk.
637
+ */
638
+ async function main() {
639
+ const args = process.argv.slice(2);
640
+ const dryRun = args.includes('--dry-run');
641
+ const iterArg = args.find((a) => a.startsWith('--iterations='));
642
+ const warmArg = args.find((a) => a.startsWith('--warmup='));
643
+ const iterations = iterArg ? parseInt(iterArg.split('=')[1], 10) : DEFAULT_ITERATIONS;
644
+ const warmup = warmArg ? parseInt(warmArg.split('=')[1], 10) : DEFAULT_WARMUP;
645
+
646
+ process.stderr.write(
647
+ `[profile-hooks] profiling ${listShims().length} shims ` +
648
+ `(${iterations} iterations + ${warmup} warmup each) — this takes ~30-60s\n`,
649
+ );
650
+
651
+ const report = runProfile({ iterations, warmup });
652
+
653
+ if (!existsSync(DOCS_DIR)) {
654
+ mkdirSync(DOCS_DIR, { recursive: true });
655
+ }
656
+
657
+ const json = JSON.stringify(report, null, 2) + '\n';
658
+
659
+ // Human-readable summary on stderr (top 5 by MATCH p95).
660
+ // 0.46.0 charter item 3: surface the relevance-MATCH p95 (hot path)
661
+ // alongside the no-match p95 (short-circuit) so the operator sees
662
+ // both at a glance. Shims without a relevance pre-gate render the
663
+ // no_match column as `—`.
664
+ process.stderr.write('\n[profile-hooks] p95 leaders (MATCH = hot path, NO_MATCH = short-circuit):\n');
665
+ for (const r of report.hooks.slice(0, 5)) {
666
+ const matchP95 = String(r.p95_ms).padStart(7);
667
+ const noMatchP95 = r.no_match !== null ? `${String(r.no_match.p95_ms).padStart(7)}ms` : ' —';
668
+ process.stderr.write(
669
+ ` ${r.name.padEnd(32)} ` +
670
+ `match.p95=${matchP95}ms ` +
671
+ `no_match.p95=${noMatchP95} ` +
672
+ `median=${String(r.median_ms).padStart(7)}ms ` +
673
+ `max=${String(r.max_ms).padStart(7)}ms\n`,
674
+ );
675
+ }
676
+
677
+ // 0.45.0 codex round-1 P2 #2: fail loudly if any shim ran a
678
+ // non-zero-exit error path — the latency number is meaningless in
679
+ // that case and the baseline would silently ship lies.
680
+ //
681
+ // 0.45.0 codex round-2 P2 #3: this AND the over-budget check below
682
+ // run BEFORE the baseline write — a failed measurement run must
683
+ // NOT clobber the checked-in last-known-good baseline. The dry-run
684
+ // branch still emits JSON for inspection regardless.
685
+ //
686
+ // 0.46.0 charter item 3: check BOTH match and no_match variants.
687
+ // Either error path means the synthetic payload is wrong.
688
+ const errored = report.hooks.filter(
689
+ (h) => h.error !== null || (h.no_match !== null && h.no_match.error !== null),
690
+ );
691
+ if (errored.length > 0) {
692
+ process.stderr.write(
693
+ `\n[profile-hooks] ${errored.length} shim(s) ran a non-zero error path:\n`,
694
+ );
695
+ for (const h of errored) {
696
+ if (h.error !== null) {
697
+ process.stderr.write(` ${h.name} [match]: ${h.error}\n`);
698
+ }
699
+ if (h.no_match !== null && h.no_match.error !== null) {
700
+ process.stderr.write(` ${h.name} [no_match]: ${h.no_match.error}\n`);
701
+ }
702
+ }
703
+ process.stderr.write(
704
+ `[profile-hooks] NOT writing ${BASELINE_PATH} — last-known-good baseline preserved.\n`,
705
+ );
706
+ if (dryRun) process.stdout.write(json);
707
+ process.exit(2);
708
+ }
709
+
710
+ // 0.46.0 charter item 3: enforce the ceiling on both variants. The
711
+ // no_match short-circuit should be much faster than the match hot
712
+ // path; if it exceeds the same ceiling that's a sign of regression
713
+ // in the pre-gate path itself (e.g. an inadvertent CLI spawn before
714
+ // shim_is_relevant fires).
715
+ const overBudget = report.hooks.filter(
716
+ (h) =>
717
+ h.p95_ms > ceilingForShim(h.name) ||
718
+ (h.no_match !== null && h.no_match.p95_ms > ceilingForShim(h.name)),
719
+ );
720
+ if (overBudget.length > 0) {
721
+ process.stderr.write(
722
+ `\n[profile-hooks] ${overBudget.length} shim(s) exceeded the p95 ceiling:\n`,
723
+ );
724
+ for (const h of overBudget) {
725
+ if (h.p95_ms > ceilingForShim(h.name)) {
726
+ process.stderr.write(
727
+ ` ${h.name} [match] p95=${h.p95_ms}ms (ceiling=${ceilingForShim(h.name)}ms)\n`,
728
+ );
729
+ }
730
+ if (h.no_match !== null && h.no_match.p95_ms > ceilingForShim(h.name)) {
731
+ process.stderr.write(
732
+ ` ${h.name} [no_match] p95=${h.no_match.p95_ms}ms (ceiling=${ceilingForShim(h.name)}ms)\n`,
733
+ );
734
+ }
735
+ }
736
+ process.stderr.write(
737
+ `[profile-hooks] NOT writing ${BASELINE_PATH} — last-known-good baseline preserved.\n`,
738
+ );
739
+ if (dryRun) process.stdout.write(json);
740
+ process.exit(1);
741
+ }
742
+
743
+ // All checks passed — safe to persist the baseline.
744
+ if (dryRun) {
745
+ process.stdout.write(json);
746
+ } else {
747
+ writeFileSync(BASELINE_PATH, json);
748
+ process.stderr.write(`[profile-hooks] wrote ${BASELINE_PATH}\n`);
749
+ }
750
+ }
751
+
752
+ // Run main only when invoked directly (not when imported by tests).
753
+ const invokedDirectly = process.argv[1] && path.resolve(process.argv[1]) === __filename;
754
+ if (invokedDirectly) {
755
+ main().catch((e) => {
756
+ process.stderr.write(`[profile-hooks] FAILED: ${e.message}\n`);
757
+ process.exit(1);
758
+ });
759
+ }
760
+
761
+ export {
762
+ BASELINE_PATH,
763
+ DEFAULT_P95_CEILING_MS,
764
+ PER_SHIM_P95_CEILING_MS,
765
+ DEFAULT_ITERATIONS,
766
+ DEFAULT_WARMUP,
767
+ };