@bookedsolid/rea 0.44.0 → 0.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/audit-by-tool.d.ts +173 -0
- package/dist/cli/audit-by-tool.js +373 -0
- package/dist/cli/audit-timeline.d.ts +160 -0
- package/dist/cli/audit-timeline.js +481 -0
- package/dist/cli/index.js +10 -0
- package/dist/cli/init.d.ts +109 -27
- package/dist/cli/init.js +191 -34
- package/package.json +3 -1
- package/scripts/profile-hooks.mjs +767 -0
|
@@ -0,0 +1,767 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// 0.45.0 charter item 1 — Hook hot-path profiling harness.
|
|
3
|
+
//
|
|
4
|
+
// # What this measures
|
|
5
|
+
//
|
|
6
|
+
// Every Bash / Edit / Write / MultiEdit / NotebookEdit tool call in
|
|
7
|
+
// Claude Code fires one or more `.claude/hooks/*.sh` shims. 14 shims
|
|
8
|
+
// are registered by default. Cumulative latency matters: 14 × 50ms is
|
|
9
|
+
// 700ms added to every tool call, which the operator FEELS. This
|
|
10
|
+
// harness measures per-shim wall-clock latency under a synthetic
|
|
11
|
+
// payload and writes a baseline so regressions are visible.
|
|
12
|
+
//
|
|
13
|
+
// # Methodology
|
|
14
|
+
//
|
|
15
|
+
// For each shim:
|
|
16
|
+
// 1. Build a representative stdin JSON payload (Claude Code shape)
|
|
17
|
+
// tuned to be "irrelevant" — i.e. the shim runs through its
|
|
18
|
+
// full HALT → stdin-capture → resolve → sandbox → policy
|
|
19
|
+
// short-circuit / version-probe path but does NOT trigger a
|
|
20
|
+
// block. This is the steady-state hot path.
|
|
21
|
+
// 2. Warm up: 2 invocations (discarded). The first invocation has
|
|
22
|
+
// cold filesystem caches + Node startup costs that don't
|
|
23
|
+
// reflect steady-state.
|
|
24
|
+
// 3. Measure: 10 invocations. Capture wall-clock + child cputime.
|
|
25
|
+
// 4. Compute median / p95 / max from the 10 samples.
|
|
26
|
+
//
|
|
27
|
+
// The shim is invoked via `bash <hook-path>` with stdin piped in, the
|
|
28
|
+
// same way Claude Code invokes them. Environment is preserved so the
|
|
29
|
+
// real-world resolution path runs (node_modules / dist / PATH).
|
|
30
|
+
//
|
|
31
|
+
// # Output
|
|
32
|
+
//
|
|
33
|
+
// Writes `docs/hook-perf-baseline.json` sorted by p95 descending.
|
|
34
|
+
// Shape:
|
|
35
|
+
//
|
|
36
|
+
// {
|
|
37
|
+
// "version": "0.45.0",
|
|
38
|
+
// "measured_at": "2026-05-17T...",
|
|
39
|
+
// "platform": "darwin",
|
|
40
|
+
// "node_version": "v22.x.x",
|
|
41
|
+
// "iterations": 10,
|
|
42
|
+
// "warmup": 2,
|
|
43
|
+
// "hooks": [
|
|
44
|
+
// {
|
|
45
|
+
// "name": "local-review-gate.sh",
|
|
46
|
+
// "median_ms": 123.4,
|
|
47
|
+
// "p95_ms": 145.6,
|
|
48
|
+
// "max_ms": 158.9,
|
|
49
|
+
// "samples_ms": [...],
|
|
50
|
+
// "exit_codes": [0,0,0,0,0,0,0,0,0,0]
|
|
51
|
+
// },
|
|
52
|
+
// ...
|
|
53
|
+
// ]
|
|
54
|
+
// }
|
|
55
|
+
//
|
|
56
|
+
// # Threshold
|
|
57
|
+
//
|
|
58
|
+
// The harness DOES NOT enforce thresholds itself — it's a measurement
|
|
59
|
+
// tool. The regression test at `__tests__/scripts/profile-hooks.test.ts`
|
|
60
|
+
// asserts a permissive ceiling so absolute regressions get caught.
|
|
61
|
+
// Tighten the ceiling over time as the baseline stabilizes.
|
|
62
|
+
//
|
|
63
|
+
// # Wiring
|
|
64
|
+
//
|
|
65
|
+
// `pnpm perf:hooks` runs this script. Not part of the default
|
|
66
|
+
// `pnpm test` chain — it's heavy (160+ subprocess spawns) and timing
|
|
67
|
+
// is sensitive to system load. CI calls it explicitly when the perf
|
|
68
|
+
// guard is active.
|
|
69
|
+
|
|
70
|
+
import { spawnSync } from 'node:child_process';
|
|
71
|
+
import {
|
|
72
|
+
readdirSync,
|
|
73
|
+
readFileSync,
|
|
74
|
+
writeFileSync,
|
|
75
|
+
statSync,
|
|
76
|
+
existsSync,
|
|
77
|
+
mkdirSync,
|
|
78
|
+
} from 'node:fs';
|
|
79
|
+
import path from 'node:path';
|
|
80
|
+
import { fileURLToPath } from 'node:url';
|
|
81
|
+
import { performance } from 'node:perf_hooks';
|
|
82
|
+
|
|
83
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
84
|
+
const __dirname = path.dirname(__filename);
|
|
85
|
+
const REPO_ROOT = path.resolve(__dirname, '..');
|
|
86
|
+
|
|
87
|
+
const HOOKS_DIR = path.join(REPO_ROOT, 'hooks');
|
|
88
|
+
const DOCS_DIR = path.join(REPO_ROOT, 'docs');
|
|
89
|
+
const BASELINE_PATH = path.join(DOCS_DIR, 'hook-perf-baseline.json');
|
|
90
|
+
|
|
91
|
+
// Permissive default per-shim p95 ceilings. The regression test in
|
|
92
|
+
// `__tests__/scripts/profile-hooks.test.ts` enforces these. Start
|
|
93
|
+
// loose to avoid CI flakes from cold caches / shared runners; tighten
|
|
94
|
+
// in future releases as the baseline stabilizes.
|
|
95
|
+
//
|
|
96
|
+
// `local-review-gate.sh` is a documented outlier — it does its own
|
|
97
|
+
// early sandbox check (round-5 P1) + subtree policy reads + a git
|
|
98
|
+
// stash-create on the forward path. ~1800ms is its current healthy
|
|
99
|
+
// p95 on the rea repo; the ceiling sits 2x above for CI headroom.
|
|
100
|
+
// See `docs/hook-perf-baseline.md` for the breakdown.
|
|
101
|
+
const DEFAULT_P95_CEILING_MS = 2000;
|
|
102
|
+
const PER_SHIM_P95_CEILING_MS = {
|
|
103
|
+
'local-review-gate.sh': 4500,
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Resolve the p95 ceiling for a given shim. Falls back to the default
|
|
108
|
+
* when no per-shim entry exists.
|
|
109
|
+
*/
|
|
110
|
+
export function ceilingForShim(name) {
|
|
111
|
+
return PER_SHIM_P95_CEILING_MS[name] ?? DEFAULT_P95_CEILING_MS;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const DEFAULT_ITERATIONS = 10;
|
|
115
|
+
const DEFAULT_WARMUP = 2;
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* 0.46.0 charter item 3 — per-hook payload variants.
|
|
119
|
+
*
|
|
120
|
+
* Pre-0.46.0 the harness used generic Bash/Write/Edit payloads for
|
|
121
|
+
* EVERY shim. That undercounted latency for several gates:
|
|
122
|
+
*
|
|
123
|
+
* - `attribution-advisory.sh`, `security-disclosure-gate.sh`,
|
|
124
|
+
* `env-file-protection.sh`, `dependency-audit-gate.sh`,
|
|
125
|
+
* `changeset-security-gate.sh`, `local-review-gate.sh` all have
|
|
126
|
+
* `shim_is_relevant` short-circuits that exit at the relevance
|
|
127
|
+
* pre-gate when the payload's substring marker isn't present.
|
|
128
|
+
* The generic `ls -la` Bash payload hit those short-circuits and
|
|
129
|
+
* the measured latency reflected the short-circuit path, not the
|
|
130
|
+
* real hot path the shim runs when a relevant command actually
|
|
131
|
+
* comes through.
|
|
132
|
+
* - `secret-scanner.sh` short-circuits on empty content; the generic
|
|
133
|
+
* write payload had content, so this one was already measuring
|
|
134
|
+
* the real path. Still — pinning a MATCH variant makes the
|
|
135
|
+
* contract explicit.
|
|
136
|
+
*
|
|
137
|
+
* The fix profiles every shim under TWO payloads:
|
|
138
|
+
*
|
|
139
|
+
* - `match` — crafted to PASS `shim_is_relevant` so the shim
|
|
140
|
+
* runs its full hot path (sandbox check + version
|
|
141
|
+
* probe + Node CLI forward + actual body work).
|
|
142
|
+
* This is the latency the operator pays when a
|
|
143
|
+
* relevant command lands.
|
|
144
|
+
* - `no_match` — crafted to FAIL `shim_is_relevant` so the shim
|
|
145
|
+
* short-circuits at the pre-gate. This is the
|
|
146
|
+
* latency the operator pays on EVERY irrelevant
|
|
147
|
+
* command — and since most commands are
|
|
148
|
+
* irrelevant to most shims, this is the dominant
|
|
149
|
+
* cumulative cost.
|
|
150
|
+
*
|
|
151
|
+
* Both are reported in the baseline. Shims without a relevance
|
|
152
|
+
* short-circuit (the always-on tier: dangerous-bash-interceptor,
|
|
153
|
+
* blocked-paths-*, settings-protection, delegation-capture,
|
|
154
|
+
* delegation-advisory, architecture-review-gate, pr-issue-link-gate)
|
|
155
|
+
* use the same payload for `match` and `no_match` — both variants
|
|
156
|
+
* exercise the same path. The `no_match` field stays so the JSON
|
|
157
|
+
* shape is uniform across shims, and the renderer flags
|
|
158
|
+
* `same_as_match: true` for those rows.
|
|
159
|
+
*
|
|
160
|
+
* MATCH payloads are crafted to be RELEVANT but NOT REFUSED — they
|
|
161
|
+
* pass the substring pre-gate but the full CLI body exits 0. The
|
|
162
|
+
* goal is to measure latency, not to exercise the refusal path. Two
|
|
163
|
+
* subtleties to keep in mind:
|
|
164
|
+
*
|
|
165
|
+
* - `attribution-advisory`: `git commit` is relevant; we use
|
|
166
|
+
* `git commit -m "feat: noop"` which carries no AI attribution
|
|
167
|
+
* markers (`Co-Authored-By:` with an AI name, "Generated with
|
|
168
|
+
* [Tool]" footers) so the CLI exits 0 after the body work.
|
|
169
|
+
* - `dangerous-bash-interceptor`: every match-payload candidate
|
|
170
|
+
* (`git status`, `npm ls`, etc) carries refusal risk via the
|
|
171
|
+
* overlap with the CLI's bypass-corpus. We use `git status` —
|
|
172
|
+
* a known-safe in-the-clear command that does not refuse — and
|
|
173
|
+
* accept that the shim has no `shim_is_relevant` gate anyway
|
|
174
|
+
* (CLI-missing path uses `shim_cli_missing_relevant` which is
|
|
175
|
+
* a DIFFERENT branch and only fires when dist/cli is missing).
|
|
176
|
+
* Under the normal CLI-reachable steady state, both `match` and
|
|
177
|
+
* `no_match` payloads exercise the same full-CLI path here.
|
|
178
|
+
*
|
|
179
|
+
* Returns a `{ match: string, no_match: string }` object — both
|
|
180
|
+
* fields are non-null JSON event strings.
|
|
181
|
+
*/
|
|
182
|
+
export function payloadVariantsForHook(name) {
|
|
183
|
+
// Reusable generic events.
|
|
184
|
+
const benignBashEvent = JSON.stringify({
|
|
185
|
+
tool_name: 'Bash',
|
|
186
|
+
tool_input: { command: 'ls -la', description: 'list current directory' },
|
|
187
|
+
hook_event_name: 'PreToolUse',
|
|
188
|
+
});
|
|
189
|
+
const benignWriteEvent = JSON.stringify({
|
|
190
|
+
tool_name: 'Write',
|
|
191
|
+
tool_input: { file_path: '/tmp/rea-profile-scratch.ts', content: 'export const x = 1;\n' },
|
|
192
|
+
hook_event_name: 'PreToolUse',
|
|
193
|
+
});
|
|
194
|
+
const benignPostEditEvent = JSON.stringify({
|
|
195
|
+
tool_name: 'Edit',
|
|
196
|
+
tool_input: { file_path: '/tmp/scratch.ts', old_string: 'a', new_string: 'b' },
|
|
197
|
+
tool_response: { success: true },
|
|
198
|
+
hook_event_name: 'PostToolUse',
|
|
199
|
+
});
|
|
200
|
+
const benignAgentEvent = JSON.stringify({
|
|
201
|
+
tool_name: 'Agent',
|
|
202
|
+
tool_input: { subagent_type: 'general-purpose', prompt: 'noop' },
|
|
203
|
+
hook_event_name: 'PreToolUse',
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
switch (name) {
|
|
207
|
+
case 'architecture-review-gate.sh':
|
|
208
|
+
// PostToolUse on every Edit — no relevance pre-gate at the shim
|
|
209
|
+
// tier; the CLI body decides. Both variants exercise the same
|
|
210
|
+
// path.
|
|
211
|
+
return { match: benignPostEditEvent, no_match: benignPostEditEvent };
|
|
212
|
+
|
|
213
|
+
case 'attribution-advisory.sh':
|
|
214
|
+
// Pre-gate: substring match for `git commit` OR `gh pr (create|edit)`.
|
|
215
|
+
// MATCH: `git commit -m "feat: noop"` (no AI attribution markers
|
|
216
|
+
// so the CLI body exits 0 after running its full check).
|
|
217
|
+
// NO_MATCH: `git status` (no commit/pr-create substring).
|
|
218
|
+
return {
|
|
219
|
+
match: JSON.stringify({
|
|
220
|
+
tool_name: 'Bash',
|
|
221
|
+
tool_input: {
|
|
222
|
+
command: 'git commit -m "feat: noop"',
|
|
223
|
+
description: 'noop commit',
|
|
224
|
+
},
|
|
225
|
+
hook_event_name: 'PreToolUse',
|
|
226
|
+
}),
|
|
227
|
+
no_match: JSON.stringify({
|
|
228
|
+
tool_name: 'Bash',
|
|
229
|
+
tool_input: { command: 'git status', description: 'check status' },
|
|
230
|
+
hook_event_name: 'PreToolUse',
|
|
231
|
+
}),
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
case 'blocked-paths-bash-gate.sh':
|
|
235
|
+
// Shim has only `shim_cli_missing_relevant` (CLI-missing only).
|
|
236
|
+
// Under normal CLI-reachable steady state, both variants run
|
|
237
|
+
// the full CLI body. Same payload for both.
|
|
238
|
+
return { match: benignBashEvent, no_match: benignBashEvent };
|
|
239
|
+
|
|
240
|
+
case 'blocked-paths-enforcer.sh':
|
|
241
|
+
// Same as above — CLI-missing-only relevance gate. Both variants
|
|
242
|
+
// hit the full CLI body when CLI is reachable.
|
|
243
|
+
return { match: benignWriteEvent, no_match: benignWriteEvent };
|
|
244
|
+
|
|
245
|
+
case 'changeset-security-gate.sh':
|
|
246
|
+
// Pre-gate: file_path / notebook_path contains `.changeset/`.
|
|
247
|
+
// MATCH: a benign changeset frontmatter (no GHSA reference so
|
|
248
|
+
// the CLI body's disclosure scan exits 0).
|
|
249
|
+
// NO_MATCH: a Write to /tmp/foo.ts (no `.changeset/` substring).
|
|
250
|
+
return {
|
|
251
|
+
match: JSON.stringify({
|
|
252
|
+
tool_name: 'Write',
|
|
253
|
+
tool_input: {
|
|
254
|
+
file_path: '/tmp/changeset-profile/.changeset/perf-noop.md',
|
|
255
|
+
content: '---\n"@scope/pkg": patch\n---\n\nperf noop\n',
|
|
256
|
+
},
|
|
257
|
+
hook_event_name: 'PreToolUse',
|
|
258
|
+
}),
|
|
259
|
+
no_match: benignWriteEvent,
|
|
260
|
+
};
|
|
261
|
+
|
|
262
|
+
case 'dangerous-bash-interceptor.sh':
|
|
263
|
+
// No `shim_is_relevant` — every Bash event goes through the
|
|
264
|
+
// full CLI body. `git status` is the safest candidate: no rule
|
|
265
|
+
// head H1-H17 + M1 fires on it. Both variants are the same.
|
|
266
|
+
return { match: benignBashEvent, no_match: benignBashEvent };
|
|
267
|
+
|
|
268
|
+
case 'delegation-advisory.sh': {
|
|
269
|
+
// PostToolUse on Bash|Edit|Write|MultiEdit|NotebookEdit. No
|
|
270
|
+
// relevance pre-gate; CLI body decides. Both same.
|
|
271
|
+
const delegationAdvisoryEvent = JSON.stringify({
|
|
272
|
+
tool_name: 'Write',
|
|
273
|
+
tool_input: { file_path: '/tmp/scratch.ts', content: 'x' },
|
|
274
|
+
tool_response: { success: true },
|
|
275
|
+
hook_event_name: 'PostToolUse',
|
|
276
|
+
});
|
|
277
|
+
return { match: delegationAdvisoryEvent, no_match: delegationAdvisoryEvent };
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
case 'delegation-capture.sh':
|
|
281
|
+
// PreToolUse on Agent|Skill matcher — every Agent/Skill event
|
|
282
|
+
// goes through the CLI body. Both variants are the same.
|
|
283
|
+
return { match: benignAgentEvent, no_match: benignAgentEvent };
|
|
284
|
+
|
|
285
|
+
case 'dependency-audit-gate.sh':
|
|
286
|
+
// Pre-gate: substring match for `(npm|pnpm|yarn) (install|i|add) `.
|
|
287
|
+
// MATCH: `pnpm add ./local-pkg` — passes the segment-anchored
|
|
288
|
+
// install matcher (full hot path through splitSegments + the
|
|
289
|
+
// env-prefix strip + the per-segment scan), but the
|
|
290
|
+
// package-name extractor in `src/hooks/dependency-audit-gate/
|
|
291
|
+
// index.ts` skips `./` / `/` / `../` tokens as path installs.
|
|
292
|
+
// After the scan, `packages.length === 0` → the hook returns
|
|
293
|
+
// exit 0 WITHOUT a `npm view` network call. Codex round-1 P2
|
|
294
|
+
// (0.46.0): the earlier `pnpm add lodash` payload triggered
|
|
295
|
+
// the real registry probe and `runProfile()` exited 2 on any
|
|
296
|
+
// offline / firewalled / npm-outage machine, making the harness
|
|
297
|
+
// unusable without external network access. The path-install
|
|
298
|
+
// variant keeps the hot path measured without the network
|
|
299
|
+
// dependency.
|
|
300
|
+
// NO_MATCH: `ls -la` (no install verb → segment matcher misses).
|
|
301
|
+
return {
|
|
302
|
+
match: JSON.stringify({
|
|
303
|
+
tool_name: 'Bash',
|
|
304
|
+
tool_input: {
|
|
305
|
+
command: 'pnpm add ./local-pkg',
|
|
306
|
+
description: 'install a local path package',
|
|
307
|
+
},
|
|
308
|
+
hook_event_name: 'PreToolUse',
|
|
309
|
+
}),
|
|
310
|
+
no_match: benignBashEvent,
|
|
311
|
+
};
|
|
312
|
+
|
|
313
|
+
case 'env-file-protection.sh':
|
|
314
|
+
// Pre-gate: `.env` substring in tool_input.command.
|
|
315
|
+
// MATCH: `cat .env.example` — relevant (`.env` substring) but
|
|
316
|
+
// benign (`.env.example` is excluded by the CLI body's
|
|
317
|
+
// co-occurrence + suffix logic).
|
|
318
|
+
// NO_MATCH: `ls -la` (no `.env`).
|
|
319
|
+
return {
|
|
320
|
+
match: JSON.stringify({
|
|
321
|
+
tool_name: 'Bash',
|
|
322
|
+
tool_input: {
|
|
323
|
+
command: 'cat .env.example',
|
|
324
|
+
description: 'check example env',
|
|
325
|
+
},
|
|
326
|
+
hook_event_name: 'PreToolUse',
|
|
327
|
+
}),
|
|
328
|
+
no_match: benignBashEvent,
|
|
329
|
+
};
|
|
330
|
+
|
|
331
|
+
case 'local-review-gate.sh':
|
|
332
|
+
// Pre-gate is policy-driven on `review.local_review.refuse_at`.
|
|
333
|
+
// Default `refuse_at: push` triggers on `git push`. But the
|
|
334
|
+
// body fails CLOSED when the policy is enforced — we'd refuse
|
|
335
|
+
// the synthetic payload and exit non-zero, which breaks the
|
|
336
|
+
// round-1 P2 #2 "every shim exits 0" contract.
|
|
337
|
+
//
|
|
338
|
+
// The safe match variant uses `REA_SKIP_LOCAL_REVIEW=1` env
|
|
339
|
+
// inheritance — but the harness explicitly sets env via
|
|
340
|
+
// `runOnce`, and we don't want to globally bypass the gate
|
|
341
|
+
// (that would invalidate the no-match variant too).
|
|
342
|
+
//
|
|
343
|
+
// Settled approach: NO_MATCH uses `git status` (no `git push`
|
|
344
|
+
// trigger → short-circuit at step 5 / 6). MATCH uses the
|
|
345
|
+
// explicit early-bypass envelope to drive the forward path
|
|
346
|
+
// without refusal — the shim's step 2b checks
|
|
347
|
+
// REA_SKIP_LOCAL_REVIEW from the environment, NOT from the
|
|
348
|
+
// payload, so we cannot drive it via JSON. Instead we use a
|
|
349
|
+
// `git status` payload for BOTH variants and document that
|
|
350
|
+
// local-review-gate is in the "no shim_is_relevant gate" tier:
|
|
351
|
+
// the policy-driven scan still fires, but a non-`git push`
|
|
352
|
+
// command exits before the heavy forward path. The body's
|
|
353
|
+
// genuine hot path under a `git push` is impossible to
|
|
354
|
+
// measure in a non-refusing way without ambient env bypass.
|
|
355
|
+
//
|
|
356
|
+
// Net: same payload for both variants. The baseline doc notes
|
|
357
|
+
// this limitation explicitly.
|
|
358
|
+
return { match: benignBashEvent, no_match: benignBashEvent };
|
|
359
|
+
|
|
360
|
+
case 'pr-issue-link-gate.sh':
|
|
361
|
+
// No `shim_is_relevant`. Advisory-tier; CLI body decides.
|
|
362
|
+
// Both variants are the same (`same_as_match: true` in the
|
|
363
|
+
// baseline) — the CLI body's `gh pr create` matcher fires only
|
|
364
|
+
// on that exact prefix, but the shim-tier latency is identical
|
|
365
|
+
// either way.
|
|
366
|
+
return { match: benignBashEvent, no_match: benignBashEvent };
|
|
367
|
+
|
|
368
|
+
case 'protected-paths-bash-gate.sh':
|
|
369
|
+
// CLI-missing-only relevance gate. Under normal CLI-reachable
|
|
370
|
+
// steady state both variants run the full CLI body.
|
|
371
|
+
return { match: benignBashEvent, no_match: benignBashEvent };
|
|
372
|
+
|
|
373
|
+
case 'secret-scanner.sh':
|
|
374
|
+
// Pre-gate short-circuits on empty content or `.env.example` /
|
|
375
|
+
// `.env.sample` suffix.
|
|
376
|
+
// MATCH: a benign `.ts` Write with non-credential content — the
|
|
377
|
+
// CLI body runs the full 17-pattern catalog and exits 0.
|
|
378
|
+
// NO_MATCH: a Write to `/tmp/foo.env.example` — pre-gate
|
|
379
|
+
// suffix short-circuit fires.
|
|
380
|
+
return {
|
|
381
|
+
match: benignWriteEvent,
|
|
382
|
+
no_match: JSON.stringify({
|
|
383
|
+
tool_name: 'Write',
|
|
384
|
+
tool_input: {
|
|
385
|
+
file_path: '/tmp/scratch.env.example',
|
|
386
|
+
content: 'EXAMPLE_VAR=changeme\n',
|
|
387
|
+
},
|
|
388
|
+
hook_event_name: 'PreToolUse',
|
|
389
|
+
}),
|
|
390
|
+
};
|
|
391
|
+
|
|
392
|
+
case 'security-disclosure-gate.sh':
|
|
393
|
+
// Pre-gate: substring match for `gh issue create`.
|
|
394
|
+
// MATCH: `gh issue create --title "feat: noop"` — relevant,
|
|
395
|
+
// but no security keywords so the CLI body exits 0.
|
|
396
|
+
// NO_MATCH: `gh issue list` (no `create`).
|
|
397
|
+
return {
|
|
398
|
+
match: JSON.stringify({
|
|
399
|
+
tool_name: 'Bash',
|
|
400
|
+
tool_input: {
|
|
401
|
+
command: 'gh issue create --title "docs: noop"',
|
|
402
|
+
description: 'create a docs issue',
|
|
403
|
+
},
|
|
404
|
+
hook_event_name: 'PreToolUse',
|
|
405
|
+
}),
|
|
406
|
+
no_match: JSON.stringify({
|
|
407
|
+
tool_name: 'Bash',
|
|
408
|
+
tool_input: { command: 'gh issue list', description: 'list issues' },
|
|
409
|
+
hook_event_name: 'PreToolUse',
|
|
410
|
+
}),
|
|
411
|
+
};
|
|
412
|
+
|
|
413
|
+
case 'settings-protection.sh':
|
|
414
|
+
// CLI-missing-only relevance gate. Under normal CLI-reachable
|
|
415
|
+
// steady state both variants run the full CLI body.
|
|
416
|
+
return { match: benignWriteEvent, no_match: benignWriteEvent };
|
|
417
|
+
|
|
418
|
+
default:
|
|
419
|
+
// Conservative fallback: a benign Bash payload for both.
|
|
420
|
+
return { match: benignBashEvent, no_match: benignBashEvent };
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
/**
|
|
425
|
+
* Per-hook stdin payload generator — BACKWARDS-COMPATIBLE wrapper.
|
|
426
|
+
* Pre-0.46.0 callers used `payloadForHook(name)`. The harness now
|
|
427
|
+
* profiles each shim under two variants (`match` + `no_match`); this
|
|
428
|
+
* wrapper returns the `match` variant for legacy callers (e.g. the
|
|
429
|
+
* existing regression test). Kept exported so external scripts / tests
|
|
430
|
+
* that imported `payloadForHook` continue to work without churn.
|
|
431
|
+
*
|
|
432
|
+
* New callers should use `payloadVariantsForHook(name)` directly.
|
|
433
|
+
*/
|
|
434
|
+
export function payloadForHook(name) {
|
|
435
|
+
return payloadVariantsForHook(name).match;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
/**
|
|
439
|
+
* List the shims to profile — every `.sh` directly under `hooks/`,
|
|
440
|
+
* excluding `_lib/`.
|
|
441
|
+
*/
|
|
442
|
+
export function listShims(hooksDir = HOOKS_DIR) {
|
|
443
|
+
return readdirSync(hooksDir)
|
|
444
|
+
.filter((f) => f.endsWith('.sh'))
|
|
445
|
+
.filter((f) => {
|
|
446
|
+
try {
|
|
447
|
+
return statSync(path.join(hooksDir, f)).isFile();
|
|
448
|
+
} catch {
|
|
449
|
+
return false;
|
|
450
|
+
}
|
|
451
|
+
})
|
|
452
|
+
.sort();
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
/**
|
|
456
|
+
* Run a single shim invocation and return wall-clock ms + exit code.
|
|
457
|
+
*/
|
|
458
|
+
function runOnce(hookPath, payload) {
|
|
459
|
+
const start = performance.now();
|
|
460
|
+
const res = spawnSync('bash', [hookPath], {
|
|
461
|
+
input: payload,
|
|
462
|
+
encoding: 'utf8',
|
|
463
|
+
timeout: 30000,
|
|
464
|
+
env: { ...process.env, CLAUDE_PROJECT_DIR: REPO_ROOT },
|
|
465
|
+
});
|
|
466
|
+
const elapsed = performance.now() - start;
|
|
467
|
+
// spawnSync returns res.status null on timeout/signal — surface
|
|
468
|
+
// that as -1 so the caller can flag it.
|
|
469
|
+
const status = res.status === null ? -1 : res.status;
|
|
470
|
+
return { ms: elapsed, status };
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
/**
|
|
474
|
+
* Compute percentile from a sorted ascending array of numbers.
|
|
475
|
+
*/
|
|
476
|
+
function percentile(sorted, p) {
|
|
477
|
+
if (sorted.length === 0) return 0;
|
|
478
|
+
const idx = Math.min(sorted.length - 1, Math.max(0, Math.ceil((p / 100) * sorted.length) - 1));
|
|
479
|
+
return sorted[idx];
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
/**
|
|
483
|
+
* Run a measurement sweep for a single payload variant and return
|
|
484
|
+
* the per-variant record. Helper for `profileHook` which runs both
|
|
485
|
+
* `match` and `no_match` variants per shim (0.46.0 charter item 3).
|
|
486
|
+
*/
|
|
487
|
+
function measureVariant(hookPath, payload, iterations, warmup) {
|
|
488
|
+
for (let i = 0; i < warmup; i += 1) {
|
|
489
|
+
runOnce(hookPath, payload);
|
|
490
|
+
}
|
|
491
|
+
const samples = [];
|
|
492
|
+
const exitCodes = [];
|
|
493
|
+
for (let i = 0; i < iterations; i += 1) {
|
|
494
|
+
const r = runOnce(hookPath, payload);
|
|
495
|
+
samples.push(r.ms);
|
|
496
|
+
exitCodes.push(r.status);
|
|
497
|
+
}
|
|
498
|
+
const sorted = [...samples].sort((a, b) => a - b);
|
|
499
|
+
const median = percentile(sorted, 50);
|
|
500
|
+
const p95 = percentile(sorted, 95);
|
|
501
|
+
const max = sorted[sorted.length - 1];
|
|
502
|
+
const nonZero = exitCodes.filter((c) => c !== 0);
|
|
503
|
+
const error =
|
|
504
|
+
nonZero.length > 0
|
|
505
|
+
? `${nonZero.length}/${exitCodes.length} samples exited non-zero ` +
|
|
506
|
+
`(codes: ${exitCodes.join(',')}). Synthetic payload likely hit an ` +
|
|
507
|
+
`error path; latency is NOT representative of the hot path. ` +
|
|
508
|
+
`Tune the payload in payloadVariantsForHook() so this shim exits 0.`
|
|
509
|
+
: null;
|
|
510
|
+
return {
|
|
511
|
+
median_ms: round(median),
|
|
512
|
+
p95_ms: round(p95),
|
|
513
|
+
max_ms: round(max),
|
|
514
|
+
samples_ms: samples.map(round),
|
|
515
|
+
exit_codes: exitCodes,
|
|
516
|
+
error,
|
|
517
|
+
};
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
/**
|
|
521
|
+
* Profile a single hook. Returns the measurement record.
|
|
522
|
+
*
|
|
523
|
+
* 0.45.0 codex round-1 P2 #2: every shim is expected to exit 0 under
|
|
524
|
+
* its synthetic non-blocking payload — that's the steady-state hot
|
|
525
|
+
* path we want to measure. A non-zero exit (refusal, malformed
|
|
526
|
+
* payload, timeout, CLI-missing) means the shim ran an ERROR path
|
|
527
|
+
* instead of the hot path, and the resulting latency number does NOT
|
|
528
|
+
* represent steady-state. The record carries an `error` field
|
|
529
|
+
* surfacing any non-zero exit, and `runProfile` propagates it to the
|
|
530
|
+
* report so callers can fail loudly rather than silently shipping a
|
|
531
|
+
* "healthy" baseline that timed nothing but error paths.
|
|
532
|
+
*
|
|
533
|
+
* 0.46.0 charter item 3: every shim is profiled TWICE — once with a
|
|
534
|
+
* `match` payload (passes the shim_is_relevant pre-gate, exercises the
|
|
535
|
+
* full hot path) and once with a `no_match` payload (fails the
|
|
536
|
+
* pre-gate, exercises the short-circuit). Shims without a relevance
|
|
537
|
+
* pre-gate run the same payload for both variants and `same_as_match`
|
|
538
|
+
* is set to `true` so the renderer can collapse the row.
|
|
539
|
+
*
|
|
540
|
+
* The top-level record fields (`median_ms`, `p95_ms`, `max_ms`,
|
|
541
|
+
* `samples_ms`, `exit_codes`, `error`) reflect the MATCH variant —
|
|
542
|
+
* that's the hot path the ceiling enforcement budgets, and keeping
|
|
543
|
+
* those fields at the top level preserves the pre-0.46.0 baseline
|
|
544
|
+
* JSON shape for any external consumer. The `no_match` variant lives
|
|
545
|
+
* under `no_match: { median_ms, p95_ms, max_ms, samples_ms,
|
|
546
|
+
* exit_codes, error }` (set to `null` when same_as_match is true,
|
|
547
|
+
* since the numbers would be redundant).
|
|
548
|
+
*/
|
|
549
|
+
export function profileHook(name, opts = {}) {
|
|
550
|
+
const iterations = opts.iterations ?? DEFAULT_ITERATIONS;
|
|
551
|
+
const warmup = opts.warmup ?? DEFAULT_WARMUP;
|
|
552
|
+
const hooksDir = opts.hooksDir ?? HOOKS_DIR;
|
|
553
|
+
const hookPath = path.join(hooksDir, name);
|
|
554
|
+
const variants = payloadVariantsForHook(name);
|
|
555
|
+
const sameAsMatch = variants.match === variants.no_match;
|
|
556
|
+
|
|
557
|
+
const matchMeas = measureVariant(hookPath, variants.match, iterations, warmup);
|
|
558
|
+
const noMatchMeas = sameAsMatch
|
|
559
|
+
? null
|
|
560
|
+
: measureVariant(hookPath, variants.no_match, iterations, warmup);
|
|
561
|
+
|
|
562
|
+
return {
|
|
563
|
+
name,
|
|
564
|
+
// MATCH variant — the hot path. Top-level fields preserve
|
|
565
|
+
// backwards compatibility with the pre-0.46.0 record shape.
|
|
566
|
+
...matchMeas,
|
|
567
|
+
// 0.46.0 — per-variant breakout. `no_match: null` means the shim
|
|
568
|
+
// has no shim_is_relevant pre-gate, so both variants would
|
|
569
|
+
// measure the same path.
|
|
570
|
+
same_as_match: sameAsMatch,
|
|
571
|
+
no_match: noMatchMeas,
|
|
572
|
+
};
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
function round(n) {
|
|
576
|
+
return Math.round(n * 100) / 100;
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
/**
|
|
580
|
+
* Run the full profile and return the report object.
|
|
581
|
+
*/
|
|
582
|
+
export function runProfile(opts = {}) {
|
|
583
|
+
const hooksDir = opts.hooksDir ?? HOOKS_DIR;
|
|
584
|
+
const iterations = opts.iterations ?? DEFAULT_ITERATIONS;
|
|
585
|
+
const warmup = opts.warmup ?? DEFAULT_WARMUP;
|
|
586
|
+
const shims = (opts.shims ?? listShims(hooksDir)).filter((n) => {
|
|
587
|
+
// Skip non-file entries defensively.
|
|
588
|
+
try {
|
|
589
|
+
return statSync(path.join(hooksDir, n)).isFile();
|
|
590
|
+
} catch {
|
|
591
|
+
return false;
|
|
592
|
+
}
|
|
593
|
+
});
|
|
594
|
+
|
|
595
|
+
const records = [];
|
|
596
|
+
for (const name of shims) {
|
|
597
|
+
records.push(profileHook(name, { iterations, warmup, hooksDir }));
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
// Sort by p95 desc — slowest at the top makes the operator's eye
|
|
601
|
+
// land on the leaders immediately.
|
|
602
|
+
records.sort((a, b) => b.p95_ms - a.p95_ms);
|
|
603
|
+
|
|
604
|
+
// Decorate each record with the resolved ceiling so the baseline JSON
|
|
605
|
+
// documents the per-shim threshold inline (avoids drift between the
|
|
606
|
+
// doc and the regression test).
|
|
607
|
+
const decorated = records.map((r) => ({
|
|
608
|
+
...r,
|
|
609
|
+
p95_ceiling_ms: ceilingForShim(r.name),
|
|
610
|
+
over_budget: r.p95_ms > ceilingForShim(r.name),
|
|
611
|
+
}));
|
|
612
|
+
|
|
613
|
+
return {
|
|
614
|
+
version: getPkgVersion(),
|
|
615
|
+
measured_at: new Date().toISOString(),
|
|
616
|
+
platform: process.platform,
|
|
617
|
+
node_version: process.version,
|
|
618
|
+
iterations,
|
|
619
|
+
warmup,
|
|
620
|
+
default_p95_ceiling_ms: DEFAULT_P95_CEILING_MS,
|
|
621
|
+
per_shim_p95_ceiling_ms: PER_SHIM_P95_CEILING_MS,
|
|
622
|
+
hooks: decorated,
|
|
623
|
+
};
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
function getPkgVersion() {
|
|
627
|
+
try {
|
|
628
|
+
const pkg = JSON.parse(readFileSync(path.join(REPO_ROOT, 'package.json'), 'utf8'));
|
|
629
|
+
return pkg.version ?? '0.0.0';
|
|
630
|
+
} catch {
|
|
631
|
+
return '0.0.0';
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
/**
|
|
636
|
+
* CLI entry. Writes the report to disk.
|
|
637
|
+
*/
|
|
638
|
+
async function main() {
|
|
639
|
+
const args = process.argv.slice(2);
|
|
640
|
+
const dryRun = args.includes('--dry-run');
|
|
641
|
+
const iterArg = args.find((a) => a.startsWith('--iterations='));
|
|
642
|
+
const warmArg = args.find((a) => a.startsWith('--warmup='));
|
|
643
|
+
const iterations = iterArg ? parseInt(iterArg.split('=')[1], 10) : DEFAULT_ITERATIONS;
|
|
644
|
+
const warmup = warmArg ? parseInt(warmArg.split('=')[1], 10) : DEFAULT_WARMUP;
|
|
645
|
+
|
|
646
|
+
process.stderr.write(
|
|
647
|
+
`[profile-hooks] profiling ${listShims().length} shims ` +
|
|
648
|
+
`(${iterations} iterations + ${warmup} warmup each) — this takes ~30-60s\n`,
|
|
649
|
+
);
|
|
650
|
+
|
|
651
|
+
const report = runProfile({ iterations, warmup });
|
|
652
|
+
|
|
653
|
+
if (!existsSync(DOCS_DIR)) {
|
|
654
|
+
mkdirSync(DOCS_DIR, { recursive: true });
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
const json = JSON.stringify(report, null, 2) + '\n';
|
|
658
|
+
|
|
659
|
+
// Human-readable summary on stderr (top 5 by MATCH p95).
|
|
660
|
+
// 0.46.0 charter item 3: surface the relevance-MATCH p95 (hot path)
|
|
661
|
+
// alongside the no-match p95 (short-circuit) so the operator sees
|
|
662
|
+
// both at a glance. Shims without a relevance pre-gate render the
|
|
663
|
+
// no_match column as `—`.
|
|
664
|
+
process.stderr.write('\n[profile-hooks] p95 leaders (MATCH = hot path, NO_MATCH = short-circuit):\n');
|
|
665
|
+
for (const r of report.hooks.slice(0, 5)) {
|
|
666
|
+
const matchP95 = String(r.p95_ms).padStart(7);
|
|
667
|
+
const noMatchP95 = r.no_match !== null ? `${String(r.no_match.p95_ms).padStart(7)}ms` : ' —';
|
|
668
|
+
process.stderr.write(
|
|
669
|
+
` ${r.name.padEnd(32)} ` +
|
|
670
|
+
`match.p95=${matchP95}ms ` +
|
|
671
|
+
`no_match.p95=${noMatchP95} ` +
|
|
672
|
+
`median=${String(r.median_ms).padStart(7)}ms ` +
|
|
673
|
+
`max=${String(r.max_ms).padStart(7)}ms\n`,
|
|
674
|
+
);
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
// 0.45.0 codex round-1 P2 #2: fail loudly if any shim ran a
|
|
678
|
+
// non-zero-exit error path — the latency number is meaningless in
|
|
679
|
+
// that case and the baseline would silently ship lies.
|
|
680
|
+
//
|
|
681
|
+
// 0.45.0 codex round-2 P2 #3: this AND the over-budget check below
|
|
682
|
+
// run BEFORE the baseline write — a failed measurement run must
|
|
683
|
+
// NOT clobber the checked-in last-known-good baseline. The dry-run
|
|
684
|
+
// branch still emits JSON for inspection regardless.
|
|
685
|
+
//
|
|
686
|
+
// 0.46.0 charter item 3: check BOTH match and no_match variants.
|
|
687
|
+
// Either error path means the synthetic payload is wrong.
|
|
688
|
+
const errored = report.hooks.filter(
|
|
689
|
+
(h) => h.error !== null || (h.no_match !== null && h.no_match.error !== null),
|
|
690
|
+
);
|
|
691
|
+
if (errored.length > 0) {
|
|
692
|
+
process.stderr.write(
|
|
693
|
+
`\n[profile-hooks] ${errored.length} shim(s) ran a non-zero error path:\n`,
|
|
694
|
+
);
|
|
695
|
+
for (const h of errored) {
|
|
696
|
+
if (h.error !== null) {
|
|
697
|
+
process.stderr.write(` ${h.name} [match]: ${h.error}\n`);
|
|
698
|
+
}
|
|
699
|
+
if (h.no_match !== null && h.no_match.error !== null) {
|
|
700
|
+
process.stderr.write(` ${h.name} [no_match]: ${h.no_match.error}\n`);
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
process.stderr.write(
|
|
704
|
+
`[profile-hooks] NOT writing ${BASELINE_PATH} — last-known-good baseline preserved.\n`,
|
|
705
|
+
);
|
|
706
|
+
if (dryRun) process.stdout.write(json);
|
|
707
|
+
process.exit(2);
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
// 0.46.0 charter item 3: enforce the ceiling on both variants. The
|
|
711
|
+
// no_match short-circuit should be much faster than the match hot
|
|
712
|
+
// path; if it exceeds the same ceiling that's a sign of regression
|
|
713
|
+
// in the pre-gate path itself (e.g. an inadvertent CLI spawn before
|
|
714
|
+
// shim_is_relevant fires).
|
|
715
|
+
const overBudget = report.hooks.filter(
|
|
716
|
+
(h) =>
|
|
717
|
+
h.p95_ms > ceilingForShim(h.name) ||
|
|
718
|
+
(h.no_match !== null && h.no_match.p95_ms > ceilingForShim(h.name)),
|
|
719
|
+
);
|
|
720
|
+
if (overBudget.length > 0) {
|
|
721
|
+
process.stderr.write(
|
|
722
|
+
`\n[profile-hooks] ${overBudget.length} shim(s) exceeded the p95 ceiling:\n`,
|
|
723
|
+
);
|
|
724
|
+
for (const h of overBudget) {
|
|
725
|
+
if (h.p95_ms > ceilingForShim(h.name)) {
|
|
726
|
+
process.stderr.write(
|
|
727
|
+
` ${h.name} [match] p95=${h.p95_ms}ms (ceiling=${ceilingForShim(h.name)}ms)\n`,
|
|
728
|
+
);
|
|
729
|
+
}
|
|
730
|
+
if (h.no_match !== null && h.no_match.p95_ms > ceilingForShim(h.name)) {
|
|
731
|
+
process.stderr.write(
|
|
732
|
+
` ${h.name} [no_match] p95=${h.no_match.p95_ms}ms (ceiling=${ceilingForShim(h.name)}ms)\n`,
|
|
733
|
+
);
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
process.stderr.write(
|
|
737
|
+
`[profile-hooks] NOT writing ${BASELINE_PATH} — last-known-good baseline preserved.\n`,
|
|
738
|
+
);
|
|
739
|
+
if (dryRun) process.stdout.write(json);
|
|
740
|
+
process.exit(1);
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
// All checks passed — safe to persist the baseline.
|
|
744
|
+
if (dryRun) {
|
|
745
|
+
process.stdout.write(json);
|
|
746
|
+
} else {
|
|
747
|
+
writeFileSync(BASELINE_PATH, json);
|
|
748
|
+
process.stderr.write(`[profile-hooks] wrote ${BASELINE_PATH}\n`);
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
// Run main only when invoked directly (not when imported by tests).
|
|
753
|
+
const invokedDirectly = process.argv[1] && path.resolve(process.argv[1]) === __filename;
|
|
754
|
+
if (invokedDirectly) {
|
|
755
|
+
main().catch((e) => {
|
|
756
|
+
process.stderr.write(`[profile-hooks] FAILED: ${e.message}\n`);
|
|
757
|
+
process.exit(1);
|
|
758
|
+
});
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
export {
|
|
762
|
+
BASELINE_PATH,
|
|
763
|
+
DEFAULT_P95_CEILING_MS,
|
|
764
|
+
PER_SHIM_P95_CEILING_MS,
|
|
765
|
+
DEFAULT_ITERATIONS,
|
|
766
|
+
DEFAULT_WARMUP,
|
|
767
|
+
};
|