@bookedsolid/rea 0.44.0 → 0.45.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -60,28 +60,65 @@ export interface ResolvedConfig {
60
60
  reagentNotices: string[];
61
61
  }
62
62
  /**
63
- * 0.44.0 charter item 1: derive the canonical hook filename set the
64
- * installer will lay down. Union of:
65
- *
66
- * - `EXPECTED_HOOKS` (the doctor's required-on-disk list — source of
67
- * truth for "what `.claude/hooks/` must contain after install").
68
- * - The `command` paths of every entry in `defaultDesiredHooks()`
69
- * (the source of truth for "what `.claude/settings.json` registers
70
- * with Claude Code"). Each command path ends in
71
- * `.claude/hooks/<name>.sh`; we extract `<name>.sh` so the result
72
- * joins cleanly with `EXPECTED_HOOKS`.
73
- *
74
- * Pre-0.44.0 `buildInstallSummary` hard-coded a hook count / list. If
75
- * a new hook was added to `EXPECTED_HOOKS` (e.g. `delegation-advisory`
76
- * was promoted in 0.36.0) or registered in `defaultDesiredHooks()`
77
- * without anyone touching the summary, the operator's confirm screen
78
- * silently lied about what was about to be installed. This helper
79
- * means the summary now tracks the real installer surface — adding
80
- * a hook to either canonical source automatically updates the screen.
63
+ * 0.45.0 charter item 2 derive the canonical hook filename set
64
+ * PRIMARILY from the packaged `hooks/` filesystem tree (the literal
65
+ * shipped artifact), with the two source-code registries
66
+ * (`EXPECTED_HOOKS` and `defaultDesiredHooks()`) layered on top as
67
+ * defensive fallbacks.
68
+ *
69
+ * # Why filesystem-first
70
+ *
71
+ * 0.44.0 introduced this helper as the UNION of two source-code
72
+ * lists. Round-2 noticed a drift hazard: if either source-code list
73
+ * gets out of sync with the actual `hooks/` filesystem reality
74
+ * (e.g. a hook is added to `hooks/` but not to `EXPECTED_HOOKS`),
75
+ * the install-summary lies about what's about to land on disk.
76
+ * The filesystem is the source of truth — what the installer
77
+ * actually copies into `.claude/hooks/` is the contents of
78
+ * `hooks/`. Pinning the canonical set to the FS catches drift at
79
+ * runtime; the cross-check test in `init.test.ts` catches it at
80
+ * build time.
81
+ *
82
+ * # Strategy
83
+ *
84
+ * 1. Try to read `PKG_ROOT/hooks/*.sh` (filtered to exclude `_lib/`).
85
+ * This is the authoritative list — it's literally what the
86
+ * installer will copy into `.claude/hooks/`.
87
+ * 2. Union with `EXPECTED_HOOKS` (doctor's required list) — covers
88
+ * the future case where the FS read fails (e.g. an unusual
89
+ * install layout) but the source-code registry is intact.
90
+ * 3. Union with `defaultDesiredHooks()` basenames — covers the
91
+ * symmetric case where a hook is registered in settings.json
92
+ * but somehow absent from `EXPECTED_HOOKS`.
93
+ *
94
+ * Steps 2 and 3 are belt-and-suspenders. The cross-check test
95
+ * asserts all three sources agree; a drift between the FS and either
96
+ * source-code list fails the test loudly. In production the FS read
97
+ * (step 1) is the only one that contributes anything that wouldn't
98
+ * already be covered by steps 2+3 IF the test stays green.
81
99
  *
82
100
  * Sorted + deduped so the screen is stable across orderings.
101
+ *
102
+ * Exported for testability — the cross-check test imports it
103
+ * directly to compare against `canonicalHooksFromFilesystem()` and
104
+ * the two source-code registries.
83
105
  */
84
106
  export declare function canonicalInstalledHooks(): string[];
107
+ /**
108
+ * 0.45.0 charter item 2 — read the canonical hook filename set
109
+ * directly from the packaged `hooks/` filesystem tree. Returns
110
+ * basenames (e.g. `dangerous-bash-interceptor.sh`) sorted ascending.
111
+ * Excludes anything under `_lib/` (shared helpers, not installed
112
+ * shims).
113
+ *
114
+ * Returns `[]` if the directory can't be read — caller is expected
115
+ * to union with `EXPECTED_HOOKS` / `defaultDesiredHooks()` so a
116
+ * missing FS doesn't produce a zero-length canonical list.
117
+ *
118
+ * Exported so the cross-check test can compare it against the two
119
+ * source-code registries and fail loudly on drift.
120
+ */
121
+ export declare function canonicalHooksFromFilesystem(): string[];
85
122
  /**
86
123
  * 0.43.0 UX polish: build the human-readable install summary shown
87
124
  * BEFORE any files are written. Lists, in order: the policy file
@@ -140,26 +177,71 @@ export declare function detectTargetState(targetDir: string): TargetState;
140
177
  * filesystems and still verify the more meaningful invariant: the
141
178
  * files exist and have non-empty bytes.
142
179
  *
143
- * Detection strategy — two layers, either sufficient:
180
+ * Detection strategy — three layers, ordered cheapest-first.
144
181
  *
145
182
  * 1. Platform — `process.platform === 'win32'` always skips the
146
183
  * exec-bit check (native Windows has no POSIX mode bit; node's
147
184
  * `stat.mode` is a translation that may or may not preserve the
148
185
  * 0o111 bit depending on the source).
149
- * 2. Sample even on Linux/macOS, when crossing into a Windows-
150
- * backed filesystem (WSL bind-mount onto `/mnt/c/`, an SMB
151
- * share, etc.), `stat.mode` returns a value whose `0o777`
152
- * portion is zero. We detect this by sampling the FIRST `.sh`
153
- * file in the hooks directory and checking whether ANY of the
154
- * `0o777` bits are set; if none are, treat as mode-less.
186
+ * 2. Unambiguous shapes via sample sample the FIRST `.sh` file:
187
+ *
188
+ * - All 0o777 bits clear (`0o000`) historical mode-less shape.
189
+ * On a genuine Unix install no shipped hook is ever 0o000,
190
+ * and a chmod-stripped install (the only innocuous source of
191
+ * 0o000) would already be unusable so a false skip there is
192
+ * harmless (the substitute presence + non-empty check still
193
+ * fires).
194
+ * - All 0o777 bits set (`0o777`) — "no info, everything exec";
195
+ * some SMB / NTFS-via-FUSE mounts surface this so file IO
196
+ * works regardless of source mode.
197
+ *
198
+ * 3. Active mode-bit probe (0.45.0 codex round-1 P1 fix) — for
199
+ * ambiguous shapes like `0o644` / `0o666` where the sample
200
+ * COULD be "mode-less mount surfacing as 0o644" OR "chmod-
201
+ * stripped genuine Unix install", do an active probe:
202
+ *
203
+ * a. Write a temporary file with mode `0o755`.
204
+ * b. Stat it back; if the kernel returned a value missing
205
+ * the exec bits we just set, the FS truly ignores mode
206
+ * bits — mode-less.
207
+ * c. If the kernel returned `0o755` (preserved the mode),
208
+ * the FS DOES respect mode bits — the sampled hook's
209
+ * lack of exec bits is a real install failure, NOT a
210
+ * mode-less mount. Return false so the caller emits the
211
+ * genuine "zero executable .sh files" error.
212
+ * d. If the probe itself fails (EROFS, EPERM, ENOSPC,
213
+ * anything), fall through to false — let the caller
214
+ * surface the real installation failure rather than
215
+ * hide it behind an advisory.
216
+ *
217
+ * Pre-fix the `0o644` branch suppressed the exec-bit check
218
+ * unconditionally, masking genuinely broken Unix installs.
155
219
  *
156
220
  * Returns true when the exec-bit check should be SKIPPED.
157
221
  *
158
222
  * Exported for testability — callers can stub the filesystem and
159
- * exercise both shapes (mode-aware vs mode-less) without spinning
160
- * up an actual Windows VM.
223
+ * exercise all three shapes without spinning up an actual Windows VM.
161
224
  */
162
225
  export declare function isModeLessFilesystem(hooksDir: string): boolean;
226
+ /**
227
+ * 0.45.0 codex round-1 P1 fix: active probe to disambiguate a
228
+ * mode-less filesystem from a chmod-stripped genuine Unix install.
229
+ *
230
+ * Writes a temporary file with mode `0o755` and stats it back. If
231
+ * the kernel returns a value that LACKS the exec bits we just set,
232
+ * the filesystem is ignoring mode bits — it's truly mode-less.
233
+ * Otherwise (kernel preserves the mode, OR the probe fails for any
234
+ * reason), return false so the caller surfaces the real install
235
+ * failure instead of hiding it behind an advisory.
236
+ *
237
+ * Probe file is written into `hooksDir` to match the exact mount
238
+ * the caller is checking — sampling a different directory could
239
+ * cross a mount boundary and lie about the target FS. The file is
240
+ * always unlinked, even on probe failure.
241
+ *
242
+ * Exported for testability.
243
+ */
244
+ export declare function filesystemIgnoresModeBits(hooksDir: string): boolean;
163
245
  /**
164
246
  * 0.43.0 UX polish: post-install sanity check. Runs synchronously
165
247
  * after the file-write phase to catch installs that completed
package/dist/cli/init.js CHANGED
@@ -18,7 +18,7 @@ import { CLAUDE_MD_MANIFEST_PATH, SETTINGS_MANIFEST_PATH, enumerateCanonicalFile
18
18
  import { writeManifestAtomic } from './install/manifest-io.js';
19
19
  import { sha256OfBuffer, sha256OfFile } from './install/sha.js';
20
20
  import { defaultReagentPath, ReagentDroppedFieldsError, translateReagentPolicy, } from './install/reagent.js';
21
- import { POLICY_FILE, REA_DIR, REGISTRY_FILE, err, getPkgVersion, log, warn } from './utils.js';
21
+ import { PKG_ROOT, POLICY_FILE, REA_DIR, REGISTRY_FILE, err, getPkgVersion, log, warn, } from './utils.js';
22
22
  const PROFILE_NAMES = [
23
23
  'minimal',
24
24
  'client-engagement',
@@ -815,29 +815,53 @@ function readExistingManifestInstalledAt(manifestPath) {
815
815
  return undefined;
816
816
  }
817
817
  /**
818
- * 0.44.0 charter item 1: derive the canonical hook filename set the
819
- * installer will lay down. Union of:
818
+ * 0.45.0 charter item 2 derive the canonical hook filename set
819
+ * PRIMARILY from the packaged `hooks/` filesystem tree (the literal
820
+ * shipped artifact), with the two source-code registries
821
+ * (`EXPECTED_HOOKS` and `defaultDesiredHooks()`) layered on top as
822
+ * defensive fallbacks.
820
823
  *
821
- * - `EXPECTED_HOOKS` (the doctor's required-on-disk list — source of
822
- * truth for "what `.claude/hooks/` must contain after install").
823
- * - The `command` paths of every entry in `defaultDesiredHooks()`
824
- * (the source of truth for "what `.claude/settings.json` registers
825
- * with Claude Code"). Each command path ends in
826
- * `.claude/hooks/<name>.sh`; we extract `<name>.sh` so the result
827
- * joins cleanly with `EXPECTED_HOOKS`.
824
+ * # Why filesystem-first
828
825
  *
829
- * Pre-0.44.0 `buildInstallSummary` hard-coded a hook count / list. If
830
- * a new hook was added to `EXPECTED_HOOKS` (e.g. `delegation-advisory`
831
- * was promoted in 0.36.0) or registered in `defaultDesiredHooks()`
832
- * without anyone touching the summary, the operator's confirm screen
833
- * silently lied about what was about to be installed. This helper
834
- * means the summary now tracks the real installer surfaceadding
835
- * a hook to either canonical source automatically updates the screen.
826
+ * 0.44.0 introduced this helper as the UNION of two source-code
827
+ * lists. Round-2 noticed a drift hazard: if either source-code list
828
+ * gets out of sync with the actual `hooks/` filesystem reality
829
+ * (e.g. a hook is added to `hooks/` but not to `EXPECTED_HOOKS`),
830
+ * the install-summary lies about what's about to land on disk.
831
+ * The filesystem is the source of truthwhat the installer
832
+ * actually copies into `.claude/hooks/` is the contents of
833
+ * `hooks/`. Pinning the canonical set to the FS catches drift at
834
+ * runtime; the cross-check test in `init.test.ts` catches it at
835
+ * build time.
836
+ *
837
+ * # Strategy
838
+ *
839
+ * 1. Try to read `PKG_ROOT/hooks/*.sh` (filtered to exclude `_lib/`).
840
+ * This is the authoritative list — it's literally what the
841
+ * installer will copy into `.claude/hooks/`.
842
+ * 2. Union with `EXPECTED_HOOKS` (doctor's required list) — covers
843
+ * the future case where the FS read fails (e.g. an unusual
844
+ * install layout) but the source-code registry is intact.
845
+ * 3. Union with `defaultDesiredHooks()` basenames — covers the
846
+ * symmetric case where a hook is registered in settings.json
847
+ * but somehow absent from `EXPECTED_HOOKS`.
848
+ *
849
+ * Steps 2 and 3 are belt-and-suspenders. The cross-check test
850
+ * asserts all three sources agree; a drift between the FS and either
851
+ * source-code list fails the test loudly. In production the FS read
852
+ * (step 1) is the only one that contributes anything that wouldn't
853
+ * already be covered by steps 2+3 IF the test stays green.
836
854
  *
837
855
  * Sorted + deduped so the screen is stable across orderings.
856
+ *
857
+ * Exported for testability — the cross-check test imports it
858
+ * directly to compare against `canonicalHooksFromFilesystem()` and
859
+ * the two source-code registries.
838
860
  */
839
861
  export function canonicalInstalledHooks() {
840
- const fromExpected = new Set(EXPECTED_HOOKS);
862
+ const merged = new Set(canonicalHooksFromFilesystem());
863
+ for (const name of EXPECTED_HOOKS)
864
+ merged.add(name);
841
865
  for (const group of defaultDesiredHooks()) {
842
866
  for (const h of group.hooks) {
843
867
  const cmd = h.command;
@@ -847,10 +871,53 @@ export function canonicalInstalledHooks() {
847
871
  const slashIdx = cmd.lastIndexOf('/');
848
872
  const basename = slashIdx >= 0 ? cmd.slice(slashIdx + 1) : cmd;
849
873
  if (basename.endsWith('.sh'))
850
- fromExpected.add(basename);
874
+ merged.add(basename);
851
875
  }
852
876
  }
853
- return Array.from(fromExpected).sort();
877
+ return Array.from(merged).sort();
878
+ }
879
+ /**
880
+ * 0.45.0 charter item 2 — read the canonical hook filename set
881
+ * directly from the packaged `hooks/` filesystem tree. Returns
882
+ * basenames (e.g. `dangerous-bash-interceptor.sh`) sorted ascending.
883
+ * Excludes anything under `_lib/` (shared helpers, not installed
884
+ * shims).
885
+ *
886
+ * Returns `[]` if the directory can't be read — caller is expected
887
+ * to union with `EXPECTED_HOOKS` / `defaultDesiredHooks()` so a
888
+ * missing FS doesn't produce a zero-length canonical list.
889
+ *
890
+ * Exported so the cross-check test can compare it against the two
891
+ * source-code registries and fail loudly on drift.
892
+ */
893
+ export function canonicalHooksFromFilesystem() {
894
+ const dir = path.join(PKG_ROOT, 'hooks');
895
+ try {
896
+ return fs
897
+ .readdirSync(dir)
898
+ .filter((name) => name.endsWith('.sh'))
899
+ .filter((name) => {
900
+ try {
901
+ // Exclude subdirectories like `_lib/`; only top-level `.sh`
902
+ // files are shipped shims. `readdirSync` returns names from
903
+ // the directory itself, but a future `_lib/foo.sh` reachable
904
+ // via the root listing should still be excluded — hence the
905
+ // explicit isFile() check.
906
+ return fs.statSync(path.join(dir, name)).isFile();
907
+ }
908
+ catch {
909
+ return false;
910
+ }
911
+ })
912
+ .sort();
913
+ }
914
+ catch {
915
+ // PKG_ROOT/hooks/ unreadable — fall through to the caller's
916
+ // source-code union. This is a defensive branch; in practice the
917
+ // packaged tarball always ships hooks/, and source builds always
918
+ // have a hooks/ checked into the repo.
919
+ return [];
920
+ }
854
921
  }
855
922
  /**
856
923
  * 0.43.0 UX polish: build the human-readable install summary shown
@@ -974,24 +1041,50 @@ export function detectTargetState(targetDir) {
974
1041
  * filesystems and still verify the more meaningful invariant: the
975
1042
  * files exist and have non-empty bytes.
976
1043
  *
977
- * Detection strategy — two layers, either sufficient:
1044
+ * Detection strategy — three layers, ordered cheapest-first.
978
1045
  *
979
1046
  * 1. Platform — `process.platform === 'win32'` always skips the
980
1047
  * exec-bit check (native Windows has no POSIX mode bit; node's
981
1048
  * `stat.mode` is a translation that may or may not preserve the
982
1049
  * 0o111 bit depending on the source).
983
- * 2. Sample even on Linux/macOS, when crossing into a Windows-
984
- * backed filesystem (WSL bind-mount onto `/mnt/c/`, an SMB
985
- * share, etc.), `stat.mode` returns a value whose `0o777`
986
- * portion is zero. We detect this by sampling the FIRST `.sh`
987
- * file in the hooks directory and checking whether ANY of the
988
- * `0o777` bits are set; if none are, treat as mode-less.
1050
+ * 2. Unambiguous shapes via sample sample the FIRST `.sh` file:
1051
+ *
1052
+ * - All 0o777 bits clear (`0o000`) historical mode-less shape.
1053
+ * On a genuine Unix install no shipped hook is ever 0o000,
1054
+ * and a chmod-stripped install (the only innocuous source of
1055
+ * 0o000) would already be unusable so a false skip there is
1056
+ * harmless (the substitute presence + non-empty check still
1057
+ * fires).
1058
+ * - All 0o777 bits set (`0o777`) — "no info, everything exec";
1059
+ * some SMB / NTFS-via-FUSE mounts surface this so file IO
1060
+ * works regardless of source mode.
1061
+ *
1062
+ * 3. Active mode-bit probe (0.45.0 codex round-1 P1 fix) — for
1063
+ * ambiguous shapes like `0o644` / `0o666` where the sample
1064
+ * COULD be "mode-less mount surfacing as 0o644" OR "chmod-
1065
+ * stripped genuine Unix install", do an active probe:
1066
+ *
1067
+ * a. Write a temporary file with mode `0o755`.
1068
+ * b. Stat it back; if the kernel returned a value missing
1069
+ * the exec bits we just set, the FS truly ignores mode
1070
+ * bits — mode-less.
1071
+ * c. If the kernel returned `0o755` (preserved the mode),
1072
+ * the FS DOES respect mode bits — the sampled hook's
1073
+ * lack of exec bits is a real install failure, NOT a
1074
+ * mode-less mount. Return false so the caller emits the
1075
+ * genuine "zero executable .sh files" error.
1076
+ * d. If the probe itself fails (EROFS, EPERM, ENOSPC,
1077
+ * anything), fall through to false — let the caller
1078
+ * surface the real installation failure rather than
1079
+ * hide it behind an advisory.
1080
+ *
1081
+ * Pre-fix the `0o644` branch suppressed the exec-bit check
1082
+ * unconditionally, masking genuinely broken Unix installs.
989
1083
  *
990
1084
  * Returns true when the exec-bit check should be SKIPPED.
991
1085
  *
992
1086
  * Exported for testability — callers can stub the filesystem and
993
- * exercise both shapes (mode-aware vs mode-less) without spinning
994
- * up an actual Windows VM.
1087
+ * exercise all three shapes without spinning up an actual Windows VM.
995
1088
  */
996
1089
  export function isModeLessFilesystem(hooksDir) {
997
1090
  if (process.platform === 'win32')
@@ -1008,12 +1101,21 @@ export function isModeLessFilesystem(hooksDir) {
1008
1101
  return false;
1009
1102
  }
1010
1103
  const stat = fs.statSync(path.join(hooksDir, firstSh));
1011
- // If ALL 0o777 bits are clear, the FS is not preserving Unix
1012
- // mode bits. Genuine Unix installs always have at least the
1013
- // owner-read bit (0o400) set, so an entirely-zero perms triple
1014
- // means we're on a mode-less mount.
1015
- if ((stat.mode & 0o777) === 0)
1104
+ const perm = stat.mode & 0o777;
1105
+ // (a) All 0o777 bits clear historical mode-less detection.
1106
+ if (perm === 0)
1107
+ return true;
1108
+ // (b) All 0o777 bits set — some SMB / FUSE mounts surface this.
1109
+ if (perm === 0o777)
1016
1110
  return true;
1111
+ // (c) 0.45.0 codex round-1 P1 fix: when 0o111 bits are clear
1112
+ // (e.g. 0o644 / 0o666), we MUST disambiguate "mode-less
1113
+ // mount that surfaces as 0o644" from "chmod-stripped Unix
1114
+ // install" via an active write-then-stat probe. The pre-fix
1115
+ // unconditional skip masked genuinely-broken Unix installs.
1116
+ if ((perm & 0o111) === 0) {
1117
+ return filesystemIgnoresModeBits(hooksDir);
1118
+ }
1017
1119
  return false;
1018
1120
  }
1019
1121
  catch {
@@ -1023,6 +1125,61 @@ export function isModeLessFilesystem(hooksDir) {
1023
1125
  return false;
1024
1126
  }
1025
1127
  }
1128
+ /**
1129
+ * 0.45.0 codex round-1 P1 fix: active probe to disambiguate a
1130
+ * mode-less filesystem from a chmod-stripped genuine Unix install.
1131
+ *
1132
+ * Writes a temporary file with mode `0o755` and stats it back. If
1133
+ * the kernel returns a value that LACKS the exec bits we just set,
1134
+ * the filesystem is ignoring mode bits — it's truly mode-less.
1135
+ * Otherwise (kernel preserves the mode, OR the probe fails for any
1136
+ * reason), return false so the caller surfaces the real install
1137
+ * failure instead of hiding it behind an advisory.
1138
+ *
1139
+ * Probe file is written into `hooksDir` to match the exact mount
1140
+ * the caller is checking — sampling a different directory could
1141
+ * cross a mount boundary and lie about the target FS. The file is
1142
+ * always unlinked, even on probe failure.
1143
+ *
1144
+ * Exported for testability.
1145
+ */
1146
+ export function filesystemIgnoresModeBits(hooksDir) {
1147
+ const probePath = path.join(hooksDir, `.rea-modeless-probe-${process.pid}-${Date.now()}`);
1148
+ try {
1149
+ // 0.45.0 codex round-2 P2: write WITHOUT the mode option, then
1150
+ // explicitly chmod to 0o755. `writeFileSync({ mode })` is filtered
1151
+ // through the process umask, so a caller running under e.g.
1152
+ // `umask 0111` would have their probe land as 0o644 even on a
1153
+ // real Unix FS — falsely flagging mode-less and re-introducing
1154
+ // the bug the round-1 fix was trying to close. Explicit chmod
1155
+ // bypasses umask and always lands exactly the bits we asked for
1156
+ // (when the FS honors them, which is the property we're probing).
1157
+ fs.writeFileSync(probePath, '');
1158
+ fs.chmodSync(probePath, 0o755);
1159
+ const stat = fs.statSync(probePath);
1160
+ const perm = stat.mode & 0o777;
1161
+ // If the kernel preserved any of our exec bits, the FS honors
1162
+ // mode bits — NOT mode-less.
1163
+ if ((perm & 0o111) !== 0)
1164
+ return false;
1165
+ // Kernel stripped every exec bit we wrote — mode-less.
1166
+ return true;
1167
+ }
1168
+ catch {
1169
+ // Probe write/stat failed (read-only mount, EPERM, ENOSPC).
1170
+ // Conservative: return false so the caller emits the real error
1171
+ // rather than swallow it behind an advisory.
1172
+ return false;
1173
+ }
1174
+ finally {
1175
+ try {
1176
+ fs.unlinkSync(probePath);
1177
+ }
1178
+ catch {
1179
+ // best-effort cleanup
1180
+ }
1181
+ }
1182
+ }
1026
1183
  /**
1027
1184
  * 0.43.0 UX polish: post-install sanity check. Runs synchronously
1028
1185
  * after the file-write phase to catch installs that completed
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bookedsolid/rea",
3
- "version": "0.44.0",
3
+ "version": "0.45.0",
4
4
  "description": "Agentic governance layer for Claude Code — policy enforcement, hook-based safety gates, audit logging, and Codex-integrated adversarial review for AI-assisted projects",
5
5
  "license": "MIT",
6
6
  "author": "Booked Solid Technology <oss@bookedsolid.tech> (https://bookedsolid.tech)",
@@ -98,6 +98,7 @@
98
98
  "lint": "pnpm run lint:regex && pnpm run lint:awk-quotes && eslint .",
99
99
  "lint:regex": "node scripts/lint-safe-regex.mjs",
100
100
  "lint:awk-quotes": "node scripts/lint-awk-shim-quotes.mjs",
101
+ "perf:hooks": "pnpm run build && node scripts/profile-hooks.mjs",
101
102
  "format": "prettier --write .",
102
103
  "format:check": "prettier --check .",
103
104
  "test": "pnpm run build && pnpm run test:dogfood && pnpm run test:bash-syntax && node scripts/run-vitest.mjs",
@@ -105,6 +106,7 @@
105
106
  "test:coverage": "vitest run --coverage",
106
107
  "test:dogfood": "node tools/check-dogfood-drift.mjs",
107
108
  "test:bash-syntax": "bash -c 'for f in hooks/*.sh hooks/_lib/*.sh; do bash -n \"$f\" || exit 1; done && echo \"[bash-syntax] OK — all hooks parse cleanly\"'",
109
+ "test:perf": "pnpm run build && REA_INCLUDE_PERF=1 vitest run __tests__/scripts/profile-hooks.test.ts",
108
110
  "type-check": "tsc --noEmit",
109
111
  "changeset": "changeset",
110
112
  "changeset:version": "changeset version",
@@ -0,0 +1,478 @@
1
+ #!/usr/bin/env node
2
+ // 0.45.0 charter item 1 — Hook hot-path profiling harness.
3
+ //
4
+ // # What this measures
5
+ //
6
+ // Every Bash / Edit / Write / MultiEdit / NotebookEdit tool call in
7
+ // Claude Code fires one or more `.claude/hooks/*.sh` shims. 14 shims
8
+ // are registered by default. Cumulative latency matters: 14 × 50ms is
9
+ // 700ms added to every tool call, which the operator FEELS. This
10
+ // harness measures per-shim wall-clock latency under a synthetic
11
+ // payload and writes a baseline so regressions are visible.
12
+ //
13
+ // # Methodology
14
+ //
15
+ // For each shim:
16
+ // 1. Build a representative stdin JSON payload (Claude Code shape)
17
+ // tuned to be "irrelevant" — i.e. the shim runs through its
18
+ // full HALT → stdin-capture → resolve → sandbox → policy
19
+ // short-circuit / version-probe path but does NOT trigger a
20
+ // block. This is the steady-state hot path.
21
+ // 2. Warm up: 2 invocations (discarded). The first invocation has
22
+ // cold filesystem caches + Node startup costs that don't
23
+ // reflect steady-state.
24
+ // 3. Measure: 10 invocations. Capture wall-clock + child cputime.
25
+ // 4. Compute median / p95 / max from the 10 samples.
26
+ //
27
+ // The shim is invoked via `bash <hook-path>` with stdin piped in, the
28
+ // same way Claude Code invokes them. Environment is preserved so the
29
+ // real-world resolution path runs (node_modules / dist / PATH).
30
+ //
31
+ // # Output
32
+ //
33
+ // Writes `docs/hook-perf-baseline.json` sorted by p95 descending.
34
+ // Shape:
35
+ //
36
+ // {
37
+ // "version": "0.45.0",
38
+ // "measured_at": "2026-05-17T...",
39
+ // "platform": "darwin",
40
+ // "node_version": "v22.x.x",
41
+ // "iterations": 10,
42
+ // "warmup": 2,
43
+ // "hooks": [
44
+ // {
45
+ // "name": "local-review-gate.sh",
46
+ // "median_ms": 123.4,
47
+ // "p95_ms": 145.6,
48
+ // "max_ms": 158.9,
49
+ // "samples_ms": [...],
50
+ // "exit_codes": [0,0,0,0,0,0,0,0,0,0]
51
+ // },
52
+ // ...
53
+ // ]
54
+ // }
55
+ //
56
+ // # Threshold
57
+ //
58
+ // The harness DOES NOT enforce thresholds itself — it's a measurement
59
+ // tool. The regression test at `__tests__/scripts/profile-hooks.test.ts`
60
+ // asserts a permissive ceiling so absolute regressions get caught.
61
+ // Tighten the ceiling over time as the baseline stabilizes.
62
+ //
63
+ // # Wiring
64
+ //
65
+ // `pnpm perf:hooks` runs this script. Not part of the default
66
+ // `pnpm test` chain — it's heavy (160+ subprocess spawns) and timing
67
+ // is sensitive to system load. CI calls it explicitly when the perf
68
+ // guard is active.
69
+
70
+ import { spawnSync } from 'node:child_process';
71
+ import {
72
+ readdirSync,
73
+ readFileSync,
74
+ writeFileSync,
75
+ statSync,
76
+ existsSync,
77
+ mkdirSync,
78
+ } from 'node:fs';
79
+ import path from 'node:path';
80
+ import { fileURLToPath } from 'node:url';
81
+ import { performance } from 'node:perf_hooks';
82
+
83
+ const __filename = fileURLToPath(import.meta.url);
84
+ const __dirname = path.dirname(__filename);
85
+ const REPO_ROOT = path.resolve(__dirname, '..');
86
+
87
+ const HOOKS_DIR = path.join(REPO_ROOT, 'hooks');
88
+ const DOCS_DIR = path.join(REPO_ROOT, 'docs');
89
+ const BASELINE_PATH = path.join(DOCS_DIR, 'hook-perf-baseline.json');
90
+
91
+ // Permissive default per-shim p95 ceilings. The regression test in
92
+ // `__tests__/scripts/profile-hooks.test.ts` enforces these. Start
93
+ // loose to avoid CI flakes from cold caches / shared runners; tighten
94
+ // in future releases as the baseline stabilizes.
95
+ //
96
+ // `local-review-gate.sh` is a documented outlier — it does its own
97
+ // early sandbox check (round-5 P1) + subtree policy reads + a git
98
+ // stash-create on the forward path. ~1800ms is its current healthy
99
+ // p95 on the rea repo; the ceiling sits 2x above for CI headroom.
100
+ // See `docs/hook-perf-baseline.md` for the breakdown.
101
+ const DEFAULT_P95_CEILING_MS = 2000;
102
+ const PER_SHIM_P95_CEILING_MS = {
103
+ 'local-review-gate.sh': 4500,
104
+ };
105
+
106
+ /**
107
+ * Resolve the p95 ceiling for a given shim. Falls back to the default
108
+ * when no per-shim entry exists.
109
+ */
110
+ export function ceilingForShim(name) {
111
+ return PER_SHIM_P95_CEILING_MS[name] ?? DEFAULT_P95_CEILING_MS;
112
+ }
113
+
114
+ const DEFAULT_ITERATIONS = 10;
115
+ const DEFAULT_WARMUP = 2;
116
+
117
+ /**
118
+ * Per-hook stdin payload generator. Each shim sees a Claude Code
119
+ * PreToolUse/PostToolUse event JSON; the shape varies slightly per
120
+ * hook (Bash vs Edit vs Write). We use intentionally innocuous
121
+ * payloads so the shim runs through its full hot path without
122
+ * blocking — that's the realistic latency we want to measure.
123
+ *
124
+ * Returns the JSON string to pipe into the shim's stdin.
125
+ */
126
+ export function payloadForHook(name) {
127
+ // PreToolUse Bash event (Bash-tier hooks): a simple `ls` payload —
128
+ // not destructive, not policy-relevant, not a git push. The shim
129
+ // should run to completion without refusal.
130
+ const bashEvent = JSON.stringify({
131
+ tool_name: 'Bash',
132
+ tool_input: { command: 'ls -la', description: 'list current directory' },
133
+ hook_event_name: 'PreToolUse',
134
+ });
135
+
136
+ // PreToolUse Write event (Write-tier hooks): writing a benign .ts
137
+ // file with no secrets, no protected-path target.
138
+ const writeEvent = JSON.stringify({
139
+ tool_name: 'Write',
140
+ tool_input: { file_path: '/tmp/rea-profile-scratch.ts', content: 'export const x = 1;\n' },
141
+ hook_event_name: 'PreToolUse',
142
+ });
143
+
144
+ // PostToolUse Edit event (architecture-review-gate fires PostToolUse).
145
+ const postEditEvent = JSON.stringify({
146
+ tool_name: 'Edit',
147
+ tool_input: { file_path: '/tmp/scratch.ts', old_string: 'a', new_string: 'b' },
148
+ tool_response: { success: true },
149
+ hook_event_name: 'PostToolUse',
150
+ });
151
+
152
+ // PreToolUse Agent event (delegation-capture matches Agent|Skill).
153
+ const agentEvent = JSON.stringify({
154
+ tool_name: 'Agent',
155
+ tool_input: { subagent_type: 'general-purpose', prompt: 'noop' },
156
+ hook_event_name: 'PreToolUse',
157
+ });
158
+
159
+ switch (name) {
160
+ case 'architecture-review-gate.sh':
161
+ return postEditEvent;
162
+ case 'attribution-advisory.sh':
163
+ // Triggers on Bash `git commit` / `gh pr create`. We use a
164
+ // non-attribution payload so it runs through and exits clean.
165
+ return JSON.stringify({
166
+ tool_name: 'Bash',
167
+ tool_input: { command: 'git status', description: 'check status' },
168
+ hook_event_name: 'PreToolUse',
169
+ });
170
+ case 'blocked-paths-bash-gate.sh':
171
+ return bashEvent;
172
+ case 'blocked-paths-enforcer.sh':
173
+ return writeEvent;
174
+ case 'changeset-security-gate.sh':
175
+ return writeEvent;
176
+ case 'dangerous-bash-interceptor.sh':
177
+ return bashEvent;
178
+ case 'delegation-advisory.sh':
179
+ // Fires PostToolUse on Bash|Edit|Write|MultiEdit|NotebookEdit.
180
+ return JSON.stringify({
181
+ tool_name: 'Write',
182
+ tool_input: { file_path: '/tmp/scratch.ts', content: 'x' },
183
+ tool_response: { success: true },
184
+ hook_event_name: 'PostToolUse',
185
+ });
186
+ case 'delegation-capture.sh':
187
+ return agentEvent;
188
+ case 'dependency-audit-gate.sh':
189
+ // Fires on Bash. Payload is benign — not an install command.
190
+ return bashEvent;
191
+ case 'env-file-protection.sh':
192
+ return bashEvent;
193
+ case 'local-review-gate.sh':
194
+ // Fires on Bash. Use a non-push command so the gate runs through
195
+ // its policy-read path without triggering the actual
196
+ // local-review refusal.
197
+ return bashEvent;
198
+ case 'pr-issue-link-gate.sh':
199
+ // Fires on `gh pr create`. Benign Bash payload.
200
+ return bashEvent;
201
+ case 'protected-paths-bash-gate.sh':
202
+ return bashEvent;
203
+ case 'secret-scanner.sh':
204
+ return writeEvent;
205
+ case 'security-disclosure-gate.sh':
206
+ return bashEvent;
207
+ case 'settings-protection.sh':
208
+ return writeEvent;
209
+ default:
210
+ return bashEvent;
211
+ }
212
+ }
213
+
214
+ /**
215
+ * List the shims to profile — every `.sh` directly under `hooks/`,
216
+ * excluding `_lib/`.
217
+ */
218
+ export function listShims(hooksDir = HOOKS_DIR) {
219
+ return readdirSync(hooksDir)
220
+ .filter((f) => f.endsWith('.sh'))
221
+ .filter((f) => {
222
+ try {
223
+ return statSync(path.join(hooksDir, f)).isFile();
224
+ } catch {
225
+ return false;
226
+ }
227
+ })
228
+ .sort();
229
+ }
230
+
231
+ /**
232
+ * Run a single shim invocation and return wall-clock ms + exit code.
233
+ */
234
+ function runOnce(hookPath, payload) {
235
+ const start = performance.now();
236
+ const res = spawnSync('bash', [hookPath], {
237
+ input: payload,
238
+ encoding: 'utf8',
239
+ timeout: 30000,
240
+ env: { ...process.env, CLAUDE_PROJECT_DIR: REPO_ROOT },
241
+ });
242
+ const elapsed = performance.now() - start;
243
+ // spawnSync returns res.status null on timeout/signal — surface
244
+ // that as -1 so the caller can flag it.
245
+ const status = res.status === null ? -1 : res.status;
246
+ return { ms: elapsed, status };
247
+ }
248
+
249
+ /**
250
+ * Compute percentile from a sorted ascending array of numbers.
251
+ */
252
+ function percentile(sorted, p) {
253
+ if (sorted.length === 0) return 0;
254
+ const idx = Math.min(sorted.length - 1, Math.max(0, Math.ceil((p / 100) * sorted.length) - 1));
255
+ return sorted[idx];
256
+ }
257
+
258
+ /**
259
+ * Profile a single hook. Returns the measurement record.
260
+ *
261
+ * 0.45.0 codex round-1 P2 #2: every shim is expected to exit 0 under
262
+ * its synthetic non-blocking payload — that's the steady-state hot
263
+ * path we want to measure. A non-zero exit (refusal, malformed
264
+ * payload, timeout, CLI-missing) means the shim ran an ERROR path
265
+ * instead of the hot path, and the resulting latency number does NOT
266
+ * represent steady-state. The record carries an `error` field
267
+ * surfacing any non-zero exit, and `runProfile` propagates it to the
268
+ * report so callers can fail loudly rather than silently shipping a
269
+ * "healthy" baseline that timed nothing but error paths.
270
+ */
271
+ export function profileHook(name, opts = {}) {
272
+ const iterations = opts.iterations ?? DEFAULT_ITERATIONS;
273
+ const warmup = opts.warmup ?? DEFAULT_WARMUP;
274
+ const hooksDir = opts.hooksDir ?? HOOKS_DIR;
275
+ const hookPath = path.join(hooksDir, name);
276
+ const payload = payloadForHook(name);
277
+
278
+ for (let i = 0; i < warmup; i += 1) {
279
+ runOnce(hookPath, payload);
280
+ }
281
+
282
+ const samples = [];
283
+ const exitCodes = [];
284
+ for (let i = 0; i < iterations; i += 1) {
285
+ const r = runOnce(hookPath, payload);
286
+ samples.push(r.ms);
287
+ exitCodes.push(r.status);
288
+ }
289
+
290
+ const sorted = [...samples].sort((a, b) => a - b);
291
+ const median = percentile(sorted, 50);
292
+ const p95 = percentile(sorted, 95);
293
+ const max = sorted[sorted.length - 1];
294
+
295
+ // 0.45.0 codex round-1 P2 #2: surface non-zero exits. -1 marks a
296
+ // timeout (runOnce normalizes spawnSync's null status). Any
297
+ // non-zero value means the shim ran a refusal / error path, not
298
+ // the steady-state hot path the measurement assumes.
299
+ const nonZero = exitCodes.filter((c) => c !== 0);
300
+ const error =
301
+ nonZero.length > 0
302
+ ? `${nonZero.length}/${exitCodes.length} samples exited non-zero ` +
303
+ `(codes: ${exitCodes.join(',')}). Synthetic payload likely hit an ` +
304
+ `error path; latency is NOT representative of the hot path. ` +
305
+ `Tune the payload in payloadForHook() so this shim exits 0.`
306
+ : null;
307
+
308
+ return {
309
+ name,
310
+ median_ms: round(median),
311
+ p95_ms: round(p95),
312
+ max_ms: round(max),
313
+ samples_ms: samples.map(round),
314
+ exit_codes: exitCodes,
315
+ error,
316
+ };
317
+ }
318
+
319
+ function round(n) {
320
+ return Math.round(n * 100) / 100;
321
+ }
322
+
323
+ /**
324
+ * Run the full profile and return the report object.
325
+ */
326
+ export function runProfile(opts = {}) {
327
+ const hooksDir = opts.hooksDir ?? HOOKS_DIR;
328
+ const iterations = opts.iterations ?? DEFAULT_ITERATIONS;
329
+ const warmup = opts.warmup ?? DEFAULT_WARMUP;
330
+ const shims = (opts.shims ?? listShims(hooksDir)).filter((n) => {
331
+ // Skip non-file entries defensively.
332
+ try {
333
+ return statSync(path.join(hooksDir, n)).isFile();
334
+ } catch {
335
+ return false;
336
+ }
337
+ });
338
+
339
+ const records = [];
340
+ for (const name of shims) {
341
+ records.push(profileHook(name, { iterations, warmup, hooksDir }));
342
+ }
343
+
344
+ // Sort by p95 desc — slowest at the top makes the operator's eye
345
+ // land on the leaders immediately.
346
+ records.sort((a, b) => b.p95_ms - a.p95_ms);
347
+
348
+ // Decorate each record with the resolved ceiling so the baseline JSON
349
+ // documents the per-shim threshold inline (avoids drift between the
350
+ // doc and the regression test).
351
+ const decorated = records.map((r) => ({
352
+ ...r,
353
+ p95_ceiling_ms: ceilingForShim(r.name),
354
+ over_budget: r.p95_ms > ceilingForShim(r.name),
355
+ }));
356
+
357
+ return {
358
+ version: getPkgVersion(),
359
+ measured_at: new Date().toISOString(),
360
+ platform: process.platform,
361
+ node_version: process.version,
362
+ iterations,
363
+ warmup,
364
+ default_p95_ceiling_ms: DEFAULT_P95_CEILING_MS,
365
+ per_shim_p95_ceiling_ms: PER_SHIM_P95_CEILING_MS,
366
+ hooks: decorated,
367
+ };
368
+ }
369
+
370
+ function getPkgVersion() {
371
+ try {
372
+ const pkg = JSON.parse(readFileSync(path.join(REPO_ROOT, 'package.json'), 'utf8'));
373
+ return pkg.version ?? '0.0.0';
374
+ } catch {
375
+ return '0.0.0';
376
+ }
377
+ }
378
+
379
+ /**
380
+ * CLI entry. Writes the report to disk.
381
+ */
382
+ async function main() {
383
+ const args = process.argv.slice(2);
384
+ const dryRun = args.includes('--dry-run');
385
+ const iterArg = args.find((a) => a.startsWith('--iterations='));
386
+ const warmArg = args.find((a) => a.startsWith('--warmup='));
387
+ const iterations = iterArg ? parseInt(iterArg.split('=')[1], 10) : DEFAULT_ITERATIONS;
388
+ const warmup = warmArg ? parseInt(warmArg.split('=')[1], 10) : DEFAULT_WARMUP;
389
+
390
+ process.stderr.write(
391
+ `[profile-hooks] profiling ${listShims().length} shims ` +
392
+ `(${iterations} iterations + ${warmup} warmup each) — this takes ~30-60s\n`,
393
+ );
394
+
395
+ const report = runProfile({ iterations, warmup });
396
+
397
+ if (!existsSync(DOCS_DIR)) {
398
+ mkdirSync(DOCS_DIR, { recursive: true });
399
+ }
400
+
401
+ const json = JSON.stringify(report, null, 2) + '\n';
402
+
403
+ // Human-readable summary on stderr (top 5 by p95).
404
+ process.stderr.write('\n[profile-hooks] p95 leaders:\n');
405
+ for (const r of report.hooks.slice(0, 5)) {
406
+ process.stderr.write(
407
+ ` ${r.name.padEnd(32)} ` +
408
+ `p95=${String(r.p95_ms).padStart(7)}ms ` +
409
+ `median=${String(r.median_ms).padStart(7)}ms ` +
410
+ `max=${String(r.max_ms).padStart(7)}ms\n`,
411
+ );
412
+ }
413
+
414
+ // 0.45.0 codex round-1 P2 #2: fail loudly if any shim ran a
415
+ // non-zero-exit error path — the latency number is meaningless in
416
+ // that case and the baseline would silently ship lies.
417
+ //
418
+ // 0.45.0 codex round-2 P2 #3: this AND the over-budget check below
419
+ // run BEFORE the baseline write — a failed measurement run must
420
+ // NOT clobber the checked-in last-known-good baseline. The dry-run
421
+ // branch still emits JSON for inspection regardless.
422
+ const errored = report.hooks.filter((h) => h.error !== null);
423
+ if (errored.length > 0) {
424
+ process.stderr.write(
425
+ `\n[profile-hooks] ${errored.length} shim(s) ran a non-zero error path:\n`,
426
+ );
427
+ for (const h of errored) {
428
+ process.stderr.write(` ${h.name}: ${h.error}\n`);
429
+ }
430
+ process.stderr.write(
431
+ `[profile-hooks] NOT writing ${BASELINE_PATH} — last-known-good baseline preserved.\n`,
432
+ );
433
+ if (dryRun) process.stdout.write(json);
434
+ process.exit(2);
435
+ }
436
+
437
+ const overBudget = report.hooks.filter((h) => h.p95_ms > ceilingForShim(h.name));
438
+ if (overBudget.length > 0) {
439
+ process.stderr.write(
440
+ `\n[profile-hooks] ${overBudget.length} shim(s) exceeded the p95 ceiling:\n`,
441
+ );
442
+ for (const h of overBudget) {
443
+ process.stderr.write(
444
+ ` ${h.name} p95=${h.p95_ms}ms (ceiling=${ceilingForShim(h.name)}ms)\n`,
445
+ );
446
+ }
447
+ process.stderr.write(
448
+ `[profile-hooks] NOT writing ${BASELINE_PATH} — last-known-good baseline preserved.\n`,
449
+ );
450
+ if (dryRun) process.stdout.write(json);
451
+ process.exit(1);
452
+ }
453
+
454
+ // All checks passed — safe to persist the baseline.
455
+ if (dryRun) {
456
+ process.stdout.write(json);
457
+ } else {
458
+ writeFileSync(BASELINE_PATH, json);
459
+ process.stderr.write(`[profile-hooks] wrote ${BASELINE_PATH}\n`);
460
+ }
461
+ }
462
+
463
+ // Run main only when invoked directly (not when imported by tests).
464
+ const invokedDirectly = process.argv[1] && path.resolve(process.argv[1]) === __filename;
465
+ if (invokedDirectly) {
466
+ main().catch((e) => {
467
+ process.stderr.write(`[profile-hooks] FAILED: ${e.message}\n`);
468
+ process.exit(1);
469
+ });
470
+ }
471
+
472
+ export {
473
+ BASELINE_PATH,
474
+ DEFAULT_P95_CEILING_MS,
475
+ PER_SHIM_P95_CEILING_MS,
476
+ DEFAULT_ITERATIONS,
477
+ DEFAULT_WARMUP,
478
+ };