unbound-cli 1.5.0 → 1.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/commands/discover.js +1 -1
- package/src/commands/doctor.js +3 -2
- package/src/commands/setup.js +75 -14
- package/src/commands/status.js +1 -1
- package/src/toolHealth.js +64 -12
- package/test/setup-args.test.js +73 -1
- package/test/tool-health.test.js +111 -0
- package/PLAN-web-4887.md +0 -515
- package/PLAN.md +0 -106
package/package.json
CHANGED
package/src/commands/discover.js
CHANGED
|
@@ -16,7 +16,7 @@ const DISCOVERY_EXIT_UNSUPPORTED_OS = 3;
|
|
|
16
16
|
// indefinitely. Discovery enforces this itself — on expiry it releases its lock,
|
|
17
17
|
// reports the run as failed, and exits non-zero. Kept in sync with
|
|
18
18
|
// setup/mdm/onboard.py's DISCOVERY_TIMEOUT_SECONDS.
|
|
19
|
-
const DISCOVERY_TIMEOUT_SECONDS =
|
|
19
|
+
const DISCOVERY_TIMEOUT_SECONDS = 9000;
|
|
20
20
|
|
|
21
21
|
// Classifies a discovery subprocess exit code:
|
|
22
22
|
// 'success' (scan ran), 'unsupported' (skipped on this OS), or 'failure'.
|
package/src/commands/doctor.js
CHANGED
|
@@ -90,7 +90,7 @@ Examples:
|
|
|
90
90
|
}
|
|
91
91
|
|
|
92
92
|
const C = output.colors;
|
|
93
|
-
const labelOf = (t) => t.label + (t.mode ? ` (${t.mode})` : '');
|
|
93
|
+
const labelOf = (t) => t.label + (t.mode && t.mode !== 'subscription' ? ` (${t.mode})` : '');
|
|
94
94
|
const W = Math.max(7, ...tools.map((t) => labelOf(t).length)); // 7 = "API key"
|
|
95
95
|
|
|
96
96
|
console.log('');
|
|
@@ -150,7 +150,8 @@ Examples:
|
|
|
150
150
|
return;
|
|
151
151
|
}
|
|
152
152
|
const fixNow = root ? allFix : userFix;
|
|
153
|
-
|
|
153
|
+
const fixDisplay = (root ? tampered : userTampered).map(labelOf).join(', ');
|
|
154
|
+
output.info(`Reinstalling: ${fixDisplay}${root ? ' (org-wide, all users)' : ''}`);
|
|
154
155
|
console.log('');
|
|
155
156
|
const r = spawnSync(process.argv[0], [process.argv[1], 'setup', ...fixNow], { stdio: 'inherit' });
|
|
156
157
|
if (!root && mdmFix.length) console.error(` ${mdmNames} ${mdmFix.length === 1 ? 'is' : 'are'} set up by your organization — run ${C.bold('sudo unbound doctor --fix')} to repair ${mdmFix.length === 1 ? 'it' : 'them'}.`);
|
package/src/commands/setup.js
CHANGED
|
@@ -35,14 +35,16 @@ const SETUP_TOOLS = [
|
|
|
35
35
|
{ label: 'Codex \u2014 gateway (gateway)', value: 'codex-gw', script: 'codex/gateway/setup.py', group: 'codex' },
|
|
36
36
|
];
|
|
37
37
|
|
|
38
|
+
// Labels drop the `(subscription)` suffix on the default mode and keep
|
|
39
|
+
// `(gateway)` as the differentiator — matches what doctor/status render.
|
|
38
40
|
const MDM_TOOLS = {
|
|
39
|
-
'cursor': { label: 'Cursor',
|
|
40
|
-
'copilot': { label: 'GitHub Copilot',
|
|
41
|
-
'claude-code-subscription': { label: 'Claude Code
|
|
42
|
-
'claude-code-gateway': { label: 'Claude Code (gateway)',
|
|
43
|
-
'gemini-cli': { label: 'Gemini CLI',
|
|
44
|
-
'codex-subscription': { label: 'Codex
|
|
45
|
-
'codex-gateway': { label: 'Codex (gateway)',
|
|
41
|
+
'cursor': { label: 'Cursor', script: 'cursor/mdm/setup.py' },
|
|
42
|
+
'copilot': { label: 'GitHub Copilot', script: 'copilot/hooks/mdm/setup.py' },
|
|
43
|
+
'claude-code-subscription': { label: 'Claude Code', script: 'claude-code/hooks/mdm/setup.py' },
|
|
44
|
+
'claude-code-gateway': { label: 'Claude Code (gateway)', script: 'claude-code/gateway/mdm/setup.py' },
|
|
45
|
+
'gemini-cli': { label: 'Gemini CLI', script: 'gemini-cli/gateway/mdm/setup.py' },
|
|
46
|
+
'codex-subscription': { label: 'Codex', script: 'codex/hooks/mdm/setup.py' },
|
|
47
|
+
'codex-gateway': { label: 'Codex (gateway)', script: 'codex/gateway/mdm/setup.py' },
|
|
46
48
|
};
|
|
47
49
|
|
|
48
50
|
// Default MDM tools for `sudo unbound onboard` (subscription mode for Claude Code/Codex since only one can be active)
|
|
@@ -59,13 +61,13 @@ const SETUP_ALL_TOOLS = ['cursor', 'claude-code-subscription', 'codex-subscripti
|
|
|
59
61
|
|
|
60
62
|
// Tool name → script mapping for automated tools
|
|
61
63
|
const SETUP_TOOL_MAP = {
|
|
62
|
-
'cursor': { label: 'Cursor',
|
|
63
|
-
'copilot': { label: 'GitHub Copilot',
|
|
64
|
-
'claude-code-subscription': { label: 'Claude Code
|
|
65
|
-
'claude-code-gateway': { label: 'Claude Code (gateway)',
|
|
66
|
-
'gemini-cli': { label: 'Gemini CLI',
|
|
67
|
-
'codex-subscription': { label: 'Codex
|
|
68
|
-
'codex-gateway': { label: 'Codex (gateway)',
|
|
64
|
+
'cursor': { label: 'Cursor', script: 'cursor/setup.py' },
|
|
65
|
+
'copilot': { label: 'GitHub Copilot', script: 'copilot/hooks/setup.py' },
|
|
66
|
+
'claude-code-subscription': { label: 'Claude Code', script: 'claude-code/hooks/setup.py' },
|
|
67
|
+
'claude-code-gateway': { label: 'Claude Code (gateway)', script: 'claude-code/gateway/setup.py' },
|
|
68
|
+
'gemini-cli': { label: 'Gemini CLI', script: 'gemini-cli/gateway/setup.py' },
|
|
69
|
+
'codex-subscription': { label: 'Codex', script: 'codex/hooks/setup.py' },
|
|
70
|
+
'codex-gateway': { label: 'Codex (gateway)', script: 'codex/gateway/setup.py' },
|
|
69
71
|
};
|
|
70
72
|
|
|
71
73
|
/**
|
|
@@ -338,6 +340,56 @@ function runScriptPiped(scriptPath, args) {
|
|
|
338
340
|
});
|
|
339
341
|
}
|
|
340
342
|
|
|
343
|
+
// Env vars Unbound writes during setup. The python `--clear` scripts strip
|
|
344
|
+
// these from ONE rc file (the current shell's), so a user who installed under
|
|
345
|
+
// zsh and runs nuke under bash leaves a stale `export …` in the other rc and
|
|
346
|
+
// then `status` reports tampered. Sweep every candidate rc to close that gap.
|
|
347
|
+
// Conservative: only UNBOUND_* names + ANTHROPIC_BASE_URL. Skip OPENAI_API_KEY
|
|
348
|
+
// — it's a generic user-owned var that can have non-Unbound uses.
|
|
349
|
+
const NUKE_ENV_VARS = [
|
|
350
|
+
'UNBOUND_API_KEY',
|
|
351
|
+
'UNBOUND_CLAUDE_API_KEY',
|
|
352
|
+
'UNBOUND_CODEX_API_KEY',
|
|
353
|
+
'UNBOUND_COPILOT_API_KEY',
|
|
354
|
+
'UNBOUND_CURSOR_API_KEY',
|
|
355
|
+
'ANTHROPIC_BASE_URL',
|
|
356
|
+
];
|
|
357
|
+
|
|
358
|
+
function nukeRcFiles() {
|
|
359
|
+
if (process.platform === 'win32') return [];
|
|
360
|
+
if (process.platform === 'darwin') return ['~/.zprofile', '~/.bash_profile', '~/.zshrc', '~/.bashrc'];
|
|
361
|
+
return ['~/.zshrc', '~/.bashrc', '~/.profile'];
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// Removes `export NAME=...` lines for NUKE_ENV_VARS from every candidate rc.
|
|
365
|
+
// On Windows, best-effort `reg delete` for each name. Returns a summary of
|
|
366
|
+
// what changed so the nuke command can surface it.
|
|
367
|
+
function clearUnboundEnvsEverywhere() {
|
|
368
|
+
const cleared = [];
|
|
369
|
+
if (process.platform === 'win32') {
|
|
370
|
+
for (const name of NUKE_ENV_VARS) {
|
|
371
|
+
const r = spawnSync('reg', ['delete', 'HKCU\\Environment', '/F', '/V', name],
|
|
372
|
+
{ stdio: 'ignore', windowsHide: true });
|
|
373
|
+
if (r.status === 0) cleared.push(`${name} (registry)`);
|
|
374
|
+
}
|
|
375
|
+
return cleared;
|
|
376
|
+
}
|
|
377
|
+
const home = os.homedir();
|
|
378
|
+
const exportRe = new RegExp(`^\\s*export\\s+(${NUKE_ENV_VARS.join('|')})=`);
|
|
379
|
+
for (const rc of nukeRcFiles()) {
|
|
380
|
+
const rcPath = rc.replace(/^~/, home);
|
|
381
|
+
let text;
|
|
382
|
+
try { text = fs.readFileSync(rcPath, 'utf8'); } catch { continue; }
|
|
383
|
+
const lines = text.split('\n');
|
|
384
|
+
const kept = lines.filter((line) => !exportRe.test(line));
|
|
385
|
+
if (kept.length !== lines.length) {
|
|
386
|
+
fs.writeFileSync(rcPath, kept.join('\n'));
|
|
387
|
+
cleared.push(`${lines.length - kept.length} line(s) from ${path.basename(rcPath)}`);
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
return cleared;
|
|
391
|
+
}
|
|
392
|
+
|
|
341
393
|
/**
|
|
342
394
|
* Returns true when the process has the privileges needed to touch system-level
|
|
343
395
|
* (MDM) configuration. On Windows, `net session` succeeds only when elevated, so
|
|
@@ -911,6 +963,13 @@ Examples:
|
|
|
911
963
|
output.info('Skipped MDM (system-level) config — that needs root. Re-run with sudo to remove it too.');
|
|
912
964
|
}
|
|
913
965
|
|
|
966
|
+
// Sweep stale `export UNBOUND_*` / `ANTHROPIC_BASE_URL` lines from EVERY
|
|
967
|
+
// candidate rc file (the per-tool python clears only touch the current
|
|
968
|
+
// shell's rc, so a stale entry in another rc would survive and re-trip
|
|
969
|
+
// `unbound status` as tampered on the next run).
|
|
970
|
+
const envCleared = clearUnboundEnvsEverywhere();
|
|
971
|
+
if (envCleared.length) output.info(`Removed Unbound env: ${envCleared.join(', ')}.`);
|
|
972
|
+
|
|
914
973
|
// Wipe credentials + settings last, regardless of tool-clear outcomes.
|
|
915
974
|
config.clearConfig();
|
|
916
975
|
output.success('Stored credentials and settings removed.');
|
|
@@ -988,4 +1047,6 @@ module.exports = {
|
|
|
988
1047
|
buildScriptArgs,
|
|
989
1048
|
scriptSupportsBackfill,
|
|
990
1049
|
resolveSetupAllTools,
|
|
1050
|
+
clearUnboundEnvsEverywhere,
|
|
1051
|
+
NUKE_ENV_VARS,
|
|
991
1052
|
};
|
package/src/commands/status.js
CHANGED
|
@@ -105,7 +105,7 @@ Examples:
|
|
|
105
105
|
console.log(` ${C.dim('None set up yet.')} Run ${C.bold('unbound setup')} to wire a tool.`);
|
|
106
106
|
} else {
|
|
107
107
|
for (const t of connected) {
|
|
108
|
-
const mode = t.mode ? C.dim(` (${t.mode})`) : '';
|
|
108
|
+
const mode = t.mode && t.mode !== 'subscription' ? C.dim(` (${t.mode})`) : '';
|
|
109
109
|
let mark = C.green('✓');
|
|
110
110
|
let note = '';
|
|
111
111
|
if (t.status === 'tampered') { mark = C.red('✗'); note = C.dim(' — run `unbound doctor`'); }
|
package/src/toolHealth.js
CHANGED
|
@@ -20,6 +20,11 @@ const { spawnSync } = require('child_process');
|
|
|
20
20
|
|
|
21
21
|
const HOME = os.homedir();
|
|
22
22
|
const GATEWAY_DEFAULT = 'https://api.getunbound.ai';
|
|
23
|
+
// Anchored to the install root + binary name so a hypothetical neighbor
|
|
24
|
+
// (e.g. `unbound-hooks-v2`) can't accidentally satisfy the substring.
|
|
25
|
+
const BINARY_MARKER = 'unbound-hook/unbound-hook';
|
|
26
|
+
// Hard-coded by the ai.getunbound.runtime pkg (binary/src/unbound_hook/_resources.py).
|
|
27
|
+
const BINARY_PATH = '/opt/unbound/current/unbound-hook/unbound-hook';
|
|
23
28
|
|
|
24
29
|
function expand(p) {
|
|
25
30
|
return p.startsWith('~') ? path.join(HOME, p.slice(1)) : p;
|
|
@@ -129,6 +134,22 @@ function envCheck(label, name, expected, kind = 'aux') {
|
|
|
129
134
|
// A value that doesn't match what setup wrote (stale key, wrong gateway URL) is a
|
|
130
135
|
// real misconfiguration: mark it not-ok so the tool reports tampered, not healthy.
|
|
131
136
|
if (expected && r.value !== expected) {
|
|
137
|
+
// process.env can be a stale snapshot from a shell loaded before setup re-wrote
|
|
138
|
+
// the rc, so consult the rc files as a fallback before declaring a mismatch.
|
|
139
|
+
if (r.source === 'process env' && process.platform !== 'win32') {
|
|
140
|
+
const re = new RegExp(`^\\s*export\\s+${name}=(.*)$`, 'm');
|
|
141
|
+
for (const rc of rcFiles()) {
|
|
142
|
+
const text = readText(rc);
|
|
143
|
+
if (!text) continue;
|
|
144
|
+
const m = text.match(re);
|
|
145
|
+
if (!m) continue;
|
|
146
|
+
const rcVal = m[1].trim().replace(/^["']|["']$/g, '');
|
|
147
|
+
if (rcVal === expected) {
|
|
148
|
+
const rcSource = path.basename(expand(rc));
|
|
149
|
+
return { name: label, ok: true, kind, detail: `${name} set (${rcSource})`, summary: `${base} set` };
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
132
153
|
return { name: label, ok: false, kind, warn: true, summary: `${base} differs from setup`, detail: `${name} set (${r.source}) but differs from what setup configured` };
|
|
133
154
|
}
|
|
134
155
|
return { name: label, ok: true, kind, detail: `${name} set (${r.source})`, summary: `${base} set` };
|
|
@@ -139,7 +160,7 @@ function envCheck(label, name, expected, kind = 'aux') {
|
|
|
139
160
|
// A plain managed-settings.json (Claude Enterprise / generic MDM) does NOT count,
|
|
140
161
|
// and a managed config that points at a missing hook script is a broken (tampered)
|
|
141
162
|
// MDM install, not a healthy one. Returns { status: 'healthy'|'tampered'|null, checks }.
|
|
142
|
-
function mdmDetect(family, dirOverride) {
|
|
163
|
+
function mdmDetect(family, dirOverride, binaryPath) {
|
|
143
164
|
// Only Cursor / Claude Code / Codex have a managed (MDM) directory. Copilot and
|
|
144
165
|
// Gemini have none — their org install writes the same per-user config into
|
|
145
166
|
// every profile, so they're checked exactly like a user-level tool.
|
|
@@ -158,7 +179,28 @@ function mdmDetect(family, dirOverride) {
|
|
|
158
179
|
const scriptPath = path.join(dir, 'hooks', 'unbound.py');
|
|
159
180
|
|
|
160
181
|
const cfgText = readText(configPath);
|
|
161
|
-
if (cfgText == null
|
|
182
|
+
if (cfgText == null) return { status: null, checks: [] };
|
|
183
|
+
const hasBinary = cfgText.includes(BINARY_MARKER);
|
|
184
|
+
const hasPython = cfgText.includes('unbound.py');
|
|
185
|
+
if (!hasBinary && !hasPython) return { status: null, checks: [] };
|
|
186
|
+
|
|
187
|
+
// Binary install wins even when an `unbound.py` substring lingers (e.g.
|
|
188
|
+
// mid-migration the config can mention both; the per-MDM unbound.py script
|
|
189
|
+
// is no longer expected once binary is in play). Mirror the python branch's
|
|
190
|
+
// existence guard so a missing binary surfaces as tampered, not healthy.
|
|
191
|
+
if (hasBinary) {
|
|
192
|
+
const bp = binaryPath || BINARY_PATH;
|
|
193
|
+
const binaryOk = fileExists(bp);
|
|
194
|
+
return {
|
|
195
|
+
status: binaryOk ? 'healthy' : 'tampered',
|
|
196
|
+
checks: [
|
|
197
|
+
{ name: 'MDM config', ok: true, kind: 'structural', detail: configPath, summary: 'managed config (binary)' },
|
|
198
|
+
{ name: 'Hook binary', ok: binaryOk, kind: 'structural',
|
|
199
|
+
summary: binaryOk ? 'hook binary installed' : 'hook binary missing',
|
|
200
|
+
detail: binaryOk ? bp : `managed config references the hook binary but it isn't installed (${bp})` },
|
|
201
|
+
],
|
|
202
|
+
};
|
|
203
|
+
}
|
|
162
204
|
|
|
163
205
|
const scriptOk = fileExists(scriptPath);
|
|
164
206
|
const checks = [
|
|
@@ -170,12 +212,13 @@ function mdmDetect(family, dirOverride) {
|
|
|
170
212
|
|
|
171
213
|
// Marker that a claude/codex/cursor hooks block references unbound.py.
|
|
172
214
|
function refsUnbound(obj) {
|
|
173
|
-
|
|
215
|
+
const s = JSON.stringify(obj || {});
|
|
216
|
+
return s.includes('unbound.py') || s.includes(BINARY_MARKER);
|
|
174
217
|
}
|
|
175
218
|
|
|
176
219
|
// One descriptor per (tool, mode). `family` groups the two-mode tools so the
|
|
177
220
|
// collapsed view shows a single line per product.
|
|
178
|
-
function buildVariants(gatewayUrl, apiKey) {
|
|
221
|
+
function buildVariants(gatewayUrl, apiKey, binaryPath) {
|
|
179
222
|
const gw = (gatewayUrl || GATEWAY_DEFAULT).replace(/\/+$/, ''); // setup rstrips too
|
|
180
223
|
return [
|
|
181
224
|
{
|
|
@@ -221,14 +264,21 @@ function buildVariants(gatewayUrl, apiKey) {
|
|
|
221
264
|
},
|
|
222
265
|
{
|
|
223
266
|
key: 'copilot', label: 'GitHub Copilot', family: 'copilot', mode: null,
|
|
224
|
-
checks: () =>
|
|
267
|
+
checks: () => {
|
|
225
268
|
// Copilot has no managed (MDM) directory: the org install writes the same
|
|
226
269
|
// ~/.copilot config into every user profile, so it's checked like a
|
|
227
270
|
// user-level tool and never reports "managed by MDM".
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
271
|
+
// Binary install of Copilot replaces the per-user `unbound.py` script with
|
|
272
|
+
// a config that points straight at the binary; skip the script check in
|
|
273
|
+
// that case so a clean binary install doesn't read as tampered.
|
|
274
|
+
const cfgText = readText('~/.copilot/hooks/unbound.json');
|
|
275
|
+
const hasBinary = cfgText != null && cfgText.includes(BINARY_MARKER);
|
|
276
|
+
const checks = [configCheck('Config', '~/.copilot/hooks/unbound.json', { json: true, test: refsUnbound })];
|
|
277
|
+
if (hasBinary) checks.push(scriptCheck('Hook binary', binaryPath || BINARY_PATH));
|
|
278
|
+
else checks.push(scriptCheck('Hook script', '~/.copilot/hooks/unbound.py'));
|
|
279
|
+
checks.push(envCheck('API key env', 'UNBOUND_COPILOT_API_KEY', apiKey));
|
|
280
|
+
return checks;
|
|
281
|
+
},
|
|
232
282
|
},
|
|
233
283
|
// Gemini CLI is intentionally omitted here — it isn't part of `setup --all`
|
|
234
284
|
// and has no managed directory. Add it back when its scope is settled.
|
|
@@ -253,8 +303,10 @@ function detectVariant(variant) {
|
|
|
253
303
|
// not-installed. This is what both `doctor` and `status` render.
|
|
254
304
|
// `_mdmDirs` (test-only) overrides the system MDM directories per family so the
|
|
255
305
|
// org-managed scenarios can be exercised without writing under /Library or /etc.
|
|
256
|
-
|
|
257
|
-
|
|
306
|
+
// `_binaryPath` (test-only) overrides the system hook-binary path so binary-mode
|
|
307
|
+
// scenarios can be exercised without writing under /opt.
|
|
308
|
+
function detectTools({ gatewayUrl, apiKey, _mdmDirs, _binaryPath } = {}) {
|
|
309
|
+
const variants = buildVariants(gatewayUrl, apiKey, _binaryPath).map(detectVariant);
|
|
258
310
|
const families = [];
|
|
259
311
|
const seen = new Set();
|
|
260
312
|
for (const v of variants) {
|
|
@@ -272,7 +324,7 @@ function detectTools({ gatewayUrl, apiKey, _mdmDirs } = {}) {
|
|
|
272
324
|
} else {
|
|
273
325
|
const family = v.family;
|
|
274
326
|
const label = v.label.replace(/ \(.*\)$/, '');
|
|
275
|
-
const mdm = mdmDetect(family, _mdmDirs && _mdmDirs[family]);
|
|
327
|
+
const mdm = mdmDetect(family, _mdmDirs && _mdmDirs[family], _binaryPath);
|
|
276
328
|
if (mdm.status === 'healthy') {
|
|
277
329
|
families.push({ key: family, label, family, mode: null, status: 'managed-by-mdm', checks: mdm.checks, scope: 'mdm' });
|
|
278
330
|
} else if (mdm.status === 'tampered') {
|
package/test/setup-args.test.js
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
const { test } = require('node:test');
|
|
2
2
|
const assert = require('node:assert/strict');
|
|
3
|
-
const
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const os = require('os');
|
|
5
|
+
const path = require('path');
|
|
6
|
+
const { buildScriptArgs, scriptSupportsBackfill, resolveSetupAllTools, clearUnboundEnvsEverywhere, NUKE_ENV_VARS } = require('../src/commands/setup');
|
|
4
7
|
|
|
5
8
|
// shellEscape single-quotes every value, so a real key surfaces as
|
|
6
9
|
// --api-key '<key>' at the head of the argv tail.
|
|
@@ -147,3 +150,72 @@ test('resolveSetupAllTools(true): clear-all covers every tool incl. gateway mode
|
|
|
147
150
|
assert.ok(tools.includes(t), `clear-all missing install-bundle tool ${t}`);
|
|
148
151
|
}
|
|
149
152
|
});
|
|
153
|
+
|
|
154
|
+
// WEB-4886: nuke must strip stale UNBOUND_* / ANTHROPIC_BASE_URL lines from
|
|
155
|
+
// EVERY candidate rc file, not just the one the python --clear script reaches.
|
|
156
|
+
// Skip on Windows (rc-file sweep is POSIX-only; the registry path runs `reg`).
|
|
157
|
+
if (process.platform !== 'win32') {
|
|
158
|
+
test('clearUnboundEnvsEverywhere: removes Unbound exports from every candidate rc, preserves other lines', () => {
|
|
159
|
+
const tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'unbound-nuke-'));
|
|
160
|
+
const origHome = process.env.HOME;
|
|
161
|
+
process.env.HOME = tmp;
|
|
162
|
+
try {
|
|
163
|
+
// .bashrc and .zshrc are in the candidate list on both darwin and linux,
|
|
164
|
+
// so the test is hermetic across CI (linux) and dev (darwin).
|
|
165
|
+
const bashrc = path.join(tmp, '.bashrc');
|
|
166
|
+
const zshrc = path.join(tmp, '.zshrc');
|
|
167
|
+
fs.writeFileSync(bashrc, [
|
|
168
|
+
'# user content',
|
|
169
|
+
'export UNBOUND_CURSOR_API_KEY="old"',
|
|
170
|
+
'export PATH=/usr/local/bin:$PATH',
|
|
171
|
+
'export UNBOUND_CLAUDE_API_KEY=abc',
|
|
172
|
+
'',
|
|
173
|
+
].join('\n'));
|
|
174
|
+
fs.writeFileSync(zshrc, [
|
|
175
|
+
'export ANTHROPIC_BASE_URL="https://gateway.example"',
|
|
176
|
+
'alias ll="ls -la"',
|
|
177
|
+
'',
|
|
178
|
+
].join('\n'));
|
|
179
|
+
|
|
180
|
+
const cleared = clearUnboundEnvsEverywhere();
|
|
181
|
+
assert.ok(cleared.length > 0, `expected something cleared, got ${JSON.stringify(cleared)}`);
|
|
182
|
+
|
|
183
|
+
const bNow = fs.readFileSync(bashrc, 'utf8');
|
|
184
|
+
assert.ok(!bNow.includes('UNBOUND_CURSOR_API_KEY'), bNow);
|
|
185
|
+
assert.ok(!bNow.includes('UNBOUND_CLAUDE_API_KEY'), bNow);
|
|
186
|
+
assert.ok(bNow.includes('# user content'), bNow);
|
|
187
|
+
assert.ok(bNow.includes('export PATH=/usr/local/bin:$PATH'), bNow);
|
|
188
|
+
|
|
189
|
+
const zNow = fs.readFileSync(zshrc, 'utf8');
|
|
190
|
+
assert.ok(!zNow.includes('ANTHROPIC_BASE_URL'), zNow);
|
|
191
|
+
assert.ok(zNow.includes('alias ll="ls -la"'), zNow);
|
|
192
|
+
} finally {
|
|
193
|
+
if (origHome === undefined) delete process.env.HOME;
|
|
194
|
+
else process.env.HOME = origHome;
|
|
195
|
+
fs.rmSync(tmp, { recursive: true, force: true });
|
|
196
|
+
}
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
test('clearUnboundEnvsEverywhere: leaves OPENAI_API_KEY alone (out of sweep scope)', () => {
|
|
200
|
+
const tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'unbound-nuke-'));
|
|
201
|
+
const origHome = process.env.HOME;
|
|
202
|
+
process.env.HOME = tmp;
|
|
203
|
+
try {
|
|
204
|
+
const rc = path.join(tmp, '.zshrc');
|
|
205
|
+
fs.writeFileSync(rc, 'export OPENAI_API_KEY="user-owned"\n');
|
|
206
|
+
clearUnboundEnvsEverywhere();
|
|
207
|
+
assert.ok(fs.readFileSync(rc, 'utf8').includes('OPENAI_API_KEY'));
|
|
208
|
+
} finally {
|
|
209
|
+
if (origHome === undefined) delete process.env.HOME;
|
|
210
|
+
else process.env.HOME = origHome;
|
|
211
|
+
fs.rmSync(tmp, { recursive: true, force: true });
|
|
212
|
+
}
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
test('NUKE_ENV_VARS covers every UNBOUND_* env var named in the variant set + ANTHROPIC_BASE_URL', () => {
|
|
216
|
+
for (const name of ['UNBOUND_API_KEY', 'UNBOUND_CLAUDE_API_KEY', 'UNBOUND_CODEX_API_KEY',
|
|
217
|
+
'UNBOUND_COPILOT_API_KEY', 'UNBOUND_CURSOR_API_KEY', 'ANTHROPIC_BASE_URL']) {
|
|
218
|
+
assert.ok(NUKE_ENV_VARS.includes(name), `missing: ${name}`);
|
|
219
|
+
}
|
|
220
|
+
});
|
|
221
|
+
}
|
package/test/tool-health.test.js
CHANGED
|
@@ -208,3 +208,114 @@ test('only four tools are reported (Gemini CLI is omitted)', () => {
|
|
|
208
208
|
assert.deepEqual(new Set(keys), new Set(['cursor', 'claude-code', 'codex', 'copilot']));
|
|
209
209
|
});
|
|
210
210
|
});
|
|
211
|
+
|
|
212
|
+
test('detectTools: cursor MDM binary (unbound-hook in hooks.json, binary installed) → managed-by-mdm', () => {
|
|
213
|
+
withHome((tmp, th) => {
|
|
214
|
+
const mdmDir = path.join(tmp, 'mdm', 'Cursor');
|
|
215
|
+
const fakeBin = path.join(tmp, 'opt', 'unbound-hook');
|
|
216
|
+
writeFile(path.join(mdmDir, 'hooks.json'), JSON.stringify({
|
|
217
|
+
hooks: { PreToolUse: [{ command: '/opt/unbound/current/unbound-hook/unbound-hook hook cursor PreToolUse' }] },
|
|
218
|
+
}));
|
|
219
|
+
writeFile(fakeBin, '');
|
|
220
|
+
const t = th.detectTools({ apiKey: 'k', _mdmDirs: { cursor: mdmDir }, _binaryPath: fakeBin }).find((x) => x.family === 'cursor');
|
|
221
|
+
assert.equal(t.status, 'managed-by-mdm');
|
|
222
|
+
assert.equal(t.scope, 'mdm');
|
|
223
|
+
});
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
test('detectTools: cursor MDM binary (unbound-hook in hooks.json, binary MISSING) → tampered', () => {
|
|
227
|
+
withHome((tmp, th) => {
|
|
228
|
+
const mdmDir = path.join(tmp, 'mdm', 'Cursor');
|
|
229
|
+
writeFile(path.join(mdmDir, 'hooks.json'), JSON.stringify({
|
|
230
|
+
hooks: { PreToolUse: [{ command: '/opt/unbound/current/unbound-hook/unbound-hook hook cursor PreToolUse' }] },
|
|
231
|
+
}));
|
|
232
|
+
const t = th.detectTools({
|
|
233
|
+
apiKey: 'k', _mdmDirs: { cursor: mdmDir },
|
|
234
|
+
_binaryPath: path.join(tmp, 'no-such-binary'),
|
|
235
|
+
}).find((x) => x.family === 'cursor');
|
|
236
|
+
assert.equal(t.status, 'tampered');
|
|
237
|
+
assert.equal(t.scope, 'mdm');
|
|
238
|
+
});
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
test('detectTools: claude-code MDM binary (unbound-hook in managed-settings.json, binary installed) → managed-by-mdm', () => {
|
|
242
|
+
withHome((tmp, th) => {
|
|
243
|
+
const mdmDir = path.join(tmp, 'mdm', 'ClaudeCode');
|
|
244
|
+
const fakeBin = path.join(tmp, 'opt', 'unbound-hook');
|
|
245
|
+
writeFile(path.join(mdmDir, 'managed-settings.json'), JSON.stringify({
|
|
246
|
+
hooks: { PreToolUse: [{ command: '/opt/unbound/current/unbound-hook/unbound-hook hook claude-code PreToolUse' }] },
|
|
247
|
+
}));
|
|
248
|
+
writeFile(fakeBin, '');
|
|
249
|
+
const t = th.detectTools({ apiKey: 'k', _mdmDirs: { 'claude-code': mdmDir }, _binaryPath: fakeBin }).find((x) => x.family === 'claude-code');
|
|
250
|
+
assert.equal(t.status, 'managed-by-mdm');
|
|
251
|
+
assert.equal(t.scope, 'mdm');
|
|
252
|
+
});
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
test('detectTools: copilot user binary (unbound-hook in ~/.copilot/hooks/unbound.json, binary installed) → healthy', () => {
|
|
256
|
+
withHome((tmp, th) => {
|
|
257
|
+
const fakeBin = path.join(tmp, 'opt', 'unbound-hook');
|
|
258
|
+
writeFile(path.join(tmp, '.copilot', 'hooks', 'unbound.json'), JSON.stringify({
|
|
259
|
+
hooks: { PreToolUse: [{ command: '/opt/unbound/current/unbound-hook/unbound-hook hook copilot PreToolUse' }] },
|
|
260
|
+
}));
|
|
261
|
+
writeFile(fakeBin, '');
|
|
262
|
+
process.env.UNBOUND_COPILOT_API_KEY = 'k';
|
|
263
|
+
try {
|
|
264
|
+
const t = th.detectTools({ apiKey: 'k', _binaryPath: fakeBin }).find((x) => x.family === 'copilot');
|
|
265
|
+
assert.equal(t.status, 'healthy');
|
|
266
|
+
} finally {
|
|
267
|
+
delete process.env.UNBOUND_COPILOT_API_KEY;
|
|
268
|
+
}
|
|
269
|
+
});
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
test('detectTools: copilot user binary (unbound-hook in unbound.json, binary MISSING) → tampered', () => {
|
|
273
|
+
withHome((tmp, th) => {
|
|
274
|
+
writeFile(path.join(tmp, '.copilot', 'hooks', 'unbound.json'), JSON.stringify({
|
|
275
|
+
hooks: { PreToolUse: [{ command: '/opt/unbound/current/unbound-hook/unbound-hook hook copilot PreToolUse' }] },
|
|
276
|
+
}));
|
|
277
|
+
process.env.UNBOUND_COPILOT_API_KEY = 'k';
|
|
278
|
+
try {
|
|
279
|
+
const t = th.detectTools({ apiKey: 'k', _binaryPath: path.join(tmp, 'no-such-binary') }).find((x) => x.family === 'copilot');
|
|
280
|
+
assert.equal(t.status, 'tampered');
|
|
281
|
+
} finally {
|
|
282
|
+
delete process.env.UNBOUND_COPILOT_API_KEY;
|
|
283
|
+
}
|
|
284
|
+
});
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
test('detectTools: codex binary regression (wrapper at ~/.codex/hooks/unbound.py, hooks.json refs wrapper, config.toml flag, env) → healthy', () => {
|
|
288
|
+
withHome((tmp, th) => {
|
|
289
|
+
const wrapper = path.join(tmp, '.codex', 'hooks', 'unbound.py');
|
|
290
|
+
writeFile(path.join(tmp, '.codex', 'hooks.json'), JSON.stringify({
|
|
291
|
+
hooks: { PreToolUse: [{ command: wrapper }] },
|
|
292
|
+
}));
|
|
293
|
+
writeFile(wrapper, '#!/bin/sh\nexec /opt/unbound/current/unbound-hook/unbound-hook hook codex "$@"\n');
|
|
294
|
+
writeFile(path.join(tmp, '.codex', 'config.toml'), 'codex_hooks = true\n');
|
|
295
|
+
process.env.UNBOUND_CODEX_API_KEY = 'k';
|
|
296
|
+
try {
|
|
297
|
+
const t = th.detectTools({ apiKey: 'k' }).find((x) => x.family === 'codex');
|
|
298
|
+
assert.equal(t.status, 'healthy');
|
|
299
|
+
} finally {
|
|
300
|
+
delete process.env.UNBOUND_CODEX_API_KEY;
|
|
301
|
+
}
|
|
302
|
+
});
|
|
303
|
+
});
|
|
304
|
+
|
|
305
|
+
test('detectTools: envCheck rc-file fallback (process.env stale, shell rc holds expected value) → healthy', () => {
|
|
306
|
+
withHome((tmp, th) => {
|
|
307
|
+
const script = path.join(tmp, '.cursor', 'hooks', 'unbound.py');
|
|
308
|
+
writeFile(path.join(tmp, '.cursor', 'hooks.json'), JSON.stringify({ hooks: { PreToolUse: [{ command: script }] } }));
|
|
309
|
+
writeFile(script, '# unbound');
|
|
310
|
+
// rcFiles() lists different files per platform (zprofile on darwin,
|
|
311
|
+
// zshrc/bashrc/profile on linux). Write to .bashrc — both lists include it.
|
|
312
|
+
writeFile(path.join(tmp, '.bashrc'), 'export UNBOUND_CURSOR_API_KEY="fresh-key"\n');
|
|
313
|
+
process.env.UNBOUND_CURSOR_API_KEY = 'stale-key';
|
|
314
|
+
try {
|
|
315
|
+
const t = th.detectTools({ apiKey: 'fresh-key' }).find((x) => x.key === 'cursor');
|
|
316
|
+
assert.equal(t.status, 'healthy');
|
|
317
|
+
} finally {
|
|
318
|
+
delete process.env.UNBOUND_CURSOR_API_KEY;
|
|
319
|
+
}
|
|
320
|
+
});
|
|
321
|
+
});
|
package/PLAN-web-4887.md
DELETED
|
@@ -1,515 +0,0 @@
|
|
|
1
|
-
# WEB-4887 — AI-Assisted Tool Policy Creation in `unbound` CLI
|
|
2
|
-
|
|
3
|
-
**Linear:** https://linear.app/unboundsec/issue/WEB-4887
|
|
4
|
-
**Status:** Spec / implementation plan
|
|
5
|
-
**Owner:** Dinesh Veluswamy
|
|
6
|
-
**Last updated:** 2026-06-18
|
|
7
|
-
|
|
8
|
-
---
|
|
9
|
-
|
|
10
|
-
## 1. Background
|
|
11
|
-
|
|
12
|
-
The web UI's tool-policy creation flow has an AI-assist input that hits a server-side endpoint tuned with our policy schema and is materially better than Claude hand-authoring policy JSON. Today, when a Claude Code user asks `unbound` to create a tool policy, Claude reaches for the flag-based `unbound policy tool create-terminal` / `create-mcp` commands and constructs the arguments itself. That works but underperforms the in-product AI-assist for the same reason the ticket describes — Claude does not have our system prompt, our command-family inventory, or our MCP catalog in context.
|
|
13
|
-
|
|
14
|
-
This plan exposes the existing AI-assist endpoints to the CLI and ships a Claude Code skill that steers Claude to use them.
|
|
15
|
-
|
|
16
|
-
Endpoints already in production (see `ai-gateway-data/webapp/api/v1/command_policy_handlers.py`):
|
|
17
|
-
|
|
18
|
-
| Policy kind | Endpoint | Model | Backend fallback if LLM fails |
|
|
19
|
-
|---|---|---|---|
|
|
20
|
-
| Terminal command | `POST /api/v1/command-policies/assist/` | Cerebras → Groq via ai-gateway classifier | Deterministic family/field extraction |
|
|
21
|
-
| MCP tool | `POST /api/v1/command-policies/assist-mcp/` | Claude Haiku 4.5 (`temp=0.1`, `max_tokens=900`) | None — returns `success: false` |
|
|
22
|
-
|
|
23
|
-
Both are admin-only (`@require_admin`), enforce a hard 2000-character input cap (HTTP 200 + `success: false`), and collapse runs of newlines/strip non-printable control chars as prompt-injection defense.
|
|
24
|
-
|
|
25
|
-
---
|
|
26
|
-
|
|
27
|
-
## 2. Goals
|
|
28
|
-
|
|
29
|
-
1. Add an AI-assist code path to `unbound policy tool create-terminal` and `create-mcp` so Claude (and humans) can create policies from a natural-language description.
|
|
30
|
-
2. Ship a Claude Code skill bundled with the CLI install so Claude reliably picks the AI-assist path over hand-authored arguments.
|
|
31
|
-
3. Detect and clearly reject prompts that ask for fields the AI-assist endpoint cannot fill (env scoping, exception clauses, user-group conditions, etc.).
|
|
32
|
-
4. Surface backend errors — especially the 2000-char rejection and the LLM-could-not-determine cases — with actionable CLI messages.
|
|
33
|
-
5. Provide an eval harness that measures the acceptance criterion: "Claude Code should always pick our AI-assist endpoint over creating policies on its own."
|
|
34
|
-
|
|
35
|
-
## 3. Non-goals
|
|
36
|
-
|
|
37
|
-
- **LLM quality improvements.** The Cerebras terminal classifier and Haiku MCP classifier are out of scope. The user has explicitly flagged this as non-functional for this ticket. We will work around flakiness via UX, not by changing the model.
|
|
38
|
-
- **Cost / Model / Security policy types.** Only tool policies have AI-assist endpoints today. Adding AI-assist to the other three is a separate ticket.
|
|
39
|
-
- **Multi-turn refinement loops** with the user inside the CLI. The endpoints are one-shot; we keep that contract.
|
|
40
|
-
- **Authoring new backend fields.** The CLI must work strictly within the existing form schema and AI-assist response schema.
|
|
41
|
-
|
|
42
|
-
---
|
|
43
|
-
|
|
44
|
-
## 4. Field whitelist — what AI-assist actually fills
|
|
45
|
-
|
|
46
|
-
This is the boundary the skill must teach Claude to respect. Fields outside this set must be passed via existing flags, or the prompt must be rejected with a clear message.
|
|
47
|
-
|
|
48
|
-
### 4.1 Terminal command AI-assist (`assist/`)
|
|
49
|
-
|
|
50
|
-
**Fills** (from `form_updates` in the response — `command_policy_handlers.py:1340-1393`):
|
|
51
|
-
|
|
52
|
-
| Field | Type | Notes |
|
|
53
|
-
|---|---|---|
|
|
54
|
-
| `command_family` | string | Must match a known family (e.g. `filesystem`, `git`, `package_manager`). |
|
|
55
|
-
| `selected_field` | string | One of the family's fields, or the literal `ANY` wildcard. |
|
|
56
|
-
| `field_value` | string | Pattern — auto-detected as EXACT / GLOB / REGEX. `*` for "match everything". |
|
|
57
|
-
| `action` | enum | `audit` \| `block` \| `warn` \| `require_slack_approval`. Omitted if user didn't specify. |
|
|
58
|
-
| `name` | string ≤ 200 | Suggested policy name. Optional. |
|
|
59
|
-
| `description` | string ≤ 500 | Suggested description. Optional. |
|
|
60
|
-
|
|
61
|
-
**Does NOT fill** (form fields the user / Claude must set via flags or accept defaults):
|
|
62
|
-
|
|
63
|
-
| Form field | Source | CLI handling |
|
|
64
|
-
|---|---|---|
|
|
65
|
-
| `enabled` | `useCreatePolicyForm.ts:21` | Default `true`. Expose `--disabled` flag. |
|
|
66
|
-
| `custom_message` | `useCreatePolicyForm.ts:18` | Only relevant for `BLOCK`/`WARN`. Expose `--custom-message` flag; do not parse from prompt. |
|
|
67
|
-
| `scope_user_group_ids` | `useCreatePolicyForm.ts:39` | Expose `--group` / `--scope-group` flag; do not parse group names from prompt. |
|
|
68
|
-
| `suggestion_id` | UI-only, coverage-gap link | Not relevant to CLI. |
|
|
69
|
-
|
|
70
|
-
### 4.2 MCP tool AI-assist (`assist-mcp/`)
|
|
71
|
-
|
|
72
|
-
**Fills** (from `mcp_policy_assist_service.py:36-86`):
|
|
73
|
-
|
|
74
|
-
| Field | Type | Notes |
|
|
75
|
-
|---|---|---|
|
|
76
|
-
| `mcp_canonical_group_id` | int | Must match an entry in the org's MCP catalog. |
|
|
77
|
-
| `mcp_tools` | string[] | Tool names, post-validated against the group's actual tool list — invented names are dropped. |
|
|
78
|
-
| `name` | string ≤ 80 | Short policy name. |
|
|
79
|
-
| `description` | string | One sentence. |
|
|
80
|
-
| `action` | enum | `AUDIT` \| `BLOCK` \| `WARN` \| `REQUIRE_SLACK_APPROVAL`. Defaults to `AUDIT` if not in prompt. |
|
|
81
|
-
| `custom_message` | string | Only when action is `BLOCK`/`WARN`. |
|
|
82
|
-
|
|
83
|
-
**Does NOT fill** (same boundary as terminal):
|
|
84
|
-
|
|
85
|
-
- `enabled` — default true, `--disabled` flag.
|
|
86
|
-
- `scope_user_group_ids` — `--group` flag.
|
|
87
|
-
- `suggested_tools` — UI-only, captured from a separate `/resolve-tools/` call.
|
|
88
|
-
|
|
89
|
-
### 4.3 Out-of-scope constructs (the "env, archived projects, nuance" cases)
|
|
90
|
-
|
|
91
|
-
These are things users will naturally try to express. None of them are representable in the current form schema, and the LLM will either silently drop them or hallucinate. The CLI must detect them in the prompt and warn before sending.
|
|
92
|
-
|
|
93
|
-
| Construct | Example phrasing | Why it can't work | CLI response |
|
|
94
|
-
|---|---|---|---|
|
|
95
|
-
| Environment scope | "in staging", "on prod", "only in dev" | No env field on policies. | Warn + ask user to remove or proceed knowing it's ignored. |
|
|
96
|
-
| Repo / project filter | "for the web repo", "except archived projects" | No project field. | Warn. |
|
|
97
|
-
| User-role conditions | "for non-admins", "when the user is on call" | Closest is `scope_user_group_ids`, settable via `--group` only. | Suggest `--group <name>`. |
|
|
98
|
-
| Time conditions | "after 6pm", "outside business hours" | Not representable. | Reject. |
|
|
99
|
-
| Exception clauses | "block all writes EXCEPT to docs" | Single-policy schema is allow-or-deny on one pattern. | Suggest creating two policies. |
|
|
100
|
-
| Compound asks | "block X and audit Y" | One policy per call. | Suggest splitting into N invocations. |
|
|
101
|
-
|
|
102
|
-
---
|
|
103
|
-
|
|
104
|
-
## 5. Architecture decision
|
|
105
|
-
|
|
106
|
-
### 5.1 Add `--prompt` to existing subcommands rather than introduce parallel `-ai` variants
|
|
107
|
-
|
|
108
|
-
The existing CLI surface is:
|
|
109
|
-
|
|
110
|
-
```
|
|
111
|
-
unbound policy tool create-terminal --name "..." --command-family ... --field K=V --action ... [flags]
|
|
112
|
-
unbound policy tool create-mcp --name "..." --mcp-server ... --mcp-action-type ... --action ... [flags]
|
|
113
|
-
```
|
|
114
|
-
|
|
115
|
-
We add a single `--prompt "<natural language>"` flag to each. When `--prompt` is present:
|
|
116
|
-
|
|
117
|
-
1. The CLI calls the corresponding AI-assist endpoint.
|
|
118
|
-
2. The response's `form_updates` are merged with any flags the user also passed (flags take precedence — they're explicit overrides).
|
|
119
|
-
3. The CLI shows the resolved policy to the user (or to Claude, who will surface it to the user) and asks for confirmation before calling the real create endpoint.
|
|
120
|
-
|
|
121
|
-
Rejected alternatives:
|
|
122
|
-
|
|
123
|
-
- **Parallel `create-terminal-ai` / `create-mcp-ai` subcommands.** Doubles the command surface; gives Claude two near-identical commands to pick between; complicates the skill.
|
|
124
|
-
- **Unified `unbound policy tool assist` that infers terminal vs MCP.** Inference itself is another LLM call. Cleaner UX, but adds a third moving part. Defer to a later ticket.
|
|
125
|
-
|
|
126
|
-
### 5.2 Two-phase delivery
|
|
127
|
-
|
|
128
|
-
| Phase | Scope | Why this order |
|
|
129
|
-
|---|---|---|
|
|
130
|
-
| **Phase 1** | `create-terminal --prompt` + skill v1 (terminal only) | Terminal endpoint has a deterministic fallback when the LLM fails. Lower-risk path to validate the surface. |
|
|
131
|
-
| **Phase 2** | `create-mcp --prompt` + skill v2 (covers both) | MCP endpoint has no fallback. Ship after Phase 1 has shaken out the error-handling patterns. |
|
|
132
|
-
|
|
133
|
-
In Phase 1, the skill explicitly tells Claude: "For MCP policies, use the existing flag-based `create-mcp` — AI-assist for MCP is coming in Phase 2."
|
|
134
|
-
|
|
135
|
-
---
|
|
136
|
-
|
|
137
|
-
## 6. Phase 1 — Terminal command AI-assist
|
|
138
|
-
|
|
139
|
-
### 6.1 CLI surface
|
|
140
|
-
|
|
141
|
-
```bash
|
|
142
|
-
unbound policy tool create-terminal \
|
|
143
|
-
--prompt "block rm -rf in the filesystem family" \
|
|
144
|
-
[--group <name>] \
|
|
145
|
-
[--disabled] \
|
|
146
|
-
[--custom-message "..."] \
|
|
147
|
-
[--yes] \
|
|
148
|
-
[--json]
|
|
149
|
-
```
|
|
150
|
-
|
|
151
|
-
Behavior:
|
|
152
|
-
|
|
153
|
-
- `--prompt` is mutually exclusive with the existing field-specifying flags (`--command-family`, `--field`, `--action`, `--name`, `--description`). If both are passed, error out: "Pass `--prompt` for AI-assist or the field flags for explicit creation, not both."
|
|
154
|
-
- `--group`, `--disabled`, `--custom-message` remain valid alongside `--prompt` because they're out-of-AI-scope (see §4.1).
|
|
155
|
-
- `--yes` skips the confirmation prompt (needed for Claude's non-interactive flows).
|
|
156
|
-
- `--json` emits the resolved policy and the create response as JSON, no TTY formatting.
|
|
157
|
-
|
|
158
|
-
### 6.2 Pre-flight checks (CLI side, before any network call)
|
|
159
|
-
|
|
160
|
-
In order, fail fast on the first hit:
|
|
161
|
-
|
|
162
|
-
1. **Auth present.** Refuse if `unbound login` has not been run.
|
|
163
|
-
2. **Admin role.** Hit `whoami` cache or pre-flight `/me`; if not admin, error: "Tool policy creation requires admin role; current role: <role>."
|
|
164
|
-
3. **Prompt length.** If trimmed length > 1800 characters, reject locally with the same wording the backend uses: "Input is too long (max 2000 characters)." Reject at 1800 to leave headroom for backend normalization.
|
|
165
|
-
4. **Out-of-scope keyword scan.** Lower-cased substring match against this list (kept small and explicit, not a regex puzzle):
|
|
166
|
-
- `staging`, `production`, `prod`, `dev`, `qa`, `testing`
|
|
167
|
-
- `archived`, `archive`
|
|
168
|
-
- `business hour`, `after hour`, `outside hour`, `weekend`
|
|
169
|
-
- `except`, `unless`, `but not`
|
|
170
|
-
- `private repo`, `public repo`, `private project`, `public project`
|
|
171
|
-
On match, warn and confirm: "Your prompt mentions `<token>`. Tool policies cannot scope by environment / project / time / exception clauses. Continue anyway? The endpoint will ignore those parts. [y/N]". Under `--yes`, log the warning but proceed.
|
|
172
|
-
5. **Newline normalization.** Collapse runs of ≥2 newlines to one. (Mirror backend defense — gives us identical sanitization regardless of which side rejects.)
|
|
173
|
-
|
|
174
|
-
### 6.3 Network call
|
|
175
|
-
|
|
176
|
-
```
|
|
177
|
-
POST {API_BASE}/api/v1/command-policies/assist/
|
|
178
|
-
Authorization: Bearer <token>
|
|
179
|
-
Content-Type: application/json
|
|
180
|
-
|
|
181
|
-
{
|
|
182
|
-
"user_input": "<sanitized prompt>",
|
|
183
|
-
"current_form_state": {
|
|
184
|
-
"command_family": "",
|
|
185
|
-
"selected_field": "",
|
|
186
|
-
"field_value": "",
|
|
187
|
-
"action": "",
|
|
188
|
-
"name": "",
|
|
189
|
-
"description": ""
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
```
|
|
193
|
-
|
|
194
|
-
Timeout: 20 seconds (backend timeout to the gateway is 15s; we add 5s for routing).
|
|
195
|
-
|
|
196
|
-
### 6.4 Response handling
|
|
197
|
-
|
|
198
|
-
| HTTP | Body | CLI behavior |
|
|
199
|
-
|---|---|---|
|
|
200
|
-
| 200 | `success: true`, `form_updates: {...}`, `explanation` | Merge `form_updates` ← user-passed flags, render preview, confirm. |
|
|
201
|
-
| 200 | `success: false`, `error: "Input is too long..."` | Print verbatim error + suggestion: "Try shortening to under 1800 characters." Exit 2. |
|
|
202
|
-
| 200 | `success: false`, `error: "Could not determine command family..."` | Print verbatim error + suggestion: "Try naming the command type explicitly (e.g. `block git pushes`, `audit npm installs`). Or use `unbound policy tool families` to see available families and pass `--command-family` directly." Exit 2. |
|
|
203
|
-
| 200 | `success: false`, `error: <other>` | Print verbatim error. Exit 2. |
|
|
204
|
-
| 401 / 403 | any | "Authentication failed / not authorized. Tool policies require admin. Run `unbound whoami` to check role." Exit 3. |
|
|
205
|
-
| 400 / 422 | any | "Request validation failed: `<body>`." Exit 2. |
|
|
206
|
-
| 5xx | any | "Server error. Try again, or fall back to flag-based creation: `unbound policy tool create-terminal --command-family ... --field ... --action ...`." Exit 4. |
|
|
207
|
-
| timeout / network | — | "Network error reaching `<host>`. Check connectivity. Falling back to flag-based creation is not blocked." Exit 4. |
|
|
208
|
-
|
|
209
|
-
**No automatic retries.** Re-sending the same prompt to the same flaky LLM endpoint won't change the answer in any meaningful way, and silent retries hide latency. If the user explicitly asks Claude to retry, that's a separate invocation.
|
|
210
|
-
|
|
211
|
-
### 6.5 Merging AI output with user-passed flags
|
|
212
|
-
|
|
213
|
-
Order of precedence (highest first):
|
|
214
|
-
|
|
215
|
-
1. Explicit flags on the command line (`--group`, `--custom-message`, `--disabled`).
|
|
216
|
-
2. AI response `form_updates` fields.
|
|
217
|
-
3. Defaults (`enabled: true`, `action: audit` if AI omitted it).
|
|
218
|
-
|
|
219
|
-
The merge happens before the confirmation step so the user sees the final shape.
|
|
220
|
-
|
|
221
|
-
### 6.6 Confirmation step
|
|
222
|
-
|
|
223
|
-
Default behavior — show the resolved policy, ask `Create policy? [Y/n]`. The
|
|
224
|
-
preview reuses the CLI's standard `output.keyValue` rendering (dimmed keys,
|
|
225
|
-
auto-padded columns) for visual consistency with `displayToolPolicy` and the
|
|
226
|
-
other policy-display surfaces.
|
|
227
|
-
|
|
228
|
-
Concept-order of rows (rows MAY be skipped when their value is empty — e.g.
|
|
229
|
-
Description and Custom message are dropped rather than printed as `(none)`):
|
|
230
|
-
|
|
231
|
-
- Name, Description, Type, Command family, Field, Pattern, Action,
|
|
232
|
-
Custom message, Scope (groups), Enabled.
|
|
233
|
-
|
|
234
|
-
Action is colored — `BLOCK` red, `WARN`/`REQUIRE_SLACK_APPROVAL` yellow,
|
|
235
|
-
`AUDIT` dimmed (low-emphasis default). The AI explanation prints below the
|
|
236
|
-
table in dim text and is omitted entirely when the backend returns an empty
|
|
237
|
-
explanation.
|
|
238
|
-
|
|
239
|
-
Exact column widths and labels are not pinned — tests assert on meaningful
|
|
240
|
-
tokens (Name value, Action enum, explanation body), not on column-aligned
|
|
241
|
-
strings.
|
|
242
|
-
|
|
243
|
-
Under `--yes`, skip the prompt but always print the resolved policy (Claude will surface it to the user in chat).
|
|
244
|
-
|
|
245
|
-
### 6.7 Create call
|
|
246
|
-
|
|
247
|
-
On confirmation, call the existing create endpoint exactly as the flag-based path does today (`POST /api/v1/command-policies/`) with the merged payload. No new backend code on the create side.
|
|
248
|
-
|
|
249
|
-
### 6.8 Backend change (optional, recommended)
|
|
250
|
-
|
|
251
|
-
Add a `source` field to both AI-assist endpoints (`"web"` | `"cli"`, defaulting to `"web"` for backward compat). Lets us measure CLI adoption and the AC pick-rate without parsing user-agent headers. One-line wire change, two-line server change, no behavioral impact.
|
|
252
|
-
|
|
253
|
-
### 6.9 Tests
|
|
254
|
-
|
|
255
|
-
- **Unit (CLI):**
|
|
256
|
-
- Prompt length pre-flight at 1799 / 1800 / 1801 chars.
|
|
257
|
-
- Out-of-scope keyword detection for each token in §6.2.
|
|
258
|
-
- Flag/prompt mutual exclusion error path.
|
|
259
|
-
- Merge precedence: flag wins over AI value.
|
|
260
|
-
- Each HTTP status path in §6.4 routes to the right CLI message.
|
|
261
|
-
- **Integration (against staging):**
|
|
262
|
-
- 10 representative single-intent prompts → expect `success: true` and a valid policy.
|
|
263
|
-
- 3 oversize prompts → expect length error, no policy created.
|
|
264
|
-
- 3 nonsense prompts → expect "could not determine" error.
|
|
265
|
-
- **Eval harness (see §8):**
|
|
266
|
-
- Pick-rate test: spawn Claude Code with the skill installed, give it 10 natural-language asks ("create a policy that…"), measure whether it invokes `create-terminal --prompt` vs hand-rolls `--command-family ...`.
|
|
267
|
-
|
|
268
|
-
---
|
|
269
|
-
|
|
270
|
-
## 7. Phase 2 — MCP tool AI-assist — **DELIVERED**
|
|
271
|
-
|
|
272
|
-
Phase 2 ships on the same branch (`web-4887-cli-ai-assist-policy`, PR #54) as
|
|
273
|
-
Phase 1. New exports in `src/lib/policy-ai-assist.js`: `runMcpPromptCreate`,
|
|
274
|
-
`mergeAiAndFlagsMcp`, `renderMcpPreview`. The `create-mcp` subcommand now
|
|
275
|
-
accepts `--prompt <text>` as a first-class alternative to the flag-based path.
|
|
276
|
-
|
|
277
|
-
### 7.1 CLI surface
|
|
278
|
-
|
|
279
|
-
```bash
|
|
280
|
-
unbound policy tool create-mcp \
|
|
281
|
-
--prompt "audit all Linear writes" \
|
|
282
|
-
[--group <name>] \
|
|
283
|
-
[--disabled] \
|
|
284
|
-
[--custom-message "..."] \
|
|
285
|
-
[--yes] \
|
|
286
|
-
[--json]
|
|
287
|
-
```
|
|
288
|
-
|
|
289
|
-
Same flag rules as Phase 1.
|
|
290
|
-
|
|
291
|
-
### 7.2 Pre-flight additions on top of §6.2
|
|
292
|
-
|
|
293
|
-
- **Catalog awareness.** If the prompt names a service that isn't in the org's MCP catalog (fetched via the existing `unbound policy tool mcp-servers` call, cached for the session), warn before sending: "Your prompt mentions `<service>`, which isn't in this org's MCP catalog. The endpoint will likely return an error. Available services: `<list>`. Continue? [y/N]". Optional optimization; skip in v1 if scope creeps.
|
|
294
|
-
|
|
295
|
-
### 7.3 Network call
|
|
296
|
-
|
|
297
|
-
`POST {API_BASE}/api/v1/command-policies/assist-mcp/` — identical envelope shape to Phase 1, with the MCP-specific `current_form_state` keys.
|
|
298
|
-
|
|
299
|
-
### 7.4 Response handling — same matrix as §6.4, with one addition
|
|
300
|
-
|
|
301
|
-
| HTTP | Body | CLI behavior |
|
|
302
|
-
|---|---|---|
|
|
303
|
-
| 200 | `success: true`, `mcp_tools: []` (empty array after backend post-validation drops hallucinated names) | Treat as a soft failure: "AI assist could not match any tools in `<group>` for your description. Try naming the tools more directly, or use `unbound policy tool create-mcp --mcp-server ... --mcp-tools tool1,tool2`." Do NOT auto-create a policy with no tools. Exit 2. |
|
|
304
|
-
|
|
305
|
-
The MCP endpoint has no backend deterministic fallback, so flakiness surfaces as either `success: false` or `mcp_tools: []`. The CLI handles both as "couldn't figure it out; here's the manual escape hatch."
|
|
306
|
-
|
|
307
|
-
Note: the `assist-mcp/` response has no `explanation` field (asymmetric with the
|
|
308
|
-
terminal `assist/` endpoint). `renderMcpPreview` is passed `undefined` for the
|
|
309
|
-
explanation and skips the trailing dim line entirely. The render code is
|
|
310
|
-
forward-compatible: if the backend later adds `explanation`, the existing
|
|
311
|
-
`if (explanation)` guard picks it up with zero CLI change.
|
|
312
|
-
|
|
313
|
-
### 7.5 Tests — extend §6.9 with MCP-specific cases
|
|
314
|
-
|
|
315
|
-
- Empty `mcp_tools` response routes to the "couldn't match any tools" message.
|
|
316
|
-
- Out-of-catalog service mention warns before sending.
|
|
317
|
-
- Integration: 10 single-service prompts + 3 multi-service prompts (latter should warn or split).
|
|
318
|
-
|
|
319
|
-
---
|
|
320
|
-
|
|
321
|
-
## 8. Claude Code skill — distribution and content
|
|
322
|
-
|
|
323
|
-
> ~~DEPRECATED — replaced by CLI-side guards in §14~~. The external Claude Code skill described below was never shipped; setup-repo PR #163 was closed unmerged. CLI-side deterministic enforcement (layers 1+2+3) supersedes this approach. See §14 for the shipped design.
|
|
324
|
-
|
|
325
|
-
The CLI cannot edit Claude's system prompt, but it can drop a skill file. Skills are the closest analog to a CLAUDE.md hook that we can install programmatically.
|
|
326
|
-
|
|
327
|
-
### 8.1 Where it lives
|
|
328
|
-
|
|
329
|
-
`~/.claude/skills/unbound-tool-policy/SKILL.md` at runtime.
|
|
330
|
-
|
|
331
|
-
Source-of-truth lives in the **`setup` repo** at `claude-code/skills/unbound-tool-policy/SKILL.md`, NOT in unbound-cli. The `claude-code/hooks/setup.py` and `claude-code/gateway/setup.py` scripts (which `unbound setup claude-code` and `unbound onboard` download and run) copy it from the setup repo's main branch on every install. This co-locates the skill with the rest of the claude-code per-tool install artifacts (`unbound.py`, `anthropic_key.sh`, etc.) and lets skill content updates ship the moment they merge to setup-repo main — no unbound-cli release required.
|
|
332
|
-
|
|
333
|
-
Installed by `unbound setup claude-code` (both subscription and gateway modes) and by `unbound onboard`. Idempotent: overwritten on each install so content updates propagate.
|
|
334
|
-
|
|
335
|
-
### 8.2 Phase 1 skill content (terminal only)
|
|
336
|
-
|
|
337
|
-
The SKILL.md frontmatter is what Claude reads to decide whether to invoke. Keep the description imperative and specific:
|
|
338
|
-
|
|
339
|
-
```markdown
|
|
340
|
-
---
|
|
341
|
-
name: unbound-tool-policy
|
|
342
|
-
description: |
|
|
343
|
-
Use when the user asks to create a terminal command tool policy in Unbound
|
|
344
|
-
(e.g. "block rm -rf", "audit git pushes to main"). ALWAYS prefer
|
|
345
|
-
`unbound policy tool create-terminal --prompt "<NL>"` over hand-authoring
|
|
346
|
-
the --command-family / --field / --action flags. The CLI calls a
|
|
347
|
-
server-side AI-assist endpoint tuned on Unbound's policy schema; it
|
|
348
|
-
outperforms hand-authoring.
|
|
349
|
-
|
|
350
|
-
For MCP tool policies (Linear, GitHub, etc.), AI-assist is not available
|
|
351
|
-
yet — fall back to flag-based `unbound policy tool create-mcp` for those.
|
|
352
|
-
---
|
|
353
|
-
|
|
354
|
-
# Creating an Unbound terminal command policy
|
|
355
|
-
|
|
356
|
-
When the user asks for a tool policy targeting terminal commands, invoke:
|
|
357
|
-
|
|
358
|
-
unbound policy tool create-terminal --prompt "<natural language>" [--group <name>] [--yes]
|
|
359
|
-
|
|
360
|
-
## Rules for the --prompt string
|
|
361
|
-
|
|
362
|
-
1. **One policy per invocation.** If the user wants two things, run the
|
|
363
|
-
command twice.
|
|
364
|
-
2. **Single-intent, imperative.** Phrase it as "block git pushes to main",
|
|
365
|
-
"audit npm installs", "warn on rm -rf". Avoid multi-paragraph briefs.
|
|
366
|
-
3. **Stay under 1500 characters.** The endpoint caps at 2000; the CLI
|
|
367
|
-
pre-trims at 1800. Long prompts get rejected.
|
|
368
|
-
4. **Do NOT include** these — the endpoint cannot represent them and the
|
|
369
|
-
CLI will warn about them:
|
|
370
|
-
- environment scope (staging, prod, dev)
|
|
371
|
-
- project / repo filters
|
|
372
|
-
- time-based conditions
|
|
373
|
-
- exception clauses ("except for X")
|
|
374
|
-
- user-role conditions
|
|
375
|
-
5. **Group scoping** goes on the `--group` flag, not in the prompt.
|
|
376
|
-
6. **Custom block messages** go on `--custom-message`, not in the prompt.
|
|
377
|
-
|
|
378
|
-
## When AI-assist fails
|
|
379
|
-
|
|
380
|
-
If the CLI returns "Could not determine command family", try one of:
|
|
381
|
-
|
|
382
|
-
- Re-phrase the user's intent to name the command type explicitly.
|
|
383
|
-
- Fall back to flag-based: `unbound policy tool families` to list families,
|
|
384
|
-
then `unbound policy tool create-terminal --command-family ... --field ...`.
|
|
385
|
-
|
|
386
|
-
## What success looks like
|
|
387
|
-
|
|
388
|
-
The CLI prints a resolved policy preview and asks for confirmation.
|
|
389
|
-
Pass `--yes` to skip the confirm; the preview still prints so the user
|
|
390
|
-
can sanity-check.
|
|
391
|
-
|
|
392
|
-
## MCP policies (not this skill, for now)
|
|
393
|
-
|
|
394
|
-
For MCP tool policies, use the flag-based form:
|
|
395
|
-
|
|
396
|
-
unbound policy tool create-mcp --name "..." --mcp-server <server> \
|
|
397
|
-
--mcp-action-type <read|write|destructive> --action AUDIT|BLOCK|WARN
|
|
398
|
-
```
|
|
399
|
-
|
|
400
|
-
### 8.3 Phase 2 skill content updates
|
|
401
|
-
|
|
402
|
-
When Phase 2 ships, replace the "MCP policies (not this skill, for now)" section with the MCP variant of the same rules, and update the frontmatter description to drop the "MCP not available yet" caveat.
|
|
403
|
-
|
|
404
|
-
### 8.4 Belt-and-suspenders: README and `--help`
|
|
405
|
-
|
|
406
|
-
- Update `unbound policy tool create-terminal --help` so the imperative language is also in the help output. Claude reads `--help` when uncertain.
|
|
407
|
-
- Update `README.md` "Tool policy examples" section so the first example uses `--prompt`. Manual flag examples remain below as a fallback.
|
|
408
|
-
|
|
409
|
-
---
|
|
410
|
-
|
|
411
|
-
## 9. Acceptance criterion — testable definition
|
|
412
|
-
|
|
413
|
-
The Linear ticket says "Claude code should always pick our AI assist endpoint over creating policies on its own." Rewritten as testable:
|
|
414
|
-
|
|
415
|
-
1. **Pick-rate ≥ 90%** on the eval set below.
|
|
416
|
-
2. **Success-rate ≥ 80%** of in-scope eval prompts that reach the endpoint return `success: true`.
|
|
417
|
-
3. **Zero hand-authored YAML / JSON files** in the eval transcript. Claude should never write a policy file to disk and then upload — the CLI is the only path.
|
|
418
|
-
|
|
419
|
-
### 9.1 Eval set
|
|
420
|
-
|
|
421
|
-
`tests/eval/policy-prompts.json` in this repo. ~20 prompts across:
|
|
422
|
-
|
|
423
|
-
- 6 single-intent in-scope ("block rm -rf", "audit git pushes to main", …)
|
|
424
|
-
- 4 detailed in-scope (300–800 chars, single intent, more context)
|
|
425
|
-
- 4 out-of-scope (env, exception, time, compound) — expected outcome: skill steers Claude to warn the user or split
|
|
426
|
-
- 3 oversize (> 2000 chars) — expected outcome: pre-flight rejects
|
|
427
|
-
- 3 nonsense — expected outcome: `success: false`, surfaced
|
|
428
|
-
|
|
429
|
-
Eval harness: a script that spawns Claude Code in a clean workspace with the skill installed, feeds each prompt as a user message, and records: which command Claude invoked, whether `--prompt` was used, the endpoint response, the final state. Aggregate into a pick-rate / success-rate report.
|
|
430
|
-
|
|
431
|
-
This eval is the artifact that closes the ticket. Without it, "always picks" is unverifiable.
|
|
432
|
-
|
|
433
|
-
---
|
|
434
|
-
|
|
435
|
-
## 10. Implementation order
|
|
436
|
-
|
|
437
|
-
### Phase 1 (1–2 dev-days)
|
|
438
|
-
|
|
439
|
-
1. `src/commands/policy.js` — add `--prompt` flag and AI path to `create-terminal`. Branch on `--prompt` presence; preserve all existing flag paths.
|
|
440
|
-
2. New module `src/lib/policy-ai-assist.js` — HTTP client, pre-flight checks, response routing, preview rendering.
|
|
441
|
-
3. Skill file at `skills/unbound-tool-policy/SKILL.md` in the repo; build step copies to `~/.claude/skills/` during `setup claude-code` and `onboard`.
|
|
442
|
-
4. Update `--help` text and `README.md` policy examples.
|
|
443
|
-
5. Unit tests (§6.9).
|
|
444
|
-
6. Eval harness skeleton + the in-scope prompts (§9.1).
|
|
445
|
-
7. Integration sanity check against staging.
|
|
446
|
-
|
|
447
|
-
### Phase 2 (1 dev-day, after Phase 1 lands)
|
|
448
|
-
|
|
449
|
-
1. Extend `create-mcp` with `--prompt`.
|
|
450
|
-
2. Reuse `src/lib/policy-ai-assist.js`; add MCP-specific pre-flight and response routing.
|
|
451
|
-
3. Update skill file and `--help`.
|
|
452
|
-
4. Extend eval harness with MCP prompts.
|
|
453
|
-
|
|
454
|
-
### Backend (optional, parallel to Phase 1)
|
|
455
|
-
|
|
456
|
-
Add the `source: "cli" | "web"` field to both AI-assist request handlers and log it. PR in `ai-gateway-data`.
|
|
457
|
-
|
|
458
|
-
---
|
|
459
|
-
|
|
460
|
-
## 11. Open questions / risks
|
|
461
|
-
|
|
462
|
-
1. **Catalog awareness in MCP pre-flight.** The `mcp-servers` list is org-specific and changes. Caching policy: per-session in memory, or persist to `~/.unbound/cache/`? Defer to Phase 2 review.
|
|
463
|
-
2. **Admin-key blast radius.** AI-assist is admin-only. End-user Claude Code sessions running under a non-admin key will get 403s. The skill should detect that on first use and tell Claude to advise the user to ask their admin, instead of looping on retries.
|
|
464
|
-
3. **Eval cost.** Running the eval against real Claude Code instances burns tokens. Cap the eval to one run per CI build, gated on changes to `src/commands/policy.js` or the skill file.
|
|
465
|
-
|
|
466
|
-
## 12. Deferred to follow-up tickets
|
|
467
|
-
|
|
468
|
-
These are intentionally out of scope for WEB-4887 to keep the surface narrow. Spin off tickets when these become live:
|
|
469
|
-
|
|
470
|
-
1. **MCP catalog cache for pre-flight.** Phase 2 pre-flight could check the prompted service against the org's MCP catalog before sending. Cache location (`~/.unbound/cache/`?) and invalidation policy need design. Defer until we see whether the bare endpoint surfaces "no matching service" errors well enough on its own.
|
|
471
|
-
2. **`source: "cli" | "web"` request field on both AI-assist endpoints.** Lets us distinguish CLI usage from web usage in production logs, which is the only way to measure the AC pick-rate at scale without parsing user-agents. Punt to a post-Phase-2 ticket so backend work doesn't gate the CLI rollout.
|
|
472
|
-
3. **AI-assist for Cost / Model / Security policy types.** Today only tool policies have AI-assist endpoints. If pick-rate data shows users want NL creation for the other three, design endpoints and extend the skill.
|
|
473
|
-
~~4. **Per-user skill install for MDM deployments.**~~ — DELIVERED in setup PR #163. Both MDM setup scripts now enumerate device users via `get_all_user_homes()` and install the skill into each user's `~/.claude/skills/` under `_run_as_user(username, _do)` (the existing security-critical privilege-drop primitive). Best-effort per user; one locked-down home does not abort the device rollout. See `claude-code/{hooks,gateway}/mdm/setup.py:setup_tool_policy_skill_for_user` and the README updates in each `mdm/` directory.
|
|
474
|
-
|
|
475
|
-
## 13. Skill reinstall policy
|
|
476
|
-
|
|
477
|
-
Each `unbound setup claude-code` and `unbound onboard` run **overwrites** `~/.claude/skills/unbound-tool-policy/SKILL.md` with the version fetched from the `setup` repo's main branch at install time. No checksum gating, no diff-and-prompt. Rationale: the skill is a derived artifact of whatever's in the setup repo's main; treating it as an install-time output (not user state) keeps the mental model simple and ensures Claude is always steered with the current rules.
|
|
478
|
-
|
|
479
|
-
(The skill source-of-truth was originally bundled in unbound-cli during initial implementation; relocated to the setup repo to follow the existing per-tool-artifact convention. See companion PR https://github.com/websentry-ai/setup/pull/163.)
|
|
480
|
-
|
|
481
|
-
---
|
|
482
|
-
|
|
483
|
-
## 14. Steering enforcement (post-skill-kill)
|
|
484
|
-
|
|
485
|
-
§8 (external Claude Code skill) is deprecated. Setup-repo PR #163 was closed unmerged, and the install-side artifact path was abandoned in favor of CLI-side deterministic enforcement that ships in the same binary as the commands it protects. Three layers, evaluated in order, implemented in `src/lib/no-ai-guard.js` and wired into the `.action()` handlers of `create-terminal` (line 1430) and `create-mcp` (line 1602) immediately after `requireLogin()`.
|
|
486
|
-
|
|
487
|
-
### 14.1 Layer 1 — hard guard at the action boundary
|
|
488
|
-
|
|
489
|
-
`unbound policy tool create-terminal` and `create-mcp` reject invocations that pass neither `--prompt` nor an explicit `--no-ai` opt-out. The error message names the AI-assist form, instructs the caller to retry with `--prompt "<text>"`, and surfaces `--no-ai` as the documented opt-out for raw classification flags. Exit code is `2` (invalid usage), routed through `err.exitCode = 2` and propagated by the `process.exitCode = err.exitCode || 1` catch block.
|
|
490
|
-
|
|
491
|
-
`--prompt` and `--no-ai` are also declared mutex: passing both exits 2 with a "not both" message. This avoids ambiguity about which path the caller intended.
|
|
492
|
-
|
|
493
|
-
Why an action-time guard and not a commander.js parse-time validator: commander has no first-class mutex API for "exactly one of A, B" that also accommodates a default-to-A steering nudge, and threading the AI-assist routing through commander's option parsing would complicate the existing `--prompt` branch in `policy.js`. Throwing from `.action()` keeps the guard local to the two subcommands it protects.
|
|
494
|
-
|
|
495
|
-
### 14.2 Layer 2 — Claude Code detection + env-gated escape hatch
|
|
496
|
-
|
|
497
|
-
When `process.env.CLAUDECODE === '1'`, `--no-ai` is additionally rejected unless `process.env.UNBOUND_ALLOW_NO_AI_UNDER_CLAUDE === '1'` is also set. The rejection message explicitly says "intended for interactive humans, not for agents." This is steering, not security: the env var is in plain sight in the error message, and an adversarial agent that reads the message could set it itself. Accepted residual risk per the original principal-architect review. Server-side detection (deferred ticket) if pick-rate telemetry later shows leakage.
|
|
498
|
-
|
|
499
|
-
`CLAUDECODE=1` is set by current Claude Code variants. If future variants do not set it, layer 1 still steers to `--prompt`, and the layer-2 nudge silently no-ops — an acceptable degradation. Future Claude Code variants can match `CLAUDECODE` to remain steered by this layer.
|
|
500
|
-
|
|
501
|
-
### 14.3 Layer 3 — banner-first `--help`
|
|
502
|
-
|
|
503
|
-
`unbound policy tool create-terminal --help` (and `create-mcp --help`) opens with a short banner that names the AI-assisted form before commander's auto-generated `Usage:` line. This is the first text a reader (human or agent) sees on `--help`, so it leads the eye to `--prompt`. Implementation: `.addHelpText('before', helpBannerFor(subcommandName))`. The existing `addHelpText('after', ...)` examples are updated so every manual-flag example prepends `--no-ai`, making the documented opt-out form unambiguous.
|
|
504
|
-
|
|
505
|
-
### 14.4 Why the external skill was killed
|
|
506
|
-
|
|
507
|
-
The original §8 plan installed `~/.claude/skills/unbound-tool-policy/SKILL.md` via `unbound setup claude-code` and `unbound onboard`, with per-user MDM rollout in setup-repo PR #163. Three problems killed it:
|
|
508
|
-
|
|
509
|
-
1. **Install-time artifact, runtime enforcement.** A skill file in `~/.claude/` is a soft nudge — Claude reads it, but nothing in the CLI enforces what the skill recommends. CLI-side guards close the loop: the binary that creates the policy is also the binary that decides which path to route through.
|
|
510
|
-
2. **Distribution sprawl.** Skill content lived in the setup repo to follow the per-tool-artifact convention. Updating steering rules meant a setup-repo PR, an MDM rollout, and a CLI release on different cadences. Folding the rules into the CLI collapses that to one release surface.
|
|
511
|
-
3. **MDM rollout complexity.** Per-user install across `get_all_user_homes()` under `_run_as_user(username, ...)` was correct but added scope to a security-critical primitive for a feature that the CLI can enforce on its own.
|
|
512
|
-
|
|
513
|
-
### 14.5 Versioning
|
|
514
|
-
|
|
515
|
-
Minor bump: `1.4.0` → `1.5.0`. The breaking surface is two subcommands and the migration is a one-flag (`--no-ai`) prepend. Major bump would over-signal. README ships a `BREAKING CHANGE in 1.5.0` callout at the top of the Tool policy section; PR description names both subcommands and shows the migration.
|
package/PLAN.md
DELETED
|
@@ -1,106 +0,0 @@
|
|
|
1
|
-
# Implementation Plan: WEB-4887 CLI `--no-ai` steering guard (layers 1+2+3)
|
|
2
|
-
|
|
3
|
-
> Generated by /implementation-plan on 2026-06-19. Source: principal-architect.
|
|
4
|
-
>
|
|
5
|
-
> Successor to PR #54 (`web-4887-cli-ai-assist-policy`) which shipped `--prompt`-based AI-assist for `create-terminal` / `create-mcp`. This plan replaces the killed external Claude Code skill (`unbound-tool-policy`, setup repo PR #163 closed unmerged) with CLI-side deterministic enforcement.
|
|
6
|
-
|
|
7
|
-
## Goal
|
|
8
|
-
`unbound policy tool create-terminal` and `unbound policy tool create-mcp` deterministically route Claude Code agents through the AI-assist `--prompt` path: invocations without `--prompt` and without an explicit `--no-ai` opt-out exit with code 2 and a copy-paste-ready remediation message; under `CLAUDECODE=1` even `--no-ai` is rejected unless an env-gated escape hatch is set; `--help` leads with the AI-assisted form.
|
|
9
|
-
|
|
10
|
-
## Established Context (do not override)
|
|
11
|
-
- Repo `/Users/dinesh/Code/unbound-cli` — Node.js CLI on commander.js `^12.1.0`, tests via `node --test`.
|
|
12
|
-
- Active branch `web-4887-cli-noai-guard`, stacked on `web-4887-cli-ai-assist-policy` (PR #54). The eventual PR base = `web-4887-cli-ai-assist-policy`; GitHub auto-retargets to `main` after #54 merges.
|
|
13
|
-
- `src/commands/policy.js` — `create-terminal` at line 1430, `create-mcp` at line 1602. Both have an existing `opts.prompt`-presence branch (lines 1473 / 1646) with an empty-`--prompt` guard that exits 2 (lines 1475 / 1648). Flag-path required-field errors at lines 1539–1547 (terminal) / 1708–1715 (MCP), thrown into the handler `catch` which sets `process.exitCode = 1` (lines 1595–1598 / 1767–1770).
|
|
14
|
-
- `src/lib/policy-ai-assist.js` owns the AI-assist `--prompt` path and **MUST NOT be modified** by this work — the new layers route BEFORE that module is reached.
|
|
15
|
-
- Test convention: integration tests at the `commander.parseAsync` layer. Fresh `new Command()` per test, `src/api.js` `.get`/`.post` and `src/config.js` `isLoggedIn`/`getApiKey`/`getBaseUrl` stubbed, `require.cache` invalidated to reset module-level caches. Pattern established in `test/policy-ai-assist.test.js` (`loadFreshModules`, `buildHarness`, `runCreate`).
|
|
16
|
-
- Exit-code convention: `output.error(msg)` + `process.exitCode = N` (no `process.exit`). Exit 2 is already used for invalid-flag failures (line 1476). Exit 1 is reserved for runtime/operational failures.
|
|
17
|
-
- This is a **breaking change** for any caller of `create-terminal` / `create-mcp` with raw flags (no `--prompt`). Clean break recommended over deprecation window; breaking surface is exactly two subcommands.
|
|
18
|
-
- Version bump: `package.json` `1.4.0` → `1.5.0` (minor; team versions liberally; breaking surface is narrow).
|
|
19
|
-
- Do NOT bundle a Claude Code skill or anything that writes to `~/.claude/` — the whole point of this work is to eliminate that external artifact.
|
|
20
|
-
- The original spec doc `PLAN-web-4887.md` is the source of truth for §1–§13 (Phase 1 and Phase 2 AI-assist scope) and must not be overwritten — only extended with a new §14 and a deprecation marker on §8.
|
|
21
|
-
|
|
22
|
-
## Out of Scope
|
|
23
|
-
- Modifying `src/lib/policy-ai-assist.js`. Routing decisions in this work happen BEFORE the AI-assist module is reached.
|
|
24
|
-
- Any Claude Code skill, hook, or `~/.claude/` artifact. The point of this work is to remove that external steering layer.
|
|
25
|
-
- Changes to `create-terminal`/`create-mcp` AI-assist behavior, prompt handling, preview rendering, merge precedence, BLOCK/WARN custom-message guard, or response routing.
|
|
26
|
-
- Changes to `update`, `delete`, `get`, `list` tool-policy subcommands. The guard does NOT extend to those (no AI-assist exists for them).
|
|
27
|
-
- Changes to non-tool policy subcommands (cost, model, security).
|
|
28
|
-
- Eval harness updates (`test/eval/`).
|
|
29
|
-
- Telemetry on guard fires (deferred until adoption data demands it).
|
|
30
|
-
- Deprecation-window plumbing (one-release-with-warning option (b)). Clean break (option (a)) is committed.
|
|
31
|
-
- Server-side detection of agents bypassing the guard (deferred — separate ticket if pick-rate telemetry shows leakage).
|
|
32
|
-
- Major-version bump to `2.0.0` (architect endorses minor `1.5.0` per team versioning practice).
|
|
33
|
-
|
|
34
|
-
## Files to Change
|
|
35
|
-
| Path | Change | Acceptance criterion |
|
|
36
|
-
|------|--------|----------------------|
|
|
37
|
-
| `src/commands/policy.js` | (a) On both `create-terminal` (line 1430) and `create-mcp` (line 1602), add `.option('--no-ai', 'Opt out of the AI-assist guard and use raw classification flags. Mutually exclusive with --prompt.')`. (b) Insert `.addHelpText('before', helpBannerFor('create-terminal'))` (resp. `'create-mcp'`) above the existing `.action(...)`. (c) Update the existing `.addHelpText('after', ...)` examples (lines 1444–1468 and 1617–1641) so every manual-flag example prepends `--no-ai`; AI-assisted (`--prompt`) examples unchanged. (d) At the top of each `.action()` handler — inside the existing `try`, immediately after `requireLogin()` (lines 1471 / 1644) — call `assertSteering(opts, { subcommandName: 'create-terminal' })` (resp. `'create-mcp'`). (e) Change the outer handler `catch` (lines 1596–1598 and 1768–1770) from `process.exitCode = 1` to `process.exitCode = err.exitCode || 1` so the guard's `err.exitCode = 2` propagates (mirrors the already-existing pattern on the AI-assist branch at line 1512). | #1, #2, #3, #4, #5, #6 |
|
|
38
|
-
| `package.json` | Bump `"version"` from `"1.4.0"` to `"1.5.0"`. | #7 |
|
|
39
|
-
| `README.md` | In the "Tool policy" examples section, update every manual-flag example for `create-terminal` and `create-mcp` to prepend `--no-ai`. AI-assisted (`--prompt`) examples unchanged. Add a one-paragraph note under the section header documenting (i) the guard, (ii) the `--no-ai` opt-out, (iii) the `UNBOUND_ALLOW_NO_AI_UNDER_CLAUDE=1` env-var escape hatch for Claude Code interactive humans. Add a top-of-section `BREAKING CHANGE in 1.5.0:` callout. | #9 |
|
|
40
|
-
| `PLAN-web-4887.md` | Append §14 "Steering enforcement (post-skill-kill)" documenting (i) layer 1 hard guard, (ii) layer 2 `CLAUDECODE=1` detection + `UNBOUND_ALLOW_NO_AI_UNDER_CLAUDE=1` escape hatch, (iii) layer 3 banner-first `--help`, (iv) why the external skill (PR #163) was killed. Mark §8 "Claude Code skill — distribution and content" as `~~DEPRECATED — replaced by CLI-side guards in §14~~` with a forward pointer to §14. Do NOT alter §1–§7 or §9–§13. | #10 |
|
|
41
|
-
|
|
42
|
-
## Files to Create
|
|
43
|
-
| Path | Purpose | Acceptance criterion |
|
|
44
|
-
|------|---------|----------------------|
|
|
45
|
-
| `src/lib/no-ai-guard.js` | New module, ~80 LOC, no external dependencies. Exports two pure functions: `assertSteering(opts, { subcommandName, env = process.env })` (runs layers 1 + 2; throws an `Error` with `err.exitCode = 2` and the appropriate stderr message — see AD-8 wording in the source plan) and `helpBannerFor(subcommandName)` (returns the multi-line banner string for `.addHelpText('before', ...)`). The `env` parameter is dependency-injected (defaults to `process.env`) so tests can exercise the production codepath without touching global env. | #1, #3, #4 |
|
|
46
|
-
| `test/no-ai-guard.test.js` | New integration test file at `commander.parseAsync` layer. Uses `node:test` + `node:assert/strict`. Reuses the harness pattern from `test/policy-ai-assist.test.js` (`loadFreshModules`, fresh `new Command()`, stubbed `api.get`/`api.post`, captured `output.error`/`output.success`/`output.warn`, `program.exitOverride()`). Adds a local `withEnv(overrides, fn)` helper that saves+restores `process.env.CLAUDECODE` and `process.env.UNBOUND_ALLOW_NO_AI_UNDER_CLAUDE` per test. Defines 11 `test(...)` cases (T1–T11) covering every acceptance criterion below; `beforeEach`/`after` hooks reset `process.exitCode = 0` (same convention as `test/policy-ai-assist.test.js` lines 97–106). | #1, #2, #3, #4, #5, #6, #8 |
|
|
47
|
-
|
|
48
|
-
## Test Surface
|
|
49
|
-
- **Layer:** CLI command — `commander.parseAsync(['node','unbound','policy','tool','create-terminal'|'create-mcp', ...argv])` against a fresh `new Command()` with `src/commands/policy.js`'s `register` invoked per test. `src/api.js` `.get`/`.post` and `src/config.js` `isLoggedIn`/`getApiKey`/`getBaseUrl` are stubbed. `process.env.CLAUDECODE` and `process.env.UNBOUND_ALLOW_NO_AI_UNDER_CLAUDE` are set per-test via a save+restore helper. NO unit tests on `assertSteering` or `helpBannerFor` directly — coverage is end-to-end through `parseAsync`.
|
|
50
|
-
- **Tests to add or extend:**
|
|
51
|
-
- `test/no-ai-guard.test.js` — covers acceptance criteria #1, #2, #3, #4, #5, #6, #8. Test cases:
|
|
52
|
-
1. **T1** — `create-terminal` with no `--prompt` and no `--no-ai`, no `CLAUDECODE` → `process.exitCode === 2` AND captured `output.error` contains `"AI-assisted"`, `"Retry with"`, AND `"--prompt"`. No `api.post` calls observed. (#1)
|
|
53
|
-
2. **T2** — `create-mcp` with no `--prompt` and no `--no-ai`, no `CLAUDECODE` → same shape as T1, scoped to `create-mcp`. (#1)
|
|
54
|
-
3. **T3** — `create-terminal --no-ai --name X --command-family git --field command='git push*' --action AUDIT` with no `CLAUDECODE` → `api.post('/api/v1/command-policies/', ...)` fires (observed in stub). `process.exitCode === 0`. (#2)
|
|
55
|
-
4. **T4** — `create-mcp --no-ai --name X --mcp-server linear --mcp-action-type read --action AUDIT` with no `CLAUDECODE` → `api.post('/api/v1/command-policies/', ...)` fires. `process.exitCode === 0`. (#2)
|
|
56
|
-
5. **T5** — `create-terminal --prompt "block rm -rf" --no-ai` → `process.exitCode === 2` AND captured `output.error` contains `"not both"`. No `api.post` calls. (#3)
|
|
57
|
-
6. **T6** — `create-terminal --no-ai` under `CLAUDECODE=1`, `UNBOUND_ALLOW_NO_AI_UNDER_CLAUDE` unset → `process.exitCode === 2` AND captured `output.error` contains `"CLAUDECODE=1"` AND `"intended for interactive humans"`. No `api.post` calls. (#4)
|
|
58
|
-
7. **T7** — `create-terminal --no-ai` under `CLAUDECODE=1` AND `UNBOUND_ALLOW_NO_AI_UNDER_CLAUDE=1` → layer 2 bypassed, falls into existing flag-path, which then exits 1 with `"--name is required"` error (asserting the env-escape hatch worked end-to-end). (#4)
|
|
59
|
-
8. **T8** — `create-terminal --prompt "block rm -rf"` under `CLAUDECODE=1` → assist endpoint stub observes `api.post('/api/v1/command-policies/assist/', ...)`. Layer 2 does not interfere. (#5)
|
|
60
|
-
9. **T9** — `create-terminal --help` (collected via `program.outputHelp()` redirected stdout or `--help` triggering commander's help output capture) → stdout contains the AI-assisted banner block AND `idx(banner) < idx("Usage:")` (banner appears before commander's auto-generated Usage line). (#6)
|
|
61
|
-
10. **T10** — same as T9 for `create-mcp --help`. (#6)
|
|
62
|
-
11. **T11** (regression smoke) — `create-terminal --prompt "audit npm installs"` with no `CLAUDECODE`, no `--no-ai` → assist endpoint stub observes the assist POST. Confirms the existing Phase-1 happy-path is unaffected by the new guard. (#8)
|
|
63
|
-
|
|
64
|
-
## Acceptance Criteria
|
|
65
|
-
1. Given `unbound policy tool create-terminal` (resp. `create-mcp`) invoked with neither `--prompt` nor `--no-ai`, when the command runs, then `process.exitCode === 2`, `output.error` is called with a message containing the substrings `"AI-assisted"`, `"Retry with"`, and `"--prompt"`, and no network call to `api.post` is made.
|
|
66
|
-
2. Given `unbound policy tool create-terminal --no-ai --name X --command-family git --field command='git push*' --action AUDIT` (resp. `create-mcp --no-ai --name X --mcp-server linear --mcp-action-type read --action AUDIT`) with `process.env.CLAUDECODE` unset, when the command runs, then `api.post` is called with path `/api/v1/command-policies/` and `process.exitCode === 0`.
|
|
67
|
-
3. Given `unbound policy tool create-terminal --prompt "X" --no-ai`, when the command runs, then `process.exitCode === 2`, `output.error` is called with a message containing the substring `"not both"`, and no network call is made.
|
|
68
|
-
4. Given `unbound policy tool create-terminal --no-ai` under `process.env.CLAUDECODE === '1'` and `process.env.UNBOUND_ALLOW_NO_AI_UNDER_CLAUDE` unset, when the command runs, then `process.exitCode === 2` AND `output.error` is called with a message containing `"CLAUDECODE=1"` AND `"intended for interactive humans"`. Given the same invocation but with `process.env.UNBOUND_ALLOW_NO_AI_UNDER_CLAUDE === '1'`, layer 2 is bypassed and the existing flag-path errors out on `"--name is required"` (with `process.exitCode === 1`).
|
|
69
|
-
5. Given `unbound policy tool create-terminal --prompt "block rm -rf"` under `process.env.CLAUDECODE === '1'`, when the command runs, then `api.post('/api/v1/command-policies/assist/', ...)` is invoked (assist endpoint is reached; layer 2 does not interfere).
|
|
70
|
-
6. Given `unbound policy tool create-terminal --help` (resp. `create-mcp --help`), when the command runs, then the help output (captured stdout) contains the AI-assisted banner block (substring `"AI-ASSISTED (preferred):"`) AND the index of that banner is strictly less than the index of `"Usage:"`.
|
|
71
|
-
7. `package.json` `version` is `"1.5.0"`.
|
|
72
|
-
8. `npm test` exits 0 with all existing tests in `test/*.test.js` (including `test/policy-ai-assist.test.js` and `test/policy-ai-assist-mcp.test.js`) passing unmodified.
|
|
73
|
-
9. `README.md` Tool policy section contains: (i) every manual-flag example using `--no-ai`, (ii) a paragraph documenting the guard + `--no-ai` + `UNBOUND_ALLOW_NO_AI_UNDER_CLAUDE` env var, (iii) a `BREAKING CHANGE in 1.5.0:` callout at the top of the section.
|
|
74
|
-
10. `PLAN-web-4887.md` has a new §14 "Steering enforcement (post-skill-kill)" documenting the three layers and the env var, AND §8 begins with `~~DEPRECATED — replaced by CLI-side guards in §14~~` and a forward pointer to §14.
|
|
75
|
-
|
|
76
|
-
## Risks & Mitigations
|
|
77
|
-
- **Risk R1: BREAKING CHANGE breaks existing scripts.** Any CI / shell script today running `unbound policy tool create-terminal --name X --command-family git --field K=V --action AUDIT` starts exiting 2 immediately on upgrade.
|
|
78
|
-
- **Mitigation:** PR description includes a `BREAKING CHANGE:` callout naming both subcommands and showing the one-flag (`--no-ai`) migration. README breaking-change banner. Team automation audited in PR review. Breaking surface is narrow (two subcommands).
|
|
79
|
-
- **Risk R2: Claude reads layer-2 stderr and sets `UNBOUND_ALLOW_NO_AI_UNDER_CLAUDE=1` itself.** The env var is in plain sight in the error message; an adversarial reading would lead the agent to set the var and bypass.
|
|
80
|
-
- **Mitigation:** Message wording explicitly says `"This is intended for interactive humans, not for agents."` Accepted residual risk — this is steering, not security. Server-side detection (deferred ticket) if pick-rate telemetry later shows leakage.
|
|
81
|
-
- **Risk R3: `addHelpText('before', ...)` placement varies across commander.js versions.** Could cause the banner to render in the wrong slot.
|
|
82
|
-
- **Mitigation:** Project pins `commander ^12.1.0`. T9/T10 assert `idx(banner) < idx("Usage:")` and fail loudly on drift. Smoke-verify locally before commit.
|
|
83
|
-
- **Risk R4: `process.env.CLAUDECODE` may not be `'1'` in every Claude Code variant.** Worst case: layer 2 silently no-ops.
|
|
84
|
-
- **Mitigation:** Layer 1 still steers in the absence of layer 2. Acceptable degradation; document the env var the CLI checks for in `PLAN-web-4887.md` §14 so future Claude Code variants can match.
|
|
85
|
-
- **Risk R5: Test brittleness around `process.exitCode`.** The runner conflates a leftover non-zero `process.exitCode` with a test-file failure.
|
|
86
|
-
- **Mitigation:** Reuse `beforeEach(() => { process.exitCode = 0; })` and `after(() => { process.exitCode = 0; })` pattern verbatim from `test/policy-ai-assist.test.js` lines 97–106. Do not invent a new convention.
|
|
87
|
-
- **Risk R6: `catch`-block change (`process.exitCode = err.exitCode || 1`) subtly alters exit codes for existing thrown `Error`s that lack `.exitCode`.**
|
|
88
|
-
- **Mitigation:** Today's behavior for such errors is `process.exitCode = 1`; the `|| 1` fallback preserves that exactly. Change is purely additive for errors that DO carry `.exitCode`. Existing `test/policy-ai-assist.test.js` and `test/policy-ai-assist-mcp.test.js` confirm regression-free behavior.
|
|
89
|
-
- **Risk R7: Test harness module-cache bleed between `test/no-ai-guard.test.js` and the existing assist tests.** Module-level caches (e.g. `_privilegesCache` in `src/lib/policy-ai-assist.js`) could leak state.
|
|
90
|
-
- **Mitigation:** Copy the `loadFreshModules` pattern from `test/policy-ai-assist.test.js` lines 9–25 verbatim — it already invalidates `require.cache` for the relevant modules.
|
|
91
|
-
|
|
92
|
-
## Sequencing
|
|
93
|
-
1. Create `src/lib/no-ai-guard.js` with `assertSteering` and `helpBannerFor`. No call sites yet. Verify the module loads cleanly: `node -e "require('./src/lib/no-ai-guard')"`.
|
|
94
|
-
2. In `src/commands/policy.js`, add `.option('--no-ai', ...)` to both `create-terminal` (line 1430 block) and `create-mcp` (line 1602 block).
|
|
95
|
-
3. In `src/commands/policy.js`, wire `assertSteering(opts, { subcommandName: ... })` into both `.action()` handlers immediately after `requireLogin()` (lines 1471 / 1644).
|
|
96
|
-
4. In `src/commands/policy.js`, change the outer handler `catch` blocks (lines 1596–1598 and 1768–1770) to use `process.exitCode = err.exitCode || 1`.
|
|
97
|
-
5. In `src/commands/policy.js`, add `.addHelpText('before', helpBannerFor('create-terminal'))` / `'create-mcp'` and update the manual examples in the existing `.addHelpText('after', ...)` blocks (lines 1444–1468 and 1617–1641) to prepend `--no-ai`.
|
|
98
|
-
6. Bump `package.json` `version` to `1.5.0`.
|
|
99
|
-
7. Write `test/no-ai-guard.test.js` (T1–T11). Run `npm test`. Iterate stderr wording in `src/lib/no-ai-guard.js` until tests pass and copy remains human-readable.
|
|
100
|
-
8. Update `README.md` Tool policy section per the table above.
|
|
101
|
-
9. Update `PLAN-web-4887.md` — append §14, mark §8 deprecated.
|
|
102
|
-
10. Run `npm test` once more for full regression confirmation.
|
|
103
|
-
11. Self-review the diff. Commit. Open PR with base `web-4887-cli-ai-assist-policy`; PR description includes the `BREAKING CHANGE:` callout and the `--no-ai` migration example.
|
|
104
|
-
|
|
105
|
-
## Open Questions
|
|
106
|
-
- None.
|