npm - unbound-cli - Versions diffs - 1.3.1 → 1.4.0 - Mend

unbound-cli 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/PLAN-web-4887.md +477 -0
package/PLAN.md +117 -0
package/README.md +8 -2
package/package.json +1 -1
package/src/commands/policy.js +203 -16
package/src/commands/setup.js +4 -2
package/src/lib/policy-ai-assist.js +503 -0
package/test/eval/README.md +45 -0
package/test/eval/policy-prompts.json +122 -0
package/test/eval/run-eval.js +57 -0
package/test/policy-ai-assist-mcp.test.js +606 -0
package/test/policy-ai-assist-preflight.test.js +66 -0
package/test/policy-ai-assist.test.js +884 -0
package/test/policy-conditions.test.js +28 -0

package/src/lib/policy-ai-assist.js ADDED Viewed

@@ -0,0 +1,503 @@
+const readline = require('readline');
+const api = require('../api');
+const output = require('../output');
+const config = require('../config');
+// Backend caps at 2000 chars; we cap at 1800 to leave headroom for the
+// sanitization passes (newline collapse + non-printable strip) below.
+const MAX_PROMPT_LEN = 1800;
+// Mirrors TOOL_ACTIONS in src/commands/policy.js. Inlined intentionally to keep
+// this module standalone; the enum is short and unlikely to change frequently.
+const TOOL_ACTIONS = ['AUDIT', 'BLOCK', 'WARN', 'REQUIRE_SLACK_APPROVAL'];
+const OUT_OF_SCOPE_KEYWORDS = Object.freeze([
+  'staging', 'production', 'prod', 'dev', 'qa', 'testing',
+  'archived', 'archive',
+  'business hour', 'after hour', 'outside hour', 'weekend',
+  'except', 'unless', 'but not',
+  'private repo', 'public repo', 'private project', 'public project',
+]);
+// Single source of truth for --action override validation. Used both early
+// (in runTerminalPromptCreate, to fail before the assist POST) and late
+// (in mergeAiAndFlags, when applying the override). Returns the normalized
+// upper-cased value, or undefined when --action was not provided.
+function validateActionOverride(action) {
+  if (typeof action !== 'string' || !action.trim().length) return undefined;
+  const up = action.toUpperCase().trim();
+  if (!TOOL_ACTIONS.includes(up)) {
+    const e = new Error(`--action must be one of: ${TOOL_ACTIONS.join(', ')}.`);
+    e.exitCode = 2;
+    throw e;
+  }
+  return up;
+}
+// Module-level cache. Each CLI invocation is a fresh Node process so cache
+// lifetime is exactly one command.
+let _privilegesCache = null;
+async function loadPrivileges() {
+  if (_privilegesCache) return _privilegesCache;
+  _privilegesCache = await api.get('/api/v1/users/privileges/');
+  return _privilegesCache;
+}
+function validatePromptPreflight(opts) {
+  if (!opts || typeof opts.prompt !== 'string') {
+    throw new Error('--prompt is required.');
+  }
+  const collapsed = opts.prompt
+    .replace(/\n{2,}/g, '\n')
+    .replace(/[^\x20-\x7e\n\t]/g, '');
+  const sanitizedPrompt = collapsed.trim();
+  if (sanitizedPrompt.length > MAX_PROMPT_LEN) {
+    const e = new Error(`Input is too long (max ${MAX_PROMPT_LEN} characters).`);
+    e.exitCode = 2;
+    throw e;
+  }
+  const warnings = [];
+  const lower = sanitizedPrompt.toLowerCase();
+  for (const token of OUT_OF_SCOPE_KEYWORDS) {
+    if (lower.includes(token)) warnings.push(token);
+  }
+  return { sanitizedPrompt, warnings };
+}
+function mergeAiAndFlags(formUpdates, opts) {
+  const f = formUpdates || {};
+  const body = {
+    policy_type: 'TERMINAL_COMMAND',
+  };
+  if (f.name) body.name = f.name;
+  if (f.description) body.description = f.description;
+  if (f.command_family) body.command_family = f.command_family;
+  const field = f.selected_field || '';
+  const value = f.field_value || '';
+  body.config = field ? { [field]: value } : {};
+  body.action = (f.action ? String(f.action).toUpperCase() : 'AUDIT');
+  // Flag overrides win over AI-supplied values. --action is validated against
+  // TOOL_ACTIONS via validateActionOverride. --name and --description are
+  // trimmed before they override; whitespace-only inputs fall back silently to
+  // the AI value (non-interactive callers may template these and an empty
+  // result is recoverable).
+  const flagName = typeof opts.name === 'string' ? opts.name.trim() : '';
+  if (flagName) body.name = flagName;
+  const flagDesc = typeof opts.description === 'string' ? opts.description.trim() : '';
+  if (flagDesc) body.description = flagDesc;
+  const normalizedAction = validateActionOverride(opts.action);
+  if (normalizedAction) body.action = normalizedAction;
+  body.enabled = !opts.disabled;
+  if (opts.customMessage) body.custom_message = opts.customMessage;
+  if (Array.isArray(opts.scopeUserGroupIds) && opts.scopeUserGroupIds.length) {
+    body.scope_user_group_ids = opts.scopeUserGroupIds;
+  }
+  return body;
+}
+function colorAction(action) {
+  const colors = output.colors;
+  if (!action) return '(unspecified)';
+  if (action === 'BLOCK') return colors.red(action);
+  if (action === 'WARN' || action === 'REQUIRE_SLACK_APPROVAL') return colors.yellow(action);
+  if (action === 'AUDIT') return colors.dim(action);
+  return action;
+}
+function renderTerminalPreview(body, explanation) {
+  const scope = body.scope_user_group_ids && body.scope_user_group_ids.length
+    ? `groups: ${body.scope_user_group_ids.join(', ')}`
+    : '(org-wide)';
+  const family = body.command_family || '(unspecified)';
+  const cfg = body.config || {};
+  const cfgKeys = Object.keys(cfg);
+  const fieldKey = cfgKeys[0] || '(unspecified)';
+  const pattern = cfgKeys.length ? String(cfg[cfgKeys[0]]) : '(unspecified)';
+  // Header goes to stdout (NOT output.info, which writes to stderr) so the
+  // whole preview — header, keyValue rows, explanation — is captured by a
+  // single `tee` / `> file`. Leading blank line gives the header breathing room.
+  console.log('');
+  console.log(output.colors.cyan('ℹ Resolved policy (from AI assist):'));
+  const rows = [
+    ['Name', body.name || '(unspecified)'],
+  ];
+  if (body.description) rows.push(['Description', body.description]);
+  rows.push(['Type', body.policy_type]);
+  rows.push(['Command family', family]);
+  rows.push(['Field', fieldKey]);
+  rows.push(['Pattern', pattern]);
+  rows.push(['Action', colorAction(body.action)]);
+  if (body.custom_message) rows.push(['Custom message', body.custom_message]);
+  rows.push(['Scope (groups)', scope]);
+  rows.push(['Enabled', String(body.enabled)]);
+  output.keyValue(rows);
+  if (explanation) {
+    console.log('');
+    console.log(`AI assist explanation: ${output.colors.dim(explanation)}`);
+  }
+  console.log('');
+}
+function routeBackendError(err) {
+  // ApiError carries statusCode + body. Non-ApiError (network/timeout) reaches
+  // this from a try/catch wrapper — we treat anything without statusCode as
+  // network/timeout.
+  if (!err || !err.statusCode) {
+    let host = '';
+    try { host = new URL(config.getBaseUrl()).host; } catch { host = config.getBaseUrl(); }
+    return {
+      message: `Network error reaching ${host}: ${(err && err.message) || 'unknown'}. Check connectivity. Falling back to flag-based creation is not blocked.`,
+      exitCode: 4,
+    };
+  }
+  const code = err.statusCode;
+  if (code === 401 || code === 403) {
+    return {
+      message: 'Authentication failed / not authorized. Tool policies require admin. Run `unbound whoami` to check role.',
+      exitCode: 3,
+    };
+  }
+  if (code === 400 || code === 422) {
+    const bodyStr = typeof err.body === 'string' ? err.body : JSON.stringify(err.body);
+    return {
+      message: `Request validation failed: ${bodyStr}.`,
+      exitCode: 2,
+    };
+  }
+  if (code >= 500) {
+    return {
+      message: 'Server error. Try again, or fall back to flag-based creation (see `unbound policy tool --help`).',
+      exitCode: 4,
+    };
+  }
+  // Other non-2xx — treat as backend error with whatever body it gave.
+  const bodyStr = typeof err.body === 'string' ? err.body : JSON.stringify(err.body);
+  return { message: `Request failed: ${bodyStr}.`, exitCode: 2 };
+}
+function routeSuccessFalse(error) {
+  const msg = error || '';
+  if (msg.toLowerCase().includes('input is too long')) {
+    return { message: `${msg} Try shortening to under 1800 characters.`, exitCode: 2 };
+  }
+  if (msg.toLowerCase().includes('could not determine command family')) {
+    return {
+      message: `${msg} Try naming the command type explicitly (e.g. \`block git pushes\`, \`audit npm installs\`). Or use \`unbound policy tool families\` to see available families and pass \`--command-family\` directly.`,
+      exitCode: 2,
+    };
+  }
+  return { message: msg, exitCode: 2 };
+}
+function confirmCreate() {
+  return promptYesNo('Create policy? [Y/n] ', true);
+}
+function confirmContinue() {
+  return promptYesNo('Continue anyway? The endpoint will ignore those parts. [y/N] ', false);
+}
+function promptYesNo(question, defaultYes) {
+  return new Promise((resolve) => {
+    const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+    let resolved = false;
+    const done = (v) => { if (!resolved) { resolved = true; resolve(v); rl.close(); } };
+    rl.on('close', () => done(false));
+    rl.question(question, (answer) => {
+      const a = (answer || '').trim().toLowerCase();
+      if (a === '') return done(defaultYes);
+      done(a === 'y' || a === 'yes');
+    });
+  });
+}
+// Public entry — orchestrates pre-flight, admin probe, assist call, merge,
+// preview, and returns {body, confirmed, explanation}. Throws on any non-success
+// branch with err.exitCode set so the caller can map it to process.exitCode.
+async function runTerminalPromptCreate(opts) {
+  if (!config.isLoggedIn()) {
+    const e = new Error('Not logged in. Run `unbound login` first.');
+    e.exitCode = 3;
+    throw e;
+  }
+  // Validate --action override early so we don't fire the assist call only to
+  // discover the override is invalid afterwards. Result is discarded — we only
+  // care about the throw side-effect; mergeAiAndFlags re-validates and applies.
+  validateActionOverride(opts.action);
+  const { sanitizedPrompt, warnings } = validatePromptPreflight(opts);
+  for (const w of warnings) {
+    output.warn(`Your prompt mentions \`${w}\`. Tool policies cannot scope by environment / project / time / exception clauses. The endpoint will ignore those parts.`);
+  }
+  if (warnings.length && !opts.yes && !opts.json) {
+    const cont = await confirmContinue();
+    if (!cont) {
+      const e = new Error('Aborted by user (out-of-scope keywords in prompt).');
+      e.exitCode = 0;
+      throw e;
+    }
+  }
+  let privileges;
+  try {
+    privileges = await loadPrivileges();
+  } catch (err) {
+    const routed = routeBackendError(err);
+    const e = new Error(routed.message);
+    e.exitCode = routed.exitCode;
+    throw e;
+  }
+  if (!privileges || !privileges.is_admin) {
+    let role = 'unknown';
+    if (privileges && privileges.is_manager) role = 'Manager';
+    else if (privileges && privileges.is_member) role = 'Member';
+    const e = new Error(`Tool policy creation requires admin role; current role: ${role}.`);
+    e.exitCode = 3;
+    throw e;
+  }
+  let response;
+  try {
+    response = await api.post('/api/v1/command-policies/assist/', {
+      body: {
+        user_input: sanitizedPrompt,
+        current_form_state: {
+          command_family: '',
+          selected_field: '',
+          field_value: '',
+          action: '',
+          name: '',
+          description: '',
+        },
+      },
+    });
+  } catch (err) {
+    const routed = routeBackendError(err);
+    const e = new Error(routed.message);
+    e.exitCode = routed.exitCode;
+    throw e;
+  }
+  if (!response || response.success !== true) {
+    const routed = routeSuccessFalse(response && response.error);
+    const e = new Error(routed.message);
+    e.exitCode = routed.exitCode;
+    throw e;
+  }
+  const body = mergeAiAndFlags(response.form_updates, opts);
+  if ((body.action === 'BLOCK' || body.action === 'WARN') && !body.custom_message) {
+    // Attribute the action choice correctly: if the user passed --action, the
+    // AI didn't set it. Misattribution is confusing in BLOCK/WARN flows where
+    // the user explicitly opted in.
+    const userSet = typeof opts.action === 'string' && opts.action.trim().length > 0;
+    const msg = userSet
+      ? `--action ${body.action} requires --custom-message. BLOCK and WARN policies need a user-facing message — add --custom-message "<text>" or drop --action ${body.action}.`
+      : `AI assist set --action to ${body.action} but no --custom-message was provided. BLOCK and WARN policies require a user-facing message. Re-run with --custom-message "<text>".`;
+    const e = new Error(msg);
+    e.exitCode = 2;
+    throw e;
+  }
+  if (!opts.json) renderTerminalPreview(body, response.explanation);
+  let confirmed = true;
+  if (!opts.yes && !opts.json) {
+    confirmed = await confirmCreate();
+  }
+  return { body, confirmed, explanation: response.explanation };
+}
+function routeMcpSuccessFalse(error) {
+  const msg = error || '';
+  if (msg.toLowerCase().includes('input is too long')) {
+    return { message: `${msg} Try shortening to under 1800 characters.`, exitCode: 2 };
+  }
+  return { message: msg, exitCode: 2 };
+}
+function mergeAiAndFlagsMcp(aiResponse, opts) {
+  const r = aiResponse || {};
+  const body = {
+    policy_type: 'MCP_TOOL',
+    mcp_canonical_group_id: r.canonical_group_id,
+    mcp_tools: Array.isArray(r.mcp_tools) ? r.mcp_tools : [],
+  };
+  // Backend serializer treats `config` as a required object on MCP_TOOL
+  // policies; the flag-path always sends `config: {}`. Match it here so the
+  // AI-assist path doesn't 422 when the backend tightens this validation.
+  body.config = {};
+  if (r.name) body.name = r.name;
+  if (r.description) body.description = r.description;
+  body.action = (r.action ? String(r.action).toUpperCase() : 'AUDIT');
+  if (r.custom_message) body.custom_message = r.custom_message;
+  const flagName = typeof opts.name === 'string' ? opts.name.trim() : '';
+  if (flagName) body.name = flagName;
+  const flagDesc = typeof opts.description === 'string' ? opts.description.trim() : '';
+  if (flagDesc) body.description = flagDesc;
+  const normalizedAction = validateActionOverride(opts.action);
+  if (normalizedAction) body.action = normalizedAction;
+  const flagCustom = typeof opts.customMessage === 'string' ? opts.customMessage.trim() : '';
+  if (flagCustom) body.custom_message = flagCustom;
+  body.enabled = !opts.disabled;
+  if (Array.isArray(opts.scopeUserGroupIds) && opts.scopeUserGroupIds.length) {
+    body.scope_user_group_ids = opts.scopeUserGroupIds;
+  }
+  return body;
+}
+function renderMcpPreview(body, explanation) {
+  const scope = body.scope_user_group_ids && body.scope_user_group_ids.length
+    ? `groups: ${body.scope_user_group_ids.join(', ')}`
+    : '(org-wide)';
+  const tools = Array.isArray(body.mcp_tools) ? body.mcp_tools.join(', ') : '';
+  console.log('');
+  console.log(output.colors.cyan('ℹ Resolved MCP policy (from AI assist):'));
+  const rows = [
+    ['Name', body.name || '(unspecified)'],
+  ];
+  if (body.description) rows.push(['Description', body.description]);
+  rows.push(['Type', body.policy_type]);
+  rows.push(['Service', `canonical_group_id: ${body.mcp_canonical_group_id}`]);
+  rows.push(['Tools', tools]);
+  rows.push(['Action', colorAction(body.action)]);
+  if (body.custom_message) rows.push(['Custom message', body.custom_message]);
+  rows.push(['Scope (groups)', scope]);
+  rows.push(['Enabled', String(body.enabled)]);
+  output.keyValue(rows);
+  if (explanation) {
+    console.log('');
+    console.log(`AI assist explanation: ${output.colors.dim(explanation)}`);
+  }
+  console.log('');
+}
+async function runMcpPromptCreate(opts) {
+  if (!config.isLoggedIn()) {
+    const e = new Error('Not logged in. Run `unbound login` first.');
+    e.exitCode = 3;
+    throw e;
+  }
+  validateActionOverride(opts.action);
+  const { sanitizedPrompt, warnings } = validatePromptPreflight(opts);
+  for (const w of warnings) {
+    output.warn(`Your prompt mentions \`${w}\`. Tool policies cannot scope by environment / project / time / exception clauses. The endpoint will ignore those parts.`);
+  }
+  if (warnings.length && !opts.yes && !opts.json) {
+    const cont = await confirmContinue();
+    if (!cont) {
+      const e = new Error('Aborted by user (out-of-scope keywords in prompt).');
+      e.exitCode = 0;
+      throw e;
+    }
+  }
+  let privileges;
+  try {
+    privileges = await loadPrivileges();
+  } catch (err) {
+    const routed = routeBackendError(err);
+    const e = new Error(routed.message);
+    e.exitCode = routed.exitCode;
+    throw e;
+  }
+  if (!privileges || !privileges.is_admin) {
+    let role = 'unknown';
+    if (privileges && privileges.is_manager) role = 'Manager';
+    else if (privileges && privileges.is_member) role = 'Member';
+    const e = new Error(`Tool policy creation requires admin role; current role: ${role}.`);
+    e.exitCode = 3;
+    throw e;
+  }
+  let response;
+  try {
+    response = await api.post('/api/v1/command-policies/assist-mcp/', {
+      body: {
+        user_input: sanitizedPrompt,
+        current_form_state: {
+          mcp_canonical_group_id: '',
+          mcp_tools: [],
+          name: '',
+          description: '',
+          action: '',
+        },
+      },
+    });
+  } catch (err) {
+    const routed = routeBackendError(err);
+    const e = new Error(routed.message);
+    e.exitCode = routed.exitCode;
+    throw e;
+  }
+  if (!response || response.success !== true) {
+    const routed = routeMcpSuccessFalse(response && response.error);
+    const e = new Error(routed.message);
+    e.exitCode = routed.exitCode;
+    throw e;
+  }
+  if (!response.canonical_group_id
+    || !Array.isArray(response.mcp_tools)
+    || response.mcp_tools.length === 0) {
+    const e = new Error('AI assist could not match any tools for your description. Try naming the service and tools more directly, or use `unbound policy tool create-mcp --name "..." --mcp-server <server> (--mcp-tool <tool> | --mcp-action-type <read|write|destructive>) --action <action>`.');
+    e.exitCode = 2;
+    throw e;
+  }
+  const body = mergeAiAndFlagsMcp(response, opts);
+  if ((body.action === 'BLOCK' || body.action === 'WARN') && !body.custom_message) {
+    const userSet = typeof opts.action === 'string' && opts.action.trim().length > 0;
+    const msg = userSet
+      ? `--action ${body.action} requires --custom-message. BLOCK and WARN policies need a user-facing message — add --custom-message "<text>" or drop --action ${body.action}.`
+      : `AI assist set --action to ${body.action} but no --custom-message was provided. BLOCK and WARN policies require a user-facing message. Re-run with --custom-message "<text>".`;
+    const e = new Error(msg);
+    e.exitCode = 2;
+    throw e;
+  }
+  if (!opts.json) renderMcpPreview(body, response.explanation);
+  let confirmed = true;
+  if (!opts.yes && !opts.json) {
+    confirmed = await confirmCreate();
+  }
+  return { body, confirmed, explanation: response.explanation };
+}
+module.exports = {
+  MAX_PROMPT_LEN,
+  OUT_OF_SCOPE_KEYWORDS,
+  validatePromptPreflight,
+  mergeAiAndFlags,
+  renderTerminalPreview,
+  routeBackendError,
+  runTerminalPromptCreate,
+  mergeAiAndFlagsMcp,
+  renderMcpPreview,
+  runMcpPromptCreate,
+};

package/test/eval/README.md ADDED Viewed

@@ -0,0 +1,45 @@
+# Eval harness — WEB-4887 Phase 1
+Twenty prompts that exercise the `unbound policy tool create-terminal --prompt`
+path end-to-end. Used to measure the ticket's acceptance criterion: "Claude Code
+should always pick our AI assist endpoint over creating policies on its own."
+This eval is **manual**. CI does NOT run it — running against the live AI-assist
+endpoint burns tokens and produces non-deterministic results.
+## Composition
+| Category | Count | Expected outcome |
+|---|---|---|
+| `single_intent_in_scope` | 6 | success — assist returns `success: true` |
+| `detailed_in_scope`      | 4 | success — long but single-intent prompts |
+| `out_of_scope`           | 4 | success — CLI warns but proceeds; endpoint usually still answers |
+| `oversize`               | 3 | `preflight_rejected` — CLI rejects locally before sending |
+| `nonsense`               | 3 | `success_false` — endpoint returns `success: false` |
+## Fixture shape
+`policy-prompts.json` is an array of `{id, category, prompt, expected_outcome}`.
+The three `oversize` entries store a marker (`OVERSIZE_FILL_<N>`) and the runner
+expands it to an N-character string at execution time so the JSON stays
+readable in source.
+## Manual run
+```bash
+# Make sure you are logged in as an admin against staging:
+unbound login --api-key <STAGING_ADMIN_KEY> --backend-url https://staging-backend.getunbound.ai
+# Then run the skeleton — today it just summarizes the prompt set.
+node test/eval/run-eval.js
+```
+The skeleton stops at the "load + summary" boundary. To turn it into a real
+eval, fill in the TODO in `run-eval.js`: for each prompt, either spawn
+`unbound policy tool create-terminal --prompt "<p>" --yes` directly and grade
+the exit code / response, OR (for the pick-rate measurement) ask Claude Code to
+satisfy the natural-language ask and check whether it invoked the `--prompt`
+flag. Aggregate pick-rate (% of prompts where Claude picked our path) and
+success-rate (% of in-scope prompts where the endpoint returned `success: true`).
+Spec acceptance gate: pick-rate >= 90%, success-rate >= 80% on in-scope prompts.

package/test/eval/policy-prompts.json ADDED Viewed

@@ -0,0 +1,122 @@
+[
+  {
+    "id": "single-1",
+    "category": "single_intent_in_scope",
+    "prompt": "block rm -rf",
+    "expected_outcome": "success"
+  },
+  {
+    "id": "single-2",
+    "category": "single_intent_in_scope",
+    "prompt": "audit git pushes to main",
+    "expected_outcome": "success"
+  },
+  {
+    "id": "single-3",
+    "category": "single_intent_in_scope",
+    "prompt": "warn on npm install",
+    "expected_outcome": "success"
+  },
+  {
+    "id": "single-4",
+    "category": "single_intent_in_scope",
+    "prompt": "block curl to external hosts",
+    "expected_outcome": "success"
+  },
+  {
+    "id": "single-5",
+    "category": "single_intent_in_scope",
+    "prompt": "audit docker push commands",
+    "expected_outcome": "success"
+  },
+  {
+    "id": "single-6",
+    "category": "single_intent_in_scope",
+    "prompt": "block force-push on git",
+    "expected_outcome": "success"
+  },
+  {
+    "id": "detailed-1",
+    "category": "detailed_in_scope",
+    "prompt": "Our team has had repeated incidents where engineers accidentally delete large amounts of work using recursive force removes. Please create a policy that blocks any invocation of rm with the recursive and force flags so destructive deletions are prevented at the shell level. The policy should fire whenever the command begins with rm and contains the -rf combination.",
+    "expected_outcome": "success"
+  },
+  {
+    "id": "detailed-2",
+    "category": "detailed_in_scope",
+    "prompt": "We want to track every git push so we can build an internal audit log of who is pushing what and where. Create a policy that audits any git push command regardless of the remote or branch. We do not want to block these — only audit them. The policy should match the command pattern git push and any arguments that follow.",
+    "expected_outcome": "success"
+  },
+  {
+    "id": "detailed-3",
+    "category": "detailed_in_scope",
+    "prompt": "Engineers occasionally try to install npm packages from inside our coding agents and we want a chance to review those installs before they happen. Please warn the user any time npm install is invoked. The custom message we want is: this install must be reviewed by your tech lead first. The action should be WARN, not BLOCK.",
+    "expected_outcome": "success"
+  },
+  {
+    "id": "detailed-4",
+    "category": "detailed_in_scope",
+    "prompt": "We need to stop sudo from being used inside the coding agent shell entirely. Any sudo invocation should be blocked outright with a custom message explaining that elevated privileges are not permitted from the AI tool path. The policy should match any command beginning with sudo, regardless of the subcommand.",
+    "expected_outcome": "success"
+  },
+  {
+    "id": "oos-env",
+    "category": "out_of_scope",
+    "prompt": "block rm -rf in the staging environment",
+    "expected_outcome": "success"
+  },
+  {
+    "id": "oos-exception",
+    "category": "out_of_scope",
+    "prompt": "block all writes except in the docs directory",
+    "expected_outcome": "success"
+  },
+  {
+    "id": "oos-time",
+    "category": "out_of_scope",
+    "prompt": "block git pushes after business hours",
+    "expected_outcome": "success"
+  },
+  {
+    "id": "oos-compound",
+    "category": "out_of_scope",
+    "prompt": "block rm -rf and audit git push",
+    "expected_outcome": "success"
+  },
+  {
+    "id": "oversize-1",
+    "category": "oversize",
+    "prompt": "OVERSIZE_FILL_2200",
+    "expected_outcome": "preflight_rejected"
+  },
+  {
+    "id": "oversize-2",
+    "category": "oversize",
+    "prompt": "OVERSIZE_FILL_2500",
+    "expected_outcome": "preflight_rejected"
+  },
+  {
+    "id": "oversize-3",
+    "category": "oversize",
+    "prompt": "OVERSIZE_FILL_3000",
+    "expected_outcome": "preflight_rejected"
+  },
+  {
+    "id": "nonsense-1",
+    "category": "nonsense",
+    "prompt": "asdfqwer purple potato",
+    "expected_outcome": "success_false"
+  },
+  {
+    "id": "nonsense-2",
+    "category": "nonsense",
+    "prompt": "make a policy for the thing that does the thing",
+    "expected_outcome": "success_false"
+  },
+  {
+    "id": "nonsense-3",
+    "category": "nonsense",
+    "prompt": "lorem ipsum dolor sit amet",
+    "expected_outcome": "success_false"
+  }
+]