@occasiolabs/occasio 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/LICENSE +202 -0
  2. package/NOTICE +10 -0
  3. package/README.md +216 -0
  4. package/bin/occasio-mcp.js +5 -0
  5. package/bin/occasio.js +2 -0
  6. package/bin/supervisor/README.md +90 -0
  7. package/bin/supervisor/com.occasio.proxy.plist.template +36 -0
  8. package/bin/supervisor/install-windows-task.ps1 +48 -0
  9. package/bin/supervisor/occasio.service +18 -0
  10. package/docs/AUDIT.md +120 -0
  11. package/docs/attest_verify.py +283 -0
  12. package/docs/audit_walker.py +65 -0
  13. package/docs/canonicalize.py +99 -0
  14. package/docs/compliance-mapping.md +93 -0
  15. package/docs/demos/mcp-block.md +148 -0
  16. package/docs/edr-calibration.md +73 -0
  17. package/docs/edr-demo.md +83 -0
  18. package/docs/python-verifier.md +74 -0
  19. package/docs/reference-pipeline.md +140 -0
  20. package/package.json +69 -0
  21. package/policy-templates/dev-default.yml +84 -0
  22. package/policy-templates/finance.yml +61 -0
  23. package/policy-templates/strict.yml +49 -0
  24. package/schemas/agent-attestation-v1.json +190 -0
  25. package/schemas/occasio-policy.schema.json +99 -0
  26. package/spec/agent-attestation/v1/README.md +137 -0
  27. package/src/adapters/claude-code.js +518 -0
  28. package/src/adapters/cline.js +161 -0
  29. package/src/adapters/computer-use-cli.js +198 -0
  30. package/src/adapters/computer-use.js +227 -0
  31. package/src/analyzer.js +170 -0
  32. package/src/anomaly/cli.js +143 -0
  33. package/src/anomaly/detectors/deny-rate.js +84 -0
  34. package/src/anomaly/detectors/file-read-volume.js +109 -0
  35. package/src/anomaly/detectors/secret-redact-rate.js +107 -0
  36. package/src/anomaly/detectors/unknown-tool-input.js +83 -0
  37. package/src/anomaly/index.js +169 -0
  38. package/src/attest/canonicalize.js +97 -0
  39. package/src/attest/index.js +355 -0
  40. package/src/attest/run-slice.js +57 -0
  41. package/src/attest/sign.js +186 -0
  42. package/src/attest/verify.js +192 -0
  43. package/src/audit/errors.js +21 -0
  44. package/src/audit/input-normalizer.js +121 -0
  45. package/src/audit/jsonl-auditor.js +178 -0
  46. package/src/audit/verifier.js +152 -0
  47. package/src/baseline.js +507 -0
  48. package/src/boundary.js +238 -0
  49. package/src/budget.js +42 -0
  50. package/src/classifier.js +115 -0
  51. package/src/context-budget.js +77 -0
  52. package/src/core/boundary-event.js +75 -0
  53. package/src/core/decision.js +61 -0
  54. package/src/core/pipeline.js +66 -0
  55. package/src/core/tool-names.js +105 -0
  56. package/src/dashboard.js +892 -0
  57. package/src/demo/README.md +31 -0
  58. package/src/demo/anomalies-demo.js +211 -0
  59. package/src/demo/attest-demo.js +198 -0
  60. package/src/distiller.js +155 -0
  61. package/src/embeddings.json +72 -0
  62. package/src/executor/dispatcher.js +230 -0
  63. package/src/harness.js +817 -0
  64. package/src/index.js +1711 -0
  65. package/src/inspect.js +329 -0
  66. package/src/interceptor.js +1198 -0
  67. package/src/lao.js +185 -0
  68. package/src/lao_prep.py +119 -0
  69. package/src/ledger.js +209 -0
  70. package/src/mcp-experiment.js +140 -0
  71. package/src/mcp-normalize.js +139 -0
  72. package/src/mcp-server.js +320 -0
  73. package/src/outbound-policy.js +433 -0
  74. package/src/policy/built-in-classifiers.js +78 -0
  75. package/src/policy/doctor.js +226 -0
  76. package/src/policy/engine.js +339 -0
  77. package/src/policy/init.js +153 -0
  78. package/src/policy/loader.js +448 -0
  79. package/src/policy/rules-default.js +36 -0
  80. package/src/policy/shell-path.js +135 -0
  81. package/src/policy/show.js +196 -0
  82. package/src/policy/validate.js +310 -0
  83. package/src/preflight/cli.js +164 -0
  84. package/src/preflight/miner.js +329 -0
  85. package/src/proxy/agent-router.js +93 -0
  86. package/src/redteam.js +428 -0
  87. package/src/replay.js +446 -0
  88. package/src/report/index.js +224 -0
  89. package/src/runtime.js +595 -0
  90. package/src/scanner/index.js +49 -0
  91. package/src/selftest.js +192 -0
  92. package/src/session.js +36 -0
@@ -0,0 +1,433 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * outbound-policy.js — Path-2 defense for deny_paths.
5
+ *
6
+ * The tool-call gate in src/policy/engine.js intercepts BoundaryEvents
7
+ * derived from `tool_use` blocks the cloud model emits in its response.
8
+ * That gate fires correctly. But Claude Code (and other agent runtimes)
9
+ * also INJECT synthetic tool_use + tool_result pairs into the OUTBOUND
10
+ * request body — file contents that have been read by the host process
11
+ * as agentic context BEFORE the model has had a chance to call any tool.
12
+ *
13
+ * Those pre-baked tool_results never trigger the engine because no
14
+ * agent-initiated tool call happened. The file content reaches the
15
+ * model anyway through the request body.
16
+ *
17
+ * This module walks the outbound body, finds Read-style tool_use blocks
18
+ * paired by tool_use_id with their tool_result content, and STRIPS the
19
+ * content of any tool_result whose source file path falls under
20
+ * deny_paths / outside allow_paths. The strip mirrors the redact-secrets
21
+ * TRANSFORM convention: the tool_result is preserved (so the model sees
22
+ * structural continuity), but its content is replaced with a one-line
23
+ * synthetic refusal marker.
24
+ *
25
+ * One audit row is written per stripped tool_result, with the same shape
26
+ * the engine writes for tool-call-time BLOCKs, so `occasio report` and
27
+ * `audit verify` see them uniformly. The audit reason is
28
+ * `outbound-context-denied` to distinguish it from `path-denied` (which
29
+ * is the tool-call-time path) — both are governance enforcement, but
30
+ * which gate caught it matters for diagnosing what kind of bypass attempt
31
+ * was made.
32
+ */
33
+
34
+ const fs = require('fs');
35
+ const os = require('os');
36
+ const path = require('path');
37
+
38
+ const STRIP_MARKER =
39
+ '[content stripped by Occasio outbound deny_paths — file is under a denied path]';
40
+
41
+ // Path normalisation mirrors src/policy/engine.js so deny_paths semantics
42
+ // stay byte-identical across both gates.
43
+ const normCase = process.platform === 'win32'
44
+ ? (p) => p.toLowerCase()
45
+ : (p) => p;
46
+
47
+ function expandHome(p) {
48
+ return p.startsWith('~') ? os.homedir() + p.slice(1) : p;
49
+ }
50
+
51
+ function resolveInputPath(rawPath) {
52
+ if (!rawPath || typeof rawPath !== 'string') return null;
53
+ const expanded = expandHome(rawPath);
54
+ try { return fs.realpathSync(expanded); }
55
+ catch { return path.resolve(expanded); }
56
+ }
57
+
58
+ function matchesPrefix(inputNorm, denyNorm) {
59
+ return inputNorm === denyNorm || inputNorm.startsWith(denyNorm + path.sep);
60
+ }
61
+
62
+ function pathIsDenied(resolved, policy) {
63
+ if (!resolved) return null;
64
+ const inputNorm = normCase(resolved);
65
+ const denyPaths = policy.deny_paths || [];
66
+ const allowPaths = policy.allow_paths || [];
67
+
68
+ for (const d of denyPaths) {
69
+ if (matchesPrefix(inputNorm, normCase(d))) return 'path-denied';
70
+ }
71
+ if (allowPaths.length > 0) {
72
+ const ok = allowPaths.some(a => matchesPrefix(inputNorm, normCase(a)));
73
+ if (!ok) return 'path-not-allowed';
74
+ }
75
+ return null;
76
+ }
77
+
78
+ /**
79
+ * Build a Map<tool_use_id, { path, toolName }> from every `tool_use` block
80
+ * found in the outbound body. Covers Read (file_path), find_files / grep
81
+ * (path), and Bash/PowerShell file-read shapes via shell-path extraction.
82
+ */
83
+ function buildToolUsePathMap(messages) {
84
+ const { extractShellReadPaths } = require('./policy/shell-path');
85
+ const map = new Map();
86
+ for (const msg of messages || []) {
87
+ if (!Array.isArray(msg.content)) continue;
88
+ for (const block of msg.content) {
89
+ if (block.type !== 'tool_use' || !block.id) continue;
90
+ const inp = block.input || {};
91
+ const name = block.name || '';
92
+ let filePaths = [];
93
+
94
+ // Read tool (Claude Code / canonical read_file)
95
+ if (/^(Read|read_file)$/i.test(name) && typeof inp.file_path === 'string') {
96
+ filePaths.push(inp.file_path);
97
+ } else if (/^(Read|read_file)$/i.test(name) && typeof inp.path === 'string') {
98
+ filePaths.push(inp.path);
99
+ }
100
+ // Glob / find_files (the search root)
101
+ else if (/^(Glob|find_files)$/i.test(name) && typeof inp.path === 'string') {
102
+ filePaths.push(inp.path);
103
+ }
104
+ // Grep (the search root)
105
+ else if (/^(Grep|grep)$/i.test(name) && typeof inp.path === 'string') {
106
+ filePaths.push(inp.path);
107
+ }
108
+ // Shell tools: extract file operands the same way the tool-call gate does
109
+ else if (/^(Bash|PowerShell|shell_bash|shell_powershell)$/i.test(name) &&
110
+ typeof inp.command === 'string') {
111
+ const ps = extractShellReadPaths(inp.command);
112
+ for (const p of ps) filePaths.push(p);
113
+ }
114
+
115
+ if (filePaths.length > 0) {
116
+ // First path is the primary; the rest are auxiliary for shell
117
+ // chains. Store as a single record keyed by tool_use_id; if the
118
+ // primary is denied, we strip; if any of the aux paths is denied
119
+ // we also strip. For shell tools, also carry the raw command so
120
+ // the path-2 shaping gate can pass it to distill() for proper
121
+ // classification.
122
+ const rec = { paths: filePaths, toolName: name };
123
+ if (/^(Bash|PowerShell|shell_bash|shell_powershell)$/i.test(name) &&
124
+ typeof inp.command === 'string') {
125
+ rec.command = inp.command;
126
+ }
127
+ map.set(block.id, rec);
128
+ }
129
+ }
130
+ }
131
+ return map;
132
+ }
133
+
134
+ /**
135
+ * Walk the outbound body and STRIP any tool_result whose source file path
136
+ * is denied by policy. Returns the modified messages array plus a list of
137
+ * strips for audit. Pure function — does NOT write to disk; the caller
138
+ * (proxy request handler) is responsible for emitting the audit rows
139
+ * since the auditor and session context live there.
140
+ *
141
+ * @param {object} reqBody Parsed Anthropic request body (has .messages)
142
+ * @param {object} policy Loaded policy with .deny_paths / .allow_paths
143
+ * @returns {{ messages: Array, strips: Array<{tool_use_id, path, toolName, reason}> }}
144
+ */
145
+ function enforceOutboundDenyPaths(reqBody, policy) {
146
+ const messages = (reqBody && reqBody.messages) || [];
147
+ const noChange = { messages, strips: [] };
148
+ const denyPaths = (policy && policy.deny_paths) || [];
149
+ const allowPaths = (policy && policy.allow_paths) || [];
150
+ if (denyPaths.length === 0 && allowPaths.length === 0) return noChange;
151
+
152
+ const idToInfo = buildToolUsePathMap(messages);
153
+ if (idToInfo.size === 0) return noChange;
154
+
155
+ // Pre-resolve per-id deny verdicts so we strip every tool_result with
156
+ // that id, even if the model interleaved multiple tool_results.
157
+ const idVerdict = new Map();
158
+ for (const [id, info] of idToInfo) {
159
+ for (const raw of info.paths) {
160
+ const resolved = resolveInputPath(raw);
161
+ const reason = pathIsDenied(resolved, policy);
162
+ if (reason) {
163
+ idVerdict.set(id, { resolved, reason, toolName: info.toolName });
164
+ break;
165
+ }
166
+ }
167
+ }
168
+ if (idVerdict.size === 0) return noChange;
169
+
170
+ const strips = [];
171
+ const newMessages = messages.map(msg => {
172
+ if (!Array.isArray(msg.content)) return msg;
173
+ let changed = false;
174
+ const newContent = msg.content.map(block => {
175
+ if (block.type !== 'tool_result' || !block.tool_use_id) return block;
176
+ const v = idVerdict.get(block.tool_use_id);
177
+ if (!v) return block;
178
+ strips.push({
179
+ tool_use_id: block.tool_use_id,
180
+ path: v.resolved,
181
+ toolName: v.toolName,
182
+ reason: v.reason,
183
+ });
184
+ changed = true;
185
+ return { ...block, content: STRIP_MARKER };
186
+ });
187
+ return changed ? { ...msg, content: newContent } : msg;
188
+ });
189
+
190
+ return { messages: newMessages, strips };
191
+ }
192
+
193
+ // ── Path-2 secret redaction (symmetric with path-1 redact-secrets) ──────────
194
+ //
195
+ // Path-1 (src/policy/engine.js + dispatcher TRANSFORM redact-secrets) scans
196
+ // the *result* of a tool_use the cloud model emits and replaces matching
197
+ // secret bytes before the result re-enters the model context. That path
198
+ // works correctly today.
199
+ //
200
+ // Path-2 covers the same defense gap as the deny_paths outbound fix: when
201
+ // Claude Code (or another agent runtime) injects pre-baked
202
+ // `tool_use Read` + `tool_result <content>` pairs into the OUTBOUND body
203
+ // as agentic context, those tool_results never trigger path-1 because no
204
+ // model-initiated tool call happened. Secrets in that content reach the
205
+ // model unless we scan and redact here.
206
+ //
207
+ // Design choice — REDACT, not request-BLOCK:
208
+ // At path-1, block_secrets_in_tool_results under `--preset strict` is a
209
+ // request-block. At path-2 the request body is already constructed by
210
+ // the agent runtime; refusing the whole turn is destructive (the agent
211
+ // loses its prompt round) while redaction is the smallest surgical fix
212
+ // that preserves the workflow. We always REDACT path-2 secrets, never
213
+ // request-block. The strict-mode tool-call-time block still fires for
214
+ // tool_use blocks the model emits — the two gates remain complementary.
215
+ //
216
+ // This gate fires when EITHER `redact_secrets_in_tool_results` OR
217
+ // `block_secrets_in_tool_results` is true. The latter, on its own, does
218
+ // not perform a request-block here (see above); it is treated as
219
+ // permission to redact at the outbound boundary. The audit row reason
220
+ // distinguishes the two flags so a reviewer can tell which policy was
221
+ // the proximate cause.
222
+ //
223
+ // Implementation reuses analyzer.scanSecrets / analyzer.redactSecrets so
224
+ // the path-1 and path-2 detection sets stay identical. `deny_patterns` is
225
+ // honoured via the existing extraPatterns surface.
226
+
227
+ const SECRET_REDACT_REASONS = Object.freeze({
228
+ redact_flag: 'outbound-secret-redacted', // user explicitly opted into redaction
229
+ block_flag: 'outbound-secret-redacted-strict', // user has block_secrets on; path-2 redacts symmetrically
230
+ });
231
+
232
+ function enforceOutboundSecretRedaction(reqBody, policy) {
233
+ const messages = (reqBody && reqBody.messages) || [];
234
+ const noChange = { messages, redactions: [] };
235
+
236
+ const redactOn = policy && policy.redact_secrets_in_tool_results === true;
237
+ const blockOn = policy && policy.block_secrets_in_tool_results === true;
238
+ if (!redactOn && !blockOn) return noChange;
239
+
240
+ const { scanSecrets, redactSecrets } = require('./analyzer');
241
+ const denyPatternsRaw = (policy && policy.deny_patterns) || [];
242
+ // policy.deny_patterns is already normalized to [{ label, regex }] by
243
+ // src/policy/loader.js — scanSecrets expects exactly that shape.
244
+ const extraPatterns = denyPatternsRaw.length > 0 ? denyPatternsRaw : undefined;
245
+ const opts = extraPatterns ? { extraPatterns } : undefined;
246
+
247
+ // Source attribution (best-effort): if a tool_result is paired with a
248
+ // known tool_use Read/find/grep/shell, we can record the source path in
249
+ // the audit row. Untied tool_results still get redacted; their audit
250
+ // row just lacks a path.
251
+ const idToInfo = buildToolUsePathMap(messages);
252
+
253
+ const reason = redactOn ? SECRET_REDACT_REASONS.redact_flag
254
+ : SECRET_REDACT_REASONS.block_flag;
255
+
256
+ const redactions = [];
257
+ const newMessages = messages.map(msg => {
258
+ if (!Array.isArray(msg.content)) return msg;
259
+ let changed = false;
260
+ const newContent = msg.content.map(block => {
261
+ if (block.type !== 'tool_result') return block;
262
+ // v1 scope: scan string content. tool_results with array-of-text
263
+ // content also occur; v2 can extend to that shape if a real bypass
264
+ // surfaces through it.
265
+ if (typeof block.content !== 'string' || !block.content) return block;
266
+ const hits = scanSecrets(block.content, opts);
267
+ if (hits.length === 0) return block;
268
+ const redacted = redactSecrets(block.content, opts);
269
+ if (redacted === block.content) return block; // defensive
270
+ const info = idToInfo.get(block.tool_use_id);
271
+ redactions.push({
272
+ tool_use_id: block.tool_use_id,
273
+ path: (info && info.paths && info.paths[0]) || null,
274
+ toolName: info && info.toolName || null,
275
+ secretsRedacted: hits.length,
276
+ labels: [...new Set(hits.map(h => h.label))],
277
+ reason,
278
+ });
279
+ changed = true;
280
+ return { ...block, content: redacted };
281
+ });
282
+ return changed ? { ...msg, content: newContent } : msg;
283
+ });
284
+
285
+ return { messages: newMessages, redactions };
286
+ }
287
+
288
+ // ── Path-2 output shaping: distill-output + max_output_tokens ───────────────
289
+ //
290
+ // Symmetric to path-1's TRANSFORM distill-output and policy.tools[*]
291
+ // .max_output_tokens. At path-1, both shaping steps run on the result of a
292
+ // tool_use the cloud model emits, just before the result re-enters the
293
+ // model context. Path-2 covers the same gap as deny_paths and secret
294
+ // redaction: when an agent runtime injects pre-baked tool_use + tool_result
295
+ // pairs into the OUTBOUND body as agentic context, those tool_results
296
+ // never trigger path-1 because no model-initiated call happened — the
297
+ // huge output reaches the model unless this gate clips.
298
+ //
299
+ // Two shaping steps, applied in path-1 order:
300
+ // 1. distill-output — clips noisy tool output (grep / find / git log /
301
+ // test runners) to a per-tool line limit using the
302
+ // shared src/distiller.js logic. Fires when the
303
+ // global flag is set OR a per-tool TRANSFORM entry
304
+ // names distill-output.
305
+ // 2. max_output_tokens — hard token cap per tool category (chars/4 est.,
306
+ // marked '~' in the marker line). Fires when the
307
+ // per-tool entry sets max_output_tokens. Final
308
+ // clip, runs after distill so the budget can
309
+ // further trim an already-distilled output.
310
+ //
311
+ // The audit row reason names which step fired (or both as a chain).
312
+
313
+ function canonicalToolName(name) {
314
+ if (!name) return null;
315
+ const s = String(name).toLowerCase();
316
+ if (s === 'read' || s === 'read_file') return 'read_file';
317
+ if (s === 'glob' || s === 'find_files') return 'find_files';
318
+ if (s === 'grep') return 'grep';
319
+ if (s === 'bash' || s === 'shell_bash') return 'shell_bash';
320
+ if (s === 'powershell' || s === 'shell_powershell') return 'shell_powershell';
321
+ return null;
322
+ }
323
+
324
+ function enforceOutboundShaping(reqBody, policy) {
325
+ const messages = (reqBody && reqBody.messages) || [];
326
+ const noChange = { messages, shapings: [] };
327
+
328
+ const globalDistill = policy && policy.distill_tool_results === true;
329
+ const tools = (policy && policy.tools) || {};
330
+
331
+ // Quick exit: nothing configured anywhere
332
+ const anyConfigured = globalDistill || Object.keys(tools).some((k) => {
333
+ const t = tools[k] || {};
334
+ return (t.action === 'TRANSFORM' && /distill/.test(t.transform || '')) ||
335
+ typeof t.max_output_tokens === 'number';
336
+ });
337
+ if (!anyConfigured) return noChange;
338
+
339
+ const { distill } = require('./distiller');
340
+ const { enforceContextBudget } = require('./context-budget');
341
+ const idToInfo = buildToolUsePathMap(messages);
342
+
343
+ const shapings = [];
344
+ const newMessages = messages.map((msg) => {
345
+ if (!Array.isArray(msg.content)) return msg;
346
+ let changed = false;
347
+ const newContent = msg.content.map((block) => {
348
+ if (block.type !== 'tool_result' || !block.tool_use_id) return block;
349
+ if (typeof block.content !== 'string' || !block.content) return block;
350
+ const info = idToInfo.get(block.tool_use_id);
351
+ if (!info) return block;
352
+
353
+ const canonical = canonicalToolName(info.toolName);
354
+ const toolEntry = (canonical && tools[canonical]) || null;
355
+
356
+ const perToolDistill = !!(toolEntry && toolEntry.action === 'TRANSFORM' &&
357
+ /distill/.test(toolEntry.transform || ''));
358
+ const shouldDistill = globalDistill || perToolDistill;
359
+ const budgetTokens = toolEntry && typeof toolEntry.max_output_tokens === 'number'
360
+ ? toolEntry.max_output_tokens : null;
361
+
362
+ if (!shouldDistill && budgetTokens === null) return block;
363
+
364
+ let working = block.content;
365
+ const reasons = [];
366
+ let totalSaved = 0;
367
+ let distillLabel = null;
368
+
369
+ // 1. Distill (per tool's command type).
370
+ // distill() classifies on the first word of the cmd string (grep,
371
+ // find, ls, git-log, test runners). Auto-context tool_use blocks
372
+ // expose typed inputs (Grep has .pattern, Bash has .command) not
373
+ // a shell command line, so we synthesize a cmd verb that
374
+ // classifyCmd recognises. Read auto-context is intentionally a
375
+ // no-op here — Read is not in the distill rules.
376
+ if (shouldDistill) {
377
+ const tn = (info.toolName || '').toLowerCase();
378
+ let synthCmd = '';
379
+ if (tn === 'grep') synthCmd = 'grep _';
380
+ else if (tn === 'glob' || tn === 'find_files') synthCmd = 'find .';
381
+ else if (tn === 'bash' || tn === 'shell_bash') synthCmd = info.command || 'ls';
382
+ else if (tn === 'powershell' || tn === 'shell_powershell') synthCmd = info.command || 'ls';
383
+ else synthCmd = info.toolName || '';
384
+ if (synthCmd) {
385
+ const r = distill(synthCmd, working);
386
+ if (r && r.distilled && r.content !== working) {
387
+ working = r.content;
388
+ totalSaved += r.savedTokens || 0;
389
+ distillLabel = r.label || null;
390
+ reasons.push('distill-output');
391
+ }
392
+ }
393
+ }
394
+ // 2. max_output_tokens (final clip)
395
+ if (budgetTokens !== null) {
396
+ const r = enforceContextBudget(working, budgetTokens);
397
+ if (r && r.clipped) {
398
+ working = r.content;
399
+ totalSaved += r.prevented_tokens || 0;
400
+ reasons.push('context-budget');
401
+ }
402
+ }
403
+
404
+ if (working === block.content) return block;
405
+
406
+ shapings.push({
407
+ tool_use_id: block.tool_use_id,
408
+ path: (info.paths && info.paths[0]) || null,
409
+ toolName: info.toolName,
410
+ reasons,
411
+ savedTokens: totalSaved,
412
+ label: distillLabel,
413
+ });
414
+ changed = true;
415
+ return { ...block, content: working };
416
+ });
417
+ return changed ? { ...msg, content: newContent } : msg;
418
+ });
419
+
420
+ return { messages: newMessages, shapings };
421
+ }
422
+
423
+ module.exports = {
424
+ enforceOutboundDenyPaths,
425
+ enforceOutboundSecretRedaction,
426
+ enforceOutboundShaping,
427
+ buildToolUsePathMap,
428
+ pathIsDenied,
429
+ resolveInputPath,
430
+ canonicalToolName,
431
+ STRIP_MARKER,
432
+ SECRET_REDACT_REASONS,
433
+ };
@@ -0,0 +1,78 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Built-in classifier registry — names that policy.yml entries can reference
5
+ * via `classifier: <name>`. Each classifier takes a BoundaryEvent and returns
6
+ * { handled: boolean, reason: string }.
7
+ *
8
+ * Stage 3: classifiers wrap the existing JS validators. The user-facing
9
+ * surface is the *name* in policy.yml (e.g. `bash-allowlist`); the
10
+ * implementation stays in code where its accumulated correctness lives.
11
+ *
12
+ * Reason codes are preserved exactly (matching FALLBACK_REASONS) so that
13
+ * status / dashboard / debug-log surfaces don't change.
14
+ */
15
+
16
+ const {
17
+ isReadHandleable,
18
+ isGlobHandleable,
19
+ isGrepHandleable,
20
+ isTodoHandleable,
21
+ } = require('../runtime');
22
+
23
+ // classifyBlock owns the Bash/PowerShell allowlist + shell-meta + classifier
24
+ // gates. We delegate to it so reason codes match exactly. classifyBlock still
25
+ // uses Claude protocol names internally, so each shell classifier hardcodes
26
+ // the protocol name it represents (independent of the event's canonical name).
27
+ const lazyClassifyBlock = () => require('../interceptor').classifyBlock;
28
+
29
+ const CLASSIFIERS = Object.freeze({
30
+ 'read-input-validator': (event) => ({
31
+ handled: !!isReadHandleable(event.toolInput),
32
+ reason: isReadHandleable(event.toolInput) ? 'ok' : 'read_unsupported_type',
33
+ }),
34
+
35
+ 'glob-input-validator': (event) => ({
36
+ handled: !!isGlobHandleable(event.toolInput),
37
+ reason: isGlobHandleable(event.toolInput) ? 'ok' : 'glob_injection_or_invalid',
38
+ }),
39
+
40
+ 'grep-input-validator': (event) => {
41
+ const ok = !!isGrepHandleable(event.toolInput);
42
+ return {
43
+ handled: ok,
44
+ reason: ok ? 'ok'
45
+ : event.toolInput?.multiline === true ? 'grep_multiline'
46
+ : 'grep_invalid_input',
47
+ };
48
+ },
49
+
50
+ 'todo-write-validator': (event) => ({
51
+ handled: !!isTodoHandleable(event.toolInput, 'TodoWrite'),
52
+ reason: isTodoHandleable(event.toolInput, 'TodoWrite') ? 'ok' : 'tool_not_handled',
53
+ }),
54
+
55
+ 'todo-read-validator': (event) => ({
56
+ handled: !!isTodoHandleable(event.toolInput, 'TodoRead'),
57
+ reason: isTodoHandleable(event.toolInput, 'TodoRead') ? 'ok' : 'tool_not_handled',
58
+ }),
59
+
60
+ // Bash / PowerShell delegate fully to classifyBlock so the
61
+ // SHELL_META / shell-composition / route-locally / native-handleable
62
+ // accumulated correctness flows through unchanged. Reason codes match
63
+ // FALLBACK_REASONS exactly.
64
+ 'bash-allowlist': (event) => {
65
+ const cb = lazyClassifyBlock();
66
+ return cb({ type: 'tool_use', id: event.id, name: 'Bash', input: event.toolInput });
67
+ },
68
+ 'powershell-allowlist': (event) => {
69
+ const cb = lazyClassifyBlock();
70
+ return cb({ type: 'tool_use', id: event.id, name: 'PowerShell', input: event.toolInput });
71
+ },
72
+ });
73
+
74
+ function lookup(name) {
75
+ return CLASSIFIERS[name] || null;
76
+ }
77
+
78
+ module.exports = { CLASSIFIERS, lookup };