@occasiolabs/occasio 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/LICENSE +202 -0
  2. package/NOTICE +10 -0
  3. package/README.md +216 -0
  4. package/bin/occasio-mcp.js +5 -0
  5. package/bin/occasio.js +2 -0
  6. package/bin/supervisor/README.md +90 -0
  7. package/bin/supervisor/com.occasio.proxy.plist.template +36 -0
  8. package/bin/supervisor/install-windows-task.ps1 +48 -0
  9. package/bin/supervisor/occasio.service +18 -0
  10. package/docs/AUDIT.md +120 -0
  11. package/docs/attest_verify.py +283 -0
  12. package/docs/audit_walker.py +65 -0
  13. package/docs/canonicalize.py +99 -0
  14. package/docs/compliance-mapping.md +93 -0
  15. package/docs/demos/mcp-block.md +148 -0
  16. package/docs/edr-calibration.md +73 -0
  17. package/docs/edr-demo.md +83 -0
  18. package/docs/python-verifier.md +74 -0
  19. package/docs/reference-pipeline.md +140 -0
  20. package/package.json +69 -0
  21. package/policy-templates/dev-default.yml +84 -0
  22. package/policy-templates/finance.yml +61 -0
  23. package/policy-templates/strict.yml +49 -0
  24. package/schemas/agent-attestation-v1.json +190 -0
  25. package/schemas/occasio-policy.schema.json +99 -0
  26. package/spec/agent-attestation/v1/README.md +137 -0
  27. package/src/adapters/claude-code.js +518 -0
  28. package/src/adapters/cline.js +161 -0
  29. package/src/adapters/computer-use-cli.js +198 -0
  30. package/src/adapters/computer-use.js +227 -0
  31. package/src/analyzer.js +170 -0
  32. package/src/anomaly/cli.js +143 -0
  33. package/src/anomaly/detectors/deny-rate.js +84 -0
  34. package/src/anomaly/detectors/file-read-volume.js +109 -0
  35. package/src/anomaly/detectors/secret-redact-rate.js +107 -0
  36. package/src/anomaly/detectors/unknown-tool-input.js +83 -0
  37. package/src/anomaly/index.js +169 -0
  38. package/src/attest/canonicalize.js +97 -0
  39. package/src/attest/index.js +355 -0
  40. package/src/attest/run-slice.js +57 -0
  41. package/src/attest/sign.js +186 -0
  42. package/src/attest/verify.js +192 -0
  43. package/src/audit/errors.js +21 -0
  44. package/src/audit/input-normalizer.js +121 -0
  45. package/src/audit/jsonl-auditor.js +178 -0
  46. package/src/audit/verifier.js +152 -0
  47. package/src/baseline.js +507 -0
  48. package/src/boundary.js +238 -0
  49. package/src/budget.js +42 -0
  50. package/src/classifier.js +115 -0
  51. package/src/context-budget.js +77 -0
  52. package/src/core/boundary-event.js +75 -0
  53. package/src/core/decision.js +61 -0
  54. package/src/core/pipeline.js +66 -0
  55. package/src/core/tool-names.js +105 -0
  56. package/src/dashboard.js +892 -0
  57. package/src/demo/README.md +31 -0
  58. package/src/demo/anomalies-demo.js +211 -0
  59. package/src/demo/attest-demo.js +198 -0
  60. package/src/distiller.js +155 -0
  61. package/src/embeddings.json +72 -0
  62. package/src/executor/dispatcher.js +230 -0
  63. package/src/harness.js +817 -0
  64. package/src/index.js +1711 -0
  65. package/src/inspect.js +329 -0
  66. package/src/interceptor.js +1198 -0
  67. package/src/lao.js +185 -0
  68. package/src/lao_prep.py +119 -0
  69. package/src/ledger.js +209 -0
  70. package/src/mcp-experiment.js +140 -0
  71. package/src/mcp-normalize.js +139 -0
  72. package/src/mcp-server.js +320 -0
  73. package/src/outbound-policy.js +433 -0
  74. package/src/policy/built-in-classifiers.js +78 -0
  75. package/src/policy/doctor.js +226 -0
  76. package/src/policy/engine.js +339 -0
  77. package/src/policy/init.js +153 -0
  78. package/src/policy/loader.js +448 -0
  79. package/src/policy/rules-default.js +36 -0
  80. package/src/policy/shell-path.js +135 -0
  81. package/src/policy/show.js +196 -0
  82. package/src/policy/validate.js +310 -0
  83. package/src/preflight/cli.js +164 -0
  84. package/src/preflight/miner.js +329 -0
  85. package/src/proxy/agent-router.js +93 -0
  86. package/src/redteam.js +428 -0
  87. package/src/replay.js +446 -0
  88. package/src/report/index.js +224 -0
  89. package/src/runtime.js +595 -0
  90. package/src/scanner/index.js +49 -0
  91. package/src/selftest.js +192 -0
  92. package/src/session.js +36 -0
@@ -0,0 +1,198 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * computer-use-cli.js — `occasio computer-use` (dry-run demo).
5
+ *
6
+ * Reads a JSONL file of synthetic tool_use blocks (or stdin), applies a
7
+ * computer-use policy file, and prints what the engine would decide for
8
+ * each one. Lets us demo the governance story for Anthropic Computer Use
9
+ * without yet wiring up the live proxy adapter.
10
+ *
11
+ * Usage:
12
+ * occasio computer-use --dry-run --from <jsonl> [--policy <yml>]
13
+ * occasio computer-use --example # built-in sample
14
+ *
15
+ * Live mode (intercept a real Computer-Use session through the proxy) is
16
+ * a follow-up that requires:
17
+ * - extending the proxy SSE parser to recognise computer-use tool-use
18
+ * blocks (currently only the Claude Code shape is parsed)
19
+ * - injecting BLOCK results back as tool_result blocks (mirroring what
20
+ * the Claude Code adapter does)
21
+ * - end-to-end validation against the actual Anthropic API
22
+ *
23
+ * That work is deferred until at least one design partner is on it. The
24
+ * adapter logic in src/adapters/computer-use.js is the half that can be
25
+ * built and unit-tested without an API key.
26
+ */
27
+
28
+ const fs = require('fs');
29
+
30
+ const { normalizeToolUse, evaluate } = require('./computer-use');
31
+
32
+ const C = {
33
+ r: s => `\x1b[31m${s}\x1b[0m`,
34
+ g: s => `\x1b[32m${s}\x1b[0m`,
35
+ y: s => `\x1b[33m${s}\x1b[0m`,
36
+ d: s => `\x1b[2m${s}\x1b[0m`,
37
+ b: s => `\x1b[1m${s}\x1b[0m`,
38
+ };
39
+
40
+ // Minimal inline YAML reader for the dry-run CLI — just enough to parse the
41
+ // example policies we ship. Real users wire up `policy.yml` through the
42
+ // full policy/loader.js path; this CLI is a demo.
43
+ function parsePolicyYaml(text) {
44
+ const out = {};
45
+ let currentArray = null;
46
+ let currentKey = null;
47
+ for (const line of text.split('\n')) {
48
+ const clean = line.replace(/#.*$/, '').replace(/\r$/, '');
49
+ if (!clean.trim()) continue;
50
+ const top = clean.match(/^([a-z_]+)\s*:\s*(.*)$/i);
51
+ if (top) {
52
+ currentKey = top[1];
53
+ const value = top[2].trim();
54
+ if (!value) { out[currentKey] = []; currentArray = out[currentKey]; }
55
+ else { out[currentKey] = parseScalar(value); currentArray = null; }
56
+ continue;
57
+ }
58
+ if (currentArray && /^\s*-\s+/.test(clean)) {
59
+ currentArray.push(parseScalar(clean.replace(/^\s*-\s+/, '')));
60
+ }
61
+ }
62
+ return out;
63
+ }
64
+
65
+ function parseScalar(s) {
66
+ s = s.trim();
67
+ if ((s.startsWith("'") && s.endsWith("'")) || (s.startsWith('"') && s.endsWith('"'))) {
68
+ return s.slice(1, -1);
69
+ }
70
+ if (/^\d+$/.test(s)) return parseInt(s, 10);
71
+ if (s === 'true') return true;
72
+ if (s === 'false') return false;
73
+ return s;
74
+ }
75
+
76
+ const EXAMPLE_TRAFFIC = [
77
+ { type: 'tool_use', name: 'computer', input: { action: 'screenshot' } },
78
+ { type: 'tool_use', name: 'computer', input: { action: 'mouse_move', coordinate: [400, 300] } },
79
+ { type: 'tool_use', name: 'computer', input: { action: 'left_click', coordinate: [400, 300] } },
80
+ { type: 'tool_use', name: 'computer', input: { action: 'type', text: 'Hello world' } },
81
+ { type: 'tool_use', name: 'computer', input: { action: 'type', text: 'password: hunter2sekret' } },
82
+ { type: 'tool_use', name: 'bash', input: { command: 'ls -la' } },
83
+ { type: 'tool_use', name: 'bash', input: { command: 'sudo rm -rf /tmp/x' } },
84
+ { type: 'tool_use', name: 'bash', input: { command: 'curl https://github.com/some/repo' } },
85
+ ];
86
+
87
+ const EXAMPLE_POLICY = {
88
+ deny_keyboard_patterns: [
89
+ { pattern: '(?i)password\\s*[:=]\\s*\\S+', reason: 'keyboard-secret-pattern' },
90
+ { pattern: '(?i)secret\\s*[:=]\\s*\\S+', reason: 'keyboard-secret-pattern' },
91
+ '(?i)(sk-ant-|ghp_|AKIA)[A-Za-z0-9]{16,}',
92
+ ],
93
+ deny_command_patterns: [
94
+ '\\bcurl\\b.*\\b(api\\.|admin\\.|prod\\.)', // exfil patterns
95
+ ],
96
+ };
97
+
98
+ function flag(args, name) {
99
+ const i = args.indexOf(name);
100
+ return i >= 0 ? args[i + 1] : undefined;
101
+ }
102
+ function bool(args, name) { return args.indexOf(name) >= 0; }
103
+
104
+ function runComputerUseCli(args = []) {
105
+ if (bool(args, '--help') || bool(args, '-h')) {
106
+ process.stdout.write(
107
+ 'Usage:\n' +
108
+ ' occasio computer-use --dry-run --from <jsonl> [--policy <yml>]\n' +
109
+ ' occasio computer-use --example\n\n' +
110
+ 'Apply a computer-use policy to a JSONL of synthetic tool_use blocks\n' +
111
+ 'and report each decision. Live proxy interception is not yet wired —\n' +
112
+ 'this is the adapter\'s policy-engine half, validated in isolation.\n'
113
+ );
114
+ return 0;
115
+ }
116
+
117
+ const example = bool(args, '--example');
118
+ const dryRun = bool(args, '--dry-run') || example;
119
+ if (!dryRun) {
120
+ process.stderr.write(`${C.r('error')}: pass --dry-run (live interception not yet wired)\n`);
121
+ return 2;
122
+ }
123
+
124
+ let traffic;
125
+ let policy;
126
+
127
+ if (example) {
128
+ traffic = EXAMPLE_TRAFFIC.slice();
129
+ policy = EXAMPLE_POLICY;
130
+ process.stdout.write(C.d('[example mode: built-in synthetic traffic + sample policy]\n\n'));
131
+ } else {
132
+ const fromPath = flag(args, '--from');
133
+ if (!fromPath) {
134
+ process.stderr.write(`${C.r('error')}: --from <jsonl> required (or use --example)\n`);
135
+ return 2;
136
+ }
137
+ try {
138
+ traffic = fs.readFileSync(fromPath, 'utf8')
139
+ .split('\n').filter(Boolean)
140
+ .map(line => JSON.parse(line));
141
+ } catch (e) {
142
+ process.stderr.write(`${C.r('error')}: cannot read traffic file: ${e.message}\n`);
143
+ return 2;
144
+ }
145
+ const polPath = flag(args, '--policy');
146
+ if (polPath) {
147
+ try { policy = parsePolicyYaml(fs.readFileSync(polPath, 'utf8')); }
148
+ catch (e) {
149
+ process.stderr.write(`${C.r('error')}: cannot read policy: ${e.message}\n`);
150
+ return 2;
151
+ }
152
+ } else {
153
+ policy = EXAMPLE_POLICY;
154
+ process.stdout.write(C.d('[no --policy given; using built-in sample policy]\n\n'));
155
+ }
156
+ }
157
+
158
+ let blocked = 0, allowed = 0;
159
+ for (const raw of traffic) {
160
+ const tool = normalizeToolUse(raw);
161
+ const decision = evaluate(tool, policy);
162
+ if (decision.decision === 'BLOCK') blocked++; else allowed++;
163
+ renderDecision(raw, decision);
164
+ }
165
+
166
+ process.stdout.write(
167
+ `\n${C.b('Summary')}: ${C.g(allowed + ' allowed')} · ${C.r(blocked + ' blocked')} ` +
168
+ `· ${traffic.length} total\n`);
169
+ return blocked > 0 ? 1 : 0;
170
+ }
171
+
172
+ function renderDecision(raw, dec) {
173
+ const name = raw.name || 'unknown';
174
+ const sub = raw.input?.action || '';
175
+ const tag = sub ? `${name}.${sub}` : name;
176
+ const valuePreview = previewInput(raw);
177
+ if (dec.decision === 'BLOCK') {
178
+ process.stdout.write(` ${C.r('✗ BLOCK')} ${C.b(tag.padEnd(28))} ${C.d(valuePreview)}\n`);
179
+ process.stdout.write(` ${C.d('reason: ' + dec.reason + (dec.detail ? ' (' + dec.detail + ')' : ''))}\n`);
180
+ } else {
181
+ process.stdout.write(` ${C.g('✓ ALLOW')} ${C.b(tag.padEnd(28))} ${C.d(valuePreview)}\n`);
182
+ }
183
+ }
184
+
185
+ function previewInput(raw) {
186
+ const inp = raw.input || {};
187
+ if (inp.text) return JSON.stringify(inp.text).slice(0, 60);
188
+ if (inp.command) return inp.command.slice(0, 60);
189
+ if (inp.coordinate) return `[${inp.coordinate.join(',')}]`;
190
+ return '';
191
+ }
192
+
193
+ module.exports = {
194
+ runComputerUseCli,
195
+ EXAMPLE_TRAFFIC,
196
+ EXAMPLE_POLICY,
197
+ _parsePolicyYaml: parsePolicyYaml,
198
+ };
@@ -0,0 +1,227 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * adapters/computer-use.js — policy + decision engine for Anthropic Computer
5
+ * Use traffic.
6
+ *
7
+ * Status: tech-demo scaffold. This module defines the tool schemas,
8
+ * normalises raw tool-use blocks into a stable internal shape, and applies
9
+ * a small set of computer-use-specific deny directives. It is intentionally
10
+ * decoupled from the live proxy adapter — wiring it into the request path
11
+ * (so a real Computer-Use session is policed in real time) requires an
12
+ * Anthropic API key for validation and lives behind the `occasio
13
+ * computer-use` CLI as a dry-run demo until then.
14
+ *
15
+ * Why a separate adapter at all
16
+ * Coding agents touch files. Computer-use agents touch the whole
17
+ * operating system: screen pixels, keyboard, mouse, browser. The blast
18
+ * radius is qualitatively larger. The same governance primitives (deny,
19
+ * redact, audit, attest) need a different rule vocabulary:
20
+ * - "this URL pattern is forbidden navigation"
21
+ * - "the keyboard input must not match a known-secret pattern"
22
+ * - "the screenshot must not contain region X" (stub — pixel work
23
+ * is deferred until we have a real session to validate against)
24
+ *
25
+ * Tool inventory (Anthropic Computer Use API, v20241022 / v20250124)
26
+ * computer.screenshot → no inputs
27
+ * computer.mouse_move → { coordinate: [x, y] }
28
+ * computer.left_click → { coordinate? }
29
+ * computer.right_click → { coordinate? }
30
+ * computer.middle_click → { coordinate? }
31
+ * computer.double_click → { coordinate? }
32
+ * computer.left_click_drag → { start_coordinate, coordinate }
33
+ * computer.type → { text }
34
+ * computer.key → { text } (keysym)
35
+ * computer.cursor_position → no inputs
36
+ * bash → { command } (shell)
37
+ * str_replace_editor → file editing (out of scope here;
38
+ * handled by existing Read/Edit path)
39
+ *
40
+ * Decision codes match the rest of Occasio: ALLOW | BLOCK | TRANSFORM.
41
+ *
42
+ * NOT YET DONE (explicit deferrals — track at the call sites):
43
+ * - Live proxy adapter wiring (needs `x-occasio-agent: computer-use`
44
+ * header routing and tool-result injection back into the SSE stream).
45
+ * Today the proxy only knows Claude Code; the Cline adapter is
46
+ * synthetic; computer-use will be the third live adapter.
47
+ * - Pixel-region deny rules. Requires lifting screenshot tensors and
48
+ * either OCR or fixed-region template matching. Deferred until at
49
+ * least one paying customer asks.
50
+ * - URL extraction from screenshots / browser state. We can intercept
51
+ * `bash` calls that issue `curl`/`wget` etc. via existing rules, but
52
+ * mouse-driven browser navigation will need a hook into the agent's
53
+ * screen-state stream — likely an MCP-style "browser state" tool.
54
+ */
55
+
56
+ // ── Tool registry ────────────────────────────────────────────────────────────
57
+
58
+ const COMPUTER_ACTIONS = new Set([
59
+ 'screenshot', 'mouse_move',
60
+ 'left_click', 'right_click', 'middle_click', 'double_click',
61
+ 'left_click_drag',
62
+ 'type', 'key', 'cursor_position',
63
+ ]);
64
+
65
+ const KNOWN_TOOL_NAMES = new Set([
66
+ 'computer', 'bash', 'str_replace_editor',
67
+ ]);
68
+
69
+ /**
70
+ * Normalise a raw tool_use block (as it appears in Anthropic messages
71
+ * API responses) into a stable `{ kind, action, ... }` shape that the
72
+ * policy engine consumes.
73
+ *
74
+ * Returns null for unrecognised inputs so callers can decide whether
75
+ * an unknown tool is itself a policy violation.
76
+ */
77
+ function normalizeToolUse(block) {
78
+ if (!block || typeof block !== 'object') return null;
79
+ if (block.type && block.type !== 'tool_use') return null;
80
+ const name = String(block.name || '').toLowerCase();
81
+ if (!KNOWN_TOOL_NAMES.has(name)) return null;
82
+ const input = (block.input && typeof block.input === 'object') ? block.input : {};
83
+
84
+ if (name === 'computer') {
85
+ const action = String(input.action || '').toLowerCase();
86
+ if (!COMPUTER_ACTIONS.has(action)) return null;
87
+ return {
88
+ kind: 'computer',
89
+ action,
90
+ coordinate: Array.isArray(input.coordinate) ? input.coordinate.slice(0, 2) : null,
91
+ start_coordinate: Array.isArray(input.start_coordinate) ? input.start_coordinate.slice(0, 2) : null,
92
+ text: typeof input.text === 'string' ? input.text : null,
93
+ raw: block,
94
+ };
95
+ }
96
+ if (name === 'bash') {
97
+ return {
98
+ kind: 'bash',
99
+ command: typeof input.command === 'string' ? input.command : null,
100
+ raw: block,
101
+ };
102
+ }
103
+ if (name === 'str_replace_editor') {
104
+ return { kind: 'str_replace_editor', raw: block };
105
+ }
106
+ return null;
107
+ }
108
+
109
+ // ── Policy rules ─────────────────────────────────────────────────────────────
110
+
111
+ /**
112
+ * A computer-use policy is a plain object — same flavour as policy.yml.
113
+ * Recognised directives (extensions to the existing engine):
114
+ *
115
+ * deny_keyboard_patterns:
116
+ * - pattern: '(?i)password\\s*[:=]\\s*\\S+'
117
+ * reason: 'keyboard-secret-pattern'
118
+ *
119
+ * deny_command_patterns: # bash commands
120
+ * - '(?i)\\bsudo\\b'
121
+ *
122
+ * deny_screen_regions: # stub: pixel work deferred
123
+ * - { x: 0, y: 0, w: 1920, h: 100, reason: 'top-bar-banner-info' }
124
+ *
125
+ * max_clicks_per_minute: 30 # rate-limit click bursts
126
+ * allow_browser_hosts: # if non-empty, anything else denied
127
+ * - 'github.com'
128
+ * - 'docs.anthropic.com'
129
+ *
130
+ * Missing directives → no constraint from that directive (additive).
131
+ */
132
+
133
+ const RESERVED_SHELL_BLACKLIST = [
134
+ /\bsudo\b/i, /\bsu\b/i,
135
+ /\brm\s+-rf\s+\//i,
136
+ /\bmkfs\b/i, /\bdd\s+if=/i,
137
+ /:\(\)\s*\{\s*:\|:\&/, // fork bomb
138
+ ];
139
+
140
+ // Compile a policy pattern string into a JS RegExp. PCRE/RE2-style inline
141
+ // flag prefixes `(?i)` and `(?m)` are not native to JS — translate them to
142
+ // the corresponding RegExp flags so policy authors can write the syntax
143
+ // they already know. Returns null on malformed input (caller treats as
144
+ // skip-not-fail to keep one bad rule from disarming the rest of policy).
145
+ function compilePolicyPattern(raw) {
146
+ if (typeof raw !== 'string' || raw.length === 0) return null;
147
+ let body = raw;
148
+ let flags = '';
149
+ const inlineFlagMatch = body.match(/^\(\?([imsux]+)\)/);
150
+ if (inlineFlagMatch) {
151
+ for (const ch of inlineFlagMatch[1]) {
152
+ if ('ims'.includes(ch) && !flags.includes(ch)) flags += ch;
153
+ }
154
+ body = body.slice(inlineFlagMatch[0].length);
155
+ }
156
+ try { return new RegExp(body, flags); }
157
+ catch { return null; }
158
+ }
159
+
160
+ function evaluate(tool, policy = {}) {
161
+ if (!tool) {
162
+ return { decision: 'BLOCK', reason: 'unrecognised-tool-use', tool: null };
163
+ }
164
+
165
+ if (tool.kind === 'computer') {
166
+ if (tool.action === 'type' || tool.action === 'key') {
167
+ const txt = tool.text || '';
168
+ const pats = (policy.deny_keyboard_patterns || []).map(p =>
169
+ typeof p === 'string' ? { pattern: p, reason: 'keyboard-pattern' }
170
+ : { pattern: p.pattern, reason: p.reason || 'keyboard-pattern' });
171
+ for (const { pattern, reason } of pats) {
172
+ const re = compilePolicyPattern(pattern);
173
+ if (!re) continue; // malformed → skip, not fatal
174
+ if (re.test(txt)) {
175
+ return {
176
+ decision: 'BLOCK', reason, tool,
177
+ detail: 'keyboard input matches deny pattern',
178
+ };
179
+ }
180
+ }
181
+ }
182
+ // mouse / screenshot / cursor: pass-through under current policy set.
183
+ // Pixel-region rules and rate limits land here when implemented.
184
+ return { decision: 'ALLOW', reason: 'pass-through', tool };
185
+ }
186
+
187
+ if (tool.kind === 'bash') {
188
+ const cmd = tool.command || '';
189
+ if (!cmd) return { decision: 'BLOCK', reason: 'empty-command', tool };
190
+
191
+ // Built-in lethal-command blacklist — always on, not configurable.
192
+ // Computer-Use agents can invoke shell with little oversight; we
193
+ // refuse the most dangerous one-liners even if policy is permissive.
194
+ for (const re of RESERVED_SHELL_BLACKLIST) {
195
+ if (re.test(cmd)) {
196
+ return { decision: 'BLOCK', reason: 'reserved-blacklist', tool, detail: re.source };
197
+ }
198
+ }
199
+ const customPats = policy.deny_command_patterns || [];
200
+ for (const raw of customPats) {
201
+ const re = compilePolicyPattern(raw);
202
+ if (!re) continue; // malformed → skip, not fatal
203
+ if (re.test(cmd)) {
204
+ return { decision: 'BLOCK', reason: 'command-pattern', tool, detail: raw };
205
+ }
206
+ }
207
+ return { decision: 'ALLOW', reason: 'pass-through', tool };
208
+ }
209
+
210
+ if (tool.kind === 'str_replace_editor') {
211
+ // File-editor calls flow through the existing Read/Write/Edit path;
212
+ // we forward unchanged here.
213
+ return { decision: 'ALLOW', reason: 'delegated-to-editor-adapter', tool };
214
+ }
215
+
216
+ return { decision: 'BLOCK', reason: 'unrecognised-tool-kind', tool };
217
+ }
218
+
219
+ module.exports = {
220
+ normalizeToolUse,
221
+ evaluate,
222
+ COMPUTER_ACTIONS,
223
+ KNOWN_TOOL_NAMES,
224
+ // Exposed for tests + custom policy linters.
225
+ _RESERVED_SHELL_BLACKLIST: RESERVED_SHELL_BLACKLIST,
226
+ _compilePolicyPattern: compilePolicyPattern,
227
+ };
@@ -0,0 +1,170 @@
1
+ 'use strict';
2
+
3
+ /** Flatten a message content block to plain text. */
4
+ function flattenContent(content) {
5
+ if (typeof content === 'string') return content;
6
+ if (Array.isArray(content)) return content.map(b => (typeof b === 'string' ? b : b.text || '')).join('\n');
7
+ return '';
8
+ }
9
+
10
+ /** Rough token estimate: ~4 chars per token (Anthropic approximation). */
11
+ function estimateTokens(text) {
12
+ return Math.ceil((text || '').length / 4);
13
+ }
14
+
15
+ // ── Level 1: File-level token accounting ──────────────────────────────────────
16
+
17
+ /**
18
+ * Parses messages for Read tool results and returns [{name, tokens}] sorted
19
+ * by token count descending (max 20 files).
20
+ *
21
+ * Modern Claude Code sends structured tool_use / tool_result blocks:
22
+ * assistant: [{type:"tool_use", id:"toolu_01", name:"Read", input:{file_path:"src/foo.ts"}}]
23
+ * user: [{type:"tool_result", tool_use_id:"toolu_01", content:"<file text>"}]
24
+ *
25
+ * We walk the full message history to build a map of
26
+ * tool_use_id → file_path (from tool_use blocks where name === "Read")
27
+ * then match tool_result blocks by tool_use_id to estimate content tokens.
28
+ *
29
+ * Falls back to the old `--- path ---` delimiter format for legacy sessions.
30
+ */
31
+ function parseFileTokens(messages) {
32
+ const fileMap = new Map();
33
+ const msgs = messages || [];
34
+
35
+ // Pass 1: collect Read tool_use blocks: id → file_path
36
+ const readIdToPath = new Map();
37
+ for (const msg of msgs) {
38
+ if (!Array.isArray(msg.content)) continue;
39
+ for (const block of msg.content) {
40
+ if (block.type === 'tool_use' && block.name === 'Read' && block.id && block.input?.file_path) {
41
+ readIdToPath.set(block.id, String(block.input.file_path));
42
+ }
43
+ }
44
+ }
45
+
46
+ // Pass 2: match tool_result content to file_path via tool_use_id
47
+ if (readIdToPath.size > 0) {
48
+ for (const msg of msgs) {
49
+ if (!Array.isArray(msg.content)) continue;
50
+ for (const block of msg.content) {
51
+ if (block.type !== 'tool_result' || !block.tool_use_id) continue;
52
+ const filePath = readIdToPath.get(block.tool_use_id);
53
+ if (!filePath) continue;
54
+ // content can be a string or an array of content blocks
55
+ let text = '';
56
+ if (typeof block.content === 'string') {
57
+ text = block.content;
58
+ } else if (Array.isArray(block.content)) {
59
+ text = block.content.map(b => (typeof b === 'string' ? b : b.text || '')).join('\n');
60
+ }
61
+ fileMap.set(filePath, (fileMap.get(filePath) || 0) + estimateTokens(text));
62
+ }
63
+ }
64
+ }
65
+
66
+ // Fallback: legacy `--- path ---` delimiter format
67
+ if (fileMap.size === 0) {
68
+ for (const msg of msgs) {
69
+ const text = flattenContent(msg.content);
70
+ const delimRe = /^---[ \t]+(\S+\.[a-zA-Z0-9]\S*)[ \t]+---\r?\n([\s\S]*?)(?=^---[ \t]+\S+\.[\w]\S*[ \t]+---|$(?![\s\S]))/gm;
71
+ for (const m of text.matchAll(delimRe)) {
72
+ const name = m[1].trim();
73
+ fileMap.set(name, (fileMap.get(name) || 0) + estimateTokens(m[2]));
74
+ }
75
+ }
76
+ }
77
+
78
+ return [...fileMap.entries()]
79
+ .map(([name, tokens]) => ({ name, tokens }))
80
+ .sort((a, b) => b.tokens - a.tokens)
81
+ .slice(0, 20);
82
+ }
83
+
84
+ // ── Level 2: Secret scanning with line context ────────────────────────────────
85
+
86
+ const SECRET_PATTERNS = [
87
+ // High-confidence structural patterns — match regardless of surrounding context
88
+ { re: /sk-ant-api03-[A-Za-z0-9_-]{40,}/, label: 'anthropic-key' },
89
+ { re: /ghp_[A-Za-z0-9]{36}/, label: 'github-pat' },
90
+ { re: /AKIA[0-9A-Z]{16}/, label: 'aws-access-key' },
91
+ { re: /-----BEGIN (RSA |EC |OPENSSH )?PRIVATE KEY/, label: 'private-key' },
92
+ { re: /(postgres|mysql|mongodb):\/\/[^:]+:[^@]+@/, label: 'db-url' },
93
+ // Generic patterns — require direct assignment (key=value) to reduce false positives.
94
+ // Original broad regexes matched mid-word (apiKeyValidator) and indirect context
95
+ // (TOKEN anywhere then = anywhere on same line). These tightened forms require the
96
+ // keyword immediately before the separator with no intervening tokens.
97
+ { re: /api[_-]?key\s*[:=]\s*['"]?\S{16,}/i, label: 'api-key' },
98
+ { re: /password\s*[:=]\s*['"]?\S{8,}/i, label: 'password' },
99
+ { re: /(?:access|bearer|auth)[_-]?token\s*[:=]\s*['"]?\S{16,}/i, label: 'token' },
100
+ ];
101
+
102
+ /** Keep line structure, mask alphanumeric chars for safe logging. */
103
+ function redactLine(line) {
104
+ const truncated = line.slice(0, 80) + (line.length > 80 ? '…' : '');
105
+ return truncated.replace(/[A-Za-z0-9]/g, '*');
106
+ }
107
+
108
+ /**
109
+ * Scans text line-by-line for known secret patterns plus any caller-supplied extras.
110
+ *
111
+ * @param {string} text
112
+ * @param {object} [opts]
113
+ * @param {Array<{label:string, regex:RegExp}>} [opts.extraPatterns] Custom compiled patterns
114
+ * Returns [{label, line, snippet}] — deduplicated by pattern+line.
115
+ */
116
+ function scanSecrets(text, opts) {
117
+ const extra = (opts && Array.isArray(opts.extraPatterns)) ? opts.extraPatterns : [];
118
+ const patterns = extra.length
119
+ ? SECRET_PATTERNS.concat(extra.map(p => ({ re: p.regex, label: p.label })))
120
+ : SECRET_PATTERNS;
121
+
122
+ const lines = text.split('\n');
123
+ const hits = [];
124
+ const seen = new Set();
125
+
126
+ for (let i = 0; i < lines.length; i++) {
127
+ for (const { re, label } of patterns) {
128
+ if (re.test(lines[i])) {
129
+ const key = `${label}:${i}`;
130
+ if (!seen.has(key)) {
131
+ seen.add(key);
132
+ hits.push({ label, line: i + 1, snippet: redactLine(lines[i]) });
133
+ }
134
+ break;
135
+ }
136
+ }
137
+ }
138
+
139
+ return hits;
140
+ }
141
+
142
+ /**
143
+ * Replace matched secret patterns in text with [REDACTED:label] tokens.
144
+ * Scan runs before replacement so line numbers in hits are accurate.
145
+ * Returns the redacted string (input is not mutated).
146
+ *
147
+ * @param {string} text
148
+ * @param {object} [opts]
149
+ * @param {Array<{label:string, regex:RegExp}>} [opts.extraPatterns]
150
+ */
151
+ function redactSecrets(text, opts) {
152
+ if (typeof text !== 'string') return text;
153
+ const extra = (opts && Array.isArray(opts.extraPatterns)) ? opts.extraPatterns : [];
154
+ const patterns = extra.length
155
+ ? SECRET_PATTERNS.concat(extra.map(p => ({ re: p.regex, label: p.label })))
156
+ : SECRET_PATTERNS;
157
+
158
+ const lines = text.split('\n');
159
+ for (let i = 0; i < lines.length; i++) {
160
+ for (const { re, label } of patterns) {
161
+ if (re.test(lines[i])) {
162
+ lines[i] = lines[i].replace(re, `[REDACTED:${label}]`);
163
+ break;
164
+ }
165
+ }
166
+ }
167
+ return lines.join('\n');
168
+ }
169
+
170
+ module.exports = { flattenContent, estimateTokens, parseFileTokens, SECRET_PATTERNS, redactLine, scanSecrets, redactSecrets };