@occasiolabs/occasio 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/LICENSE +202 -0
  2. package/NOTICE +10 -0
  3. package/README.md +216 -0
  4. package/bin/occasio-mcp.js +5 -0
  5. package/bin/occasio.js +2 -0
  6. package/bin/supervisor/README.md +90 -0
  7. package/bin/supervisor/com.occasio.proxy.plist.template +36 -0
  8. package/bin/supervisor/install-windows-task.ps1 +48 -0
  9. package/bin/supervisor/occasio.service +18 -0
  10. package/docs/AUDIT.md +120 -0
  11. package/docs/attest_verify.py +283 -0
  12. package/docs/audit_walker.py +65 -0
  13. package/docs/canonicalize.py +99 -0
  14. package/docs/compliance-mapping.md +93 -0
  15. package/docs/demos/mcp-block.md +148 -0
  16. package/docs/edr-calibration.md +73 -0
  17. package/docs/edr-demo.md +83 -0
  18. package/docs/python-verifier.md +74 -0
  19. package/docs/reference-pipeline.md +140 -0
  20. package/package.json +69 -0
  21. package/policy-templates/dev-default.yml +84 -0
  22. package/policy-templates/finance.yml +61 -0
  23. package/policy-templates/strict.yml +49 -0
  24. package/schemas/agent-attestation-v1.json +190 -0
  25. package/schemas/occasio-policy.schema.json +99 -0
  26. package/spec/agent-attestation/v1/README.md +137 -0
  27. package/src/adapters/claude-code.js +518 -0
  28. package/src/adapters/cline.js +161 -0
  29. package/src/adapters/computer-use-cli.js +198 -0
  30. package/src/adapters/computer-use.js +227 -0
  31. package/src/analyzer.js +170 -0
  32. package/src/anomaly/cli.js +143 -0
  33. package/src/anomaly/detectors/deny-rate.js +84 -0
  34. package/src/anomaly/detectors/file-read-volume.js +109 -0
  35. package/src/anomaly/detectors/secret-redact-rate.js +107 -0
  36. package/src/anomaly/detectors/unknown-tool-input.js +83 -0
  37. package/src/anomaly/index.js +169 -0
  38. package/src/attest/canonicalize.js +97 -0
  39. package/src/attest/index.js +355 -0
  40. package/src/attest/run-slice.js +57 -0
  41. package/src/attest/sign.js +186 -0
  42. package/src/attest/verify.js +192 -0
  43. package/src/audit/errors.js +21 -0
  44. package/src/audit/input-normalizer.js +121 -0
  45. package/src/audit/jsonl-auditor.js +178 -0
  46. package/src/audit/verifier.js +152 -0
  47. package/src/baseline.js +507 -0
  48. package/src/boundary.js +238 -0
  49. package/src/budget.js +42 -0
  50. package/src/classifier.js +115 -0
  51. package/src/context-budget.js +77 -0
  52. package/src/core/boundary-event.js +75 -0
  53. package/src/core/decision.js +61 -0
  54. package/src/core/pipeline.js +66 -0
  55. package/src/core/tool-names.js +105 -0
  56. package/src/dashboard.js +892 -0
  57. package/src/demo/README.md +31 -0
  58. package/src/demo/anomalies-demo.js +211 -0
  59. package/src/demo/attest-demo.js +198 -0
  60. package/src/distiller.js +155 -0
  61. package/src/embeddings.json +72 -0
  62. package/src/executor/dispatcher.js +230 -0
  63. package/src/harness.js +817 -0
  64. package/src/index.js +1711 -0
  65. package/src/inspect.js +329 -0
  66. package/src/interceptor.js +1198 -0
  67. package/src/lao.js +185 -0
  68. package/src/lao_prep.py +119 -0
  69. package/src/ledger.js +209 -0
  70. package/src/mcp-experiment.js +140 -0
  71. package/src/mcp-normalize.js +139 -0
  72. package/src/mcp-server.js +320 -0
  73. package/src/outbound-policy.js +433 -0
  74. package/src/policy/built-in-classifiers.js +78 -0
  75. package/src/policy/doctor.js +226 -0
  76. package/src/policy/engine.js +339 -0
  77. package/src/policy/init.js +153 -0
  78. package/src/policy/loader.js +448 -0
  79. package/src/policy/rules-default.js +36 -0
  80. package/src/policy/shell-path.js +135 -0
  81. package/src/policy/show.js +196 -0
  82. package/src/policy/validate.js +310 -0
  83. package/src/preflight/cli.js +164 -0
  84. package/src/preflight/miner.js +329 -0
  85. package/src/proxy/agent-router.js +93 -0
  86. package/src/redteam.js +428 -0
  87. package/src/replay.js +446 -0
  88. package/src/report/index.js +224 -0
  89. package/src/runtime.js +595 -0
  90. package/src/scanner/index.js +49 -0
  91. package/src/selftest.js +192 -0
  92. package/src/session.js +36 -0
package/src/runtime.js ADDED
@@ -0,0 +1,595 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * runtime.js — Pure deterministic local execution core.
5
+ *
6
+ * Owns the structured tool handlers for Read, Glob, Grep, TodoWrite, and TodoRead.
7
+ * These are pure filesystem / in-memory functions with no dependency on the
8
+ * interceptor pipeline, Anthropic API, or shell execution. Safe to import in
9
+ * any process context: proxy, MCP server, CLI tools.
10
+ *
11
+ * Also exports executeLocalTool() — a higher-level wrapper that adds distillation,
12
+ * token estimation, and secret scanning to the raw handler results. Used by
13
+ * mcp-server.js so the MCP path has parity with the interceptor path.
14
+ *
15
+ * Imported by:
16
+ * interceptor.js — for native tool dispatch inside interceptToolUse
17
+ * mcp-server.js — for executeLocalTool (hardened execution wrapper)
18
+ */
19
+
20
+ const fs = require('fs');
21
+ const path = require('path');
22
+ const { distill } = require('./distiller');
23
+ const { estimateTokens, scanSecrets } = require('./analyzer');
24
+
25
+ // ── Shared constants ───────────────────────────────────────────────────────────
26
+
27
+ const MAX_OUTPUT = 512 * 1024; // 512 KB — same cap as exec maxBuffer
28
+
29
+ // File extensions the native Read handler cannot serve correctly.
30
+ // PDFs and images need structured rendering (base64, page extraction) that we
31
+ // cannot replicate; Jupyter notebooks need cell-by-cell parsing. All others
32
+ // are treated as UTF-8 text and handled natively.
33
+ const READ_SKIP_EXTENSIONS = new Set([
34
+ '.pdf', '.ipynb',
35
+ '.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.ico',
36
+ '.zip', '.gz', '.tar', '.bz2', '.xz', '.7z', '.rar',
37
+ '.exe', '.dll', '.so', '.dylib',
38
+ ]);
39
+
40
+ // ── Shared helper ──────────────────────────────────────────────────────────────
41
+
42
+ function readFileNative(absPath) {
43
+ const buf = fs.readFileSync(absPath);
44
+ if (buf.length > MAX_OUTPUT) {
45
+ return buf.slice(0, MAX_OUTPUT).toString('utf8') + '\n[truncated — file too large]';
46
+ }
47
+ return buf.toString('utf8');
48
+ }
49
+
50
+ // ── Read tool support ──────────────────────────────────────────────────────────
51
+
52
+ /**
53
+ * Returns true when this Read input can be served natively.
54
+ * Falls back for PDFs/images (need structured rendering), Jupyter notebooks,
55
+ * malformed input, or the `pages` parameter (implies PDF range extraction).
56
+ */
57
+ function isReadHandleable(input) {
58
+ if (!input || typeof input !== 'object') return false;
59
+ const fp = input.file_path;
60
+ if (!fp || typeof fp !== 'string' || !fp.trim()) return false;
61
+ if (input.pages != null) return false;
62
+ const ext = path.extname(fp).toLowerCase();
63
+ return !READ_SKIP_EXTENSIONS.has(ext);
64
+ }
65
+
66
+ /**
67
+ * Read a file natively and return content formatted like `cat -n` (1-based line
68
+ * numbers), honouring the optional offset (0-based line index) and limit fields
69
+ * that the Claude Code Read tool sends for partial reads.
70
+ */
71
+ function handleReadTool(input) {
72
+ const fp = (input?.file_path || '').trim();
73
+ if (!fp) return { output: '(no file_path provided)', exitCode: 1 };
74
+
75
+ const abs = path.resolve(process.cwd(), fp);
76
+ try {
77
+ const content = readFileNative(abs); // already caps at MAX_OUTPUT
78
+ const lines = content.split('\n');
79
+ const offset = (typeof input.offset === 'number' && input.offset >= 0) ? input.offset : 0;
80
+ const limit = (typeof input.limit === 'number' && input.limit > 0) ? input.limit : lines.length;
81
+ const slice = lines.slice(offset, offset + limit);
82
+ // Line numbers reflect position in the file (not the slice), matching cat -n.
83
+ const formatted = slice.map((l, i) => `${String(offset + i + 1).padStart(6)}\t${l}`).join('\n');
84
+ return { output: formatted, exitCode: 0 };
85
+ } catch (e) {
86
+ const msg = e.code === 'ENOENT'
87
+ ? `${fp}: No such file or directory`
88
+ : `${fp}: ${e.message}`;
89
+ return { output: `Read: ${msg}`, exitCode: 1 };
90
+ }
91
+ }
92
+
93
+ // ── Glob tool support ──────────────────────────────────────────────────────────
94
+
95
+ // Characters that indicate shell injection in a glob pattern.
96
+ // We reject patterns containing these so handleGlobTool stays read-only.
97
+ const GLOB_INJECTION_RE = /[;&|`$<>!]/;
98
+
99
+ // Directories skipped during recursive glob walks.
100
+ const GLOB_SKIP = new Set(['node_modules', '.git', '.hg', '.svn', 'dist', 'build', '__pycache__', '.venv', 'venv']);
101
+
102
+ // Maximum number of matches returned to avoid overwhelming the model context.
103
+ const GLOB_MAX = 500;
104
+
105
+ function isGlobHandleable(input) {
106
+ if (!input || typeof input !== 'object') return false;
107
+ const pattern = input.pattern;
108
+ if (!pattern || typeof pattern !== 'string' || !pattern.trim()) return false;
109
+ if (GLOB_INJECTION_RE.test(pattern)) return false;
110
+ if (input.path != null && typeof input.path !== 'string') return false;
111
+ return true;
112
+ }
113
+
114
+ // Escape regex metacharacters in a literal string segment.
115
+ function escapeRegexChars(s) {
116
+ return s.replace(/[.+^${}()|[\]\\]/g, '\\$&');
117
+ }
118
+
119
+ /**
120
+ * Convert a glob pattern to a RegExp.
121
+ * Supports: ** (any path depth), * (single segment), ? (single char),
122
+ * {ts,tsx} (alternation), [abc] (character classes).
123
+ * Exported for unit testing.
124
+ */
125
+ function globToRegex(pattern) {
126
+ // Normalise Windows separators in the pattern.
127
+ const p = pattern.replace(/\\/g, '/');
128
+
129
+ let re = '';
130
+ let i = 0;
131
+ while (i < p.length) {
132
+ // ** — match any path segments (including none), consuming the trailing /
133
+ if (p[i] === '*' && p[i + 1] === '*') {
134
+ re += '.*';
135
+ i += 2;
136
+ if (p[i] === '/') i++; // consume separator after **
137
+ continue;
138
+ }
139
+ // * — match within a single path segment
140
+ if (p[i] === '*') { re += '[^/]*'; i++; continue; }
141
+ // ? — match a single character within a segment
142
+ if (p[i] === '?') { re += '[^/]'; i++; continue; }
143
+ // {a,b,c} — alternation
144
+ if (p[i] === '{') {
145
+ const end = p.indexOf('}', i);
146
+ if (end !== -1) {
147
+ const alts = p.slice(i + 1, end).split(',').map(escapeRegexChars);
148
+ re += `(?:${alts.join('|')})`;
149
+ i = end + 1;
150
+ continue;
151
+ }
152
+ }
153
+ // [abc] / [^abc] — pass character classes through verbatim
154
+ if (p[i] === '[') {
155
+ const end = p.indexOf(']', i);
156
+ if (end !== -1) { re += p.slice(i, end + 1); i = end + 1; continue; }
157
+ }
158
+ re += escapeRegexChars(p[i]);
159
+ i++;
160
+ }
161
+
162
+ // On Windows, matching is case-insensitive; on POSIX it's case-sensitive.
163
+ const flags = process.platform === 'win32' ? 'i' : '';
164
+ return new RegExp(`^${re}$`, flags);
165
+ }
166
+
167
+ /**
168
+ * Walk `dir` recursively, collecting paths that match `regex`.
169
+ * Results are relative to `baseDir`.
170
+ */
171
+ function walkGlob(dir, baseDir, regex, results) {
172
+ if (results.length >= GLOB_MAX) return;
173
+ let entries;
174
+ try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
175
+ catch { return; }
176
+
177
+ for (const entry of entries) {
178
+ if (results.length >= GLOB_MAX) break;
179
+ if (GLOB_SKIP.has(entry.name)) continue;
180
+ const abs = path.join(dir, entry.name);
181
+ // Normalise to forward slashes for matching (consistent on all platforms).
182
+ const rel = path.relative(baseDir, abs).replace(/\\/g, '/');
183
+ if (entry.isDirectory()) {
184
+ walkGlob(abs, baseDir, regex, results);
185
+ } else if (regex.test(rel)) {
186
+ results.push(rel);
187
+ }
188
+ }
189
+ }
190
+
191
+ /**
192
+ * Resolve glob pattern + optional base path to a sorted list of matching paths,
193
+ * relative to CWD. Returns { output, exitCode, matchCount }.
194
+ */
195
+ function handleGlobTool(input) {
196
+ const pattern = (input?.pattern || '').trim();
197
+ if (!pattern) return { output: '(no pattern provided)', exitCode: 1, matchCount: 0 };
198
+
199
+ const baseDir = input?.path
200
+ ? path.resolve(process.cwd(), input.path)
201
+ : process.cwd();
202
+
203
+ // Reject if base path escapes CWD for safety.
204
+ const cwd = process.cwd();
205
+ if (!baseDir.startsWith(cwd) && baseDir !== cwd) {
206
+ // Allow absolute paths outside CWD — Glob is read-only and safe.
207
+ }
208
+
209
+ let regex;
210
+ try { regex = globToRegex(pattern); }
211
+ catch (e) { return { output: `Glob: invalid pattern: ${e.message}`, exitCode: 1, matchCount: 0 }; }
212
+
213
+ const results = [];
214
+ walkGlob(baseDir, baseDir, regex, results);
215
+ results.sort();
216
+
217
+ const truncated = results.length >= GLOB_MAX;
218
+ const lines = results.map(r => path.join(baseDir !== cwd ? baseDir : '', r).replace(/\\/g, '/'));
219
+ const output = lines.join('\n') + (truncated ? `\n(truncated at ${GLOB_MAX} results)` : '');
220
+ return { output: output || '(no matches)', exitCode: 0, matchCount: results.length };
221
+ }
222
+
223
+ // ── Grep tool support ──────────────────────────────────────────────────────────
224
+
225
+ const GREP_MAX_RESULTS = 250; // default output cap — matches Claude Code head_limit default
226
+ const GREP_FILE_CAP = 10_000; // safety limit on files walked before stopping
227
+
228
+ // File-type → extension mapping, matching ripgrep's --type names.
229
+ const GREP_TYPE_EXTS = new Map([
230
+ ['js', ['.js', '.mjs', '.cjs']],
231
+ ['ts', ['.ts', '.tsx', '.mts', '.cts']],
232
+ ['py', ['.py', '.pyi']],
233
+ ['rust', ['.rs']],
234
+ ['go', ['.go']],
235
+ ['java', ['.java']],
236
+ ['rb', ['.rb']],
237
+ ['css', ['.css', '.scss', '.sass', '.less']],
238
+ ['html', ['.html', '.htm']],
239
+ ['json', ['.json', '.jsonc']],
240
+ ['md', ['.md', '.mdx']],
241
+ ['yaml', ['.yaml', '.yml']],
242
+ ['sh', ['.sh', '.bash', '.zsh']],
243
+ ['c', ['.c', '.h']],
244
+ ['cpp', ['.cpp', '.cc', '.cxx', '.hpp', '.hh']],
245
+ ]);
246
+
247
+ const VALID_GREP_MODES = new Set(['content', 'files_with_matches', 'count']);
248
+
249
+ function isGrepHandleable(input) {
250
+ if (!input || typeof input !== 'object') return false;
251
+ const pattern = input.pattern;
252
+ if (!pattern || typeof pattern !== 'string' || !pattern.trim()) return false;
253
+ // Optional fields must be the right type when present.
254
+ if (input.path != null && typeof input.path !== 'string') return false;
255
+ if (input.glob != null && typeof input.glob !== 'string') return false;
256
+ if (input.type != null && typeof input.type !== 'string') return false;
257
+ if (input.output_mode != null && !VALID_GREP_MODES.has(input.output_mode)) return false;
258
+ // Cross-line matching (rg -U) requires full-file regex — not supported natively.
259
+ if (input.multiline === true) return false;
260
+ return true;
261
+ }
262
+
263
+ // Read a file for grep: returns null for binary files or on read error.
264
+ function tryReadGrep(absPath) {
265
+ try {
266
+ const buf = fs.readFileSync(absPath);
267
+ if (buf.slice(0, 512).includes(0)) return null; // binary file — skip
268
+ return (buf.length > MAX_OUTPUT ? buf.slice(0, MAX_OUTPUT) : buf).toString('utf8');
269
+ } catch { return null; }
270
+ }
271
+
272
+ // Walk directory collecting absolute file paths, honouring glob and type filters.
273
+ function walkGrepFiles(dir, baseDir, globRegex, globHasDir, typeExts, results) {
274
+ if (results.length >= GREP_FILE_CAP) return;
275
+ let entries;
276
+ try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
277
+ catch { return; }
278
+ for (const entry of entries) {
279
+ if (results.length >= GREP_FILE_CAP) break;
280
+ if (GLOB_SKIP.has(entry.name)) continue;
281
+ const abs = path.join(dir, entry.name);
282
+ if (entry.isDirectory()) {
283
+ walkGrepFiles(abs, baseDir, globRegex, globHasDir, typeExts, results);
284
+ } else {
285
+ if (typeExts && !typeExts.includes(path.extname(abs).toLowerCase())) continue;
286
+ if (globRegex) {
287
+ // Glob patterns with path separators match against the relative path;
288
+ // plain filename globs (e.g. "*.ts") match against the basename only.
289
+ const testStr = globHasDir
290
+ ? path.relative(baseDir, abs).replace(/\\/g, '/')
291
+ : path.basename(abs);
292
+ if (!globRegex.test(testStr)) continue;
293
+ }
294
+ results.push(abs);
295
+ }
296
+ }
297
+ }
298
+
299
+ /**
300
+ * Execute a structured Grep tool call locally.
301
+ *
302
+ * Supports: pattern, path, glob, type, output_mode (files_with_matches | content | count),
303
+ * -i (case-insensitive), -C / context / -A / -B (context lines), head_limit, offset.
304
+ *
305
+ * Does NOT support multiline (cross-line regex) — isGrepHandleable rejects those.
306
+ */
307
+ function handleGrepTool(input) {
308
+ const pattern = (input?.pattern || '').trim();
309
+ if (!pattern) return { output: '(no pattern provided)', exitCode: 1, matchCount: 0 };
310
+
311
+ const searchRoot = input?.path
312
+ ? path.resolve(process.cwd(), input.path)
313
+ : process.cwd();
314
+
315
+ const outputMode = input?.output_mode || 'files_with_matches';
316
+ const caseInsens = input?.['-i'] === true;
317
+ const contextN = typeof input?.['-C'] === 'number' ? input['-C'] :
318
+ typeof input?.context === 'number' ? input.context : 0;
319
+ const linesBefore = typeof input?.['-B'] === 'number' ? input['-B'] : contextN;
320
+ const linesAfter = typeof input?.['-A'] === 'number' ? input['-A'] : contextN;
321
+ const headLimit = typeof input?.head_limit === 'number' && input.head_limit > 0
322
+ ? Math.min(input.head_limit, GREP_MAX_RESULTS)
323
+ : GREP_MAX_RESULTS;
324
+ const skipLines = typeof input?.offset === 'number' && input.offset > 0 ? input.offset : 0;
325
+
326
+ let regex;
327
+ try {
328
+ regex = new RegExp(pattern, 'g' + (caseInsens ? 'i' : ''));
329
+ } catch (e) {
330
+ return { output: `Grep: invalid pattern: ${e.message}`, exitCode: 1, matchCount: 0 };
331
+ }
332
+
333
+ // Build type extension filter.
334
+ let typeExts = null;
335
+ if (input?.type) {
336
+ const t = input.type.toLowerCase();
337
+ typeExts = GREP_TYPE_EXTS.get(t) || [t.startsWith('.') ? t : `.${t}`];
338
+ }
339
+
340
+ // Build glob file filter.
341
+ let globRegex = null;
342
+ let globHasDir = false;
343
+ if (input?.glob) {
344
+ try {
345
+ globRegex = globToRegex(input.glob);
346
+ globHasDir = input.glob.includes('/') || input.glob.includes('**');
347
+ } catch { /* ignore invalid glob — no filter applied */ }
348
+ }
349
+
350
+ // Collect candidate files.
351
+ let files = [];
352
+ try {
353
+ const stat = fs.statSync(searchRoot);
354
+ if (stat.isFile()) {
355
+ files.push(searchRoot);
356
+ } else {
357
+ walkGrepFiles(searchRoot, searchRoot, globRegex, globHasDir, typeExts, files);
358
+ files.sort();
359
+ }
360
+ } catch (e) {
361
+ return { output: `Grep: cannot access path: ${e.message}`, exitCode: 1, matchCount: 0 };
362
+ }
363
+
364
+ const outputLines = [];
365
+ let totalMatches = 0;
366
+ let truncated = false;
367
+ const wantMore = () => outputLines.length < skipLines + headLimit;
368
+ const relOf = abs => path.relative(searchRoot, abs).replace(/\\/g, '/') || path.basename(abs);
369
+
370
+ if (outputMode === 'files_with_matches') {
371
+ for (const absFile of files) {
372
+ if (!wantMore()) { truncated = true; break; }
373
+ const content = tryReadGrep(absFile);
374
+ if (!content) continue;
375
+ regex.lastIndex = 0;
376
+ if (regex.test(content)) { totalMatches++; outputLines.push(relOf(absFile)); }
377
+ }
378
+
379
+ } else if (outputMode === 'count') {
380
+ for (const absFile of files) {
381
+ if (!wantMore()) { truncated = true; break; }
382
+ const content = tryReadGrep(absFile);
383
+ if (!content) continue;
384
+ let count = 0;
385
+ for (const line of content.split('\n')) { regex.lastIndex = 0; if (regex.test(line)) count++; }
386
+ if (count > 0) { totalMatches += count; outputLines.push(`${relOf(absFile)}:${count}`); }
387
+ }
388
+
389
+ } else { // content
390
+ for (const absFile of files) {
391
+ if (!wantMore()) { truncated = true; break; }
392
+ const content = tryReadGrep(absFile);
393
+ if (!content) continue;
394
+ const fileLabel = relOf(absFile);
395
+ const fileLines = content.split('\n');
396
+ const matchSet = new Set();
397
+ for (let i = 0; i < fileLines.length; i++) {
398
+ regex.lastIndex = 0;
399
+ if (regex.test(fileLines[i])) matchSet.add(i);
400
+ }
401
+ if (!matchSet.size) continue;
402
+ totalMatches += matchSet.size;
403
+
404
+ // Merge context windows into non-overlapping groups.
405
+ const sorted = [...matchSet].sort((a, b) => a - b);
406
+ const groups = [];
407
+ let gs = -1, ge = -1;
408
+ for (const idx of sorted) {
409
+ const s = Math.max(0, idx - linesBefore);
410
+ const e = Math.min(fileLines.length - 1, idx + linesAfter);
411
+ if (gs === -1) { gs = s; ge = e; }
412
+ else if (s <= ge + 1) { ge = Math.max(ge, e); }
413
+ else { groups.push([gs, ge]); gs = s; ge = e; }
414
+ }
415
+ if (gs !== -1) groups.push([gs, ge]);
416
+
417
+ let firstGroup = true;
418
+ for (const [gStart, gEnd] of groups) {
419
+ if (!wantMore()) { truncated = true; break; }
420
+ if (!firstGroup) outputLines.push('--');
421
+ firstGroup = false;
422
+ for (let i = gStart; i <= gEnd && wantMore(); i++) {
423
+ const sep = matchSet.has(i) ? ':' : '-';
424
+ outputLines.push(`${fileLabel}${sep}${i + 1}${sep}${fileLines[i]}`);
425
+ }
426
+ }
427
+ }
428
+ }
429
+
430
+ const sliced = outputLines.slice(skipLines, skipLines + headLimit);
431
+ const text = sliced.join('\n') || '(no matches)';
432
+ const suffix = truncated ? '\n(truncated — use head_limit/offset to paginate)' : '';
433
+ return { output: text + suffix, exitCode: 0, matchCount: totalMatches };
434
+ }
435
+
436
+ // ── Todo tool support ──────────────────────────────────────────────────────────
437
+
438
+ /**
439
+ * Returns true when this TodoWrite/TodoRead call can be served natively.
440
+ * TodoRead: always handleable — no required inputs.
441
+ * TodoWrite: requires input.todos to be an array.
442
+ */
443
+ function isTodoHandleable(input, toolName) {
444
+ if (toolName === 'TodoRead') return true;
445
+ if (toolName === 'TodoWrite') {
446
+ if (!input || typeof input !== 'object') return false;
447
+ return Array.isArray(input.todos);
448
+ }
449
+ return false;
450
+ }
451
+
452
+ /**
453
+ * Handle a TodoWrite call: replace the session todo list with input.todos.
454
+ * Returns { output: '', exitCode: 0, taskCount: N } on success.
455
+ * Claude Code expects an empty-string response from write tools.
456
+ */
457
+ function handleTodoWriteTool(input, todoStore) {
458
+ const todos = input?.todos;
459
+ if (!Array.isArray(todos)) {
460
+ return { output: 'TodoWrite: todos must be an array', exitCode: 1, taskCount: 0 };
461
+ }
462
+ todoStore.splice(0, todoStore.length, ...todos);
463
+ return { output: '', exitCode: 0, taskCount: todos.length };
464
+ }
465
+
466
+ /**
467
+ * Handle a TodoRead call: return the session todo list as a JSON string.
468
+ * Returns { output: string, exitCode: 0, taskCount: N }.
469
+ */
470
+ function handleTodoReadTool(todoStore) {
471
+ const output = JSON.stringify(todoStore, null, 2);
472
+ return { output, exitCode: 0, taskCount: todoStore.length };
473
+ }
474
+
475
+ // ── MCP execution wrapper ──────────────────────────────────────────────────────
476
+
477
+ /**
478
+ * Execute a normalized local tool call with distillation, token estimation,
479
+ * and secret scanning applied. Returns a canonical result shape for MCP use.
480
+ *
481
+ * @param {string} toolName 'read_file' | 'find_files' | 'grep' | 'TodoWrite' | 'TodoRead'
482
+ * @param {object} normalizedInput Already normalized (via mcp-normalize.js or similar)
483
+ * @param {Array} [todoStore=[]] Mutable session todo list (passed through to Todo handlers)
484
+ * @returns {{
485
+ * content: string, // final text to send to the model
486
+ * exitCode: number,
487
+ * outputTokens: number,
488
+ * bytes: number,
489
+ * distilled: boolean,
490
+ * distillSaved: number, // tokens saved by distillation (0 if not distilled)
491
+ * distillLabel: string|null,
492
+ * rawContent: string|null, // original pre-distill content (null if not distilled)
493
+ * secrets: Array, // [{label, line, snippet}]
494
+ * matchCount?: number, // grep / glob only
495
+ * taskCount?: number, // todo only
496
+ * }}
497
+ */
498
+ function executeLocalTool(toolName, normalizedInput, todoStore = []) {
499
+ let raw, extra = {};
500
+
501
+ if (toolName === 'read_file') {
502
+ raw = handleReadTool(normalizedInput);
503
+ } else if (toolName === 'find_files') {
504
+ raw = handleGlobTool(normalizedInput);
505
+ extra.matchCount = raw.matchCount;
506
+ } else if (toolName === 'grep') {
507
+ raw = handleGrepTool(normalizedInput);
508
+ extra.matchCount = raw.matchCount;
509
+ } else if (toolName === 'TodoWrite') {
510
+ raw = handleTodoWriteTool(normalizedInput, todoStore);
511
+ extra.taskCount = raw.taskCount;
512
+ } else if (toolName === 'TodoRead') {
513
+ raw = handleTodoReadTool(todoStore);
514
+ extra.taskCount = raw.taskCount;
515
+ } else {
516
+ return {
517
+ content: `Unknown tool: ${toolName}`, exitCode: 1,
518
+ outputTokens: 0, bytes: 0,
519
+ distilled: false, distillSaved: 0, distillLabel: null, rawContent: null,
520
+ secrets: [],
521
+ };
522
+ }
523
+
524
+ const rawOutput = raw.output;
525
+ const bytes = Buffer.byteLength(rawOutput, 'utf8');
526
+
527
+ // Choose synthetic cmd string so classifyCmd fires correctly.
528
+ let distillCmd;
529
+ if (toolName === 'grep') {
530
+ distillCmd = 'grep ' + (normalizedInput.pattern || '');
531
+ } else if (toolName === 'find_files') {
532
+ distillCmd = 'find . -name ' + (normalizedInput.pattern || '');
533
+ } else {
534
+ // read_file and todo tools: no distillation category — pass file path so
535
+ // classifyCmd returns null and the output passes through unchanged.
536
+ distillCmd = normalizedInput.file_path || toolName;
537
+ }
538
+
539
+ const dr = distill(distillCmd, rawOutput);
540
+ const content = dr.content;
541
+ const distilled = dr.distilled || false;
542
+ const rawContent = distilled ? dr.rawContent : null;
543
+ const distillSaved = distilled ? estimateTokens(rawOutput) - estimateTokens(content) : 0;
544
+ const distillLabel = dr.label || null;
545
+ const outputTokens = estimateTokens(content);
546
+ const secrets = scanSecrets(content);
547
+
548
+ return {
549
+ content,
550
+ exitCode: raw.exitCode,
551
+ outputTokens,
552
+ bytes,
553
+ distilled,
554
+ distillSaved,
555
+ distillLabel,
556
+ rawContent,
557
+ secrets,
558
+ ...extra,
559
+ };
560
+ }
561
+
562
+ // ── Exports ────────────────────────────────────────────────────────────────────
563
+
564
+ module.exports = {
565
+ // Shared
566
+ MAX_OUTPUT,
567
+ readFileNative,
568
+ // Read
569
+ READ_SKIP_EXTENSIONS,
570
+ isReadHandleable,
571
+ handleReadTool,
572
+ // Glob
573
+ GLOB_INJECTION_RE,
574
+ GLOB_SKIP,
575
+ GLOB_MAX,
576
+ isGlobHandleable,
577
+ globToRegex,
578
+ walkGlob,
579
+ handleGlobTool,
580
+ // Grep
581
+ GREP_MAX_RESULTS,
582
+ GREP_FILE_CAP,
583
+ GREP_TYPE_EXTS,
584
+ VALID_GREP_MODES,
585
+ isGrepHandleable,
586
+ tryReadGrep,
587
+ walkGrepFiles,
588
+ handleGrepTool,
589
+ // Todo
590
+ isTodoHandleable,
591
+ handleTodoWriteTool,
592
+ handleTodoReadTool,
593
+ // MCP execution wrapper
594
+ executeLocalTool,
595
+ };
@@ -0,0 +1,49 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Scanner — uniform `transform(input) → { hits, content }` interface over
5
+ * the existing secret scanner and distiller.
6
+ *
7
+ * Stage 1 wraps:
8
+ * - analyzer.scanSecrets → secret detection on string content
9
+ * - distiller.distill → content reduction for tool results
10
+ *
11
+ * The dispatcher's TRANSFORM branch will call these in Stage 2 when the
12
+ * policy engine starts emitting TRANSFORM Decisions natively. For now the
13
+ * scanner is exported as a layer module so callers can use the canonical
14
+ * contract; legacy interceptToolUse still does its own scanner/distill calls.
15
+ */
16
+
17
+ const { scanSecrets, redactSecrets } = require('../analyzer');
18
+ const { distill } = require('../distiller');
19
+
20
+ /**
21
+ * Run the secret scanner over a string.
22
+ * @returns {{ hits: Array<{label, line, snippet}>, content: string }}
23
+ */
24
+ function scan(content) {
25
+ const text = typeof content === 'string' ? content : (content?.toString?.() || '');
26
+ return { hits: scanSecrets(text), content: text };
27
+ }
28
+
29
+ /**
30
+ * Run the distiller against (label, content). Returns the distill result
31
+ * unchanged so callers can choose to use the distilled or the raw form.
32
+ */
33
+ function reduce(label, content) {
34
+ const text = typeof content === 'string' ? content : (content?.toString?.() || '');
35
+ return distill(label || 'tool-result', text);
36
+ }
37
+
38
+ /**
39
+ * Redact secrets in-place: scan first (to preserve accurate line metadata),
40
+ * then replace each match with [REDACTED:label].
41
+ * @returns {{ hits: Array<{label, line, snippet}>, content: string }}
42
+ */
43
+ function redact(content) {
44
+ const text = typeof content === 'string' ? content : (content?.toString?.() || '');
45
+ const hits = scanSecrets(text);
46
+ return { hits, content: redactSecrets(text) };
47
+ }
48
+
49
+ module.exports = { scan, reduce, redact };