@occasiolabs/occasio 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/NOTICE +10 -0
- package/README.md +216 -0
- package/bin/occasio-mcp.js +5 -0
- package/bin/occasio.js +2 -0
- package/bin/supervisor/README.md +90 -0
- package/bin/supervisor/com.occasio.proxy.plist.template +36 -0
- package/bin/supervisor/install-windows-task.ps1 +48 -0
- package/bin/supervisor/occasio.service +18 -0
- package/docs/AUDIT.md +120 -0
- package/docs/attest_verify.py +283 -0
- package/docs/audit_walker.py +65 -0
- package/docs/canonicalize.py +99 -0
- package/docs/compliance-mapping.md +93 -0
- package/docs/demos/mcp-block.md +148 -0
- package/docs/edr-calibration.md +73 -0
- package/docs/edr-demo.md +83 -0
- package/docs/python-verifier.md +74 -0
- package/docs/reference-pipeline.md +140 -0
- package/package.json +69 -0
- package/policy-templates/dev-default.yml +84 -0
- package/policy-templates/finance.yml +61 -0
- package/policy-templates/strict.yml +49 -0
- package/schemas/agent-attestation-v1.json +190 -0
- package/schemas/occasio-policy.schema.json +99 -0
- package/spec/agent-attestation/v1/README.md +137 -0
- package/src/adapters/claude-code.js +518 -0
- package/src/adapters/cline.js +161 -0
- package/src/adapters/computer-use-cli.js +198 -0
- package/src/adapters/computer-use.js +227 -0
- package/src/analyzer.js +170 -0
- package/src/anomaly/cli.js +143 -0
- package/src/anomaly/detectors/deny-rate.js +84 -0
- package/src/anomaly/detectors/file-read-volume.js +109 -0
- package/src/anomaly/detectors/secret-redact-rate.js +107 -0
- package/src/anomaly/detectors/unknown-tool-input.js +83 -0
- package/src/anomaly/index.js +169 -0
- package/src/attest/canonicalize.js +97 -0
- package/src/attest/index.js +355 -0
- package/src/attest/run-slice.js +57 -0
- package/src/attest/sign.js +186 -0
- package/src/attest/verify.js +192 -0
- package/src/audit/errors.js +21 -0
- package/src/audit/input-normalizer.js +121 -0
- package/src/audit/jsonl-auditor.js +178 -0
- package/src/audit/verifier.js +152 -0
- package/src/baseline.js +507 -0
- package/src/boundary.js +238 -0
- package/src/budget.js +42 -0
- package/src/classifier.js +115 -0
- package/src/context-budget.js +77 -0
- package/src/core/boundary-event.js +75 -0
- package/src/core/decision.js +61 -0
- package/src/core/pipeline.js +66 -0
- package/src/core/tool-names.js +105 -0
- package/src/dashboard.js +892 -0
- package/src/demo/README.md +31 -0
- package/src/demo/anomalies-demo.js +211 -0
- package/src/demo/attest-demo.js +198 -0
- package/src/distiller.js +155 -0
- package/src/embeddings.json +72 -0
- package/src/executor/dispatcher.js +230 -0
- package/src/harness.js +817 -0
- package/src/index.js +1711 -0
- package/src/inspect.js +329 -0
- package/src/interceptor.js +1198 -0
- package/src/lao.js +185 -0
- package/src/lao_prep.py +119 -0
- package/src/ledger.js +209 -0
- package/src/mcp-experiment.js +140 -0
- package/src/mcp-normalize.js +139 -0
- package/src/mcp-server.js +320 -0
- package/src/outbound-policy.js +433 -0
- package/src/policy/built-in-classifiers.js +78 -0
- package/src/policy/doctor.js +226 -0
- package/src/policy/engine.js +339 -0
- package/src/policy/init.js +153 -0
- package/src/policy/loader.js +448 -0
- package/src/policy/rules-default.js +36 -0
- package/src/policy/shell-path.js +135 -0
- package/src/policy/show.js +196 -0
- package/src/policy/validate.js +310 -0
- package/src/preflight/cli.js +164 -0
- package/src/preflight/miner.js +329 -0
- package/src/proxy/agent-router.js +93 -0
- package/src/redteam.js +428 -0
- package/src/replay.js +446 -0
- package/src/report/index.js +224 -0
- package/src/runtime.js +595 -0
- package/src/scanner/index.js +49 -0
- package/src/selftest.js +192 -0
- package/src/session.js +36 -0
package/src/runtime.js
ADDED
|
@@ -0,0 +1,595 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* runtime.js — Pure deterministic local execution core.
|
|
5
|
+
*
|
|
6
|
+
* Owns the structured tool handlers for Read, Glob, Grep, TodoWrite, and TodoRead.
|
|
7
|
+
* These are pure filesystem / in-memory functions with no dependency on the
|
|
8
|
+
* interceptor pipeline, Anthropic API, or shell execution. Safe to import in
|
|
9
|
+
* any process context: proxy, MCP server, CLI tools.
|
|
10
|
+
*
|
|
11
|
+
* Also exports executeLocalTool() — a higher-level wrapper that adds distillation,
|
|
12
|
+
* token estimation, and secret scanning to the raw handler results. Used by
|
|
13
|
+
* mcp-server.js so the MCP path has parity with the interceptor path.
|
|
14
|
+
*
|
|
15
|
+
* Imported by:
|
|
16
|
+
* interceptor.js — for native tool dispatch inside interceptToolUse
|
|
17
|
+
* mcp-server.js — for executeLocalTool (hardened execution wrapper)
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
const fs = require('fs');
|
|
21
|
+
const path = require('path');
|
|
22
|
+
const { distill } = require('./distiller');
|
|
23
|
+
const { estimateTokens, scanSecrets } = require('./analyzer');
|
|
24
|
+
|
|
25
|
+
// ── Shared constants ───────────────────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
const MAX_OUTPUT = 512 * 1024; // 512 KB — same cap as exec maxBuffer
|
|
28
|
+
|
|
29
|
+
// File extensions the native Read handler cannot serve correctly.
|
|
30
|
+
// PDFs and images need structured rendering (base64, page extraction) that we
|
|
31
|
+
// cannot replicate; Jupyter notebooks need cell-by-cell parsing. All others
|
|
32
|
+
// are treated as UTF-8 text and handled natively.
|
|
33
|
+
const READ_SKIP_EXTENSIONS = new Set([
|
|
34
|
+
'.pdf', '.ipynb',
|
|
35
|
+
'.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.ico',
|
|
36
|
+
'.zip', '.gz', '.tar', '.bz2', '.xz', '.7z', '.rar',
|
|
37
|
+
'.exe', '.dll', '.so', '.dylib',
|
|
38
|
+
]);
|
|
39
|
+
|
|
40
|
+
// ── Shared helper ──────────────────────────────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
function readFileNative(absPath) {
|
|
43
|
+
const buf = fs.readFileSync(absPath);
|
|
44
|
+
if (buf.length > MAX_OUTPUT) {
|
|
45
|
+
return buf.slice(0, MAX_OUTPUT).toString('utf8') + '\n[truncated — file too large]';
|
|
46
|
+
}
|
|
47
|
+
return buf.toString('utf8');
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// ── Read tool support ──────────────────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Returns true when this Read input can be served natively.
|
|
54
|
+
* Falls back for PDFs/images (need structured rendering), Jupyter notebooks,
|
|
55
|
+
* malformed input, or the `pages` parameter (implies PDF range extraction).
|
|
56
|
+
*/
|
|
57
|
+
function isReadHandleable(input) {
|
|
58
|
+
if (!input || typeof input !== 'object') return false;
|
|
59
|
+
const fp = input.file_path;
|
|
60
|
+
if (!fp || typeof fp !== 'string' || !fp.trim()) return false;
|
|
61
|
+
if (input.pages != null) return false;
|
|
62
|
+
const ext = path.extname(fp).toLowerCase();
|
|
63
|
+
return !READ_SKIP_EXTENSIONS.has(ext);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Read a file natively and return content formatted like `cat -n` (1-based line
|
|
68
|
+
* numbers), honouring the optional offset (0-based line index) and limit fields
|
|
69
|
+
* that the Claude Code Read tool sends for partial reads.
|
|
70
|
+
*/
|
|
71
|
+
function handleReadTool(input) {
|
|
72
|
+
const fp = (input?.file_path || '').trim();
|
|
73
|
+
if (!fp) return { output: '(no file_path provided)', exitCode: 1 };
|
|
74
|
+
|
|
75
|
+
const abs = path.resolve(process.cwd(), fp);
|
|
76
|
+
try {
|
|
77
|
+
const content = readFileNative(abs); // already caps at MAX_OUTPUT
|
|
78
|
+
const lines = content.split('\n');
|
|
79
|
+
const offset = (typeof input.offset === 'number' && input.offset >= 0) ? input.offset : 0;
|
|
80
|
+
const limit = (typeof input.limit === 'number' && input.limit > 0) ? input.limit : lines.length;
|
|
81
|
+
const slice = lines.slice(offset, offset + limit);
|
|
82
|
+
// Line numbers reflect position in the file (not the slice), matching cat -n.
|
|
83
|
+
const formatted = slice.map((l, i) => `${String(offset + i + 1).padStart(6)}\t${l}`).join('\n');
|
|
84
|
+
return { output: formatted, exitCode: 0 };
|
|
85
|
+
} catch (e) {
|
|
86
|
+
const msg = e.code === 'ENOENT'
|
|
87
|
+
? `${fp}: No such file or directory`
|
|
88
|
+
: `${fp}: ${e.message}`;
|
|
89
|
+
return { output: `Read: ${msg}`, exitCode: 1 };
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// ── Glob tool support ──────────────────────────────────────────────────────────
|
|
94
|
+
|
|
95
|
+
// Characters that indicate shell injection in a glob pattern.
|
|
96
|
+
// We reject patterns containing these so handleGlobTool stays read-only.
|
|
97
|
+
const GLOB_INJECTION_RE = /[;&|`$<>!]/;
|
|
98
|
+
|
|
99
|
+
// Directories skipped during recursive glob walks.
|
|
100
|
+
const GLOB_SKIP = new Set(['node_modules', '.git', '.hg', '.svn', 'dist', 'build', '__pycache__', '.venv', 'venv']);
|
|
101
|
+
|
|
102
|
+
// Maximum number of matches returned to avoid overwhelming the model context.
|
|
103
|
+
const GLOB_MAX = 500;
|
|
104
|
+
|
|
105
|
+
function isGlobHandleable(input) {
|
|
106
|
+
if (!input || typeof input !== 'object') return false;
|
|
107
|
+
const pattern = input.pattern;
|
|
108
|
+
if (!pattern || typeof pattern !== 'string' || !pattern.trim()) return false;
|
|
109
|
+
if (GLOB_INJECTION_RE.test(pattern)) return false;
|
|
110
|
+
if (input.path != null && typeof input.path !== 'string') return false;
|
|
111
|
+
return true;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Escape regex metacharacters in a literal string segment.
|
|
115
|
+
function escapeRegexChars(s) {
|
|
116
|
+
return s.replace(/[.+^${}()|[\]\\]/g, '\\$&');
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Convert a glob pattern to a RegExp.
|
|
121
|
+
* Supports: ** (any path depth), * (single segment), ? (single char),
|
|
122
|
+
* {ts,tsx} (alternation), [abc] (character classes).
|
|
123
|
+
* Exported for unit testing.
|
|
124
|
+
*/
|
|
125
|
+
function globToRegex(pattern) {
|
|
126
|
+
// Normalise Windows separators in the pattern.
|
|
127
|
+
const p = pattern.replace(/\\/g, '/');
|
|
128
|
+
|
|
129
|
+
let re = '';
|
|
130
|
+
let i = 0;
|
|
131
|
+
while (i < p.length) {
|
|
132
|
+
// ** — match any path segments (including none), consuming the trailing /
|
|
133
|
+
if (p[i] === '*' && p[i + 1] === '*') {
|
|
134
|
+
re += '.*';
|
|
135
|
+
i += 2;
|
|
136
|
+
if (p[i] === '/') i++; // consume separator after **
|
|
137
|
+
continue;
|
|
138
|
+
}
|
|
139
|
+
// * — match within a single path segment
|
|
140
|
+
if (p[i] === '*') { re += '[^/]*'; i++; continue; }
|
|
141
|
+
// ? — match a single character within a segment
|
|
142
|
+
if (p[i] === '?') { re += '[^/]'; i++; continue; }
|
|
143
|
+
// {a,b,c} — alternation
|
|
144
|
+
if (p[i] === '{') {
|
|
145
|
+
const end = p.indexOf('}', i);
|
|
146
|
+
if (end !== -1) {
|
|
147
|
+
const alts = p.slice(i + 1, end).split(',').map(escapeRegexChars);
|
|
148
|
+
re += `(?:${alts.join('|')})`;
|
|
149
|
+
i = end + 1;
|
|
150
|
+
continue;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
// [abc] / [^abc] — pass character classes through verbatim
|
|
154
|
+
if (p[i] === '[') {
|
|
155
|
+
const end = p.indexOf(']', i);
|
|
156
|
+
if (end !== -1) { re += p.slice(i, end + 1); i = end + 1; continue; }
|
|
157
|
+
}
|
|
158
|
+
re += escapeRegexChars(p[i]);
|
|
159
|
+
i++;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// On Windows, matching is case-insensitive; on POSIX it's case-sensitive.
|
|
163
|
+
const flags = process.platform === 'win32' ? 'i' : '';
|
|
164
|
+
return new RegExp(`^${re}$`, flags);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Walk `dir` recursively, collecting paths that match `regex`.
|
|
169
|
+
* Results are relative to `baseDir`.
|
|
170
|
+
*/
|
|
171
|
+
function walkGlob(dir, baseDir, regex, results) {
|
|
172
|
+
if (results.length >= GLOB_MAX) return;
|
|
173
|
+
let entries;
|
|
174
|
+
try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
|
|
175
|
+
catch { return; }
|
|
176
|
+
|
|
177
|
+
for (const entry of entries) {
|
|
178
|
+
if (results.length >= GLOB_MAX) break;
|
|
179
|
+
if (GLOB_SKIP.has(entry.name)) continue;
|
|
180
|
+
const abs = path.join(dir, entry.name);
|
|
181
|
+
// Normalise to forward slashes for matching (consistent on all platforms).
|
|
182
|
+
const rel = path.relative(baseDir, abs).replace(/\\/g, '/');
|
|
183
|
+
if (entry.isDirectory()) {
|
|
184
|
+
walkGlob(abs, baseDir, regex, results);
|
|
185
|
+
} else if (regex.test(rel)) {
|
|
186
|
+
results.push(rel);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Resolve glob pattern + optional base path to a sorted list of matching paths,
|
|
193
|
+
* relative to CWD. Returns { output, exitCode, matchCount }.
|
|
194
|
+
*/
|
|
195
|
+
function handleGlobTool(input) {
|
|
196
|
+
const pattern = (input?.pattern || '').trim();
|
|
197
|
+
if (!pattern) return { output: '(no pattern provided)', exitCode: 1, matchCount: 0 };
|
|
198
|
+
|
|
199
|
+
const baseDir = input?.path
|
|
200
|
+
? path.resolve(process.cwd(), input.path)
|
|
201
|
+
: process.cwd();
|
|
202
|
+
|
|
203
|
+
// Reject if base path escapes CWD for safety.
|
|
204
|
+
const cwd = process.cwd();
|
|
205
|
+
if (!baseDir.startsWith(cwd) && baseDir !== cwd) {
|
|
206
|
+
// Allow absolute paths outside CWD — Glob is read-only and safe.
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
let regex;
|
|
210
|
+
try { regex = globToRegex(pattern); }
|
|
211
|
+
catch (e) { return { output: `Glob: invalid pattern: ${e.message}`, exitCode: 1, matchCount: 0 }; }
|
|
212
|
+
|
|
213
|
+
const results = [];
|
|
214
|
+
walkGlob(baseDir, baseDir, regex, results);
|
|
215
|
+
results.sort();
|
|
216
|
+
|
|
217
|
+
const truncated = results.length >= GLOB_MAX;
|
|
218
|
+
const lines = results.map(r => path.join(baseDir !== cwd ? baseDir : '', r).replace(/\\/g, '/'));
|
|
219
|
+
const output = lines.join('\n') + (truncated ? `\n(truncated at ${GLOB_MAX} results)` : '');
|
|
220
|
+
return { output: output || '(no matches)', exitCode: 0, matchCount: results.length };
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// ── Grep tool support ──────────────────────────────────────────────────────────
|
|
224
|
+
|
|
225
|
+
const GREP_MAX_RESULTS = 250; // default output cap — matches Claude Code head_limit default
|
|
226
|
+
const GREP_FILE_CAP = 10_000; // safety limit on files walked before stopping
|
|
227
|
+
|
|
228
|
+
// File-type → extension mapping, matching ripgrep's --type names.
|
|
229
|
+
const GREP_TYPE_EXTS = new Map([
|
|
230
|
+
['js', ['.js', '.mjs', '.cjs']],
|
|
231
|
+
['ts', ['.ts', '.tsx', '.mts', '.cts']],
|
|
232
|
+
['py', ['.py', '.pyi']],
|
|
233
|
+
['rust', ['.rs']],
|
|
234
|
+
['go', ['.go']],
|
|
235
|
+
['java', ['.java']],
|
|
236
|
+
['rb', ['.rb']],
|
|
237
|
+
['css', ['.css', '.scss', '.sass', '.less']],
|
|
238
|
+
['html', ['.html', '.htm']],
|
|
239
|
+
['json', ['.json', '.jsonc']],
|
|
240
|
+
['md', ['.md', '.mdx']],
|
|
241
|
+
['yaml', ['.yaml', '.yml']],
|
|
242
|
+
['sh', ['.sh', '.bash', '.zsh']],
|
|
243
|
+
['c', ['.c', '.h']],
|
|
244
|
+
['cpp', ['.cpp', '.cc', '.cxx', '.hpp', '.hh']],
|
|
245
|
+
]);
|
|
246
|
+
|
|
247
|
+
const VALID_GREP_MODES = new Set(['content', 'files_with_matches', 'count']);
|
|
248
|
+
|
|
249
|
+
function isGrepHandleable(input) {
|
|
250
|
+
if (!input || typeof input !== 'object') return false;
|
|
251
|
+
const pattern = input.pattern;
|
|
252
|
+
if (!pattern || typeof pattern !== 'string' || !pattern.trim()) return false;
|
|
253
|
+
// Optional fields must be the right type when present.
|
|
254
|
+
if (input.path != null && typeof input.path !== 'string') return false;
|
|
255
|
+
if (input.glob != null && typeof input.glob !== 'string') return false;
|
|
256
|
+
if (input.type != null && typeof input.type !== 'string') return false;
|
|
257
|
+
if (input.output_mode != null && !VALID_GREP_MODES.has(input.output_mode)) return false;
|
|
258
|
+
// Cross-line matching (rg -U) requires full-file regex — not supported natively.
|
|
259
|
+
if (input.multiline === true) return false;
|
|
260
|
+
return true;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Read a file for grep: returns null for binary files or on read error.
|
|
264
|
+
function tryReadGrep(absPath) {
|
|
265
|
+
try {
|
|
266
|
+
const buf = fs.readFileSync(absPath);
|
|
267
|
+
if (buf.slice(0, 512).includes(0)) return null; // binary file — skip
|
|
268
|
+
return (buf.length > MAX_OUTPUT ? buf.slice(0, MAX_OUTPUT) : buf).toString('utf8');
|
|
269
|
+
} catch { return null; }
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// Walk directory collecting absolute file paths, honouring glob and type filters.
|
|
273
|
+
function walkGrepFiles(dir, baseDir, globRegex, globHasDir, typeExts, results) {
|
|
274
|
+
if (results.length >= GREP_FILE_CAP) return;
|
|
275
|
+
let entries;
|
|
276
|
+
try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
|
|
277
|
+
catch { return; }
|
|
278
|
+
for (const entry of entries) {
|
|
279
|
+
if (results.length >= GREP_FILE_CAP) break;
|
|
280
|
+
if (GLOB_SKIP.has(entry.name)) continue;
|
|
281
|
+
const abs = path.join(dir, entry.name);
|
|
282
|
+
if (entry.isDirectory()) {
|
|
283
|
+
walkGrepFiles(abs, baseDir, globRegex, globHasDir, typeExts, results);
|
|
284
|
+
} else {
|
|
285
|
+
if (typeExts && !typeExts.includes(path.extname(abs).toLowerCase())) continue;
|
|
286
|
+
if (globRegex) {
|
|
287
|
+
// Glob patterns with path separators match against the relative path;
|
|
288
|
+
// plain filename globs (e.g. "*.ts") match against the basename only.
|
|
289
|
+
const testStr = globHasDir
|
|
290
|
+
? path.relative(baseDir, abs).replace(/\\/g, '/')
|
|
291
|
+
: path.basename(abs);
|
|
292
|
+
if (!globRegex.test(testStr)) continue;
|
|
293
|
+
}
|
|
294
|
+
results.push(abs);
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Execute a structured Grep tool call locally.
|
|
301
|
+
*
|
|
302
|
+
* Supports: pattern, path, glob, type, output_mode (files_with_matches | content | count),
|
|
303
|
+
* -i (case-insensitive), -C / context / -A / -B (context lines), head_limit, offset.
|
|
304
|
+
*
|
|
305
|
+
* Does NOT support multiline (cross-line regex) — isGrepHandleable rejects those.
|
|
306
|
+
*/
|
|
307
|
+
function handleGrepTool(input) {
|
|
308
|
+
const pattern = (input?.pattern || '').trim();
|
|
309
|
+
if (!pattern) return { output: '(no pattern provided)', exitCode: 1, matchCount: 0 };
|
|
310
|
+
|
|
311
|
+
const searchRoot = input?.path
|
|
312
|
+
? path.resolve(process.cwd(), input.path)
|
|
313
|
+
: process.cwd();
|
|
314
|
+
|
|
315
|
+
const outputMode = input?.output_mode || 'files_with_matches';
|
|
316
|
+
const caseInsens = input?.['-i'] === true;
|
|
317
|
+
const contextN = typeof input?.['-C'] === 'number' ? input['-C'] :
|
|
318
|
+
typeof input?.context === 'number' ? input.context : 0;
|
|
319
|
+
const linesBefore = typeof input?.['-B'] === 'number' ? input['-B'] : contextN;
|
|
320
|
+
const linesAfter = typeof input?.['-A'] === 'number' ? input['-A'] : contextN;
|
|
321
|
+
const headLimit = typeof input?.head_limit === 'number' && input.head_limit > 0
|
|
322
|
+
? Math.min(input.head_limit, GREP_MAX_RESULTS)
|
|
323
|
+
: GREP_MAX_RESULTS;
|
|
324
|
+
const skipLines = typeof input?.offset === 'number' && input.offset > 0 ? input.offset : 0;
|
|
325
|
+
|
|
326
|
+
let regex;
|
|
327
|
+
try {
|
|
328
|
+
regex = new RegExp(pattern, 'g' + (caseInsens ? 'i' : ''));
|
|
329
|
+
} catch (e) {
|
|
330
|
+
return { output: `Grep: invalid pattern: ${e.message}`, exitCode: 1, matchCount: 0 };
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// Build type extension filter.
|
|
334
|
+
let typeExts = null;
|
|
335
|
+
if (input?.type) {
|
|
336
|
+
const t = input.type.toLowerCase();
|
|
337
|
+
typeExts = GREP_TYPE_EXTS.get(t) || [t.startsWith('.') ? t : `.${t}`];
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
// Build glob file filter.
|
|
341
|
+
let globRegex = null;
|
|
342
|
+
let globHasDir = false;
|
|
343
|
+
if (input?.glob) {
|
|
344
|
+
try {
|
|
345
|
+
globRegex = globToRegex(input.glob);
|
|
346
|
+
globHasDir = input.glob.includes('/') || input.glob.includes('**');
|
|
347
|
+
} catch { /* ignore invalid glob — no filter applied */ }
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// Collect candidate files.
|
|
351
|
+
let files = [];
|
|
352
|
+
try {
|
|
353
|
+
const stat = fs.statSync(searchRoot);
|
|
354
|
+
if (stat.isFile()) {
|
|
355
|
+
files.push(searchRoot);
|
|
356
|
+
} else {
|
|
357
|
+
walkGrepFiles(searchRoot, searchRoot, globRegex, globHasDir, typeExts, files);
|
|
358
|
+
files.sort();
|
|
359
|
+
}
|
|
360
|
+
} catch (e) {
|
|
361
|
+
return { output: `Grep: cannot access path: ${e.message}`, exitCode: 1, matchCount: 0 };
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
const outputLines = [];
|
|
365
|
+
let totalMatches = 0;
|
|
366
|
+
let truncated = false;
|
|
367
|
+
const wantMore = () => outputLines.length < skipLines + headLimit;
|
|
368
|
+
const relOf = abs => path.relative(searchRoot, abs).replace(/\\/g, '/') || path.basename(abs);
|
|
369
|
+
|
|
370
|
+
if (outputMode === 'files_with_matches') {
|
|
371
|
+
for (const absFile of files) {
|
|
372
|
+
if (!wantMore()) { truncated = true; break; }
|
|
373
|
+
const content = tryReadGrep(absFile);
|
|
374
|
+
if (!content) continue;
|
|
375
|
+
regex.lastIndex = 0;
|
|
376
|
+
if (regex.test(content)) { totalMatches++; outputLines.push(relOf(absFile)); }
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
} else if (outputMode === 'count') {
|
|
380
|
+
for (const absFile of files) {
|
|
381
|
+
if (!wantMore()) { truncated = true; break; }
|
|
382
|
+
const content = tryReadGrep(absFile);
|
|
383
|
+
if (!content) continue;
|
|
384
|
+
let count = 0;
|
|
385
|
+
for (const line of content.split('\n')) { regex.lastIndex = 0; if (regex.test(line)) count++; }
|
|
386
|
+
if (count > 0) { totalMatches += count; outputLines.push(`${relOf(absFile)}:${count}`); }
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
} else { // content
|
|
390
|
+
for (const absFile of files) {
|
|
391
|
+
if (!wantMore()) { truncated = true; break; }
|
|
392
|
+
const content = tryReadGrep(absFile);
|
|
393
|
+
if (!content) continue;
|
|
394
|
+
const fileLabel = relOf(absFile);
|
|
395
|
+
const fileLines = content.split('\n');
|
|
396
|
+
const matchSet = new Set();
|
|
397
|
+
for (let i = 0; i < fileLines.length; i++) {
|
|
398
|
+
regex.lastIndex = 0;
|
|
399
|
+
if (regex.test(fileLines[i])) matchSet.add(i);
|
|
400
|
+
}
|
|
401
|
+
if (!matchSet.size) continue;
|
|
402
|
+
totalMatches += matchSet.size;
|
|
403
|
+
|
|
404
|
+
// Merge context windows into non-overlapping groups.
|
|
405
|
+
const sorted = [...matchSet].sort((a, b) => a - b);
|
|
406
|
+
const groups = [];
|
|
407
|
+
let gs = -1, ge = -1;
|
|
408
|
+
for (const idx of sorted) {
|
|
409
|
+
const s = Math.max(0, idx - linesBefore);
|
|
410
|
+
const e = Math.min(fileLines.length - 1, idx + linesAfter);
|
|
411
|
+
if (gs === -1) { gs = s; ge = e; }
|
|
412
|
+
else if (s <= ge + 1) { ge = Math.max(ge, e); }
|
|
413
|
+
else { groups.push([gs, ge]); gs = s; ge = e; }
|
|
414
|
+
}
|
|
415
|
+
if (gs !== -1) groups.push([gs, ge]);
|
|
416
|
+
|
|
417
|
+
let firstGroup = true;
|
|
418
|
+
for (const [gStart, gEnd] of groups) {
|
|
419
|
+
if (!wantMore()) { truncated = true; break; }
|
|
420
|
+
if (!firstGroup) outputLines.push('--');
|
|
421
|
+
firstGroup = false;
|
|
422
|
+
for (let i = gStart; i <= gEnd && wantMore(); i++) {
|
|
423
|
+
const sep = matchSet.has(i) ? ':' : '-';
|
|
424
|
+
outputLines.push(`${fileLabel}${sep}${i + 1}${sep}${fileLines[i]}`);
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
const sliced = outputLines.slice(skipLines, skipLines + headLimit);
|
|
431
|
+
const text = sliced.join('\n') || '(no matches)';
|
|
432
|
+
const suffix = truncated ? '\n(truncated — use head_limit/offset to paginate)' : '';
|
|
433
|
+
return { output: text + suffix, exitCode: 0, matchCount: totalMatches };
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
// ── Todo tool support ──────────────────────────────────────────────────────────
|
|
437
|
+
|
|
438
|
+
/**
|
|
439
|
+
* Returns true when this TodoWrite/TodoRead call can be served natively.
|
|
440
|
+
* TodoRead: always handleable — no required inputs.
|
|
441
|
+
* TodoWrite: requires input.todos to be an array.
|
|
442
|
+
*/
|
|
443
|
+
function isTodoHandleable(input, toolName) {
|
|
444
|
+
if (toolName === 'TodoRead') return true;
|
|
445
|
+
if (toolName === 'TodoWrite') {
|
|
446
|
+
if (!input || typeof input !== 'object') return false;
|
|
447
|
+
return Array.isArray(input.todos);
|
|
448
|
+
}
|
|
449
|
+
return false;
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
/**
|
|
453
|
+
* Handle a TodoWrite call: replace the session todo list with input.todos.
|
|
454
|
+
* Returns { output: '', exitCode: 0, taskCount: N } on success.
|
|
455
|
+
* Claude Code expects an empty-string response from write tools.
|
|
456
|
+
*/
|
|
457
|
+
function handleTodoWriteTool(input, todoStore) {
|
|
458
|
+
const todos = input?.todos;
|
|
459
|
+
if (!Array.isArray(todos)) {
|
|
460
|
+
return { output: 'TodoWrite: todos must be an array', exitCode: 1, taskCount: 0 };
|
|
461
|
+
}
|
|
462
|
+
todoStore.splice(0, todoStore.length, ...todos);
|
|
463
|
+
return { output: '', exitCode: 0, taskCount: todos.length };
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
/**
|
|
467
|
+
* Handle a TodoRead call: return the session todo list as a JSON string.
|
|
468
|
+
* Returns { output: string, exitCode: 0, taskCount: N }.
|
|
469
|
+
*/
|
|
470
|
+
function handleTodoReadTool(todoStore) {
|
|
471
|
+
const output = JSON.stringify(todoStore, null, 2);
|
|
472
|
+
return { output, exitCode: 0, taskCount: todoStore.length };
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
// ── MCP execution wrapper ──────────────────────────────────────────────────────
|
|
476
|
+
|
|
477
|
+
/**
|
|
478
|
+
* Execute a normalized local tool call with distillation, token estimation,
|
|
479
|
+
* and secret scanning applied. Returns a canonical result shape for MCP use.
|
|
480
|
+
*
|
|
481
|
+
* @param {string} toolName 'read_file' | 'find_files' | 'grep' | 'TodoWrite' | 'TodoRead'
|
|
482
|
+
* @param {object} normalizedInput Already normalized (via mcp-normalize.js or similar)
|
|
483
|
+
* @param {Array} [todoStore=[]] Mutable session todo list (passed through to Todo handlers)
|
|
484
|
+
* @returns {{
|
|
485
|
+
* content: string, // final text to send to the model
|
|
486
|
+
* exitCode: number,
|
|
487
|
+
* outputTokens: number,
|
|
488
|
+
* bytes: number,
|
|
489
|
+
* distilled: boolean,
|
|
490
|
+
* distillSaved: number, // tokens saved by distillation (0 if not distilled)
|
|
491
|
+
* distillLabel: string|null,
|
|
492
|
+
* rawContent: string|null, // original pre-distill content (null if not distilled)
|
|
493
|
+
* secrets: Array, // [{label, line, snippet}]
|
|
494
|
+
* matchCount?: number, // grep / glob only
|
|
495
|
+
* taskCount?: number, // todo only
|
|
496
|
+
* }}
|
|
497
|
+
*/
|
|
498
|
+
function executeLocalTool(toolName, normalizedInput, todoStore = []) {
|
|
499
|
+
let raw, extra = {};
|
|
500
|
+
|
|
501
|
+
if (toolName === 'read_file') {
|
|
502
|
+
raw = handleReadTool(normalizedInput);
|
|
503
|
+
} else if (toolName === 'find_files') {
|
|
504
|
+
raw = handleGlobTool(normalizedInput);
|
|
505
|
+
extra.matchCount = raw.matchCount;
|
|
506
|
+
} else if (toolName === 'grep') {
|
|
507
|
+
raw = handleGrepTool(normalizedInput);
|
|
508
|
+
extra.matchCount = raw.matchCount;
|
|
509
|
+
} else if (toolName === 'TodoWrite') {
|
|
510
|
+
raw = handleTodoWriteTool(normalizedInput, todoStore);
|
|
511
|
+
extra.taskCount = raw.taskCount;
|
|
512
|
+
} else if (toolName === 'TodoRead') {
|
|
513
|
+
raw = handleTodoReadTool(todoStore);
|
|
514
|
+
extra.taskCount = raw.taskCount;
|
|
515
|
+
} else {
|
|
516
|
+
return {
|
|
517
|
+
content: `Unknown tool: ${toolName}`, exitCode: 1,
|
|
518
|
+
outputTokens: 0, bytes: 0,
|
|
519
|
+
distilled: false, distillSaved: 0, distillLabel: null, rawContent: null,
|
|
520
|
+
secrets: [],
|
|
521
|
+
};
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
const rawOutput = raw.output;
|
|
525
|
+
const bytes = Buffer.byteLength(rawOutput, 'utf8');
|
|
526
|
+
|
|
527
|
+
// Choose synthetic cmd string so classifyCmd fires correctly.
|
|
528
|
+
let distillCmd;
|
|
529
|
+
if (toolName === 'grep') {
|
|
530
|
+
distillCmd = 'grep ' + (normalizedInput.pattern || '');
|
|
531
|
+
} else if (toolName === 'find_files') {
|
|
532
|
+
distillCmd = 'find . -name ' + (normalizedInput.pattern || '');
|
|
533
|
+
} else {
|
|
534
|
+
// read_file and todo tools: no distillation category — pass file path so
|
|
535
|
+
// classifyCmd returns null and the output passes through unchanged.
|
|
536
|
+
distillCmd = normalizedInput.file_path || toolName;
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
const dr = distill(distillCmd, rawOutput);
|
|
540
|
+
const content = dr.content;
|
|
541
|
+
const distilled = dr.distilled || false;
|
|
542
|
+
const rawContent = distilled ? dr.rawContent : null;
|
|
543
|
+
const distillSaved = distilled ? estimateTokens(rawOutput) - estimateTokens(content) : 0;
|
|
544
|
+
const distillLabel = dr.label || null;
|
|
545
|
+
const outputTokens = estimateTokens(content);
|
|
546
|
+
const secrets = scanSecrets(content);
|
|
547
|
+
|
|
548
|
+
return {
|
|
549
|
+
content,
|
|
550
|
+
exitCode: raw.exitCode,
|
|
551
|
+
outputTokens,
|
|
552
|
+
bytes,
|
|
553
|
+
distilled,
|
|
554
|
+
distillSaved,
|
|
555
|
+
distillLabel,
|
|
556
|
+
rawContent,
|
|
557
|
+
secrets,
|
|
558
|
+
...extra,
|
|
559
|
+
};
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
// ── Exports ────────────────────────────────────────────────────────────────────
|
|
563
|
+
|
|
564
|
+
module.exports = {
|
|
565
|
+
// Shared
|
|
566
|
+
MAX_OUTPUT,
|
|
567
|
+
readFileNative,
|
|
568
|
+
// Read
|
|
569
|
+
READ_SKIP_EXTENSIONS,
|
|
570
|
+
isReadHandleable,
|
|
571
|
+
handleReadTool,
|
|
572
|
+
// Glob
|
|
573
|
+
GLOB_INJECTION_RE,
|
|
574
|
+
GLOB_SKIP,
|
|
575
|
+
GLOB_MAX,
|
|
576
|
+
isGlobHandleable,
|
|
577
|
+
globToRegex,
|
|
578
|
+
walkGlob,
|
|
579
|
+
handleGlobTool,
|
|
580
|
+
// Grep
|
|
581
|
+
GREP_MAX_RESULTS,
|
|
582
|
+
GREP_FILE_CAP,
|
|
583
|
+
GREP_TYPE_EXTS,
|
|
584
|
+
VALID_GREP_MODES,
|
|
585
|
+
isGrepHandleable,
|
|
586
|
+
tryReadGrep,
|
|
587
|
+
walkGrepFiles,
|
|
588
|
+
handleGrepTool,
|
|
589
|
+
// Todo
|
|
590
|
+
isTodoHandleable,
|
|
591
|
+
handleTodoWriteTool,
|
|
592
|
+
handleTodoReadTool,
|
|
593
|
+
// MCP execution wrapper
|
|
594
|
+
executeLocalTool,
|
|
595
|
+
};
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Scanner — uniform `transform(input) → { hits, content }` interface over
|
|
5
|
+
* the existing secret scanner and distiller.
|
|
6
|
+
*
|
|
7
|
+
* Stage 1 wraps:
|
|
8
|
+
* - analyzer.scanSecrets → secret detection on string content
|
|
9
|
+
* - distiller.distill → content reduction for tool results
|
|
10
|
+
*
|
|
11
|
+
* The dispatcher's TRANSFORM branch will call these in Stage 2 when the
|
|
12
|
+
* policy engine starts emitting TRANSFORM Decisions natively. For now the
|
|
13
|
+
* scanner is exported as a layer module so callers can use the canonical
|
|
14
|
+
* contract; legacy interceptToolUse still does its own scanner/distill calls.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
const { scanSecrets, redactSecrets } = require('../analyzer');
|
|
18
|
+
const { distill } = require('../distiller');
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Run the secret scanner over a string.
|
|
22
|
+
* @returns {{ hits: Array<{label, line, snippet}>, content: string }}
|
|
23
|
+
*/
|
|
24
|
+
function scan(content) {
|
|
25
|
+
const text = typeof content === 'string' ? content : (content?.toString?.() || '');
|
|
26
|
+
return { hits: scanSecrets(text), content: text };
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Run the distiller against (label, content). Returns the distill result
|
|
31
|
+
* unchanged so callers can choose to use the distilled or the raw form.
|
|
32
|
+
*/
|
|
33
|
+
function reduce(label, content) {
|
|
34
|
+
const text = typeof content === 'string' ? content : (content?.toString?.() || '');
|
|
35
|
+
return distill(label || 'tool-result', text);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Redact secrets in-place: scan first (to preserve accurate line metadata),
|
|
40
|
+
* then replace each match with [REDACTED:label].
|
|
41
|
+
* @returns {{ hits: Array<{label, line, snippet}>, content: string }}
|
|
42
|
+
*/
|
|
43
|
+
function redact(content) {
|
|
44
|
+
const text = typeof content === 'string' ? content : (content?.toString?.() || '');
|
|
45
|
+
const hits = scanSecrets(text);
|
|
46
|
+
return { hits, content: redactSecrets(text) };
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
module.exports = { scan, reduce, redact };
|