@occasiolabs/occasio 0.8.4 → 0.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/ADAPTER-STAGE-2-MIGRATION.md +59 -0
- package/docs/STAGE-2-STEP-5-SHELL-PLAN.md +107 -0
- package/docs/THREAT-MODEL.md +195 -0
- package/docs/edr-calibration.md +29 -0
- package/package.json +7 -3
- package/src/adapters/claude-code.js +1 -2
- package/src/adapters/computer-use.js +1 -1
- package/src/anomaly/cli.js +4 -1
- package/src/anomaly/detectors/deny-rate.js +2 -1
- package/src/anomaly/detectors/file-read-volume.js +2 -1
- package/src/anomaly/index.js +5 -0
- package/src/boundary.js +1 -1
- package/src/classifier.js +1 -1
- package/src/cli/clear.js +4 -4
- package/src/cli/help.js +58 -37
- package/src/cli/status.js +1 -1
- package/src/dashboard.js +2 -3
- package/src/distiller.js +1 -1
- package/src/executor/dispatcher.js +2 -2
- package/src/executor/native-handlers/glob.js +173 -0
- package/src/executor/native-handlers/grep.js +258 -0
- package/src/executor/native-handlers/read.js +99 -0
- package/src/executor/native-handlers/todo.js +56 -0
- package/src/harness.js +8 -10
- package/src/index.js +13 -15
- package/src/inspect.js +1 -1
- package/src/interceptor.js +9 -29
- package/src/ledger.js +2 -3
- package/src/mcp-experiment.js +4 -4
- package/src/mcp-server.js +3 -3
- package/src/policy/doctor.js +2 -2
- package/src/policy/engine.js +0 -1
- package/src/policy/init.js +1 -1
- package/src/policy/loader.js +3 -3
- package/src/policy/show.js +1 -2
- package/src/preflight/cli.js +0 -1
- package/src/preflight/miner.js +3 -6
- package/src/redteam.js +1 -2
- package/src/replay.js +1 -1
- package/src/report/index.js +0 -4
- package/src/runtime.js +42 -444
- package/src/selftest.js +1 -1
- package/src/session.js +1 -1
package/src/cli/help.js
CHANGED
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
// `occasio help` — top-level usage. Pure text; no side effects other
|
|
2
2
|
// than console.log. Each CLI command lives in its own file under
|
|
3
3
|
// src/cli/ as part of the index.js decomposition (see CHANGELOG).
|
|
4
|
+
//
|
|
5
|
+
// Maturity tags follow the bewertung pillars:
|
|
6
|
+
// (stable) — load-bearing, has test coverage and field validation
|
|
7
|
+
// (beta) — works end-to-end but missing breadth (one detector, one preset)
|
|
8
|
+
// (alpha) — scaffold; needs operator calibration before relying on it
|
|
4
9
|
|
|
5
10
|
'use strict';
|
|
6
11
|
|
|
@@ -19,42 +24,58 @@ function run() {
|
|
|
19
24
|
console.log(`
|
|
20
25
|
${col.b(`⚡ Occasio v${VERSION}`)}
|
|
21
26
|
|
|
22
|
-
${col.b('
|
|
23
|
-
occasio
|
|
24
|
-
occasio
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
occasio
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
27
|
+
${col.b('60-Second Start:')}
|
|
28
|
+
${col.c('occasio init')} Create policy.yml from a template
|
|
29
|
+
${col.c('occasio register')} Install shell alias so 'claude' uses the proxy
|
|
30
|
+
${col.c('claude --version')} Confirm the wrapper resolves Claude Code
|
|
31
|
+
|
|
32
|
+
${col.b('Usage:')} occasio <command> [args...] (or oc <command>)
|
|
33
|
+
|
|
34
|
+
${col.b('Setup')} ${col.d('— one-time, per project')}
|
|
35
|
+
init ${col.d('(stable)')} Create starter policy.yml (--template strict|finance)
|
|
36
|
+
register ${col.d('(stable)')} Register shell alias (type 'claude' directly)
|
|
37
|
+
doctor ${col.d('(stable)')} Check setup: Node, claude CLI, port, Python, profile
|
|
38
|
+
|
|
39
|
+
${col.b('Run')} ${col.d('— start a session, observe live state')}
|
|
40
|
+
claude [args...] ${col.d('(stable)')} Start Claude with local proxy (intercept + log)
|
|
41
|
+
status ${col.d('(stable)')} Session stats, savings breakdown, coverage
|
|
42
|
+
clear ${col.d('(stable)')} Reset today's log and session data
|
|
43
|
+
clear --history ${col.d('(stable)')} Wipe all historical logs
|
|
44
|
+
ledger ${col.d('(stable)')} Inspect token ledger (--last N, --summary, --scope)
|
|
45
|
+
dashboard ${col.d('(beta)')} Open live dashboard at http://localhost:3001
|
|
46
|
+
|
|
47
|
+
${col.b('Inspect')} ${col.d('— forensics over what the agent did')}
|
|
48
|
+
replay ${col.d('(stable)')} Replay run audit (--last N, --detail, --run <id>)
|
|
49
|
+
boundary ${col.d('(stable)')} Per-request: produced / re-entered / prevented
|
|
50
|
+
inspect ${col.d('(stable)')} Cloud-boundary manifest (--last N, --entry N)
|
|
51
|
+
distill ${col.d('(stable)')} Inspect distilled outputs (--last N, --entry <N>)
|
|
52
|
+
report ${col.d('(stable)')} Governance export (--format csv for SIEM)
|
|
53
|
+
preflight ${col.d('(beta)')} Read-only miner over past logs
|
|
54
|
+
baseline ${col.d('(beta)')} Behavior baseline: [learn|show|compare|reset]
|
|
55
|
+
|
|
56
|
+
${col.b('Audit')} ${col.d('— tamper-evidence and attestation')}
|
|
57
|
+
audit verify ${col.d('(stable)')} Verify hash chain in pipeline-events.jsonl
|
|
58
|
+
audit repair ${col.d('(stable)')} Truncate crash-partial trailing line (--file --dry-run)
|
|
59
|
+
attest --run-id <uuid> ${col.d('(stable)')} Behavioral attestation: hash-chain + execution summary
|
|
60
|
+
${col.d('Add --sign in GitHub Actions for Sigstore keyless signing')}
|
|
61
|
+
attest verify <file> ${col.d('(stable)')} Re-verify signed attestation (bundle + DSSE + chain)
|
|
62
|
+
selftest ${col.d('(stable)')} Run governance self-checks on scratch chain
|
|
63
|
+
|
|
64
|
+
${col.b('Detect')} ${col.d('— anomalies, adversarial probes')}
|
|
65
|
+
anomalies ${col.d('(beta)')} Windowed EDR over the audit chain (--window 15m --json)
|
|
66
|
+
harness ${col.d('(alpha)')} Real Claude Code run vs. governance claims (API key required)
|
|
67
|
+
redteam ${col.d('(alpha)')} Autonomous adversarial probe (API key + SDK required)
|
|
68
|
+
|
|
69
|
+
${col.b('Policy & extras')}
|
|
70
|
+
policy [show] ${col.d('(stable)')} Show active policy: flags, routing, overrides
|
|
71
|
+
policy show --diff ${col.d('(stable)')} Only values that differ from defaults
|
|
72
|
+
policy validate ${col.d('(stable)')} Validate policy.yml and report errors/warnings
|
|
73
|
+
policy doctor ${col.d('(beta)')} Cross-reference logs with policy; suggest tightening
|
|
74
|
+
computer-use ${col.d('(alpha)')} Apply policy to a JSONL of tool_use blocks (--dry-run --example)
|
|
75
|
+
mcp-experiment ${col.d('(beta)')} MCP vs. built-in tool adoption stats
|
|
76
|
+
demo ${col.d('(stable)')} 10-second proof: see Occasio block real secrets
|
|
77
|
+
demo attest ${col.d('(stable)')} End-to-end attestation pipeline against a synthetic chain
|
|
78
|
+
demo anomalies ${col.d('(stable)')} End-to-end EDR test: synthetic adversarial chain
|
|
58
79
|
|
|
59
80
|
${col.b('Presets:')}
|
|
60
81
|
--preset balanced (default) Intercept safe reads locally, log all requests
|
|
@@ -68,7 +89,7 @@ ${col.b('Flags:')}
|
|
|
68
89
|
--log-only Alias for --preset off
|
|
69
90
|
--dashboard Open live dashboard at http://localhost:3001
|
|
70
91
|
--port <N> Proxy port (default: 8081)
|
|
71
|
-
--verbose Print live per-request chatter (off by default
|
|
92
|
+
--verbose Print live per-request chatter (off by default)
|
|
72
93
|
|
|
73
94
|
${col.b('Multi-agent routing:')}
|
|
74
95
|
Default → Claude Code adapter
|
package/src/cli/status.js
CHANGED
|
@@ -26,7 +26,7 @@ function todayStr() {
|
|
|
26
26
|
function getLogFile() { return path.join(LOG_DIR, 'logs', `${todayStr()}.jsonl`); }
|
|
27
27
|
|
|
28
28
|
function run() {
|
|
29
|
-
let s = null; try { s = JSON.parse(fs.readFileSync(SESSION_FILE, 'utf8')); } catch {}
|
|
29
|
+
let s = null; try { s = JSON.parse(fs.readFileSync(SESSION_FILE, 'utf8')); } catch { /* ignore */ }
|
|
30
30
|
console.log(col.b('\n⚡ Occasio\n'));
|
|
31
31
|
if (!s) { console.log(col.d(' No session data yet. Run: occasio claude\n')); return; }
|
|
32
32
|
|
package/src/dashboard.js
CHANGED
|
@@ -16,7 +16,6 @@ const path = require('path');
|
|
|
16
16
|
const os = require('os');
|
|
17
17
|
|
|
18
18
|
const DASHBOARD_PORT = 3001;
|
|
19
|
-
const PROXY_PORT = 8081;
|
|
20
19
|
const LOG_DIR = path.join(os.homedir(), '.occasio');
|
|
21
20
|
const SESSION_FILE = path.join(LOG_DIR, 'session.json');
|
|
22
21
|
|
|
@@ -97,8 +96,8 @@ const server = http.createServer((req, res) => {
|
|
|
97
96
|
}
|
|
98
97
|
|
|
99
98
|
if (req.url === '/api/clear' && req.method === 'POST') {
|
|
100
|
-
try { fs.writeFileSync(todayLogFile(), ''); } catch {}
|
|
101
|
-
try { fs.writeFileSync(SESSION_FILE, '{}'); } catch {}
|
|
99
|
+
try { fs.writeFileSync(todayLogFile(), ''); } catch { /* ignore */ }
|
|
100
|
+
try { fs.writeFileSync(SESSION_FILE, '{}'); } catch { /* ignore */ }
|
|
102
101
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
103
102
|
res.end('{"ok":true}');
|
|
104
103
|
broadcast({ type: 'update', session: {}, entries: [] });
|
package/src/distiller.js
CHANGED
|
@@ -120,7 +120,7 @@ const FAIL_RE = /\b(FAIL|FAILED|ERROR|error:|✗|×|AssertionError|not ok|ERRORE
|
|
|
120
120
|
* Keeps all failure-related lines (plus 1 line of context each side) and the
|
|
121
121
|
* last 15 lines (usually the summary). Clips total to TEST_MAX_LINES.
|
|
122
122
|
*/
|
|
123
|
-
function distillTestOutput(output, rawBytes,
|
|
123
|
+
function distillTestOutput(output, rawBytes, _cmd) {
|
|
124
124
|
const lines = output.split('\n');
|
|
125
125
|
const none = { content: output, distilled: false, savedTokens: 0, label: '', rawBytes, rawContent: null };
|
|
126
126
|
if (lines.length <= TEST_MAX_LINES) return none;
|
|
@@ -44,7 +44,7 @@ const NATIVE_HANDLERS = {
|
|
|
44
44
|
// but nativeHandle returned null, fall back to the exec subprocess. The
|
|
45
45
|
// returned `native` field tells the caller which path was taken.
|
|
46
46
|
[CANONICAL.SHELL_BASH]: async (input) => {
|
|
47
|
-
const cmd = (input?.command
|
|
47
|
+
const cmd = (typeof input?.command === 'string' ? input.command : '').trim();
|
|
48
48
|
if (!cmd) return null;
|
|
49
49
|
const nr = nativeHandle(cmd);
|
|
50
50
|
if (nr !== null) {
|
|
@@ -63,7 +63,7 @@ const NATIVE_HANDLERS = {
|
|
|
63
63
|
// then native-only execution. expandedCmd is returned so the caller can
|
|
64
64
|
// record the actually-executed command in toolsRun.
|
|
65
65
|
[CANONICAL.SHELL_POWERSHELL]: (input) => {
|
|
66
|
-
const rawCmd = (input?.command
|
|
66
|
+
const rawCmd = (typeof input?.command === 'string' ? input.command : '').trim();
|
|
67
67
|
if (!rawCmd) return null;
|
|
68
68
|
const cmd = expandPsEnvVars(rawCmd);
|
|
69
69
|
const nr = nativeHandle(cmd);
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Native handler for the Glob tool.
|
|
5
|
+
*
|
|
6
|
+
* Pure filesystem function: takes a glob pattern (+ optional base path) and
|
|
7
|
+
* returns a sorted list of matching paths. No dependency on the interceptor
|
|
8
|
+
* pipeline, Anthropic API, or shell execution. Safe to import in any process
|
|
9
|
+
* context.
|
|
10
|
+
*
|
|
11
|
+
* Extracted from src/runtime.js as Stage-2 Step 3 of the executor migration
|
|
12
|
+
* (see docs/ADAPTER-STAGE-2-MIGRATION.md). src/runtime.js re-exports these
|
|
13
|
+
* so existing consumers keep working unchanged.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
const fs = require('fs');
|
|
17
|
+
const path = require('path');
|
|
18
|
+
|
|
19
|
+
// ── Glob tool support ──────────────────────────────────────────────────────────
|
|
20
|
+
|
|
21
|
+
// Characters that indicate shell injection in a glob pattern.
|
|
22
|
+
// We reject patterns containing these so handleGlobTool stays read-only.
|
|
23
|
+
const GLOB_INJECTION_RE = /[;&|`$<>!]/;
|
|
24
|
+
|
|
25
|
+
// Directories skipped during recursive glob walks.
|
|
26
|
+
const GLOB_SKIP = new Set(['node_modules', '.git', '.hg', '.svn', 'dist', 'build', '__pycache__', '.venv', 'venv']);
|
|
27
|
+
|
|
28
|
+
// Maximum number of matches returned to avoid overwhelming the model context.
|
|
29
|
+
const GLOB_MAX = 500;
|
|
30
|
+
|
|
31
|
+
// Maximum recursion depth from baseDir. Hard cap on path-traversal DoS
|
|
32
|
+
// (a fuzz-discovered class — see THREAT-MODEL.md residual risk #5).
|
|
33
|
+
// Tunable via env for special-case repos.
|
|
34
|
+
const GLOB_MAX_DEPTH = Number(process.env.OCCASIO_GLOB_MAX_DEPTH) || 16;
|
|
35
|
+
|
|
36
|
+
// Soft wall-clock limit per walk in ms. Stops a walk that strayed onto a huge
|
|
37
|
+
// subtree (e.g. agent globbed up from /) before it burns seconds. Stop is
|
|
38
|
+
// best-effort — the caller still receives whatever was collected so far.
|
|
39
|
+
const GLOB_MAX_MS = Number(process.env.OCCASIO_GLOB_MAX_MS) || 2_000;
|
|
40
|
+
|
|
41
|
+
function isGlobHandleable(input) {
|
|
42
|
+
if (!input || typeof input !== 'object') return false;
|
|
43
|
+
const pattern = input.pattern;
|
|
44
|
+
if (!pattern || typeof pattern !== 'string' || !pattern.trim()) return false;
|
|
45
|
+
if (GLOB_INJECTION_RE.test(pattern)) return false;
|
|
46
|
+
if (input.path != null && typeof input.path !== 'string') return false;
|
|
47
|
+
return true;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Escape regex metacharacters in a literal string segment.
|
|
51
|
+
function escapeRegexChars(s) {
|
|
52
|
+
return s.replace(/[.+^${}()|[\]\\]/g, '\\$&');
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Convert a glob pattern to a RegExp.
|
|
57
|
+
* Supports: ** (any path depth), * (single segment), ? (single char),
|
|
58
|
+
* {ts,tsx} (alternation), [abc] (character classes).
|
|
59
|
+
* Exported for unit testing.
|
|
60
|
+
*/
|
|
61
|
+
function globToRegex(pattern) {
|
|
62
|
+
// Normalise Windows separators in the pattern.
|
|
63
|
+
const p = pattern.replace(/\\/g, '/');
|
|
64
|
+
|
|
65
|
+
let re = '';
|
|
66
|
+
let i = 0;
|
|
67
|
+
while (i < p.length) {
|
|
68
|
+
// ** — match any path segments (including none), consuming the trailing /
|
|
69
|
+
if (p[i] === '*' && p[i + 1] === '*') {
|
|
70
|
+
re += '.*';
|
|
71
|
+
i += 2;
|
|
72
|
+
if (p[i] === '/') i++; // consume separator after **
|
|
73
|
+
continue;
|
|
74
|
+
}
|
|
75
|
+
// * — match within a single path segment
|
|
76
|
+
if (p[i] === '*') { re += '[^/]*'; i++; continue; }
|
|
77
|
+
// ? — match a single character within a segment
|
|
78
|
+
if (p[i] === '?') { re += '[^/]'; i++; continue; }
|
|
79
|
+
// {a,b,c} — alternation
|
|
80
|
+
if (p[i] === '{') {
|
|
81
|
+
const end = p.indexOf('}', i);
|
|
82
|
+
if (end !== -1) {
|
|
83
|
+
const alts = p.slice(i + 1, end).split(',').map(escapeRegexChars);
|
|
84
|
+
re += `(?:${alts.join('|')})`;
|
|
85
|
+
i = end + 1;
|
|
86
|
+
continue;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
// [abc] / [^abc] — pass character classes through verbatim
|
|
90
|
+
if (p[i] === '[') {
|
|
91
|
+
const end = p.indexOf(']', i);
|
|
92
|
+
if (end !== -1) { re += p.slice(i, end + 1); i = end + 1; continue; }
|
|
93
|
+
}
|
|
94
|
+
re += escapeRegexChars(p[i]);
|
|
95
|
+
i++;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// On Windows, matching is case-insensitive; on POSIX it's case-sensitive.
|
|
99
|
+
const flags = process.platform === 'win32' ? 'i' : '';
|
|
100
|
+
return new RegExp(`^${re}$`, flags);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Walk `dir` recursively, collecting paths that match `regex`.
|
|
105
|
+
* Results are relative to `baseDir`.
|
|
106
|
+
*/
|
|
107
|
+
function walkGlob(dir, baseDir, regex, results, depth = 0, deadline = Infinity) {
|
|
108
|
+
if (results.length >= GLOB_MAX) return;
|
|
109
|
+
if (depth >= GLOB_MAX_DEPTH) return;
|
|
110
|
+
if (Date.now() >= deadline) return;
|
|
111
|
+
let entries;
|
|
112
|
+
try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
|
|
113
|
+
catch { return; }
|
|
114
|
+
|
|
115
|
+
for (const entry of entries) {
|
|
116
|
+
if (results.length >= GLOB_MAX) break;
|
|
117
|
+
if (Date.now() >= deadline) break;
|
|
118
|
+
if (GLOB_SKIP.has(entry.name)) continue;
|
|
119
|
+
const abs = path.join(dir, entry.name);
|
|
120
|
+
// Normalise to forward slashes for matching (consistent on all platforms).
|
|
121
|
+
const rel = path.relative(baseDir, abs).replace(/\\/g, '/');
|
|
122
|
+
if (entry.isDirectory()) {
|
|
123
|
+
walkGlob(abs, baseDir, regex, results, depth + 1, deadline);
|
|
124
|
+
} else if (regex.test(rel)) {
|
|
125
|
+
results.push(rel);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Resolve glob pattern + optional base path to a sorted list of matching paths,
|
|
132
|
+
* relative to CWD. Returns { output, exitCode, matchCount }.
|
|
133
|
+
*/
|
|
134
|
+
function handleGlobTool(input) {
|
|
135
|
+
const pattern = (typeof input?.pattern === 'string' ? input.pattern : '').trim();
|
|
136
|
+
if (!pattern) return { output: '(no pattern provided)', exitCode: 1, matchCount: 0 };
|
|
137
|
+
|
|
138
|
+
const baseDir = input?.path
|
|
139
|
+
? path.resolve(process.cwd(), input.path)
|
|
140
|
+
: process.cwd();
|
|
141
|
+
|
|
142
|
+
const cwd = process.cwd();
|
|
143
|
+
|
|
144
|
+
let regex;
|
|
145
|
+
try { regex = globToRegex(pattern); }
|
|
146
|
+
catch (e) { return { output: `Glob: invalid pattern: ${e.message}`, exitCode: 1, matchCount: 0 }; }
|
|
147
|
+
|
|
148
|
+
const results = [];
|
|
149
|
+
const deadline = Date.now() + GLOB_MAX_MS;
|
|
150
|
+
walkGlob(baseDir, baseDir, regex, results, 0, deadline);
|
|
151
|
+
const timedOut = Date.now() >= deadline;
|
|
152
|
+
results.sort();
|
|
153
|
+
|
|
154
|
+
const truncated = results.length >= GLOB_MAX;
|
|
155
|
+
const lines = results.map(r => path.join(baseDir !== cwd ? baseDir : '', r).replace(/\\/g, '/'));
|
|
156
|
+
const suffix = truncated ? `\n(truncated at ${GLOB_MAX} results)`
|
|
157
|
+
: timedOut ? `\n(truncated — walk exceeded ${GLOB_MAX_MS} ms)`
|
|
158
|
+
: '';
|
|
159
|
+
const output = lines.join('\n') + suffix;
|
|
160
|
+
return { output: output || '(no matches)', exitCode: 0, matchCount: results.length };
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
module.exports = {
|
|
164
|
+
GLOB_INJECTION_RE,
|
|
165
|
+
GLOB_SKIP,
|
|
166
|
+
GLOB_MAX,
|
|
167
|
+
GLOB_MAX_DEPTH,
|
|
168
|
+
GLOB_MAX_MS,
|
|
169
|
+
isGlobHandleable,
|
|
170
|
+
globToRegex,
|
|
171
|
+
walkGlob,
|
|
172
|
+
handleGlobTool,
|
|
173
|
+
};
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Native handler for the Grep tool.
|
|
5
|
+
*
|
|
6
|
+
* Pure filesystem function: takes a regex pattern (+ optional path, glob, type,
|
|
7
|
+
* output_mode, context flags) and returns matches in one of three formats
|
|
8
|
+
* (files_with_matches | content | count). No dependency on the interceptor
|
|
9
|
+
* pipeline, Anthropic API, or shell execution.
|
|
10
|
+
*
|
|
11
|
+
* Extracted from src/runtime.js as Stage-2 Step 4 of the executor migration
|
|
12
|
+
* (see docs/ADAPTER-STAGE-2-MIGRATION.md). src/runtime.js re-exports these so
|
|
13
|
+
* existing consumers keep working unchanged.
|
|
14
|
+
*
|
|
15
|
+
* Imports `globToRegex` and `GLOB_SKIP` from the Glob handler — the Grep file
|
|
16
|
+
* filter shares the glob grammar, and both walks skip the same vendor dirs.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
const fs = require('fs');
|
|
20
|
+
const path = require('path');
|
|
21
|
+
|
|
22
|
+
const { MAX_OUTPUT } = require('./read');
|
|
23
|
+
const { globToRegex, GLOB_SKIP, GLOB_MAX_DEPTH, GLOB_MAX_MS } = require('./glob');
|
|
24
|
+
|
|
25
|
+
// ── Grep tool support ──────────────────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
const GREP_MAX_RESULTS = 250; // default output cap — matches Claude Code head_limit default
|
|
28
|
+
const GREP_FILE_CAP = 10_000; // safety limit on files walked before stopping
|
|
29
|
+
|
|
30
|
+
// File-type → extension mapping, matching ripgrep's --type names.
|
|
31
|
+
const GREP_TYPE_EXTS = new Map([
|
|
32
|
+
['js', ['.js', '.mjs', '.cjs']],
|
|
33
|
+
['ts', ['.ts', '.tsx', '.mts', '.cts']],
|
|
34
|
+
['py', ['.py', '.pyi']],
|
|
35
|
+
['rust', ['.rs']],
|
|
36
|
+
['go', ['.go']],
|
|
37
|
+
['java', ['.java']],
|
|
38
|
+
['rb', ['.rb']],
|
|
39
|
+
['css', ['.css', '.scss', '.sass', '.less']],
|
|
40
|
+
['html', ['.html', '.htm']],
|
|
41
|
+
['json', ['.json', '.jsonc']],
|
|
42
|
+
['md', ['.md', '.mdx']],
|
|
43
|
+
['yaml', ['.yaml', '.yml']],
|
|
44
|
+
['sh', ['.sh', '.bash', '.zsh']],
|
|
45
|
+
['c', ['.c', '.h']],
|
|
46
|
+
['cpp', ['.cpp', '.cc', '.cxx', '.hpp', '.hh']],
|
|
47
|
+
]);
|
|
48
|
+
|
|
49
|
+
const VALID_GREP_MODES = new Set(['content', 'files_with_matches', 'count']);
|
|
50
|
+
|
|
51
|
+
function isGrepHandleable(input) {
|
|
52
|
+
if (!input || typeof input !== 'object') return false;
|
|
53
|
+
const pattern = input.pattern;
|
|
54
|
+
if (!pattern || typeof pattern !== 'string' || !pattern.trim()) return false;
|
|
55
|
+
// Optional fields must be the right type when present.
|
|
56
|
+
if (input.path != null && typeof input.path !== 'string') return false;
|
|
57
|
+
if (input.glob != null && typeof input.glob !== 'string') return false;
|
|
58
|
+
if (input.type != null && typeof input.type !== 'string') return false;
|
|
59
|
+
if (input.output_mode != null && !VALID_GREP_MODES.has(input.output_mode)) return false;
|
|
60
|
+
// Cross-line matching (rg -U) requires full-file regex — not supported natively.
|
|
61
|
+
if (input.multiline === true) return false;
|
|
62
|
+
return true;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Read a file for grep: returns null for binary files or on read error.
|
|
66
|
+
function tryReadGrep(absPath) {
|
|
67
|
+
try {
|
|
68
|
+
const buf = fs.readFileSync(absPath);
|
|
69
|
+
if (buf.slice(0, 512).includes(0)) return null; // binary file — skip
|
|
70
|
+
return (buf.length > MAX_OUTPUT ? buf.slice(0, MAX_OUTPUT) : buf).toString('utf8');
|
|
71
|
+
} catch { return null; }
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Walk directory collecting absolute file paths, honouring glob and type filters.
|
|
75
|
+
function walkGrepFiles(dir, baseDir, globRegex, globHasDir, typeExts, results, depth = 0, deadline = Infinity) {
|
|
76
|
+
if (results.length >= GREP_FILE_CAP) return;
|
|
77
|
+
if (depth >= GLOB_MAX_DEPTH) return;
|
|
78
|
+
if (Date.now() >= deadline) return;
|
|
79
|
+
let entries;
|
|
80
|
+
try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
|
|
81
|
+
catch { return; }
|
|
82
|
+
for (const entry of entries) {
|
|
83
|
+
if (results.length >= GREP_FILE_CAP) break;
|
|
84
|
+
if (Date.now() >= deadline) break;
|
|
85
|
+
if (GLOB_SKIP.has(entry.name)) continue;
|
|
86
|
+
const abs = path.join(dir, entry.name);
|
|
87
|
+
if (entry.isDirectory()) {
|
|
88
|
+
walkGrepFiles(abs, baseDir, globRegex, globHasDir, typeExts, results, depth + 1, deadline);
|
|
89
|
+
} else {
|
|
90
|
+
if (typeExts && !typeExts.includes(path.extname(abs).toLowerCase())) continue;
|
|
91
|
+
if (globRegex) {
|
|
92
|
+
// Glob patterns with path separators match against the relative path;
|
|
93
|
+
// plain filename globs (e.g. "*.ts") match against the basename only.
|
|
94
|
+
const testStr = globHasDir
|
|
95
|
+
? path.relative(baseDir, abs).replace(/\\/g, '/')
|
|
96
|
+
: path.basename(abs);
|
|
97
|
+
if (!globRegex.test(testStr)) continue;
|
|
98
|
+
}
|
|
99
|
+
results.push(abs);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Execute a structured Grep tool call locally.
|
|
106
|
+
*
|
|
107
|
+
* Supports: pattern, path, glob, type, output_mode (files_with_matches | content | count),
|
|
108
|
+
* -i (case-insensitive), -C / context / -A / -B (context lines), head_limit, offset.
|
|
109
|
+
*
|
|
110
|
+
* Does NOT support multiline (cross-line regex) — isGrepHandleable rejects those.
|
|
111
|
+
*/
|
|
112
|
+
function handleGrepTool(input) {
|
|
113
|
+
const pattern = (typeof input?.pattern === 'string' ? input.pattern : '').trim();
|
|
114
|
+
if (!pattern) return { output: '(no pattern provided)', exitCode: 1, matchCount: 0 };
|
|
115
|
+
|
|
116
|
+
const searchRoot = input?.path
|
|
117
|
+
? path.resolve(process.cwd(), input.path)
|
|
118
|
+
: process.cwd();
|
|
119
|
+
|
|
120
|
+
const outputMode = input?.output_mode || 'files_with_matches';
|
|
121
|
+
const caseInsens = input?.['-i'] === true;
|
|
122
|
+
const contextN = typeof input?.['-C'] === 'number' ? input['-C'] :
|
|
123
|
+
typeof input?.context === 'number' ? input.context : 0;
|
|
124
|
+
const linesBefore = typeof input?.['-B'] === 'number' ? input['-B'] : contextN;
|
|
125
|
+
const linesAfter = typeof input?.['-A'] === 'number' ? input['-A'] : contextN;
|
|
126
|
+
const headLimit = typeof input?.head_limit === 'number' && input.head_limit > 0
|
|
127
|
+
? Math.min(input.head_limit, GREP_MAX_RESULTS)
|
|
128
|
+
: GREP_MAX_RESULTS;
|
|
129
|
+
const skipLines = typeof input?.offset === 'number' && input.offset > 0 ? input.offset : 0;
|
|
130
|
+
|
|
131
|
+
let regex;
|
|
132
|
+
try {
|
|
133
|
+
regex = new RegExp(pattern, 'g' + (caseInsens ? 'i' : ''));
|
|
134
|
+
} catch (e) {
|
|
135
|
+
return { output: `Grep: invalid pattern: ${e.message}`, exitCode: 1, matchCount: 0 };
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Build type extension filter.
|
|
139
|
+
let typeExts = null;
|
|
140
|
+
if (input?.type) {
|
|
141
|
+
const t = input.type.toLowerCase();
|
|
142
|
+
typeExts = GREP_TYPE_EXTS.get(t) || [t.startsWith('.') ? t : `.${t}`];
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Build glob file filter.
|
|
146
|
+
let globRegex = null;
|
|
147
|
+
let globHasDir = false;
|
|
148
|
+
if (input?.glob) {
|
|
149
|
+
try {
|
|
150
|
+
globRegex = globToRegex(input.glob);
|
|
151
|
+
globHasDir = input.glob.includes('/') || input.glob.includes('**');
|
|
152
|
+
} catch { /* ignore invalid glob — no filter applied */ }
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Collect candidate files.
|
|
156
|
+
let files = [];
|
|
157
|
+
const deadline = Date.now() + GLOB_MAX_MS;
|
|
158
|
+
try {
|
|
159
|
+
const stat = fs.statSync(searchRoot);
|
|
160
|
+
if (stat.isFile()) {
|
|
161
|
+
files.push(searchRoot);
|
|
162
|
+
} else {
|
|
163
|
+
walkGrepFiles(searchRoot, searchRoot, globRegex, globHasDir, typeExts, files, 0, deadline);
|
|
164
|
+
files.sort();
|
|
165
|
+
}
|
|
166
|
+
} catch (e) {
|
|
167
|
+
return { output: `Grep: cannot access path: ${e.message}`, exitCode: 1, matchCount: 0 };
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const outputLines = [];
|
|
171
|
+
let totalMatches = 0;
|
|
172
|
+
let truncated = false;
|
|
173
|
+
// wantMore also enforces the per-call wall-clock budget so the
|
|
174
|
+
// file-read+match loop can't blow past it even if walkGrepFiles already
|
|
175
|
+
// collected thousands of paths before the walk-deadline tripped.
|
|
176
|
+
const wantMore = () => outputLines.length < skipLines + headLimit
|
|
177
|
+
&& Date.now() < deadline;
|
|
178
|
+
const relOf = abs => path.relative(searchRoot, abs).replace(/\\/g, '/') || path.basename(abs);
|
|
179
|
+
|
|
180
|
+
if (outputMode === 'files_with_matches') {
|
|
181
|
+
for (const absFile of files) {
|
|
182
|
+
if (!wantMore()) { truncated = true; break; }
|
|
183
|
+
const content = tryReadGrep(absFile);
|
|
184
|
+
if (!content) continue;
|
|
185
|
+
regex.lastIndex = 0;
|
|
186
|
+
if (regex.test(content)) { totalMatches++; outputLines.push(relOf(absFile)); }
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
} else if (outputMode === 'count') {
|
|
190
|
+
for (const absFile of files) {
|
|
191
|
+
if (!wantMore()) { truncated = true; break; }
|
|
192
|
+
const content = tryReadGrep(absFile);
|
|
193
|
+
if (!content) continue;
|
|
194
|
+
let count = 0;
|
|
195
|
+
for (const line of content.split('\n')) { regex.lastIndex = 0; if (regex.test(line)) count++; }
|
|
196
|
+
if (count > 0) { totalMatches += count; outputLines.push(`${relOf(absFile)}:${count}`); }
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
} else { // content
|
|
200
|
+
for (const absFile of files) {
|
|
201
|
+
if (!wantMore()) { truncated = true; break; }
|
|
202
|
+
const content = tryReadGrep(absFile);
|
|
203
|
+
if (!content) continue;
|
|
204
|
+
const fileLabel = relOf(absFile);
|
|
205
|
+
const fileLines = content.split('\n');
|
|
206
|
+
const matchSet = new Set();
|
|
207
|
+
for (let i = 0; i < fileLines.length; i++) {
|
|
208
|
+
regex.lastIndex = 0;
|
|
209
|
+
if (regex.test(fileLines[i])) matchSet.add(i);
|
|
210
|
+
}
|
|
211
|
+
if (!matchSet.size) continue;
|
|
212
|
+
totalMatches += matchSet.size;
|
|
213
|
+
|
|
214
|
+
// Merge context windows into non-overlapping groups.
|
|
215
|
+
const sorted = [...matchSet].sort((a, b) => a - b);
|
|
216
|
+
const groups = [];
|
|
217
|
+
let gs = -1, ge = -1;
|
|
218
|
+
for (const idx of sorted) {
|
|
219
|
+
const s = Math.max(0, idx - linesBefore);
|
|
220
|
+
const e = Math.min(fileLines.length - 1, idx + linesAfter);
|
|
221
|
+
if (gs === -1) { gs = s; ge = e; }
|
|
222
|
+
else if (s <= ge + 1) { ge = Math.max(ge, e); }
|
|
223
|
+
else { groups.push([gs, ge]); gs = s; ge = e; }
|
|
224
|
+
}
|
|
225
|
+
if (gs !== -1) groups.push([gs, ge]);
|
|
226
|
+
|
|
227
|
+
let firstGroup = true;
|
|
228
|
+
for (const [gStart, gEnd] of groups) {
|
|
229
|
+
if (!wantMore()) { truncated = true; break; }
|
|
230
|
+
if (!firstGroup) outputLines.push('--');
|
|
231
|
+
firstGroup = false;
|
|
232
|
+
for (let i = gStart; i <= gEnd && wantMore(); i++) {
|
|
233
|
+
const sep = matchSet.has(i) ? ':' : '-';
|
|
234
|
+
outputLines.push(`${fileLabel}${sep}${i + 1}${sep}${fileLines[i]}`);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
const sliced = outputLines.slice(skipLines, skipLines + headLimit);
|
|
241
|
+
const text = sliced.join('\n') || '(no matches)';
|
|
242
|
+
const timedOut = Date.now() >= deadline;
|
|
243
|
+
const suffix = truncated ? '\n(truncated — use head_limit/offset to paginate)'
|
|
244
|
+
: timedOut ? `\n(truncated — walk exceeded ${GLOB_MAX_MS} ms)`
|
|
245
|
+
: '';
|
|
246
|
+
return { output: text + suffix, exitCode: 0, matchCount: totalMatches };
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
module.exports = {
|
|
250
|
+
GREP_MAX_RESULTS,
|
|
251
|
+
GREP_FILE_CAP,
|
|
252
|
+
GREP_TYPE_EXTS,
|
|
253
|
+
VALID_GREP_MODES,
|
|
254
|
+
isGrepHandleable,
|
|
255
|
+
tryReadGrep,
|
|
256
|
+
walkGrepFiles,
|
|
257
|
+
handleGrepTool,
|
|
258
|
+
};
|