@occasiolabs/occasio 0.8.4 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/docs/ADAPTER-STAGE-2-MIGRATION.md +59 -0
  2. package/docs/STAGE-2-STEP-5-SHELL-PLAN.md +107 -0
  3. package/docs/THREAT-MODEL.md +195 -0
  4. package/docs/edr-calibration.md +29 -0
  5. package/package.json +7 -3
  6. package/src/adapters/claude-code.js +1 -2
  7. package/src/adapters/computer-use.js +1 -1
  8. package/src/anomaly/cli.js +4 -1
  9. package/src/anomaly/detectors/deny-rate.js +2 -1
  10. package/src/anomaly/detectors/file-read-volume.js +2 -1
  11. package/src/anomaly/index.js +5 -0
  12. package/src/boundary.js +1 -1
  13. package/src/classifier.js +1 -1
  14. package/src/cli/clear.js +4 -4
  15. package/src/cli/help.js +58 -37
  16. package/src/cli/status.js +1 -1
  17. package/src/dashboard.js +2 -3
  18. package/src/distiller.js +1 -1
  19. package/src/executor/dispatcher.js +2 -2
  20. package/src/executor/native-handlers/glob.js +173 -0
  21. package/src/executor/native-handlers/grep.js +258 -0
  22. package/src/executor/native-handlers/read.js +99 -0
  23. package/src/executor/native-handlers/todo.js +56 -0
  24. package/src/harness.js +8 -10
  25. package/src/index.js +13 -15
  26. package/src/inspect.js +1 -1
  27. package/src/interceptor.js +9 -29
  28. package/src/ledger.js +2 -3
  29. package/src/mcp-experiment.js +4 -4
  30. package/src/mcp-server.js +3 -3
  31. package/src/policy/doctor.js +2 -2
  32. package/src/policy/engine.js +0 -1
  33. package/src/policy/init.js +1 -1
  34. package/src/policy/loader.js +3 -3
  35. package/src/policy/show.js +1 -2
  36. package/src/preflight/cli.js +0 -1
  37. package/src/preflight/miner.js +3 -6
  38. package/src/redteam.js +1 -2
  39. package/src/replay.js +1 -1
  40. package/src/report/index.js +0 -4
  41. package/src/runtime.js +42 -444
  42. package/src/selftest.js +1 -1
  43. package/src/session.js +1 -1
package/src/cli/help.js CHANGED
@@ -1,6 +1,11 @@
1
1
  // `occasio help` — top-level usage. Pure text; no side effects other
2
2
  // than console.log. Each CLI command lives in its own file under
3
3
  // src/cli/ as part of the index.js decomposition (see CHANGELOG).
4
+ //
5
+ // Maturity tags follow the bewertung pillars:
6
+ // (stable) — load-bearing, has test coverage and field validation
7
+ // (beta) — works end-to-end but missing breadth (one detector, one preset)
8
+ // (alpha) — scaffold; needs operator calibration before relying on it
4
9
 
5
10
  'use strict';
6
11
 
@@ -19,42 +24,58 @@ function run() {
19
24
  console.log(`
20
25
  ${col.b(`⚡ Occasio v${VERSION}`)}
21
26
 
22
- ${col.b('Usage:')}
23
- occasio claude [args...] Start Claude with local proxy (intercept + log)
24
- occasio demo 10-second proof: see Occasio block real secrets
25
- occasio demo attest End-to-end attestation pipeline against a synthetic audit chain
26
- occasio demo anomalies End-to-end EDR test: synthetic adversarial chain → all 4 detectors
27
- occasio dashboard Open live dashboard for the running session
28
- occasio register Register shell alias (type 'claude' directly)
29
- occasio status Show session stats and savings breakdown
30
- occasio doctor Check setup: Node, claude CLI, port, Python, profile
31
- occasio clear Reset today's log and session data
32
- occasio clear --history Wipe all historical logs
33
- occasio ledger Inspect token ledger (--last N, --summary, --scope session|today)
34
- occasio replay Replay run audit (--last N, --detail, --run <id>, --attribute)
35
- occasio distill Inspect distilled outputs (--last N, --entry <N> for raw)
36
- occasio inspect Cloud-boundary manifest (--last N, --entry N, --run <id>)
37
- occasio boundary Per-request three-column view: produced / re-entered / prevented
38
- occasio baseline Behavior baseline: [learn|show|compare|reset] (per project cwd)
39
- occasio harness Run a real Claude Code session against scratch fixtures and verify governance claims (needs ANTHROPIC_API_KEY)
40
- occasio redteam Autonomous adversarial test — tester LLM probes a subject Claude Code session under Occasio (needs ANTHROPIC_API_KEY + @anthropic-ai/sdk)
41
- occasio policy [show] Show active policy: flags, tool routing, overrides
42
- occasio policy show --diff Only values that differ from defaults
43
- occasio policy validate Validate policy.yml and report errors/warnings
44
- occasio policy init Create a starter policy.yml (safe, non-destructive)
45
- Use --template strict|finance for a non-default starter
46
- occasio policy doctor Cross-reference session logs with policy; surface suggestions
47
- occasio audit [verify] Verify tamper-evident hash chain in pipeline-events.jsonl
48
- occasio audit repair Truncate a crash-partial trailing line (--file <path> [--dry-run])
49
- occasio report Governance export: file access log, blocked paths, secret events
50
- occasio anomalies Live anomaly detection over the audit chain (--window 15m, --json)
51
- occasio computer-use Apply a Computer-Use policy to a JSONL of tool_use blocks (--dry-run --example)
52
- occasio attest --run-id <uuid> AI-Agent Behavioral Attestation v1: hash-chain commitment + execution summary for one run
53
- Add --sign in GitHub Actions (with permissions: id-token: write) for Sigstore keyless signing
54
- occasio attest verify <file> Re-verify a signed attestation: Sigstore bundle + DSSE payload match + audit chain integrity
55
- occasio selftest Run governance self-checks on a scratch chain (does not touch your audit log)
56
- occasio report --format csv CSV export for auditors / SIEM import
57
- occasio mcp-experiment MCP vs. built-in tool adoption stats (experiment)
27
+ ${col.b('60-Second Start:')}
28
+ ${col.c('occasio init')} Create policy.yml from a template
29
+ ${col.c('occasio register')} Install shell alias so 'claude' uses the proxy
30
+ ${col.c('claude --version')} Confirm the wrapper resolves Claude Code
31
+
32
+ ${col.b('Usage:')} occasio <command> [args...] (or oc <command>)
33
+
34
+ ${col.b('Setup')} ${col.d('— one-time, per project')}
35
+ init ${col.d('(stable)')} Create starter policy.yml (--template strict|finance)
36
+ register ${col.d('(stable)')} Register shell alias (type 'claude' directly)
37
+ doctor ${col.d('(stable)')} Check setup: Node, claude CLI, port, Python, profile
38
+
39
+ ${col.b('Run')} ${col.d('— start a session, observe live state')}
40
+ claude [args...] ${col.d('(stable)')} Start Claude with local proxy (intercept + log)
41
+ status ${col.d('(stable)')} Session stats, savings breakdown, coverage
42
+ clear ${col.d('(stable)')} Reset today's log and session data
43
+ clear --history ${col.d('(stable)')} Wipe all historical logs
44
+ ledger ${col.d('(stable)')} Inspect token ledger (--last N, --summary, --scope)
45
+ dashboard ${col.d('(beta)')} Open live dashboard at http://localhost:3001
46
+
47
+ ${col.b('Inspect')} ${col.d('— forensics over what the agent did')}
48
+ replay ${col.d('(stable)')} Replay run audit (--last N, --detail, --run <id>)
49
+ boundary ${col.d('(stable)')} Per-request: produced / re-entered / prevented
50
+ inspect ${col.d('(stable)')} Cloud-boundary manifest (--last N, --entry N)
51
+ distill ${col.d('(stable)')} Inspect distilled outputs (--last N, --entry <N>)
52
+ report ${col.d('(stable)')} Governance export (--format csv for SIEM)
53
+ preflight ${col.d('(beta)')} Read-only miner over past logs
54
+ baseline ${col.d('(beta)')} Behavior baseline: [learn|show|compare|reset]
55
+
56
+ ${col.b('Audit')} ${col.d('— tamper-evidence and attestation')}
57
+ audit verify ${col.d('(stable)')} Verify hash chain in pipeline-events.jsonl
58
+ audit repair ${col.d('(stable)')} Truncate crash-partial trailing line (--file --dry-run)
59
+ attest --run-id <uuid> ${col.d('(stable)')} Behavioral attestation: hash-chain + execution summary
60
+ ${col.d('Add --sign in GitHub Actions for Sigstore keyless signing')}
61
+ attest verify <file> ${col.d('(stable)')} Re-verify signed attestation (bundle + DSSE + chain)
62
+ selftest ${col.d('(stable)')} Run governance self-checks on scratch chain
63
+
64
+ ${col.b('Detect')} ${col.d('— anomalies, adversarial probes')}
65
+ anomalies ${col.d('(beta)')} Windowed EDR over the audit chain (--window 15m --json)
66
+ harness ${col.d('(alpha)')} Real Claude Code run vs. governance claims (API key required)
67
+ redteam ${col.d('(alpha)')} Autonomous adversarial probe (API key + SDK required)
68
+
69
+ ${col.b('Policy & extras')}
70
+ policy [show] ${col.d('(stable)')} Show active policy: flags, routing, overrides
71
+ policy show --diff ${col.d('(stable)')} Only values that differ from defaults
72
+ policy validate ${col.d('(stable)')} Validate policy.yml and report errors/warnings
73
+ policy doctor ${col.d('(beta)')} Cross-reference logs with policy; suggest tightening
74
+ computer-use ${col.d('(alpha)')} Apply policy to a JSONL of tool_use blocks (--dry-run --example)
75
+ mcp-experiment ${col.d('(beta)')} MCP vs. built-in tool adoption stats
76
+ demo ${col.d('(stable)')} 10-second proof: see Occasio block real secrets
77
+ demo attest ${col.d('(stable)')} End-to-end attestation pipeline against a synthetic chain
78
+ demo anomalies ${col.d('(stable)')} End-to-end EDR test: synthetic adversarial chain
58
79
 
59
80
  ${col.b('Presets:')}
60
81
  --preset balanced (default) Intercept safe reads locally, log all requests
@@ -68,7 +89,7 @@ ${col.b('Flags:')}
68
89
  --log-only Alias for --preset off
69
90
  --dashboard Open live dashboard at http://localhost:3001
70
91
  --port <N> Proxy port (default: 8081)
71
- --verbose Print live per-request chatter (off by default — quiet for Claude Code's TUI)
92
+ --verbose Print live per-request chatter (off by default)
72
93
 
73
94
  ${col.b('Multi-agent routing:')}
74
95
  Default → Claude Code adapter
package/src/cli/status.js CHANGED
@@ -26,7 +26,7 @@ function todayStr() {
26
26
  function getLogFile() { return path.join(LOG_DIR, 'logs', `${todayStr()}.jsonl`); }
27
27
 
28
28
  function run() {
29
- let s = null; try { s = JSON.parse(fs.readFileSync(SESSION_FILE, 'utf8')); } catch {}
29
+ let s = null; try { s = JSON.parse(fs.readFileSync(SESSION_FILE, 'utf8')); } catch { /* ignore */ }
30
30
  console.log(col.b('\n⚡ Occasio\n'));
31
31
  if (!s) { console.log(col.d(' No session data yet. Run: occasio claude\n')); return; }
32
32
 
package/src/dashboard.js CHANGED
@@ -16,7 +16,6 @@ const path = require('path');
16
16
  const os = require('os');
17
17
 
18
18
  const DASHBOARD_PORT = 3001;
19
- const PROXY_PORT = 8081;
20
19
  const LOG_DIR = path.join(os.homedir(), '.occasio');
21
20
  const SESSION_FILE = path.join(LOG_DIR, 'session.json');
22
21
 
@@ -97,8 +96,8 @@ const server = http.createServer((req, res) => {
97
96
  }
98
97
 
99
98
  if (req.url === '/api/clear' && req.method === 'POST') {
100
- try { fs.writeFileSync(todayLogFile(), ''); } catch {}
101
- try { fs.writeFileSync(SESSION_FILE, '{}'); } catch {}
99
+ try { fs.writeFileSync(todayLogFile(), ''); } catch { /* ignore */ }
100
+ try { fs.writeFileSync(SESSION_FILE, '{}'); } catch { /* ignore */ }
102
101
  res.writeHead(200, { 'Content-Type': 'application/json' });
103
102
  res.end('{"ok":true}');
104
103
  broadcast({ type: 'update', session: {}, entries: [] });
package/src/distiller.js CHANGED
@@ -120,7 +120,7 @@ const FAIL_RE = /\b(FAIL|FAILED|ERROR|error:|✗|×|AssertionError|not ok|ERRORE
120
120
  * Keeps all failure-related lines (plus 1 line of context each side) and the
121
121
  * last 15 lines (usually the summary). Clips total to TEST_MAX_LINES.
122
122
  */
123
- function distillTestOutput(output, rawBytes, cmd) {
123
+ function distillTestOutput(output, rawBytes, _cmd) {
124
124
  const lines = output.split('\n');
125
125
  const none = { content: output, distilled: false, savedTokens: 0, label: '', rawBytes, rawContent: null };
126
126
  if (lines.length <= TEST_MAX_LINES) return none;
@@ -44,7 +44,7 @@ const NATIVE_HANDLERS = {
44
44
  // but nativeHandle returned null, fall back to the exec subprocess. The
45
45
  // returned `native` field tells the caller which path was taken.
46
46
  [CANONICAL.SHELL_BASH]: async (input) => {
47
- const cmd = (input?.command || '').trim();
47
+ const cmd = (typeof input?.command === 'string' ? input.command : '').trim();
48
48
  if (!cmd) return null;
49
49
  const nr = nativeHandle(cmd);
50
50
  if (nr !== null) {
@@ -63,7 +63,7 @@ const NATIVE_HANDLERS = {
63
63
  // then native-only execution. expandedCmd is returned so the caller can
64
64
  // record the actually-executed command in toolsRun.
65
65
  [CANONICAL.SHELL_POWERSHELL]: (input) => {
66
- const rawCmd = (input?.command || '').trim();
66
+ const rawCmd = (typeof input?.command === 'string' ? input.command : '').trim();
67
67
  if (!rawCmd) return null;
68
68
  const cmd = expandPsEnvVars(rawCmd);
69
69
  const nr = nativeHandle(cmd);
@@ -0,0 +1,173 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Native handler for the Glob tool.
5
+ *
6
+ * Pure filesystem function: takes a glob pattern (+ optional base path) and
7
+ * returns a sorted list of matching paths. No dependency on the interceptor
8
+ * pipeline, Anthropic API, or shell execution. Safe to import in any process
9
+ * context.
10
+ *
11
+ * Extracted from src/runtime.js as Stage-2 Step 3 of the executor migration
12
+ * (see docs/ADAPTER-STAGE-2-MIGRATION.md). src/runtime.js re-exports these
13
+ * so existing consumers keep working unchanged.
14
+ */
15
+
16
+ const fs = require('fs');
17
+ const path = require('path');
18
+
19
+ // ── Glob tool support ──────────────────────────────────────────────────────────
20
+
21
+ // Characters that indicate shell injection in a glob pattern.
22
+ // We reject patterns containing these so handleGlobTool stays read-only.
23
+ const GLOB_INJECTION_RE = /[;&|`$<>!]/;
24
+
25
+ // Directories skipped during recursive glob walks.
26
+ const GLOB_SKIP = new Set(['node_modules', '.git', '.hg', '.svn', 'dist', 'build', '__pycache__', '.venv', 'venv']);
27
+
28
+ // Maximum number of matches returned to avoid overwhelming the model context.
29
+ const GLOB_MAX = 500;
30
+
31
+ // Maximum recursion depth from baseDir. Hard cap on path-traversal DoS
32
+ // (a fuzz-discovered class — see THREAT-MODEL.md residual risk #5).
33
+ // Tunable via env for special-case repos.
34
+ const GLOB_MAX_DEPTH = Number(process.env.OCCASIO_GLOB_MAX_DEPTH) || 16;
35
+
36
+ // Soft wall-clock limit per walk in ms. Stops a walk that strayed onto a huge
37
+ // subtree (e.g. agent globbed up from /) before it burns seconds. Stop is
38
+ // best-effort — the caller still receives whatever was collected so far.
39
+ const GLOB_MAX_MS = Number(process.env.OCCASIO_GLOB_MAX_MS) || 2_000;
40
+
41
+ function isGlobHandleable(input) {
42
+ if (!input || typeof input !== 'object') return false;
43
+ const pattern = input.pattern;
44
+ if (!pattern || typeof pattern !== 'string' || !pattern.trim()) return false;
45
+ if (GLOB_INJECTION_RE.test(pattern)) return false;
46
+ if (input.path != null && typeof input.path !== 'string') return false;
47
+ return true;
48
+ }
49
+
50
+ // Escape regex metacharacters in a literal string segment.
51
+ function escapeRegexChars(s) {
52
+ return s.replace(/[.+^${}()|[\]\\]/g, '\\$&');
53
+ }
54
+
55
+ /**
56
+ * Convert a glob pattern to a RegExp.
57
+ * Supports: ** (any path depth), * (single segment), ? (single char),
58
+ * {ts,tsx} (alternation), [abc] (character classes).
59
+ * Exported for unit testing.
60
+ */
61
+ function globToRegex(pattern) {
62
+ // Normalise Windows separators in the pattern.
63
+ const p = pattern.replace(/\\/g, '/');
64
+
65
+ let re = '';
66
+ let i = 0;
67
+ while (i < p.length) {
68
+ // ** — match any path segments (including none), consuming the trailing /
69
+ if (p[i] === '*' && p[i + 1] === '*') {
70
+ re += '.*';
71
+ i += 2;
72
+ if (p[i] === '/') i++; // consume separator after **
73
+ continue;
74
+ }
75
+ // * — match within a single path segment
76
+ if (p[i] === '*') { re += '[^/]*'; i++; continue; }
77
+ // ? — match a single character within a segment
78
+ if (p[i] === '?') { re += '[^/]'; i++; continue; }
79
+ // {a,b,c} — alternation
80
+ if (p[i] === '{') {
81
+ const end = p.indexOf('}', i);
82
+ if (end !== -1) {
83
+ const alts = p.slice(i + 1, end).split(',').map(escapeRegexChars);
84
+ re += `(?:${alts.join('|')})`;
85
+ i = end + 1;
86
+ continue;
87
+ }
88
+ }
89
+ // [abc] / [^abc] — pass character classes through verbatim
90
+ if (p[i] === '[') {
91
+ const end = p.indexOf(']', i);
92
+ if (end !== -1) { re += p.slice(i, end + 1); i = end + 1; continue; }
93
+ }
94
+ re += escapeRegexChars(p[i]);
95
+ i++;
96
+ }
97
+
98
+ // On Windows, matching is case-insensitive; on POSIX it's case-sensitive.
99
+ const flags = process.platform === 'win32' ? 'i' : '';
100
+ return new RegExp(`^${re}$`, flags);
101
+ }
102
+
103
+ /**
104
+ * Walk `dir` recursively, collecting paths that match `regex`.
105
+ * Results are relative to `baseDir`.
106
+ */
107
+ function walkGlob(dir, baseDir, regex, results, depth = 0, deadline = Infinity) {
108
+ if (results.length >= GLOB_MAX) return;
109
+ if (depth >= GLOB_MAX_DEPTH) return;
110
+ if (Date.now() >= deadline) return;
111
+ let entries;
112
+ try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
113
+ catch { return; }
114
+
115
+ for (const entry of entries) {
116
+ if (results.length >= GLOB_MAX) break;
117
+ if (Date.now() >= deadline) break;
118
+ if (GLOB_SKIP.has(entry.name)) continue;
119
+ const abs = path.join(dir, entry.name);
120
+ // Normalise to forward slashes for matching (consistent on all platforms).
121
+ const rel = path.relative(baseDir, abs).replace(/\\/g, '/');
122
+ if (entry.isDirectory()) {
123
+ walkGlob(abs, baseDir, regex, results, depth + 1, deadline);
124
+ } else if (regex.test(rel)) {
125
+ results.push(rel);
126
+ }
127
+ }
128
+ }
129
+
130
+ /**
131
+ * Resolve glob pattern + optional base path to a sorted list of matching paths,
132
+ * relative to CWD. Returns { output, exitCode, matchCount }.
133
+ */
134
+ function handleGlobTool(input) {
135
+ const pattern = (typeof input?.pattern === 'string' ? input.pattern : '').trim();
136
+ if (!pattern) return { output: '(no pattern provided)', exitCode: 1, matchCount: 0 };
137
+
138
+ const baseDir = input?.path
139
+ ? path.resolve(process.cwd(), input.path)
140
+ : process.cwd();
141
+
142
+ const cwd = process.cwd();
143
+
144
+ let regex;
145
+ try { regex = globToRegex(pattern); }
146
+ catch (e) { return { output: `Glob: invalid pattern: ${e.message}`, exitCode: 1, matchCount: 0 }; }
147
+
148
+ const results = [];
149
+ const deadline = Date.now() + GLOB_MAX_MS;
150
+ walkGlob(baseDir, baseDir, regex, results, 0, deadline);
151
+ const timedOut = Date.now() >= deadline;
152
+ results.sort();
153
+
154
+ const truncated = results.length >= GLOB_MAX;
155
+ const lines = results.map(r => path.join(baseDir !== cwd ? baseDir : '', r).replace(/\\/g, '/'));
156
+ const suffix = truncated ? `\n(truncated at ${GLOB_MAX} results)`
157
+ : timedOut ? `\n(truncated — walk exceeded ${GLOB_MAX_MS} ms)`
158
+ : '';
159
+ const output = lines.join('\n') + suffix;
160
+ return { output: output || '(no matches)', exitCode: 0, matchCount: results.length };
161
+ }
162
+
163
+ module.exports = {
164
+ GLOB_INJECTION_RE,
165
+ GLOB_SKIP,
166
+ GLOB_MAX,
167
+ GLOB_MAX_DEPTH,
168
+ GLOB_MAX_MS,
169
+ isGlobHandleable,
170
+ globToRegex,
171
+ walkGlob,
172
+ handleGlobTool,
173
+ };
@@ -0,0 +1,258 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Native handler for the Grep tool.
5
+ *
6
+ * Pure filesystem function: takes a regex pattern (+ optional path, glob, type,
7
+ * output_mode, context flags) and returns matches in one of three formats
8
+ * (files_with_matches | content | count). No dependency on the interceptor
9
+ * pipeline, Anthropic API, or shell execution.
10
+ *
11
+ * Extracted from src/runtime.js as Stage-2 Step 4 of the executor migration
12
+ * (see docs/ADAPTER-STAGE-2-MIGRATION.md). src/runtime.js re-exports these so
13
+ * existing consumers keep working unchanged.
14
+ *
15
+ * Imports `globToRegex` and `GLOB_SKIP` from the Glob handler — the Grep file
16
+ * filter shares the glob grammar, and both walks skip the same vendor dirs.
17
+ */
18
+
19
+ const fs = require('fs');
20
+ const path = require('path');
21
+
22
+ const { MAX_OUTPUT } = require('./read');
23
+ const { globToRegex, GLOB_SKIP, GLOB_MAX_DEPTH, GLOB_MAX_MS } = require('./glob');
24
+
25
+ // ── Grep tool support ──────────────────────────────────────────────────────────
26
+
27
+ const GREP_MAX_RESULTS = 250; // default output cap — matches Claude Code head_limit default
28
+ const GREP_FILE_CAP = 10_000; // safety limit on files walked before stopping
29
+
30
+ // File-type → extension mapping, matching ripgrep's --type names.
31
+ const GREP_TYPE_EXTS = new Map([
32
+ ['js', ['.js', '.mjs', '.cjs']],
33
+ ['ts', ['.ts', '.tsx', '.mts', '.cts']],
34
+ ['py', ['.py', '.pyi']],
35
+ ['rust', ['.rs']],
36
+ ['go', ['.go']],
37
+ ['java', ['.java']],
38
+ ['rb', ['.rb']],
39
+ ['css', ['.css', '.scss', '.sass', '.less']],
40
+ ['html', ['.html', '.htm']],
41
+ ['json', ['.json', '.jsonc']],
42
+ ['md', ['.md', '.mdx']],
43
+ ['yaml', ['.yaml', '.yml']],
44
+ ['sh', ['.sh', '.bash', '.zsh']],
45
+ ['c', ['.c', '.h']],
46
+ ['cpp', ['.cpp', '.cc', '.cxx', '.hpp', '.hh']],
47
+ ]);
48
+
49
+ const VALID_GREP_MODES = new Set(['content', 'files_with_matches', 'count']);
50
+
51
+ function isGrepHandleable(input) {
52
+ if (!input || typeof input !== 'object') return false;
53
+ const pattern = input.pattern;
54
+ if (!pattern || typeof pattern !== 'string' || !pattern.trim()) return false;
55
+ // Optional fields must be the right type when present.
56
+ if (input.path != null && typeof input.path !== 'string') return false;
57
+ if (input.glob != null && typeof input.glob !== 'string') return false;
58
+ if (input.type != null && typeof input.type !== 'string') return false;
59
+ if (input.output_mode != null && !VALID_GREP_MODES.has(input.output_mode)) return false;
60
+ // Cross-line matching (rg -U) requires full-file regex — not supported natively.
61
+ if (input.multiline === true) return false;
62
+ return true;
63
+ }
64
+
65
+ // Read a file for grep: returns null for binary files or on read error.
66
+ function tryReadGrep(absPath) {
67
+ try {
68
+ const buf = fs.readFileSync(absPath);
69
+ if (buf.slice(0, 512).includes(0)) return null; // binary file — skip
70
+ return (buf.length > MAX_OUTPUT ? buf.slice(0, MAX_OUTPUT) : buf).toString('utf8');
71
+ } catch { return null; }
72
+ }
73
+
74
+ // Walk directory collecting absolute file paths, honouring glob and type filters.
75
+ function walkGrepFiles(dir, baseDir, globRegex, globHasDir, typeExts, results, depth = 0, deadline = Infinity) {
76
+ if (results.length >= GREP_FILE_CAP) return;
77
+ if (depth >= GLOB_MAX_DEPTH) return;
78
+ if (Date.now() >= deadline) return;
79
+ let entries;
80
+ try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
81
+ catch { return; }
82
+ for (const entry of entries) {
83
+ if (results.length >= GREP_FILE_CAP) break;
84
+ if (Date.now() >= deadline) break;
85
+ if (GLOB_SKIP.has(entry.name)) continue;
86
+ const abs = path.join(dir, entry.name);
87
+ if (entry.isDirectory()) {
88
+ walkGrepFiles(abs, baseDir, globRegex, globHasDir, typeExts, results, depth + 1, deadline);
89
+ } else {
90
+ if (typeExts && !typeExts.includes(path.extname(abs).toLowerCase())) continue;
91
+ if (globRegex) {
92
+ // Glob patterns with path separators match against the relative path;
93
+ // plain filename globs (e.g. "*.ts") match against the basename only.
94
+ const testStr = globHasDir
95
+ ? path.relative(baseDir, abs).replace(/\\/g, '/')
96
+ : path.basename(abs);
97
+ if (!globRegex.test(testStr)) continue;
98
+ }
99
+ results.push(abs);
100
+ }
101
+ }
102
+ }
103
+
104
+ /**
105
+ * Execute a structured Grep tool call locally.
106
+ *
107
+ * Supports: pattern, path, glob, type, output_mode (files_with_matches | content | count),
108
+ * -i (case-insensitive), -C / context / -A / -B (context lines), head_limit, offset.
109
+ *
110
+ * Does NOT support multiline (cross-line regex) — isGrepHandleable rejects those.
111
+ */
112
+ function handleGrepTool(input) {
113
+ const pattern = (typeof input?.pattern === 'string' ? input.pattern : '').trim();
114
+ if (!pattern) return { output: '(no pattern provided)', exitCode: 1, matchCount: 0 };
115
+
116
+ const searchRoot = input?.path
117
+ ? path.resolve(process.cwd(), input.path)
118
+ : process.cwd();
119
+
120
+ const outputMode = input?.output_mode || 'files_with_matches';
121
+ const caseInsens = input?.['-i'] === true;
122
+ const contextN = typeof input?.['-C'] === 'number' ? input['-C'] :
123
+ typeof input?.context === 'number' ? input.context : 0;
124
+ const linesBefore = typeof input?.['-B'] === 'number' ? input['-B'] : contextN;
125
+ const linesAfter = typeof input?.['-A'] === 'number' ? input['-A'] : contextN;
126
+ const headLimit = typeof input?.head_limit === 'number' && input.head_limit > 0
127
+ ? Math.min(input.head_limit, GREP_MAX_RESULTS)
128
+ : GREP_MAX_RESULTS;
129
+ const skipLines = typeof input?.offset === 'number' && input.offset > 0 ? input.offset : 0;
130
+
131
+ let regex;
132
+ try {
133
+ regex = new RegExp(pattern, 'g' + (caseInsens ? 'i' : ''));
134
+ } catch (e) {
135
+ return { output: `Grep: invalid pattern: ${e.message}`, exitCode: 1, matchCount: 0 };
136
+ }
137
+
138
+ // Build type extension filter.
139
+ let typeExts = null;
140
+ if (input?.type) {
141
+ const t = input.type.toLowerCase();
142
+ typeExts = GREP_TYPE_EXTS.get(t) || [t.startsWith('.') ? t : `.${t}`];
143
+ }
144
+
145
+ // Build glob file filter.
146
+ let globRegex = null;
147
+ let globHasDir = false;
148
+ if (input?.glob) {
149
+ try {
150
+ globRegex = globToRegex(input.glob);
151
+ globHasDir = input.glob.includes('/') || input.glob.includes('**');
152
+ } catch { /* ignore invalid glob — no filter applied */ }
153
+ }
154
+
155
+ // Collect candidate files.
156
+ let files = [];
157
+ const deadline = Date.now() + GLOB_MAX_MS;
158
+ try {
159
+ const stat = fs.statSync(searchRoot);
160
+ if (stat.isFile()) {
161
+ files.push(searchRoot);
162
+ } else {
163
+ walkGrepFiles(searchRoot, searchRoot, globRegex, globHasDir, typeExts, files, 0, deadline);
164
+ files.sort();
165
+ }
166
+ } catch (e) {
167
+ return { output: `Grep: cannot access path: ${e.message}`, exitCode: 1, matchCount: 0 };
168
+ }
169
+
170
+ const outputLines = [];
171
+ let totalMatches = 0;
172
+ let truncated = false;
173
+ // wantMore also enforces the per-call wall-clock budget so the
174
+ // file-read+match loop can't blow past it even if walkGrepFiles already
175
+ // collected thousands of paths before the walk-deadline tripped.
176
+ const wantMore = () => outputLines.length < skipLines + headLimit
177
+ && Date.now() < deadline;
178
+ const relOf = abs => path.relative(searchRoot, abs).replace(/\\/g, '/') || path.basename(abs);
179
+
180
+ if (outputMode === 'files_with_matches') {
181
+ for (const absFile of files) {
182
+ if (!wantMore()) { truncated = true; break; }
183
+ const content = tryReadGrep(absFile);
184
+ if (!content) continue;
185
+ regex.lastIndex = 0;
186
+ if (regex.test(content)) { totalMatches++; outputLines.push(relOf(absFile)); }
187
+ }
188
+
189
+ } else if (outputMode === 'count') {
190
+ for (const absFile of files) {
191
+ if (!wantMore()) { truncated = true; break; }
192
+ const content = tryReadGrep(absFile);
193
+ if (!content) continue;
194
+ let count = 0;
195
+ for (const line of content.split('\n')) { regex.lastIndex = 0; if (regex.test(line)) count++; }
196
+ if (count > 0) { totalMatches += count; outputLines.push(`${relOf(absFile)}:${count}`); }
197
+ }
198
+
199
+ } else { // content
200
+ for (const absFile of files) {
201
+ if (!wantMore()) { truncated = true; break; }
202
+ const content = tryReadGrep(absFile);
203
+ if (!content) continue;
204
+ const fileLabel = relOf(absFile);
205
+ const fileLines = content.split('\n');
206
+ const matchSet = new Set();
207
+ for (let i = 0; i < fileLines.length; i++) {
208
+ regex.lastIndex = 0;
209
+ if (regex.test(fileLines[i])) matchSet.add(i);
210
+ }
211
+ if (!matchSet.size) continue;
212
+ totalMatches += matchSet.size;
213
+
214
+ // Merge context windows into non-overlapping groups.
215
+ const sorted = [...matchSet].sort((a, b) => a - b);
216
+ const groups = [];
217
+ let gs = -1, ge = -1;
218
+ for (const idx of sorted) {
219
+ const s = Math.max(0, idx - linesBefore);
220
+ const e = Math.min(fileLines.length - 1, idx + linesAfter);
221
+ if (gs === -1) { gs = s; ge = e; }
222
+ else if (s <= ge + 1) { ge = Math.max(ge, e); }
223
+ else { groups.push([gs, ge]); gs = s; ge = e; }
224
+ }
225
+ if (gs !== -1) groups.push([gs, ge]);
226
+
227
+ let firstGroup = true;
228
+ for (const [gStart, gEnd] of groups) {
229
+ if (!wantMore()) { truncated = true; break; }
230
+ if (!firstGroup) outputLines.push('--');
231
+ firstGroup = false;
232
+ for (let i = gStart; i <= gEnd && wantMore(); i++) {
233
+ const sep = matchSet.has(i) ? ':' : '-';
234
+ outputLines.push(`${fileLabel}${sep}${i + 1}${sep}${fileLines[i]}`);
235
+ }
236
+ }
237
+ }
238
+ }
239
+
240
+ const sliced = outputLines.slice(skipLines, skipLines + headLimit);
241
+ const text = sliced.join('\n') || '(no matches)';
242
+ const timedOut = Date.now() >= deadline;
243
+ const suffix = truncated ? '\n(truncated — use head_limit/offset to paginate)'
244
+ : timedOut ? `\n(truncated — walk exceeded ${GLOB_MAX_MS} ms)`
245
+ : '';
246
+ return { output: text + suffix, exitCode: 0, matchCount: totalMatches };
247
+ }
248
+
249
+ module.exports = {
250
+ GREP_MAX_RESULTS,
251
+ GREP_FILE_CAP,
252
+ GREP_TYPE_EXTS,
253
+ VALID_GREP_MODES,
254
+ isGrepHandleable,
255
+ tryReadGrep,
256
+ walkGrepFiles,
257
+ handleGrepTool,
258
+ };