@occasiolabs/occasio 0.8.4 → 0.8.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +4 -3
  2. package/docs/ADAPTER-STAGE-2-MIGRATION.md +59 -0
  3. package/docs/STAGE-2-STEP-5-SHELL-PLAN.md +107 -0
  4. package/docs/THREAT-MODEL.md +195 -0
  5. package/docs/edr-calibration.md +29 -0
  6. package/package.json +8 -3
  7. package/src/adapters/claude-code.js +1 -2
  8. package/src/adapters/computer-use.js +1 -1
  9. package/src/anomaly/cli.js +4 -1
  10. package/src/anomaly/detectors/deny-rate.js +2 -1
  11. package/src/anomaly/detectors/file-read-volume.js +2 -1
  12. package/src/anomaly/index.js +5 -0
  13. package/src/boundary.js +1 -1
  14. package/src/classifier.js +1 -1
  15. package/src/cli/clear.js +4 -4
  16. package/src/cli/conversation.js +121 -0
  17. package/src/cli/help.js +62 -38
  18. package/src/cli/recap.js +367 -0
  19. package/src/cli/status.js +1 -1
  20. package/src/dashboard.js +2 -3
  21. package/src/demo/audit-demo.js +330 -0
  22. package/src/distiller.js +1 -1
  23. package/src/executor/dispatcher.js +2 -2
  24. package/src/executor/native-handlers/glob.js +173 -0
  25. package/src/executor/native-handlers/grep.js +258 -0
  26. package/src/executor/native-handlers/read.js +99 -0
  27. package/src/executor/native-handlers/todo.js +56 -0
  28. package/src/harness.js +8 -10
  29. package/src/index.js +118 -30
  30. package/src/inspect.js +1 -1
  31. package/src/interceptor.js +9 -29
  32. package/src/ledger.js +2 -3
  33. package/src/mcp-experiment.js +4 -4
  34. package/src/mcp-server.js +3 -3
  35. package/src/policy/doctor.js +2 -2
  36. package/src/policy/engine.js +0 -1
  37. package/src/policy/init.js +1 -1
  38. package/src/policy/loader.js +3 -3
  39. package/src/policy/show.js +1 -2
  40. package/src/preflight/cli.js +0 -1
  41. package/src/preflight/miner.js +3 -6
  42. package/src/redteam.js +1 -2
  43. package/src/replay.js +1 -1
  44. package/src/report/index.js +0 -4
  45. package/src/runtime.js +42 -444
  46. package/src/selftest.js +1 -1
  47. package/src/session.js +1 -1
@@ -0,0 +1,330 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * demo/audit-demo.js — `occasio demo audit`
5
+ *
6
+ * Hero demo: the auditor scenario. Shows the actual moat — a signed,
7
+ * offline-verifiable attestation of agent behavior that any third
8
+ * party can re-check with two independent verifiers (Node + Python).
9
+ *
10
+ * Flow:
11
+ * 1. Auditor question framed
12
+ * 2. Build richer synthetic chain (~12 rows: PASS + BLOCKs on the
13
+ * asked-about path + TRANSFORM + LOCAL)
14
+ * 3. Verify chain integrity (Node SHA-256 walker)
15
+ * 4. Build unsigned attestation
16
+ * 5. Answer the auditor's question from the chain
17
+ * 6. Re-verify with the independent Python verifier (docs/attest_verify.py)
18
+ * on the same artifact → byte-identical result
19
+ * 7. Show what --sign would add in real CI
20
+ * 8. CTA + spec link
21
+ *
22
+ * No API key, no network, no touching the user's real ~/.occasio chain.
23
+ */
24
+
25
+ const fs = require('fs');
26
+ const os = require('os');
27
+ const path = require('path');
28
+ const crypto = require('crypto');
29
+ const { spawnSync } = require('child_process');
30
+
31
+ const { buildAttestation } = require('../attest');
32
+ const { verifyFile } = require('../audit/verifier');
33
+ const { canonicalize } = require('../attest/canonicalize');
34
+
35
+ const C = {
36
+ r: s => `\x1b[31m${s}\x1b[0m`,
37
+ g: s => `\x1b[32m${s}\x1b[0m`,
38
+ y: s => `\x1b[33m${s}\x1b[0m`,
39
+ c: s => `\x1b[36m${s}\x1b[0m`,
40
+ d: s => `\x1b[2m${s}\x1b[0m`,
41
+ b: s => `\x1b[1m${s}\x1b[0m`,
42
+ rb: s => `\x1b[31;1m${s}\x1b[0m`,
43
+ gb: s => `\x1b[32;1m${s}\x1b[0m`,
44
+ };
45
+
46
+ const GENESIS = '0'.repeat(64);
47
+ const DENIED_PATH = '/etc/secrets/db.yml';
48
+
49
+ // Strip the OS temp prefix so demo output is screen-recording-safe
50
+ // (Windows TEMP contains the username; macOS/Linux less so but still
51
+ // machine-specific). Internal use still uses the real path.
52
+ function displayPath(p) {
53
+ const tmp = os.tmpdir();
54
+ if (p && p.startsWith(tmp)) {
55
+ const suffix = p.slice(tmp.length).replace(/^[\\/]+/, '');
56
+ return process.platform === 'win32' ? `%TEMP%\\${suffix}` : `$TMPDIR/${suffix}`;
57
+ }
58
+ return p;
59
+ }
60
+
61
+ function appendRow(file, prevHash, row) {
62
+ const withPrev = { ...row, prev_hash: prevHash };
63
+ const hash = crypto.createHash('sha256').update(JSON.stringify(withPrev), 'utf8').digest('hex');
64
+ const full = { ...withPrev, hash };
65
+ fs.appendFileSync(file, JSON.stringify(full) + '\n');
66
+ return { hash, full };
67
+ }
68
+
69
+ function buildAuditScenarioChain(chainFile, policyFile) {
70
+ fs.writeFileSync(chainFile, '');
71
+ fs.writeFileSync(policyFile, [
72
+ 'version: 1',
73
+ 'deny_paths:',
74
+ ' - "/etc/secrets/**"',
75
+ ' - "**/.env"',
76
+ ' - "**/.ssh/**"',
77
+ 'deny_patterns:',
78
+ ' api_key: "(?i)api[_-]?key\\\\s*[:=]\\\\s*\\\\S+"',
79
+ 'block_secrets_in_tool_results: true',
80
+ '',
81
+ ].join('\n'));
82
+
83
+ const RUN = crypto.randomBytes(16).toString('hex');
84
+ const RUN_ID = `${RUN.slice(0,8)}-${RUN.slice(8,12)}-${RUN.slice(12,16)}-${RUN.slice(16,20)}-${RUN.slice(20,32)}`;
85
+ const policyHash = crypto.createHash('sha256').update(fs.readFileSync(policyFile)).digest('hex');
86
+
87
+ let prev = GENESIS;
88
+ const ts = (sec) => new Date(Date.UTC(2026, 4, 16, 9, 30, sec)).toISOString();
89
+
90
+ const blockedHashes = [];
91
+
92
+ // 1. policy_loaded
93
+ ({ hash: prev } = appendRow(chainFile, prev, {
94
+ ts: ts(0), event_id: crypto.randomUUID(), run_id: RUN_ID, agent: 'occasio',
95
+ kind: 'policy_loaded', tool_name: 'policy_loaded', action: 'INFO',
96
+ tool_inputs: { policy_hash: policyHash, policy_path: policyFile, version: 1 },
97
+ policy_source: 'user', reason: 'policy-loaded',
98
+ }));
99
+
100
+ // 2-4. Three normal PASS reads of source files
101
+ for (const [i, p] of [[5, 'src/server.js'], [8, 'src/db.js'], [12, 'package.json']]) {
102
+ ({ hash: prev } = appendRow(chainFile, prev, {
103
+ ts: ts(i), event_id: crypto.randomUUID(), run_id: RUN_ID, agent: 'claude-code',
104
+ kind: 'tool_call', tool_name: 'Read', action: 'PASS',
105
+ tool_inputs: { path: p },
106
+ }));
107
+ }
108
+
109
+ // 5. LOCAL Glob discovery
110
+ ({ hash: prev } = appendRow(chainFile, prev, {
111
+ ts: ts(15), event_id: crypto.randomUUID(), run_id: RUN_ID, agent: 'claude-code',
112
+ kind: 'tool_call', tool_name: 'Glob', action: 'LOCAL',
113
+ tool_inputs: { pattern: 'src/**/*.js' },
114
+ }));
115
+
116
+ // 6-8. THREE attempts to read the denied path — all BLOCKED
117
+ // (this is what the auditor will ask about)
118
+ for (const [i, attempt] of [
119
+ [18, DENIED_PATH],
120
+ [22, '/etc/secrets/../secrets/db.yml'], // traversal attempt
121
+ [27, DENIED_PATH], // retry
122
+ ].entries()) {
123
+ const r = appendRow(chainFile, prev, {
124
+ ts: ts(attempt[0]), event_id: crypto.randomUUID(), run_id: RUN_ID, agent: 'claude-code',
125
+ kind: 'tool_call', tool_name: 'Read', action: 'BLOCK',
126
+ tool_inputs: { path: attempt[1] },
127
+ reason: 'path-denied',
128
+ });
129
+ prev = r.hash;
130
+ blockedHashes.push({ ts: ts(attempt[0]), hash: r.hash, path: attempt[1] });
131
+ }
132
+
133
+ // 9. TRANSFORM — Grep result had an API key, redacted before forward
134
+ ({ hash: prev } = appendRow(chainFile, prev, {
135
+ ts: ts(32), event_id: crypto.randomUUID(), run_id: RUN_ID, agent: 'claude-code',
136
+ kind: 'tool_call', tool_name: 'Grep', action: 'TRANSFORM',
137
+ tool_inputs: { pattern: 'config', path: 'src/' },
138
+ secrets_redacted: 1,
139
+ transform: 'redact-secrets',
140
+ }));
141
+
142
+ // 10-11. Two more PASS reads
143
+ for (const [i, p] of [[36, 'src/auth.js'], [40, 'src/routes.js']]) {
144
+ ({ hash: prev } = appendRow(chainFile, prev, {
145
+ ts: ts(i), event_id: crypto.randomUUID(), run_id: RUN_ID, agent: 'claude-code',
146
+ kind: 'tool_call', tool_name: 'Read', action: 'PASS',
147
+ tool_inputs: { path: p },
148
+ }));
149
+ }
150
+
151
+ // 12. LOCAL grep
152
+ ({ hash: prev } = appendRow(chainFile, prev, {
153
+ ts: ts(45), event_id: crypto.randomUUID(), run_id: RUN_ID, agent: 'claude-code',
154
+ kind: 'tool_call', tool_name: 'Grep', action: 'LOCAL',
155
+ tool_inputs: { pattern: 'TODO', path: 'src/' },
156
+ }));
157
+
158
+ return { runId: RUN_ID, blockedHashes };
159
+ }
160
+
161
+ function tryPython(att, chainFile, attFile) {
162
+ const candidates = process.platform === 'win32' ? ['python', 'py'] : ['python3', 'python'];
163
+ for (const cmd of candidates) {
164
+ const probe = spawnSync(cmd, ['--version'], { stdio: 'pipe', shell: false });
165
+ if (probe.status === 0) {
166
+ const verifierPath = path.join(__dirname, '..', '..', 'docs', 'attest_verify.py');
167
+ if (!fs.existsSync(verifierPath)) {
168
+ return { ran: false, reason: `verifier script not found at ${verifierPath}` };
169
+ }
170
+ const result = spawnSync(cmd, [verifierPath, attFile, '--chain', chainFile], {
171
+ stdio: 'pipe', shell: false, encoding: 'utf8',
172
+ });
173
+ const out = (result.stdout || '') + (result.stderr || '');
174
+ // Parse the verifier's "[STATUS] check name" lines. For an unsigned
175
+ // attestation, sigstore steps SKIP and the chain step OKs — that is
176
+ // the success shape for the demo. Real failure = any FAIL line.
177
+ const lines = out.split('\n');
178
+ const checks = [];
179
+ for (const line of lines) {
180
+ const m = line.match(/\[\s*(OK|FAIL|SKIP)\s*\]\s+(.+?)\s*$/);
181
+ if (m) checks.push({ status: m[1], name: m[2] });
182
+ }
183
+ const anyFail = checks.some(c => c.status === 'FAIL');
184
+ const chainOk = checks.some(c => c.status === 'OK' && /chain/i.test(c.name));
185
+ return {
186
+ ran: true,
187
+ cmd,
188
+ exitCode: result.status,
189
+ checks,
190
+ anyFail,
191
+ chainOk,
192
+ stdout: result.stdout || '',
193
+ };
194
+ }
195
+ }
196
+ return { ran: false, reason: 'no python3/python on PATH' };
197
+ }
198
+
199
+ async function runAuditDemoCli(_args = []) {
200
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'lf-demo-audit-'));
201
+ const chainFile = path.join(tmpDir, 'pipeline-events.jsonl');
202
+ const policyFile = path.join(tmpDir, 'policy.yml');
203
+ const attFile = path.join(tmpDir, 'occasio-attestation.json');
204
+
205
+ console.log(C.b('\n⚡ Occasio — auditor demo ') + C.d('(no API key, no network, ~10s)\n'));
206
+
207
+ // ── Scene ──────────────────────────────────────────────────────────────
208
+ console.log(C.b('━━━ The question ━━━\n'));
209
+ console.log(' ' + C.y('Auditor:') + ' "Your CI merged 47 AI-authored PRs last month. Prove the');
210
+ console.log(' agent in build #2849 never read ' + C.c(DENIED_PATH) + '."');
211
+ console.log('');
212
+ console.log(' ' + C.d('Without Occasio: you grep through stderr logs, hope nothing was'));
213
+ console.log(' ' + C.d('rotated, and email Anthropic for server-side traces.'));
214
+ console.log(' ' + C.d('With Occasio: you hand the auditor one JSON file. They verify it offline.'));
215
+ console.log('');
216
+
217
+ // ── Step 1: chain ──────────────────────────────────────────────────────
218
+ console.log(C.b('1.') + ' Synthesizing the CI-run audit chain ' + C.d('(12 rows, hash-linked)'));
219
+ const { runId, blockedHashes } = buildAuditScenarioChain(chainFile, policyFile);
220
+ console.log(' ' + C.g('✓') + ' run_id: ' + C.c(runId));
221
+ console.log(' ' + C.d('chain: ' + displayPath(chainFile)));
222
+ console.log('');
223
+
224
+ // ── Step 2: verify ─────────────────────────────────────────────────────
225
+ console.log(C.b('2.') + ' Verifying chain integrity ' + C.d('(SHA-256 walk: prev_hash → hash, GENESIS → HEAD)'));
226
+ const ver = verifyFile(chainFile);
227
+ if (!ver.ok) {
228
+ console.error(' ' + C.r('✗') + ' chain broken — demo bug');
229
+ return 1;
230
+ }
231
+ console.log(' ' + C.g('✓') + ' all ' + ver.chained + ' rows chained, no tamper gap');
232
+ console.log('');
233
+
234
+ // ── Step 3: build attestation ──────────────────────────────────────────
235
+ console.log(C.b('3.') + ' Building behavioral attestation ' + C.d('(unsigned; --sign needs OIDC)'));
236
+ const att = buildAttestation({ runId, logFile: chainFile, policyFile });
237
+ if (!att) {
238
+ console.error(' ' + C.r('✗') + ' buildAttestation returned null');
239
+ return 1;
240
+ }
241
+ fs.writeFileSync(attFile, JSON.stringify(att, null, 2));
242
+ console.log(' ' + C.g('✓') + ' ' + C.d(displayPath(attFile)));
243
+ console.log(' ' + C.d('tool_calls: ' + att.execution_summary.tool_calls
244
+ + ' blocked: ' + att.execution_summary.blocked
245
+ + ' transformed: ' + att.execution_summary.transformed
246
+ + ' secrets_redacted: ' + att.execution_summary.secrets_redacted));
247
+ console.log('');
248
+
249
+ // ── Step 4: ANSWER THE AUDITOR ─────────────────────────────────────────
250
+ console.log(C.b('━━━ Answer to the auditor ━━━\n'));
251
+ console.log(' ' + C.y('Q:') + ' "Did the agent read ' + C.c(DENIED_PATH) + '?"');
252
+ console.log(' ' + C.gb('A: NO.') + ' ' + blockedHashes.length + ' attempts, all denied by policy. Evidence:');
253
+ console.log('');
254
+ for (const b of blockedHashes) {
255
+ console.log(' ' + C.d(b.ts) + ' ' + C.rb('BLOCK') + ' '
256
+ + b.path.padEnd(38) + ' ' + C.d('hash=' + b.hash.slice(0, 16) + '…'));
257
+ }
258
+ console.log('');
259
+ console.log(' ' + C.d('These row hashes are linked into the chain. Tampering with any'));
260
+ console.log(' ' + C.d('one of them breaks the SHA-256 walk and the attestation fails verify.'));
261
+ console.log('');
262
+
263
+ // ── Step 5: Node verifier (canonical round-trip) ───────────────────────
264
+ console.log(C.b('4.') + ' Re-verifying with the Node verifier ' + C.d('(canonical JSON round-trip)'));
265
+ const reparsed = JSON.parse(fs.readFileSync(attFile, 'utf8'));
266
+ const { signature: _o1, ...expected } = att;
267
+ const { signature: _o2, ...observed } = reparsed;
268
+ if (canonicalize(expected) !== canonicalize(observed)) {
269
+ console.error(' ' + C.r('✗') + ' canonical bytes diverged');
270
+ return 1;
271
+ }
272
+ console.log(' ' + C.g('✓') + ' predicate canonical-stable across reparse');
273
+ console.log('');
274
+
275
+ // ── Step 6: Python cross-verifier ──────────────────────────────────────
276
+ console.log(C.b('5.') + ' Re-verifying with the independent Python verifier ' + C.d('(docs/attest_verify.py)'));
277
+ const py = tryPython(att, chainFile, attFile);
278
+ if (!py.ran) {
279
+ console.log(' ' + C.y('○') + ' Python verifier skipped — ' + py.reason);
280
+ console.log(' ' + C.d(' install python3 to run the cross-verifier; the Node check above already passed'));
281
+ } else if (py.anyFail) {
282
+ console.log(' ' + C.r('✗') + ' Python verifier reported FAIL ' + C.d('(' + py.cmd + ' exit ' + py.exitCode + ')'));
283
+ for (const c of py.checks) {
284
+ const tag = c.status === 'OK' ? C.g('OK ') : c.status === 'FAIL' ? C.r('FAIL') : C.y('SKIP');
285
+ console.log(' [' + tag + '] ' + c.name);
286
+ }
287
+ } else if (py.chainOk) {
288
+ console.log(' ' + C.g('✓') + ' independent Python verifier agrees on the audit chain');
289
+ for (const c of py.checks) {
290
+ const tag = c.status === 'OK' ? C.g('OK ') : C.y('SKIP');
291
+ console.log(' [' + tag + '] ' + c.name);
292
+ }
293
+ console.log(' ' + C.d(' (sigstore steps SKIP because the demo attestation is unsigned;'));
294
+ console.log(' ' + C.d(' in real CI with --sign, all three rows would show OK)'));
295
+ } else {
296
+ console.log(' ' + C.y('○') + ' Python verifier ran but did not produce the expected check lines');
297
+ console.log(' ' + C.d(' exit ' + py.exitCode + ' — see ' + displayPath(attFile) + ' for the artifact'));
298
+ }
299
+ console.log('');
300
+
301
+ // ── Step 7: what --sign adds in CI ─────────────────────────────────────
302
+ console.log(C.b('━━━ In real CI: --sign adds the third check ━━━\n'));
303
+ console.log(' ' + C.c('occasio attest --run-id ' + runId.slice(0, 8) + '… --sign'));
304
+ console.log(' ' + C.d(' → signs via Sigstore keyless using GitHub Actions OIDC (no key mgmt)'));
305
+ console.log(' ' + C.d(' → emits .json attestation + .bundle.json Sigstore bundle'));
306
+ console.log(' ' + C.d(' → workflow posts a Check Run on the PR with the verify summary'));
307
+ console.log('');
308
+ console.log(' ' + C.b('Auditor verifies offline with three independent paths:'));
309
+ console.log(' ' + C.d(' • ') + C.c('occasio attest verify <file>') + C.d(' (Node)'));
310
+ console.log(' ' + C.d(' • ') + C.c('python3 docs/attest_verify.py <file>') + C.d(' (stdlib + sigstore-python)'));
311
+ console.log(' ' + C.d(' • ') + C.c('cosign verify-blob …') + C.d(' (any sigstore-conformant tool)'));
312
+ console.log('');
313
+ console.log(' ' + C.d('All three must agree. None of them trust Occasio\'s own verifier.'));
314
+ console.log('');
315
+
316
+ // ── CTA ────────────────────────────────────────────────────────────────
317
+ console.log(C.b('━━━ Try it on your own CI ━━━\n'));
318
+ console.log(' ' + C.c('npm install -g @occasiolabs/occasio'));
319
+ console.log(' ' + C.c('occasio policy init ') + C.d('# write ~/.occasio/policy.yml'));
320
+ console.log(' ' + C.c('occasio register ') + C.d('# alias `claude` through the proxy'));
321
+ console.log(' ' + C.d('Add .github/workflows/attest-on-pr.yml from docs/reference-pipeline.md'));
322
+ console.log('');
323
+ console.log(' ' + C.b('Spec:') + ' https://github.com/occasiolabs/occasio/tree/main/spec/agent-attestation/v1');
324
+ console.log(' ' + C.b('Scratch artifacts kept at:') + ' ' + C.d(displayPath(tmpDir)));
325
+ console.log('');
326
+
327
+ return 0;
328
+ }
329
+
330
+ module.exports = { runAuditDemoCli, buildAuditScenarioChain };
package/src/distiller.js CHANGED
@@ -120,7 +120,7 @@ const FAIL_RE = /\b(FAIL|FAILED|ERROR|error:|✗|×|AssertionError|not ok|ERRORE
120
120
  * Keeps all failure-related lines (plus 1 line of context each side) and the
121
121
  * last 15 lines (usually the summary). Clips total to TEST_MAX_LINES.
122
122
  */
123
- function distillTestOutput(output, rawBytes, cmd) {
123
+ function distillTestOutput(output, rawBytes, _cmd) {
124
124
  const lines = output.split('\n');
125
125
  const none = { content: output, distilled: false, savedTokens: 0, label: '', rawBytes, rawContent: null };
126
126
  if (lines.length <= TEST_MAX_LINES) return none;
@@ -44,7 +44,7 @@ const NATIVE_HANDLERS = {
44
44
  // but nativeHandle returned null, fall back to the exec subprocess. The
45
45
  // returned `native` field tells the caller which path was taken.
46
46
  [CANONICAL.SHELL_BASH]: async (input) => {
47
- const cmd = (input?.command || '').trim();
47
+ const cmd = (typeof input?.command === 'string' ? input.command : '').trim();
48
48
  if (!cmd) return null;
49
49
  const nr = nativeHandle(cmd);
50
50
  if (nr !== null) {
@@ -63,7 +63,7 @@ const NATIVE_HANDLERS = {
63
63
  // then native-only execution. expandedCmd is returned so the caller can
64
64
  // record the actually-executed command in toolsRun.
65
65
  [CANONICAL.SHELL_POWERSHELL]: (input) => {
66
- const rawCmd = (input?.command || '').trim();
66
+ const rawCmd = (typeof input?.command === 'string' ? input.command : '').trim();
67
67
  if (!rawCmd) return null;
68
68
  const cmd = expandPsEnvVars(rawCmd);
69
69
  const nr = nativeHandle(cmd);
@@ -0,0 +1,173 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Native handler for the Glob tool.
5
+ *
6
+ * Pure filesystem function: takes a glob pattern (+ optional base path) and
7
+ * returns a sorted list of matching paths. No dependency on the interceptor
8
+ * pipeline, Anthropic API, or shell execution. Safe to import in any process
9
+ * context.
10
+ *
11
+ * Extracted from src/runtime.js as Stage-2 Step 3 of the executor migration
12
+ * (see docs/ADAPTER-STAGE-2-MIGRATION.md). src/runtime.js re-exports these
13
+ * so existing consumers keep working unchanged.
14
+ */
15
+
16
+ const fs = require('fs');
17
+ const path = require('path');
18
+
19
+ // ── Glob tool support ──────────────────────────────────────────────────────────
20
+
21
+ // Characters that indicate shell injection in a glob pattern.
22
+ // We reject patterns containing these so handleGlobTool stays read-only.
23
+ const GLOB_INJECTION_RE = /[;&|`$<>!]/;
24
+
25
+ // Directories skipped during recursive glob walks.
26
+ const GLOB_SKIP = new Set(['node_modules', '.git', '.hg', '.svn', 'dist', 'build', '__pycache__', '.venv', 'venv']);
27
+
28
+ // Maximum number of matches returned to avoid overwhelming the model context.
29
+ const GLOB_MAX = 500;
30
+
31
+ // Maximum recursion depth from baseDir. Hard cap on path-traversal DoS
32
+ // (a fuzz-discovered class — see THREAT-MODEL.md residual risk #5).
33
+ // Tunable via env for special-case repos.
34
+ const GLOB_MAX_DEPTH = Number(process.env.OCCASIO_GLOB_MAX_DEPTH) || 16;
35
+
36
+ // Soft wall-clock limit per walk in ms. Stops a walk that strayed onto a huge
37
+ // subtree (e.g. agent globbed up from /) before it burns seconds. Stop is
38
+ // best-effort — the caller still receives whatever was collected so far.
39
+ const GLOB_MAX_MS = Number(process.env.OCCASIO_GLOB_MAX_MS) || 2_000;
40
+
41
+ function isGlobHandleable(input) {
42
+ if (!input || typeof input !== 'object') return false;
43
+ const pattern = input.pattern;
44
+ if (!pattern || typeof pattern !== 'string' || !pattern.trim()) return false;
45
+ if (GLOB_INJECTION_RE.test(pattern)) return false;
46
+ if (input.path != null && typeof input.path !== 'string') return false;
47
+ return true;
48
+ }
49
+
50
+ // Escape regex metacharacters in a literal string segment.
51
+ function escapeRegexChars(s) {
52
+ return s.replace(/[.+^${}()|[\]\\]/g, '\\$&');
53
+ }
54
+
55
+ /**
56
+ * Convert a glob pattern to a RegExp.
57
+ * Supports: ** (any path depth), * (single segment), ? (single char),
58
+ * {ts,tsx} (alternation), [abc] (character classes).
59
+ * Exported for unit testing.
60
+ */
61
+ function globToRegex(pattern) {
62
+ // Normalise Windows separators in the pattern.
63
+ const p = pattern.replace(/\\/g, '/');
64
+
65
+ let re = '';
66
+ let i = 0;
67
+ while (i < p.length) {
68
+ // ** — match any path segments (including none), consuming the trailing /
69
+ if (p[i] === '*' && p[i + 1] === '*') {
70
+ re += '.*';
71
+ i += 2;
72
+ if (p[i] === '/') i++; // consume separator after **
73
+ continue;
74
+ }
75
+ // * — match within a single path segment
76
+ if (p[i] === '*') { re += '[^/]*'; i++; continue; }
77
+ // ? — match a single character within a segment
78
+ if (p[i] === '?') { re += '[^/]'; i++; continue; }
79
+ // {a,b,c} — alternation
80
+ if (p[i] === '{') {
81
+ const end = p.indexOf('}', i);
82
+ if (end !== -1) {
83
+ const alts = p.slice(i + 1, end).split(',').map(escapeRegexChars);
84
+ re += `(?:${alts.join('|')})`;
85
+ i = end + 1;
86
+ continue;
87
+ }
88
+ }
89
+ // [abc] / [^abc] — pass character classes through verbatim
90
+ if (p[i] === '[') {
91
+ const end = p.indexOf(']', i);
92
+ if (end !== -1) { re += p.slice(i, end + 1); i = end + 1; continue; }
93
+ }
94
+ re += escapeRegexChars(p[i]);
95
+ i++;
96
+ }
97
+
98
+ // On Windows, matching is case-insensitive; on POSIX it's case-sensitive.
99
+ const flags = process.platform === 'win32' ? 'i' : '';
100
+ return new RegExp(`^${re}$`, flags);
101
+ }
102
+
103
+ /**
104
+ * Walk `dir` recursively, collecting paths that match `regex`.
105
+ * Results are relative to `baseDir`.
106
+ */
107
+ function walkGlob(dir, baseDir, regex, results, depth = 0, deadline = Infinity) {
108
+ if (results.length >= GLOB_MAX) return;
109
+ if (depth >= GLOB_MAX_DEPTH) return;
110
+ if (Date.now() >= deadline) return;
111
+ let entries;
112
+ try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
113
+ catch { return; }
114
+
115
+ for (const entry of entries) {
116
+ if (results.length >= GLOB_MAX) break;
117
+ if (Date.now() >= deadline) break;
118
+ if (GLOB_SKIP.has(entry.name)) continue;
119
+ const abs = path.join(dir, entry.name);
120
+ // Normalise to forward slashes for matching (consistent on all platforms).
121
+ const rel = path.relative(baseDir, abs).replace(/\\/g, '/');
122
+ if (entry.isDirectory()) {
123
+ walkGlob(abs, baseDir, regex, results, depth + 1, deadline);
124
+ } else if (regex.test(rel)) {
125
+ results.push(rel);
126
+ }
127
+ }
128
+ }
129
+
130
+ /**
131
+ * Resolve glob pattern + optional base path to a sorted list of matching paths,
132
+ * relative to CWD. Returns { output, exitCode, matchCount }.
133
+ */
134
+ function handleGlobTool(input) {
135
+ const pattern = (typeof input?.pattern === 'string' ? input.pattern : '').trim();
136
+ if (!pattern) return { output: '(no pattern provided)', exitCode: 1, matchCount: 0 };
137
+
138
+ const baseDir = input?.path
139
+ ? path.resolve(process.cwd(), input.path)
140
+ : process.cwd();
141
+
142
+ const cwd = process.cwd();
143
+
144
+ let regex;
145
+ try { regex = globToRegex(pattern); }
146
+ catch (e) { return { output: `Glob: invalid pattern: ${e.message}`, exitCode: 1, matchCount: 0 }; }
147
+
148
+ const results = [];
149
+ const deadline = Date.now() + GLOB_MAX_MS;
150
+ walkGlob(baseDir, baseDir, regex, results, 0, deadline);
151
+ const timedOut = Date.now() >= deadline;
152
+ results.sort();
153
+
154
+ const truncated = results.length >= GLOB_MAX;
155
+ const lines = results.map(r => path.join(baseDir !== cwd ? baseDir : '', r).replace(/\\/g, '/'));
156
+ const suffix = truncated ? `\n(truncated at ${GLOB_MAX} results)`
157
+ : timedOut ? `\n(truncated — walk exceeded ${GLOB_MAX_MS} ms)`
158
+ : '';
159
+ const output = lines.join('\n') + suffix;
160
+ return { output: output || '(no matches)', exitCode: 0, matchCount: results.length };
161
+ }
162
+
163
+ module.exports = {
164
+ GLOB_INJECTION_RE,
165
+ GLOB_SKIP,
166
+ GLOB_MAX,
167
+ GLOB_MAX_DEPTH,
168
+ GLOB_MAX_MS,
169
+ isGlobHandleable,
170
+ globToRegex,
171
+ walkGlob,
172
+ handleGlobTool,
173
+ };