@occasiolabs/occasio 0.8.4 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/docs/ADAPTER-STAGE-2-MIGRATION.md +59 -0
  2. package/docs/STAGE-2-STEP-5-SHELL-PLAN.md +107 -0
  3. package/docs/THREAT-MODEL.md +195 -0
  4. package/docs/edr-calibration.md +29 -0
  5. package/package.json +7 -3
  6. package/src/adapters/claude-code.js +1 -2
  7. package/src/adapters/computer-use.js +1 -1
  8. package/src/anomaly/cli.js +4 -1
  9. package/src/anomaly/detectors/deny-rate.js +2 -1
  10. package/src/anomaly/detectors/file-read-volume.js +2 -1
  11. package/src/anomaly/index.js +5 -0
  12. package/src/boundary.js +1 -1
  13. package/src/classifier.js +1 -1
  14. package/src/cli/clear.js +4 -4
  15. package/src/cli/help.js +58 -37
  16. package/src/cli/status.js +1 -1
  17. package/src/dashboard.js +2 -3
  18. package/src/distiller.js +1 -1
  19. package/src/executor/dispatcher.js +2 -2
  20. package/src/executor/native-handlers/glob.js +173 -0
  21. package/src/executor/native-handlers/grep.js +258 -0
  22. package/src/executor/native-handlers/read.js +99 -0
  23. package/src/executor/native-handlers/todo.js +56 -0
  24. package/src/harness.js +8 -10
  25. package/src/index.js +13 -15
  26. package/src/inspect.js +1 -1
  27. package/src/interceptor.js +9 -29
  28. package/src/ledger.js +2 -3
  29. package/src/mcp-experiment.js +4 -4
  30. package/src/mcp-server.js +3 -3
  31. package/src/policy/doctor.js +2 -2
  32. package/src/policy/engine.js +0 -1
  33. package/src/policy/init.js +1 -1
  34. package/src/policy/loader.js +3 -3
  35. package/src/policy/show.js +1 -2
  36. package/src/preflight/cli.js +0 -1
  37. package/src/preflight/miner.js +3 -6
  38. package/src/redteam.js +1 -2
  39. package/src/replay.js +1 -1
  40. package/src/report/index.js +0 -4
  41. package/src/runtime.js +42 -444
  42. package/src/selftest.js +1 -1
  43. package/src/session.js +1 -1
@@ -0,0 +1,59 @@
1
+ # Adapter Stage-2 Migration
2
+
3
+ Stage 1 (already complete) moved the cross-cutting pipeline plumbing —
4
+ boundary events, processToolEvent, tool-name canonicalisation — into
5
+ `src/core/`. The result is an interceptor that still owns dispatch logic
6
+ but no longer owns the canonical event-construction or the
7
+ decision/effect bookkeeping.
8
+
9
+ Stage 2 separates **tool execution** from **dispatch routing**. The goal
10
+ is that `src/interceptor.js` shrinks into a thin router whose only job is
11
+ to map an incoming Anthropic SSE tool_use block to a per-tool *native
12
+ handler*, and that those native handlers live in their own files under
13
+ `src/executor/native-handlers/`. Every native handler is a pure function
14
+ of `(input, sessionContext) → { output, exitCode, … }` with no
15
+ dispatch-loop awareness.
16
+
17
+ ## What has moved so far
18
+
19
+ | Step | Module | Origin | Destination | Status |
20
+ |---|---|---|---|---|
21
+ | 1 | TodoWrite / TodoRead native handlers | `src/runtime.js` | `src/executor/native-handlers/todo.js` | ✅ |
22
+ | 2 | Read native handler (+ `MAX_OUTPUT`, `READ_SKIP_EXTENSIONS`, `readFileNative`) | `src/runtime.js` | `src/executor/native-handlers/read.js` | ✅ |
23
+ | 3 | Glob native handler (+ `globToRegex`, `walkGlob`, `GLOB_*` constants) | `src/runtime.js` | `src/executor/native-handlers/glob.js` | ✅ |
24
+ | 4 | Grep native handler (+ `tryReadGrep`, `walkGrepFiles`, `GREP_*` constants) | `src/runtime.js` | `src/executor/native-handlers/grep.js` | ✅ this commit |
25
+
26
+ `src/runtime.js` re-exports the moved symbols, so every existing import
27
+ path (`src/interceptor.js`, tests, the MCP server) continues to work
28
+ without a code change.
29
+
30
+ ## What is still pending
31
+
32
+ The order below reflects the cleanest dependency boundary at each step:
33
+ moving a handler **without** also moving its tests and without breaking
34
+ `executeLocalTool()` is the constraint that controls sequencing.
35
+
36
+ | Step | Module | Origin | Proposed destination | Notes |
37
+ |---|---|---|---|---|
38
+ | 5 | Bash / PowerShell native dispatch | `src/interceptor.js` `nativeHandle` + per-family branches | `src/executor/native-handlers/shell-*.js` | Too big to land as a single step. Sub-plan in **`docs/STAGE-2-STEP-5-SHELL-PLAN.md`** decomposes it into 5a–5g (file-read / file-stat / list / search / git / compound / router). The original "Decision-shape mismatch" caveat is partially stale: `executor/dispatcher.js` already canonicalises the return value via `NATIVE_HANDLERS[SHELL_BASH]`. |
39
+ | 6 | `executeLocalTool()` wrapper | `src/runtime.js` | `src/executor/index.js` | Once steps 2–5 are done, the wrapper becomes the executor module's public surface. `runtime.js` is then a thin compatibility shim and can be deprecated. |
40
+ | 7 | Remove `runtime.js` shim | — | — | After two minor versions with `runtime.js` re-exporting from `executor/`, drop the file. |
41
+
42
+ ## Why incremental
43
+
44
+ A single big move would either: (a) keep `runtime.js` as a frozen
45
+ re-export forever, which obscures the real module graph; or (b) update
46
+ every import site in one commit, which is hostile to bisection. The
47
+ per-handler approach lets us validate each step against the full test
48
+ suite (`npm test`, all 2632 + 86 + 58 + 26 + 6 = 2808 tests passing as
49
+ of this commit) before moving the next.
50
+
51
+ ## Test hygiene
52
+
53
+ `test-interceptor.js` is currently ~10kLoC and groups its tests by
54
+ section number, not by handler. As each handler moves out of
55
+ `runtime.js`, the tests in that section should also relocate to a
56
+ dedicated `test-native-handlers.js` (or `test-native-<handler>.js` if
57
+ the volume warrants splitting further). The TodoWrite/TodoRead tests
58
+ are the first candidates — they cluster around section "9. TodoWrite"
59
+ and "9b. TodoRead" in `test-interceptor.js`.
@@ -0,0 +1,107 @@
1
+ # Stage-2 Step 5 — Shell native-handler extraction plan
2
+
3
+ Step 5 of `docs/ADAPTER-STAGE-2-MIGRATION.md`. Not yet executed because the
4
+ work is structurally bigger than Steps 1–4: `nativeHandle` is not a single
5
+ handler but an in-line dispatcher with ~30 command branches across Bash and
6
+ PowerShell. Splitting it cleanly requires a sub-plan.
7
+
8
+ This document is the sub-plan. It must be approved (or revised) before any
9
+ extraction code is written.
10
+
11
+ ## What is in scope
12
+
13
+ The starting point is `nativeHandle(cmd)` inside `src/interceptor.js`, plus
14
+ the helpers `runCompound`, `parseFlagsAndPath`, `stripQuotes`, and the
15
+ PowerShell-specific normalisation in `expandPsEnvVars`.
16
+
17
+ The compound-chain code (`runCompound`, `isCompoundHandleable`, the
18
+ `cd`/`Set-Location` cwd-prefix logic) is interleaved with the per-command
19
+ branches. It cannot move independently — handlers and the compound runner
20
+ share the same `cwd` tracking convention.
21
+
22
+ ## What is NOT in scope
23
+
24
+ - Bash/PowerShell dispatch routing through `pipeline.processToolEvent`.
25
+ This is already in place via `src/executor/dispatcher.js`
26
+ (`NATIVE_HANDLERS[CANONICAL.SHELL_BASH]` wraps `nativeHandle` and returns
27
+ the dispatcher-shaped `{ output, exitCode, native }` Result). The
28
+ "Decision-shape mismatch" caveat in the original migration doc is
29
+ partially stale: the dispatcher already canonicalises the return value.
30
+ What remains is whether `nativeHandle`'s own internal `null`-on-no-match
31
+ signal needs to become a Decision (`PASS` to fall back, `LOCAL` to handle).
32
+ Step 5 does NOT change that contract — it only relocates the code.
33
+ - The legacy `runLocally` exec path. That is the cloud-fallback subprocess
34
+ for shell commands the proxy decided not to intercept. Out of scope.
35
+ - Decision-shape unification across all dispatch surfaces. That is a
36
+ Stage-3 concern (per ARCHITECTURE.md "tool-name canonicalisation" note).
37
+
38
+ ## Sub-step plan
39
+
40
+ The constraint at each step is the same as in Steps 1–4: every step must
41
+ keep `npm test` green, must keep `runtime.js` / `interceptor.js` as
42
+ re-export shims, and must produce one refactor commit + one test commit.
43
+
44
+ | Sub-step | Module | Origin (src/interceptor.js) | Destination | Notes |
45
+ |---|---|---|---|---|
46
+ | 5a | shell-read handlers | `cat` / `bat` / `type`, `Get-Content`, `head`, `tail` branches | `src/executor/native-handlers/shell-read.js` | Smallest blast radius. All four share `parseFlagsAndPath` and `readFileNative` (already in read.js). |
47
+ | 5b | shell-stat handlers | `test -f|-e|-d`, `Test-Path` branches | `src/executor/native-handlers/shell-stat.js` | Self-contained. No shared helpers beyond `path.resolve`. |
48
+ | 5c | shell-list handlers | `dir`, `Get-ChildItem`, `find -name` branches | `src/executor/native-handlers/shell-list.js` | Uses an internal recursive `walk()` — needs to keep `SKIP` consistent with `GLOB_SKIP` from `glob.js` (or import it). |
49
+ | 5d | shell-search handler | `Select-String` branch | `src/executor/native-handlers/shell-search.js` | Single-file search. Comment in source explicitly says glob expansion is intentionally NOT supported here — keep that limit. |
50
+ | 5e | shell-git handler | `git status` / `git log` / `git -C <path>` / bare-git branches + `isBareGitReadOnly` / `isGitCSegment` | `src/executor/native-handlers/shell-git.js` | Largest single family. Shares `runOneShellCommand` exec helper with `runCompound` — must be lifted to a small shared util (`src/executor/native-handlers/shell-exec.js`) first. |
51
+ | 5f | shell-compound runner | `runCompound`, `isCompoundSegment` family, `cd`/`Set-Location` cwd-prefix logic, echo-segment passthrough | `src/executor/native-handlers/shell-compound.js` | Depends on 5a–5e being done first: the runner dispatches to per-family handlers and tracks cwd across segments. Pull that orchestration out only after the families are stable. |
52
+ | 5g | thin router | reduced `nativeHandle` becomes pure dispatch: lookup head → call family handler | `src/executor/native-handlers/shell.js` | Final step. Once the family handlers are in their own files, `nativeHandle` shrinks to ~30 lines. Move it into `shell.js`; `src/interceptor.js` re-exports for back-compat as everywhere else. |
53
+
54
+ Estimated effort: 5a-5d ≈ half-day each (well-trodden pattern). 5e is the
55
+ biggest — git semantics, multiple test sections in `test-interceptor.js`,
56
+ needs the shared exec helper first. 5f and 5g are smaller but order-dependent.
57
+
58
+ ## Why this order
59
+
60
+ - **Read-only-on-files first (5a, 5b)**. Lowest risk. Handlers are pure
61
+ filesystem reads; tests are stable; no shared state.
62
+ - **Searching/listing next (5c, 5d)**. Still read-only; just larger output.
63
+ - **Git last among families (5e)**. Touches the live `git` binary in tests
64
+ (via `execFileSync`), shares an exec helper with the compound runner. The
65
+ shared helper must move first.
66
+ - **Compound runner after families (5f)**. The runner orchestrates the
67
+ families. If extracted earlier, it would re-cross the boundary back into
68
+ `interceptor.js` to call branches that haven't moved. Cleaner to wait.
69
+ - **Router last (5g)**. By then `nativeHandle` is a pure routing function
70
+ and the rename + relocation is mechanical.
71
+
72
+ ## Test relocation
73
+
74
+ Each refactor commit pulls the corresponding section of `test-interceptor.js`
75
+ into a new module-mirroring file under `test-native-handlers.js`. Routing
76
+ tests (`isInterceptable`, `isNativeHandleable`, `isPowerShellNativeHandleable`)
77
+ stay in `test-interceptor.js`. Compound-chain tests stay until 5f.
78
+
79
+ ## Stop conditions / abort criteria
80
+
81
+ - If any sub-step requires changing handler behaviour (not just relocation),
82
+ revert and re-plan. A behavioural fix and a structural move must not ride
83
+ on the same commit.
84
+ - If `nativeHandle`'s `null`-on-no-match signal needs to become a Decision
85
+ during the move, abort and address that as Stage-3 work first. The Step 5
86
+ scope is "relocation without behaviour change".
87
+ - The `parseFlagsAndPath` and `stripQuotes` helpers may turn out to be
88
+ shared by more than one family. If so, lift them to a `shell-parse.js`
89
+ helper module before the family extractions, not during.
90
+
91
+ ## After Step 5
92
+
93
+ `src/interceptor.js` should be ≤ 600 lines (currently 1095, was 1098 before
94
+ the `anthropicRequest` removal). The remaining content will be:
95
+
96
+ - `parseSSE` (Anthropic SSE protocol)
97
+ - `interceptToolUse` orchestration (the main exported entrypoint)
98
+ - `runLocally` (legacy exec fallback)
99
+ - `buildFollowUpHeaders` (HTTP plumbing)
100
+ - `runOneRound`, `blocksToContent`, `classifyBlock`, `isInterceptable`,
101
+ `isNativeHandleable`, `isPowerShellNativeHandleable` (routing helpers)
102
+ - `FALLBACK_REASONS`, `LOCAL_BASH_CMDS` (constants)
103
+
104
+ That is the natural shape of an SSE-protocol interceptor — what is left
105
+ after extracting the per-tool execution. At that point `runtime.js` can be
106
+ deprecated (it would be a pure re-export shim) and dropped in a minor
107
+ version.
@@ -0,0 +1,195 @@
1
+ # Threat Model
2
+
3
+ This document is the first-party threat model for the occasio proxy.
4
+ Audience: security reviewers (internal, third-party), auditors evaluating
5
+ SOC2 / EU-AI-Act / NIST-AI-RMF alignment claims, and contributors
6
+ proposing changes that cross a trust boundary.
7
+
8
+ It is written using **STRIDE** (Spoofing, Tampering, Repudiation,
9
+ Information disclosure, Denial of service, Elevation of privilege) over
10
+ the five trust boundaries enumerated below. STRIDE is the right framing
11
+ because the threats here are concrete data-flow concerns; privacy-class
12
+ risks (LINDDUN) are addressed only briefly because the proxy explicitly
13
+ does not store user content beyond the audit chain and the ledger.
14
+
15
+ Scope status: **first-party, unaudited.** This document reflects the
16
+ author's view of the system. It has not been reviewed by an external
17
+ party. Treat findings as starting points for an audit, not as evidence
18
+ that the system is audited.
19
+
20
+ ## System overview
21
+
22
+ ```
23
+ ┌───────────────────┐ B1 ┌───────────────────┐ B2 ┌─────────────────┐
24
+ │ Claude Code │ ◀────▶ │ occasio proxy │ ◀────▶ │ Anthropic API │
25
+ │ (or other CLI) │ SSE │ (interceptor) │ TLS │ │
26
+ └───────────────────┘ └──┬──────────┬─────┘ └─────────────────┘
27
+ │ │
28
+ B3│ │B4
29
+ ▼ ▼
30
+ ┌──────────────┐ ┌──────────────────┐
31
+ │ MCP server │ │ local FS / shell │
32
+ │ (parity) │ │ (native handlers)│
33
+ └──────────────┘ └──────────────────┘
34
+
35
+ B5│
36
+
37
+ ┌─────────────────────┐
38
+ │ audit chain + │
39
+ │ attestation bundle │
40
+ └─────────────────────┘
41
+ ```
42
+
43
+ ### Trust boundaries
44
+
45
+ - **B1 — Agent ↔ Proxy.** The agent (Claude Code or another caller) is
46
+ treated as **untrusted input** to the proxy. The proxy parses
47
+ Anthropic SSE protocol, tool-use blocks, and follow-up headers.
48
+ - **B2 — Proxy ↔ Anthropic API.** The proxy is a TLS client of the
49
+ upstream. The upstream is trusted as a service, not as an oracle —
50
+ the proxy must not let upstream responses dictate local actions.
51
+ - **B3 — MCP path.** When invoked as `occasio-mcp`, the proxy speaks
52
+ the MCP JSON-RPC frame protocol on stdin/stdout. Caller is
53
+ untrusted; the wire format must not let a malformed frame escape.
54
+ - **B4 — Proxy ↔ local resources.** Native handlers read files and
55
+ invoke shell subprocesses. Inputs originating from B1 reach this
56
+ boundary as tool-use blocks. Path traversal, shell injection, and
57
+ symlink-following are the concrete concerns here.
58
+ - **B5 — Audit / attest write surface.** The hash-chained JSONL ledger
59
+ and the Sigstore-signed in-toto attestation bundles. The integrity
60
+ guarantee is the basis for every compliance claim downstream.
61
+
62
+ ## Out of scope
63
+
64
+ - **Operating-system privilege escalation.** If an attacker can already
65
+ write to `~/.occasio/` or run code as the user, all bets are off.
66
+ The proxy assumes filesystem ACLs on the home directory are intact.
67
+ - **TLS / cert-pinning of the upstream.** The proxy uses Node's default
68
+ TLS stack against `api.anthropic.com`. We rely on the platform CA
69
+ store. MITM with a CA-store compromise is out of scope.
70
+ - **Supply-chain compromise of `sigstore` or `proper-lockfile`.** These
71
+ are the two runtime deps. Pinning + lockfile audit is the
72
+ responsibility of the deployment, not the proxy.
73
+ - **Side-channels on the host (timing, cache, EM).** Out of scope.
74
+ - **Anthropic API quota exhaustion attacks.** Anthropic's concern, not
75
+ the proxy's. The budget gate (`--budget`) reduces but does not
76
+ eliminate cost exposure.
77
+
78
+ ## STRIDE — boundary-by-boundary
79
+
80
+ ### B1 — Agent ↔ Proxy (SSE / tool-use blocks)
81
+
82
+ | Class | Threat | Current mitigation | Residual |
83
+ |---|---|---|---|
84
+ | **S** | A non-Anthropic SSE-shaped payload pretending to be a tool-use block. | Tool-use blocks must match the schema enforced in `parseSSE`. Unknown `name` falls back to cloud (not executed locally). | If the agent itself is compromised, the proxy will faithfully execute whatever it asks. Out of scope (the agent is the trust source for the operator). |
85
+ | **T** | Modified SSE frames designed to inject extra tool-use blocks downstream. | The proxy re-emits a synthesized SSE stream to the agent — it does not blindly forward upstream bytes. Tool-use blocks are reconstructed from parsed structures. | A bug in `parseSSE` or `runOneRound` that lets a forged block slip through would defeat this. **Fuzz target.** |
86
+ | **R** | Agent denies having issued a tool call. | Every block is appended to `pipeline-events.jsonl` with `tool_inputs` captured (post-ARCH-27 governance milestone). The hash-chain makes selective deletion detectable. | The proxy cannot prove the agent's *intent* — only that the call was made. |
87
+ | **I** | A malicious tool-use response from the upstream leaks proxy state (cwd, env) back to the agent. | Native handlers never read process env vars and only resolve paths under `process.cwd()` (with explicit absolute-path opt-in for Read). Synthetic responses (`BLOCK`) are templated, not interpolated from upstream. | `expandPsEnvVars` resolves `$env:VAR` in PowerShell input. If a policy author writes a rule that echoes env-expanded input back, the value reaches the agent. Document this in policy-author guidance. |
88
+ | **D** | Huge SSE payload starves the proxy. | `MAX_OUTPUT` (512 KB) caps file reads; shell exec uses `maxBuffer: 512_000`. SSE chunks are processed streamingly, not buffered whole. | An attacker can still cause many small allocations. No per-connection rate limit. **Add later.** |
89
+ | **E** | Tool-use block escalates from a non-interceptable tool (e.g. `Write`) to a privileged dispatch path. | `isInterceptable` is a pure function on `block.name + block.input`; non-whitelisted names return `false` and always go to the cloud (where the agent's own confirmation prompts apply). | The proxy does not *prevent* the cloud from executing dangerous tools — it only declines to execute them locally. The policy engine's `deny_patterns` is the mechanism to actually block. Document the distinction. |
90
+
91
+ ### B2 — Proxy ↔ Anthropic API
92
+
93
+ | Class | Threat | Current mitigation | Residual |
94
+ |---|---|---|---|
95
+ | **S** | A man-in-the-middle pretending to be `api.anthropic.com`. | Node TLS with system CA store. | No cert pinning. Out of scope per "Out of scope" above. |
96
+ | **T** | Upstream response modified to inject tool-use blocks the agent didn't authorize. | Tool-use blocks coming back are parsed structurally and dispatched through the same gate as user-originated blocks (`isInterceptable` → handler/cloud). A tampered upstream cannot achieve more than a malicious agent already could. | If the upstream is fully compromised, it can mint any tool-use block. The agent's local-confirmation UX is the final defense; the proxy does not add one. |
97
+ | **R** | Proxy denies having sent a request. | The audit chain records every `tool_use` row including `run_id`, `iso`, `cwd`. The cloud send itself is logged separately. Selective deletion is detectable via hash-chain verifier (`audit/verifier.js`). | The proxy cannot prove *what* it sent (only that a send happened) unless full-body capture is enabled. That is intentional to limit content exposure. |
98
+ | **I** | Secrets in tool output (cred files, env dumps) get sent upstream. | `scanSecrets` runs over Read output; `block_secrets` policy mode aborts the round. `redact_secrets` mode substitutes redacted tokens. | Pattern-based detection misses novel secret formats. No entropy-based detection. **Calibration gap.** |
99
+ | **D** | Cost-amplification: the upstream is induced to bill the operator (compromised agent in a loop). | `--budget N` gate: 80% warning, 100% block (402). | The block fires *after* the request that crosses the threshold. Single-call cost spikes are not pre-flighted. |
100
+ | **E** | Upstream-controlled `tool_use` block names a privileged action. | Whitelist-based interception; everything not on the list falls through and is shown to the agent. The agent's own confirmation governs whether it runs. | Defense-in-depth gap: the proxy does not add its own confirmation step. By design. |
101
+
102
+ ### B3 — MCP path
103
+
104
+ | Class | Threat | Current mitigation | Residual |
105
+ |---|---|---|---|
106
+ | **S** | A non-MCP client speaking JSON-RPC into stdin. | JSON-RPC frames validated structurally (method whitelist, params schema in `mcp-normalize.js`). Unknown method → error response. | The MCP server inherits the trust profile of whoever spawned it (CLI parent process). |
107
+ | **T** | Frame splicing — partial JSON across multiple writes designed to confuse the parser. | Line-buffered (`split('\n')`); each line parsed independently. Malformed frames are dropped with a logged error (per `mcp-server.js` line 304). | Buffering is unbounded if no newline arrives. **Add a max-line-length gate.** |
108
+ | **R** | Same audit chain as B1; same guarantees. | `tools_mcp_count` and `mcp-experiment.jsonl` distinguish MCP rows from interceptor rows. | — |
109
+ | **I** | A response under MCP leaks more than the equivalent interceptor response. | `executeLocalTool` is the shared wrapper; both paths produce the same shape including `secrets` scan. | — |
110
+ | **D** | Same as B1. | — | No per-client rate limit. |
111
+ | **E** | A misconfigured MCP server runs in a different cwd than expected. | The cwd at spawn time is captured in `tool_use` rows (post-ARCH-26 cwd-in-log work). Path enforcement honours this cwd. | If two MCP servers share a `.occasio` log dir but different cwds, audit interpretation needs the row's cwd, not the verifier's cwd. Documented. |
112
+
113
+ ### B4 — Proxy ↔ local FS / shell
114
+
115
+ | Class | Threat | Current mitigation | Residual |
116
+ |---|---|---|---|
117
+ | **S** | Symlink pointing at a sensitive file. Read tool dereferences. | `handleReadTool` uses `fs.readFileSync` which follows symlinks. Path-policy enforcement (`deny_paths`) is evaluated against the *resolved* path, not the requested path. | Race condition: TOCTOU between policy check and read. No `O_NOFOLLOW` on Node's fs API for sync calls. **Document; consider switching to `lstat`-then-`open` pattern.** |
118
+ | **T** | Shell command modified mid-flight to alter behaviour. | Shell strings are passed verbatim to `child_process.exec` with `maxBuffer`. No shell-string concatenation from upstream. | The native shell handler is the cleaner path — it never invokes a shell. Commands that fall back to `runLocally` do go through a shell. By design (existing user workflows depend on shell features). |
119
+ | **R** | A subprocess writes to the audit log to mask its activity. | Audit log lives under `~/.occasio/` and is opened append-only by the proxy itself. Subprocesses are spawned with the proxy's environment but do not inherit the file descriptor. | OS-level privesc covers this; out of scope. |
120
+ | **I** | Path traversal via `..` in Read input. | `handleReadTool` calls `path.resolve(process.cwd(), fp)` — this *does not* contain `..` escapes. Policy `deny_paths` then evaluates the resolved path. UNC / network paths (`\\server\share\…` and `//server/share/…`) are rejected at the `isReadHandleable` gate to prevent SMB-resolution DoS. **Fuzz-verified.** | Absolute paths are accepted by design (the agent often needs to read system files like `/etc/hosts`). The defense is the policy layer, not the handler. |
121
+ | **D** | `**/**/**/...` glob causing a deep walk. | `GLOB_MAX = 500` matches; `walkGlob` skips `node_modules`, `.git`, etc.; `GLOB_MAX_DEPTH = 16` caps recursion depth; `GLOB_MAX_MS = 2000` caps wall-clock per walk. Both env-tunable. | An attacker can still consume ~2 s per call. Stacking many calls in a round is the residual vector — partially covered by `--budget` (cost) but not by a per-round count cap. |
122
+ | **E** | `nativeHandle` executes a command it shouldn't (e.g. a `git` subcommand that mutates). | `isBareGitReadOnly` and `isGitCSegment` whitelist subcommands (status, log, diff, …). Unknown subcommands return `null` (= fall through, not execute). | The whitelist is the integrity guarantee. Regression in the whitelist is the highest-impact local bug class. **Fuzz target.** |
123
+
124
+ ### B5 — Audit / attest
125
+
126
+ | Class | Threat | Current mitigation | Residual |
127
+ |---|---|---|---|
128
+ | **S** | A different process appends a forged row to `pipeline-events.jsonl`. | Hash-chain: each row's `prev_hash` is the SHA-256 of the previous canonical row. The verifier (`audit/verifier.js`) detects any insertion or modification. Optional file-locking (`proper-lockfile`) for multi-writer scenarios (audit v0.8.4). | If an attacker fully replays the chain (recomputing hashes), the GENESIS sentinel is the only fixed anchor. Bundle this into the Sigstore attestation for external attestability. |
129
+ | **T** | Selective deletion of rows. | Same. Chain verification fails on any gap. | — |
130
+ | **R** | Operator denies a tool call happened. | Sigstore-signed attestation bundles cryptographically commit to the chain head. | Signing is opt-in (`occasio attest sign`); a non-signing operator has only the local hash chain. |
131
+ | **I** | `tool_inputs` recorded in the chain contain secrets that propagate downstream. | Audit-time secret scanning is **not** applied; the chain captures inputs as-is for forensic value. The expectation is that operators consume the chain in a trusted environment. | If the chain is sent to a third party (compliance vendor), pre-redaction is the operator's responsibility. **Document this prominently.** |
132
+ | **D** | Audit write failure aborts the proxy. | `AuditWriteError` is intentionally session-fatal (per `pipeline.js` line 39). No silent fallback. | A consistently failing audit (full disk, permissions) bricks the proxy. This is the right tradeoff but it must be loud — currently is. |
133
+ | **E** | The `attest` CLI signs a bundle the chain does not support. | `canonicalize.js` produces a stable byte representation; `sign.js` signs that. Verification recomputes both. Any drift fails. | A bug in `canonicalize.js` is the single point of failure for the attestation pipeline. The "canonical-byte round-trip stable" test is the primary mitigation; broaden it. |
134
+
135
+ ## Mitigation matrix — which control covers which boundary
136
+
137
+ | Control | B1 | B2 | B3 | B4 | B5 |
138
+ |---|---|---|---|---|---|
139
+ | `isInterceptable` whitelist | ✓ | — | ✓ | ✓ | — |
140
+ | `scanSecrets` | ✓ | ✓ | ✓ | — | — |
141
+ | `--budget` gate | — | ✓ | — | — | — |
142
+ | Native handler (no shell) | — | — | — | ✓ | — |
143
+ | `deny_paths` / `deny_patterns` | — | — | — | ✓ | — |
144
+ | Hash-chain audit | ✓ | ✓ | ✓ | ✓ | ✓ |
145
+ | Sigstore attestation | — | — | — | — | ✓ |
146
+ | `MAX_OUTPUT` / `GLOB_MAX` caps | — | — | — | ✓ | — |
147
+
148
+ ## Residual risks worth naming
149
+
150
+ These are real and currently unmitigated:
151
+
152
+ 1. **No per-connection rate limit** on the SSE interceptor or MCP server.
153
+ DoS from a buggy or malicious agent is possible.
154
+ 2. **No symlink-following control.** `handleReadTool` will follow a
155
+ symlink that escapes `cwd`. Policy `deny_paths` against the *resolved*
156
+ path is the only defense.
157
+ 3. **Pattern-based secret detection only.** No entropy heuristic.
158
+ `redact_secrets` mode mitigates accidental leaks; targeted exfil with
159
+ custom-format keys would slip through.
160
+ 4. **Anomaly detector calibrated on synthetic data.** FP-rate is
161
+ documented but empirical baselines from real sessions are not yet
162
+ gathered. The detector currently performs more like a smoke detector
163
+ than a precision instrument.
164
+ 5. ~~**No glob depth limit.**~~ **Addressed** in the same commit set as this
165
+ document: `GLOB_MAX_DEPTH=16` and `GLOB_MAX_MS=2000` cap recursion and
166
+ wall-clock per walk. A pathological symlink loop now stops at depth 16
167
+ or 2 s, whichever fires first. Both are env-tunable.
168
+ 6. **The whitelist in `isBareGitReadOnly` is the highest-impact integrity
169
+ surface.** A regression that admits `git push` would let a compromised
170
+ agent leak via the local git remote.
171
+ 7. **Audit chain captures `tool_inputs` verbatim.** Operators sending
172
+ the chain to third parties must pre-redact.
173
+ 8. **Sigstore caret-pin (`^3.1.0`).** A compromised minor version of
174
+ sigstore is admitted by the resolver. Lockfile commits help but do not
175
+ eliminate this.
176
+ 9. **No replay protection on the MCP frame parser.** A replayed valid
177
+ frame is treated as a new request. Acceptable because the MCP server
178
+ is stateless per call, but worth knowing.
179
+
180
+ ## Verification mapping
181
+
182
+ | Mitigation | Where proven |
183
+ |---|---|
184
+ | `isInterceptable` whitelist correctness | `test-interceptor.js` §2, §19–§22 + `test-native-handlers.js` §1–§4 |
185
+ | Hash-chain tamper detection | `test-audit-chain.js` (86 tests inc. integrity, GENESIS, repair) |
186
+ | Sigstore round-trip | `test-attest.js` (58 tests) + CI-gated `test:e2e` |
187
+ | `deny_paths` enforcement | `test-policy-paths.js` (26 tests) |
188
+ | Public-API export drift | `test-native-handlers.js` §5 (drift guard) |
189
+ | Native handler robustness against adversarial inputs | `test-fuzz.js` (new — this commit) |
190
+
191
+ ## Change protocol
192
+
193
+ Any change that crosses one of these boundaries must update this
194
+ document in the same PR. Changes inside a boundary (refactor, perf,
195
+ ergonomics) do not. The author judges; reviewers can require an update.
@@ -66,6 +66,35 @@ node scripts/calibrate-anomaly-detectors.js --window 30m --step 10m
66
66
 
67
67
  The script prints per-detector tallies, alert rates, one example per severity level, and a heuristic suggestion ("threshold likely too tight" if a detector fires >1 HIGH/day on normal usage). Use the suggestion as a starting point, not a verdict — your chain's activity profile may legitimately push a detector higher than the heuristic expects.
68
68
 
69
+ ## Synthetic profiles (false-positive matrix)
70
+
71
+ `scripts/edr-synthetic.js` generates a hash-chained `pipeline-events.jsonl` matching one of four profiles. Use it to validate detector behavior without relying on your own chain history.
72
+
73
+ ```bash
74
+ node scripts/edr-synthetic.js --profile low-activity --rows 600 --out /tmp/lo.jsonl
75
+ node bin/occasio.js anomalies --chain /tmp/lo.jsonl --window 15m
76
+ ```
77
+
78
+ | Profile | What it models | deny-rate | file-read-volume | secret-redact-rate | unknown-tool-input |
79
+ |--- |--- |---: |---: |---: |---: |
80
+ | `low-activity` | 1 tool-call / 5 min, no BLOCKs, no redactions | quiet | quiet | quiet | quiet |
81
+ | `bursty` | 60 distinct file reads in 5-min spike late in the window | quiet | **fires** | quiet | quiet |
82
+ | `secret-heavy` | Periodic redaction stretches (8-15 per window) over 10 hrs | quiet | quiet | **fires** MED/HIGH | quiet |
83
+ | `denied-heavy` | 12 BLOCK rows clustered in the final 15 minutes | **fires** HIGH | quiet | quiet | quiet |
84
+
85
+ The `low-activity` profile is the canonical FP smoke: any HIGH-severity alert on this profile is a regression. `test-anomaly.js` asserts this on every CI run.
86
+
87
+ ### Threshold overrides per profile
88
+
89
+ If your own chain looks more like `bursty` or `secret-heavy` on a steady-state basis (legitimately busy team), raise the threshold with `--threshold-multiplier`:
90
+
91
+ ```bash
92
+ occasio anomalies --threshold-multiplier 2 # halve sensitivity
93
+ occasio anomalies --threshold-multiplier 0.5 # double sensitivity
94
+ ```
95
+
96
+ The multiplier currently affects `deny-rate` and `file-read-volume` (the two rate detectors with a continuous threshold). Categorical detectors (`unknown-tool-input`, `secret-redact-rate`'s "first-time-leak" branch) ignore it on purpose — those signals are not threshold-tunable.
97
+
69
98
  ## What this is not
70
99
 
71
100
  - **Not a replacement for adversarial validation.** Calibration tells us whether the threshold is too tight for normal use. It does not tell us whether the threshold is loose enough to catch genuine attacks. That is the job of `occasio demo anomalies` (which constructs a synthetic adversarial chain that must trigger all four detectors) and the [EDR demo walkthrough](edr-demo.md) (which runs a real Claude Code session against the policy and confirms the detectors fire on the resulting chain).
package/package.json CHANGED
@@ -1,6 +1,7 @@
1
1
  {
2
2
  "name": "@occasiolabs/occasio",
3
- "version": "0.8.4",
3
+ "version": "0.8.5",
4
+ "mcpName": "io.github.occasiolabs/occasio",
4
5
  "description": "Occasio — cryptographically verifiable behavioral attestation for AI coding agents. Tool-call interception + policy enforcement + tamper-evident audit chain + Sigstore-signed in-toto attestations + windowed EDR detection. Same engine for Claude Code and MCP; Computer-Use scaffold included.",
5
6
  "main": "src/index.js",
6
7
  "files": [
@@ -14,10 +15,13 @@
14
15
  "NOTICE"
15
16
  ],
16
17
  "scripts": {
17
- "test": "node test-interceptor.js && node test-audit-chain.js && node test-attest.js && node test-policy-paths.js",
18
- "lint": "eslint src/audit src/attest",
18
+ "pretest": "npm run lint:all",
19
+ "test": "node test-interceptor.js && node test-native-handlers.js && node test-audit-chain.js && node test-attest.js && node test-policy-paths.js && node test-anomaly.js",
20
+ "lint": "eslint src/audit src/attest src/core src/executor",
19
21
  "lint:all": "eslint src bin",
20
22
  "smoke": "node test-smoke.js",
23
+ "fuzz": "node test-fuzz.js",
24
+ "test:e2e": "node test-attest-e2e.js",
21
25
  "test:mcp": "node test-mcp-server.js",
22
26
  "restart-check": "node scripts/restart-check.js",
23
27
  "check-validation": "node scripts/check-validation.js",
@@ -224,9 +224,8 @@ async function runToolLoop({
224
224
  const fs = require('fs');
225
225
  const path = require('path');
226
226
  const {
227
- classifyBlock, isInterceptable,
228
227
  blocksToContent, runOneRound,
229
- scanToolResults, FALLBACK_REASONS,
228
+ FALLBACK_REASONS,
230
229
  } = require('../interceptor');
231
230
 
232
231
  const { blocks: initialBlocks, stopReason: initialStop, message: initialMessage } =
@@ -134,7 +134,7 @@ const RESERVED_SHELL_BLACKLIST = [
134
134
  /\bsudo\b/i, /\bsu\b/i,
135
135
  /\brm\s+-rf\s+\//i,
136
136
  /\bmkfs\b/i, /\bdd\s+if=/i,
137
- /:\(\)\s*\{\s*:\|:\&/, // fork bomb
137
+ /:\(\)\s*\{\s*:\|:&/, // fork bomb
138
138
  ];
139
139
 
140
140
  // Compile a policy pattern string into a JS RegExp. PCRE/RE2-style inline
@@ -51,6 +51,7 @@ function runAnomaliesCli(args = []) {
51
51
  process.stdout.write(
52
52
  'Usage:\n' +
53
53
  ' occasio anomalies [--window 15m] [--since <ISO>] [--chain <path>] [--json]\n' +
54
+ ' [--threshold-multiplier <n>] raise (>1) or lower (<1) deny/file-read thresholds\n' +
54
55
  '\n' +
55
56
  'Detectors:\n' +
56
57
  ' deny-rate BLOCK rate spike vs historical baseline\n' +
@@ -65,8 +66,10 @@ function runAnomaliesCli(args = []) {
65
66
  const since = flag(args, '--since');
66
67
  const chain = flag(args, '--chain') || DEFAULT_CHAIN;
67
68
  const asJson = bool(args, '--json');
69
+ const mult = parseFloat(flag(args, '--threshold-multiplier', '1') || '1');
70
+ const thresholdMultiplier = Number.isFinite(mult) && mult > 0 ? mult : 1;
68
71
 
69
- const result = runDetectors({ chainFile: chain, windowMs, now: since });
72
+ const result = runDetectors({ chainFile: chain, windowMs, now: since, thresholdMultiplier });
70
73
 
71
74
  if (asJson) {
72
75
  process.stdout.write(JSON.stringify(result, null, 2) + '\n');
@@ -63,7 +63,8 @@ function evaluate(windowRows, historicalRows, opts) {
63
63
  }
64
64
 
65
65
  const ratio = winBlocks / histRatePerWindow;
66
- if (ratio < MULTIPLIER_THRESHOLD) return [];
66
+ const effectiveThreshold = MULTIPLIER_THRESHOLD * (opts.thresholdMultiplier || 1);
67
+ if (ratio < effectiveThreshold) return [];
67
68
 
68
69
  const severity = ratio > 10 ? 'high' : 'medium';
69
70
  return [{
@@ -87,7 +87,8 @@ function evaluate(windowRows, historicalRows, opts) {
87
87
  }
88
88
 
89
89
  if (p95 === 0) return [];
90
- if (winSet.size < p95 * P95_MULTIPLIER) return [];
90
+ const effectiveMult = P95_MULTIPLIER * (opts.thresholdMultiplier || 1);
91
+ if (winSet.size < p95 * effectiveMult) return [];
91
92
 
92
93
  const ratio = winSet.size / Math.max(p95, 1);
93
94
  const severity = ratio > 4 ? 'high' : 'medium';
@@ -110,6 +110,10 @@ function runDetectors({
110
110
  windowMs = DEFAULT_WINDOW_MS,
111
111
  now = null,
112
112
  detectors = null,
113
+ // Multiplier applied to detector internal thresholds. >1 = more permissive
114
+ // (fewer alerts), <1 = more sensitive. Detectors choose how to apply this;
115
+ // categorical detectors may ignore it.
116
+ thresholdMultiplier = 1,
113
117
  // For tests: pass rows directly instead of reading from disk.
114
118
  rows = null,
115
119
  } = {}) {
@@ -123,6 +127,7 @@ function runDetectors({
123
127
  try {
124
128
  const out = d.evaluate(split.window, split.historical, {
125
129
  windowMs, windowStartMs: split.windowStartMs, windowEndMs: split.windowEndMs,
130
+ thresholdMultiplier,
126
131
  });
127
132
  if (!Array.isArray(out)) continue;
128
133
  for (const a of out) {
package/src/boundary.js CHANGED
@@ -116,7 +116,7 @@ function fmtBytes(b) {
116
116
  return `${(b / 1024).toFixed(1)} KB`;
117
117
  }
118
118
 
119
- function renderBoundaryView(view, opts = {}) {
119
+ function renderBoundaryView(view, _opts = {}) {
120
120
  if (!view) return '';
121
121
  const lines = [];
122
122
  const tag = view.event_type ? `[${view.event_type}]` : '';
package/src/classifier.js CHANGED
@@ -37,7 +37,7 @@ const FEEDBACK_LOG = path.join(os.homedir(), '.occasio', 'routing-feedback.jsonl
37
37
  * @param {string} [context] reserved for future ML use
38
38
  * @returns {{ local: boolean, confidence: number, reason: string }}
39
39
  */
40
- function routeLocally(toolName, command, context = '') {
40
+ function routeLocally(toolName, command, _context = '') {
41
41
  if (toolName !== 'Bash') {
42
42
  return { local: false, confidence: 1.0, reason: 'non-bash tool' };
43
43
  }
package/src/cli/clear.js CHANGED
@@ -40,13 +40,13 @@ function run(args) {
40
40
  const blockedDir = path.join(LOG_DIR, 'blocked');
41
41
  let n = 0;
42
42
  for (const dir of [logsDir, blockedDir]) {
43
- try { for (const f of fs.readdirSync(dir)) { fs.unlinkSync(path.join(dir, f)); n++; } } catch {}
43
+ try { for (const f of fs.readdirSync(dir)) { fs.unlinkSync(path.join(dir, f)); n++; } } catch { /* ignore */ }
44
44
  }
45
- try { fs.unlinkSync(SESSION_FILE); } catch {}
45
+ try { fs.unlinkSync(SESSION_FILE); } catch { /* ignore */ }
46
46
  console.log(col.g(`✓ Cleared all history (${n} log files) and session data`));
47
47
  } else {
48
- try { fs.unlinkSync(getLogFile()); } catch {}
49
- try { fs.unlinkSync(SESSION_FILE); } catch {}
48
+ try { fs.unlinkSync(getLogFile()); } catch { /* ignore */ }
49
+ try { fs.unlinkSync(SESSION_FILE); } catch { /* ignore */ }
50
50
  console.log(col.g("✓ Cleared today's log and session data"));
51
51
  console.log(col.d(' Use --history to wipe all historical logs'));
52
52
  }