npm - @occasiolabs/occasio - Versions diffs - 0.8.4 → 0.8.5 - Mend

@occasiolabs/occasio 0.8.4 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/docs/ADAPTER-STAGE-2-MIGRATION.md +59 -0
package/docs/STAGE-2-STEP-5-SHELL-PLAN.md +107 -0
package/docs/THREAT-MODEL.md +195 -0
package/docs/edr-calibration.md +29 -0
package/package.json +7 -3
package/src/adapters/claude-code.js +1 -2
package/src/adapters/computer-use.js +1 -1
package/src/anomaly/cli.js +4 -1
package/src/anomaly/detectors/deny-rate.js +2 -1
package/src/anomaly/detectors/file-read-volume.js +2 -1
package/src/anomaly/index.js +5 -0
package/src/boundary.js +1 -1
package/src/classifier.js +1 -1
package/src/cli/clear.js +4 -4
package/src/cli/help.js +58 -37
package/src/cli/status.js +1 -1
package/src/dashboard.js +2 -3
package/src/distiller.js +1 -1
package/src/executor/dispatcher.js +2 -2
package/src/executor/native-handlers/glob.js +173 -0
package/src/executor/native-handlers/grep.js +258 -0
package/src/executor/native-handlers/read.js +99 -0
package/src/executor/native-handlers/todo.js +56 -0
package/src/harness.js +8 -10
package/src/index.js +13 -15
package/src/inspect.js +1 -1
package/src/interceptor.js +9 -29
package/src/ledger.js +2 -3
package/src/mcp-experiment.js +4 -4
package/src/mcp-server.js +3 -3
package/src/policy/doctor.js +2 -2
package/src/policy/engine.js +0 -1
package/src/policy/init.js +1 -1
package/src/policy/loader.js +3 -3
package/src/policy/show.js +1 -2
package/src/preflight/cli.js +0 -1
package/src/preflight/miner.js +3 -6
package/src/redteam.js +1 -2
package/src/replay.js +1 -1
package/src/report/index.js +0 -4
package/src/runtime.js +42 -444
package/src/selftest.js +1 -1
package/src/session.js +1 -1

package/docs/ADAPTER-STAGE-2-MIGRATION.md ADDED Viewed

@@ -0,0 +1,59 @@
+# Adapter Stage-2 Migration
+Stage 1 (already complete) moved the cross-cutting pipeline plumbing —
+boundary events, processToolEvent, tool-name canonicalisation — into
+`src/core/`. The result is an interceptor that still owns dispatch logic
+but no longer owns the canonical event-construction or the
+decision/effect bookkeeping.
+Stage 2 separates **tool execution** from **dispatch routing**. The goal
+is that `src/interceptor.js` shrinks into a thin router whose only job is
+to map an incoming Anthropic SSE tool_use block to a per-tool *native
+handler*, and that those native handlers live in their own files under
+`src/executor/native-handlers/`. Every native handler is a pure function
+of `(input, sessionContext) → { output, exitCode, … }` with no
+dispatch-loop awareness.
+## What has moved so far
+| Step | Module | Origin | Destination | Status |
+|---|---|---|---|---|
+| 1 | TodoWrite / TodoRead native handlers | `src/runtime.js` | `src/executor/native-handlers/todo.js` | ✅ |
+| 2 | Read native handler (+ `MAX_OUTPUT`, `READ_SKIP_EXTENSIONS`, `readFileNative`) | `src/runtime.js` | `src/executor/native-handlers/read.js` | ✅ |
+| 3 | Glob native handler (+ `globToRegex`, `walkGlob`, `GLOB_*` constants) | `src/runtime.js` | `src/executor/native-handlers/glob.js` | ✅ |
+| 4 | Grep native handler (+ `tryReadGrep`, `walkGrepFiles`, `GREP_*` constants) | `src/runtime.js` | `src/executor/native-handlers/grep.js` | ✅ this commit |
+`src/runtime.js` re-exports the moved symbols, so every existing import
+path (`src/interceptor.js`, tests, the MCP server) continues to work
+without a code change.
+## What is still pending
+The order below reflects the cleanest dependency boundary at each step:
+moving a handler **without** also moving its tests and without breaking
+`executeLocalTool()` is the constraint that controls sequencing.
+| Step | Module | Origin | Proposed destination | Notes |
+|---|---|---|---|---|
+| 5 | Bash / PowerShell native dispatch | `src/interceptor.js` `nativeHandle` + per-family branches | `src/executor/native-handlers/shell-*.js` | Too big to land as a single step. Sub-plan in **`docs/STAGE-2-STEP-5-SHELL-PLAN.md`** decomposes it into 5a–5g (file-read / file-stat / list / search / git / compound / router). The original "Decision-shape mismatch" caveat is partially stale: `executor/dispatcher.js` already canonicalises the return value via `NATIVE_HANDLERS[SHELL_BASH]`. |
+| 6 | `executeLocalTool()` wrapper | `src/runtime.js` | `src/executor/index.js` | Once steps 2–5 are done, the wrapper becomes the executor module's public surface. `runtime.js` is then a thin compatibility shim and can be deprecated. |
+| 7 | Remove `runtime.js` shim | — | — | After two minor versions with `runtime.js` re-exporting from `executor/`, drop the file. |
+## Why incremental
+A single big move would either: (a) keep `runtime.js` as a frozen
+re-export forever, which obscures the real module graph; or (b) update
+every import site in one commit, which is hostile to bisection. The
+per-handler approach lets us validate each step against the full test
+suite (`npm test`, all 2632 + 86 + 58 + 26 + 6 = 2808 tests passing as
+of this commit) before moving the next.
+## Test hygiene
+`test-interceptor.js` is currently ~10kLoC and groups its tests by
+section number, not by handler. As each handler moves out of
+`runtime.js`, the tests in that section should also relocate to a
+dedicated `test-native-handlers.js` (or `test-native-<handler>.js` if
+the volume warrants splitting further). The TodoWrite/TodoRead tests
+are the first candidates — they cluster around section "9. TodoWrite"
+and "9b. TodoRead" in `test-interceptor.js`.

package/docs/STAGE-2-STEP-5-SHELL-PLAN.md ADDED Viewed

@@ -0,0 +1,107 @@
+# Stage-2 Step 5 — Shell native-handler extraction plan
+Step 5 of `docs/ADAPTER-STAGE-2-MIGRATION.md`. Not yet executed because the
+work is structurally bigger than Steps 1–4: `nativeHandle` is not a single
+handler but an in-line dispatcher with ~30 command branches across Bash and
+PowerShell. Splitting it cleanly requires a sub-plan.
+This document is the sub-plan. It must be approved (or revised) before any
+extraction code is written.
+## What is in scope
+The starting point is `nativeHandle(cmd)` inside `src/interceptor.js`, plus
+the helpers `runCompound`, `parseFlagsAndPath`, `stripQuotes`, and the
+PowerShell-specific normalisation in `expandPsEnvVars`.
+The compound-chain code (`runCompound`, `isCompoundHandleable`, the
+`cd`/`Set-Location` cwd-prefix logic) is interleaved with the per-command
+branches. It cannot move independently — handlers and the compound runner
+share the same `cwd` tracking convention.
+## What is NOT in scope
+- Bash/PowerShell dispatch routing through `pipeline.processToolEvent`.
+  This is already in place via `src/executor/dispatcher.js`
+  (`NATIVE_HANDLERS[CANONICAL.SHELL_BASH]` wraps `nativeHandle` and returns
+  the dispatcher-shaped `{ output, exitCode, native }` Result). The
+  "Decision-shape mismatch" caveat in the original migration doc is
+  partially stale: the dispatcher already canonicalises the return value.
+  What remains is whether `nativeHandle`'s own internal `null`-on-no-match
+  signal needs to become a Decision (`PASS` to fall back, `LOCAL` to handle).
+  Step 5 does NOT change that contract — it only relocates the code.
+- The legacy `runLocally` exec path. That is the cloud-fallback subprocess
+  for shell commands the proxy decided not to intercept. Out of scope.
+- Decision-shape unification across all dispatch surfaces. That is a
+  Stage-3 concern (per ARCHITECTURE.md "tool-name canonicalisation" note).
+## Sub-step plan
+The constraint at each step is the same as in Steps 1–4: every step must
+keep `npm test` green, must keep `runtime.js` / `interceptor.js` as
+re-export shims, and must produce one refactor commit + one test commit.
+| Sub-step | Module | Origin (src/interceptor.js) | Destination | Notes |
+|---|---|---|---|---|
+| 5a | shell-read handlers | `cat` / `bat` / `type`, `Get-Content`, `head`, `tail` branches | `src/executor/native-handlers/shell-read.js` | Smallest blast radius. All four share `parseFlagsAndPath` and `readFileNative` (already in read.js). |
+| 5b | shell-stat handlers | `test -f|-e|-d`, `Test-Path` branches | `src/executor/native-handlers/shell-stat.js` | Self-contained. No shared helpers beyond `path.resolve`. |
+| 5c | shell-list handlers | `dir`, `Get-ChildItem`, `find -name` branches | `src/executor/native-handlers/shell-list.js` | Uses an internal recursive `walk()` — needs to keep `SKIP` consistent with `GLOB_SKIP` from `glob.js` (or import it). |
+| 5d | shell-search handler | `Select-String` branch | `src/executor/native-handlers/shell-search.js` | Single-file search. Comment in source explicitly says glob expansion is intentionally NOT supported here — keep that limit. |
+| 5e | shell-git handler | `git status` / `git log` / `git -C <path>` / bare-git branches + `isBareGitReadOnly` / `isGitCSegment` | `src/executor/native-handlers/shell-git.js` | Largest single family. Shares `runOneShellCommand` exec helper with `runCompound` — must be lifted to a small shared util (`src/executor/native-handlers/shell-exec.js`) first. |
+| 5f | shell-compound runner | `runCompound`, `isCompoundSegment` family, `cd`/`Set-Location` cwd-prefix logic, echo-segment passthrough | `src/executor/native-handlers/shell-compound.js` | Depends on 5a–5e being done first: the runner dispatches to per-family handlers and tracks cwd across segments. Pull that orchestration out only after the families are stable. |
+| 5g | thin router | reduced `nativeHandle` becomes pure dispatch: lookup head → call family handler | `src/executor/native-handlers/shell.js` | Final step. Once the family handlers are in their own files, `nativeHandle` shrinks to ~30 lines. Move it into `shell.js`; `src/interceptor.js` re-exports for back-compat as everywhere else. |
+Estimated effort: 5a-5d ≈ half-day each (well-trodden pattern). 5e is the
+biggest — git semantics, multiple test sections in `test-interceptor.js`,
+needs the shared exec helper first. 5f and 5g are smaller but order-dependent.
+## Why this order
+- **Read-only-on-files first (5a, 5b)**. Lowest risk. Handlers are pure
+  filesystem reads; tests are stable; no shared state.
+- **Searching/listing next (5c, 5d)**. Still read-only; just larger output.
+- **Git last among families (5e)**. Touches the live `git` binary in tests
+  (via `execFileSync`), shares an exec helper with the compound runner. The
+  shared helper must move first.
+- **Compound runner after families (5f)**. The runner orchestrates the
+  families. If extracted earlier, it would re-cross the boundary back into
+  `interceptor.js` to call branches that haven't moved. Cleaner to wait.
+- **Router last (5g)**. By then `nativeHandle` is a pure routing function
+  and the rename + relocation is mechanical.
+## Test relocation
+Each refactor commit pulls the corresponding section of `test-interceptor.js`
+into a new module-mirroring file under `test-native-handlers.js`. Routing
+tests (`isInterceptable`, `isNativeHandleable`, `isPowerShellNativeHandleable`)
+stay in `test-interceptor.js`. Compound-chain tests stay until 5f.
+## Stop conditions / abort criteria
+- If any sub-step requires changing handler behaviour (not just relocation),
+  revert and re-plan. A behavioural fix and a structural move must not ride
+  on the same commit.
+- If `nativeHandle`'s `null`-on-no-match signal needs to become a Decision
+  during the move, abort and address that as Stage-3 work first. The Step 5
+  scope is "relocation without behaviour change".
+- The `parseFlagsAndPath` and `stripQuotes` helpers may turn out to be
+  shared by more than one family. If so, lift them to a `shell-parse.js`
+  helper module before the family extractions, not during.
+## After Step 5
+`src/interceptor.js` should be ≤ 600 lines (currently 1095, was 1098 before
+the `anthropicRequest` removal). The remaining content will be:
+- `parseSSE` (Anthropic SSE protocol)
+- `interceptToolUse` orchestration (the main exported entrypoint)
+- `runLocally` (legacy exec fallback)
+- `buildFollowUpHeaders` (HTTP plumbing)
+- `runOneRound`, `blocksToContent`, `classifyBlock`, `isInterceptable`,
+  `isNativeHandleable`, `isPowerShellNativeHandleable` (routing helpers)
+- `FALLBACK_REASONS`, `LOCAL_BASH_CMDS` (constants)
+That is the natural shape of an SSE-protocol interceptor — what is left
+after extracting the per-tool execution. At that point `runtime.js` can be
+deprecated (it would be a pure re-export shim) and dropped in a minor
+version.

package/docs/THREAT-MODEL.md ADDED Viewed

@@ -0,0 +1,195 @@
+# Threat Model
+This document is the first-party threat model for the occasio proxy.
+Audience: security reviewers (internal, third-party), auditors evaluating
+SOC2 / EU-AI-Act / NIST-AI-RMF alignment claims, and contributors
+proposing changes that cross a trust boundary.
+It is written using **STRIDE** (Spoofing, Tampering, Repudiation,
+Information disclosure, Denial of service, Elevation of privilege) over
+the five trust boundaries enumerated below. STRIDE is the right framing
+because the threats here are concrete data-flow concerns; privacy-class
+risks (LINDDUN) are addressed only briefly because the proxy explicitly
+does not store user content beyond the audit chain and the ledger.
+Scope status: **first-party, unaudited.** This document reflects the
+author's view of the system. It has not been reviewed by an external
+party. Treat findings as starting points for an audit, not as evidence
+that the system is audited.
+## System overview
+```
+┌───────────────────┐   B1   ┌───────────────────┐   B2   ┌─────────────────┐
+│   Claude Code     │ ◀────▶ │   occasio proxy   │ ◀────▶ │  Anthropic API  │
+│   (or other CLI)  │  SSE   │   (interceptor)   │  TLS   │                 │
+└───────────────────┘        └──┬──────────┬─────┘        └─────────────────┘
+                                │          │
+                              B3│          │B4
+                                ▼          ▼
+                       ┌──────────────┐   ┌──────────────────┐
+                       │  MCP server  │   │  local FS / shell │
+                       │  (parity)    │   │  (native handlers)│
+                       └──────────────┘   └──────────────────┘
+                                            │
+                                          B5│
+                                            ▼
+                                  ┌─────────────────────┐
+                                  │  audit chain +      │
+                                  │  attestation bundle │
+                                  └─────────────────────┘
+```
+### Trust boundaries
+- **B1 — Agent ↔ Proxy.** The agent (Claude Code or another caller) is
+  treated as **untrusted input** to the proxy. The proxy parses
+  Anthropic SSE protocol, tool-use blocks, and follow-up headers.
+- **B2 — Proxy ↔ Anthropic API.** The proxy is a TLS client of the
+  upstream. The upstream is trusted as a service, not as an oracle —
+  the proxy must not let upstream responses dictate local actions.
+- **B3 — MCP path.** When invoked as `occasio-mcp`, the proxy speaks
+  the MCP JSON-RPC frame protocol on stdin/stdout. Caller is
+  untrusted; the wire format must not let a malformed frame escape.
+- **B4 — Proxy ↔ local resources.** Native handlers read files and
+  invoke shell subprocesses. Inputs originating from B1 reach this
+  boundary as tool-use blocks. Path traversal, shell injection, and
+  symlink-following are the concrete concerns here.
+- **B5 — Audit / attest write surface.** The hash-chained JSONL ledger
+  and the Sigstore-signed in-toto attestation bundles. The integrity
+  guarantee is the basis for every compliance claim downstream.
+## Out of scope
+- **Operating-system privilege escalation.** If an attacker can already
+  write to `~/.occasio/` or run code as the user, all bets are off.
+  The proxy assumes filesystem ACLs on the home directory are intact.
+- **TLS / cert-pinning of the upstream.** The proxy uses Node's default
+  TLS stack against `api.anthropic.com`. We rely on the platform CA
+  store. MITM with a CA-store compromise is out of scope.
+- **Supply-chain compromise of `sigstore` or `proper-lockfile`.** These
+  are the two runtime deps. Pinning + lockfile audit is the
+  responsibility of the deployment, not the proxy.
+- **Side-channels on the host (timing, cache, EM).** Out of scope.
+- **Anthropic API quota exhaustion attacks.** Anthropic's concern, not
+  the proxy's. The budget gate (`--budget`) reduces but does not
+  eliminate cost exposure.
+## STRIDE — boundary-by-boundary
+### B1 — Agent ↔ Proxy (SSE / tool-use blocks)
+| Class | Threat | Current mitigation | Residual |
+|---|---|---|---|
+| **S** | A non-Anthropic SSE-shaped payload pretending to be a tool-use block. | Tool-use blocks must match the schema enforced in `parseSSE`. Unknown `name` falls back to cloud (not executed locally). | If the agent itself is compromised, the proxy will faithfully execute whatever it asks. Out of scope (the agent is the trust source for the operator). |
+| **T** | Modified SSE frames designed to inject extra tool-use blocks downstream. | The proxy re-emits a synthesized SSE stream to the agent — it does not blindly forward upstream bytes. Tool-use blocks are reconstructed from parsed structures. | A bug in `parseSSE` or `runOneRound` that lets a forged block slip through would defeat this. **Fuzz target.** |
+| **R** | Agent denies having issued a tool call. | Every block is appended to `pipeline-events.jsonl` with `tool_inputs` captured (post-ARCH-27 governance milestone). The hash-chain makes selective deletion detectable. | The proxy cannot prove the agent's *intent* — only that the call was made. |
+| **I** | A malicious tool-use response from the upstream leaks proxy state (cwd, env) back to the agent. | Native handlers never read process env vars and only resolve paths under `process.cwd()` (with explicit absolute-path opt-in for Read). Synthetic responses (`BLOCK`) are templated, not interpolated from upstream. | `expandPsEnvVars` resolves `$env:VAR` in PowerShell input. If a policy author writes a rule that echoes env-expanded input back, the value reaches the agent. Document this in policy-author guidance. |
+| **D** | Huge SSE payload starves the proxy. | `MAX_OUTPUT` (512 KB) caps file reads; shell exec uses `maxBuffer: 512_000`. SSE chunks are processed streamingly, not buffered whole. | An attacker can still cause many small allocations. No per-connection rate limit. **Add later.** |
+| **E** | Tool-use block escalates from a non-interceptable tool (e.g. `Write`) to a privileged dispatch path. | `isInterceptable` is a pure function on `block.name + block.input`; non-whitelisted names return `false` and always go to the cloud (where the agent's own confirmation prompts apply). | The proxy does not *prevent* the cloud from executing dangerous tools — it only declines to execute them locally. The policy engine's `deny_patterns` is the mechanism to actually block. Document the distinction. |
+### B2 — Proxy ↔ Anthropic API
+| Class | Threat | Current mitigation | Residual |
+|---|---|---|---|
+| **S** | A man-in-the-middle pretending to be `api.anthropic.com`. | Node TLS with system CA store. | No cert pinning. Out of scope per "Out of scope" above. |
+| **T** | Upstream response modified to inject tool-use blocks the agent didn't authorize. | Tool-use blocks coming back are parsed structurally and dispatched through the same gate as user-originated blocks (`isInterceptable` → handler/cloud). A tampered upstream cannot achieve more than a malicious agent already could. | If the upstream is fully compromised, it can mint any tool-use block. The agent's local-confirmation UX is the final defense; the proxy does not add one. |
+| **R** | Proxy denies having sent a request. | The audit chain records every `tool_use` row including `run_id`, `iso`, `cwd`. The cloud send itself is logged separately. Selective deletion is detectable via hash-chain verifier (`audit/verifier.js`). | The proxy cannot prove *what* it sent (only that a send happened) unless full-body capture is enabled. That is intentional to limit content exposure. |
+| **I** | Secrets in tool output (cred files, env dumps) get sent upstream. | `scanSecrets` runs over Read output; `block_secrets` policy mode aborts the round. `redact_secrets` mode substitutes redacted tokens. | Pattern-based detection misses novel secret formats. No entropy-based detection. **Calibration gap.** |
+| **D** | Cost-amplification: the upstream is induced to bill the operator (compromised agent in a loop). | `--budget N` gate: 80% warning, 100% block (402). | The block fires *after* the request that crosses the threshold. Single-call cost spikes are not pre-flighted. |
+| **E** | Upstream-controlled `tool_use` block names a privileged action. | Whitelist-based interception; everything not on the list falls through and is shown to the agent. The agent's own confirmation governs whether it runs. | Defense-in-depth gap: the proxy does not add its own confirmation step. By design. |
+### B3 — MCP path
+| Class | Threat | Current mitigation | Residual |
+|---|---|---|---|
+| **S** | A non-MCP client speaking JSON-RPC into stdin. | JSON-RPC frames validated structurally (method whitelist, params schema in `mcp-normalize.js`). Unknown method → error response. | The MCP server inherits the trust profile of whoever spawned it (CLI parent process). |
+| **T** | Frame splicing — partial JSON across multiple writes designed to confuse the parser. | Line-buffered (`split('\n')`); each line parsed independently. Malformed frames are dropped with a logged error (per `mcp-server.js` line 304). | Buffering is unbounded if no newline arrives. **Add a max-line-length gate.** |
+| **R** | Same audit chain as B1; same guarantees. | `tools_mcp_count` and `mcp-experiment.jsonl` distinguish MCP rows from interceptor rows. | — |
+| **I** | A response under MCP leaks more than the equivalent interceptor response. | `executeLocalTool` is the shared wrapper; both paths produce the same shape including `secrets` scan. | — |
+| **D** | Same as B1. | — | No per-client rate limit. |
+| **E** | A misconfigured MCP server runs in a different cwd than expected. | The cwd at spawn time is captured in `tool_use` rows (post-ARCH-26 cwd-in-log work). Path enforcement honours this cwd. | If two MCP servers share a `.occasio` log dir but different cwds, audit interpretation needs the row's cwd, not the verifier's cwd. Documented. |
+### B4 — Proxy ↔ local FS / shell
+| Class | Threat | Current mitigation | Residual |
+|---|---|---|---|
+| **S** | Symlink pointing at a sensitive file. Read tool dereferences. | `handleReadTool` uses `fs.readFileSync` which follows symlinks. Path-policy enforcement (`deny_paths`) is evaluated against the *resolved* path, not the requested path. | Race condition: TOCTOU between policy check and read. No `O_NOFOLLOW` on Node's fs API for sync calls. **Document; consider switching to `lstat`-then-`open` pattern.** |
+| **T** | Shell command modified mid-flight to alter behaviour. | Shell strings are passed verbatim to `child_process.exec` with `maxBuffer`. No shell-string concatenation from upstream. | The native shell handler is the cleaner path — it never invokes a shell. Commands that fall back to `runLocally` do go through a shell. By design (existing user workflows depend on shell features). |
+| **R** | A subprocess writes to the audit log to mask its activity. | Audit log lives under `~/.occasio/` and is opened append-only by the proxy itself. Subprocesses are spawned with the proxy's environment but do not inherit the file descriptor. | OS-level privesc covers this; out of scope. |
+| **I** | Path traversal via `..` in Read input. | `handleReadTool` calls `path.resolve(process.cwd(), fp)` — this *does not* contain `..` escapes. Policy `deny_paths` then evaluates the resolved path. UNC / network paths (`\\server\share\…` and `//server/share/…`) are rejected at the `isReadHandleable` gate to prevent SMB-resolution DoS. **Fuzz-verified.** | Absolute paths are accepted by design (the agent often needs to read system files like `/etc/hosts`). The defense is the policy layer, not the handler. |
+| **D** | `**/**/**/...` glob causing a deep walk. | `GLOB_MAX = 500` matches; `walkGlob` skips `node_modules`, `.git`, etc.; `GLOB_MAX_DEPTH = 16` caps recursion depth; `GLOB_MAX_MS = 2000` caps wall-clock per walk. Both env-tunable. | An attacker can still consume ~2 s per call. Stacking many calls in a round is the residual vector — partially covered by `--budget` (cost) but not by a per-round count cap. |
+| **E** | `nativeHandle` executes a command it shouldn't (e.g. a `git` subcommand that mutates). | `isBareGitReadOnly` and `isGitCSegment` whitelist subcommands (status, log, diff, …). Unknown subcommands return `null` (= fall through, not execute). | The whitelist is the integrity guarantee. Regression in the whitelist is the highest-impact local bug class. **Fuzz target.** |
+### B5 — Audit / attest
+| Class | Threat | Current mitigation | Residual |
+|---|---|---|---|
+| **S** | A different process appends a forged row to `pipeline-events.jsonl`. | Hash-chain: each row's `prev_hash` is the SHA-256 of the previous canonical row. The verifier (`audit/verifier.js`) detects any insertion or modification. Optional file-locking (`proper-lockfile`) for multi-writer scenarios (audit v0.8.4). | If an attacker fully replays the chain (recomputing hashes), the GENESIS sentinel is the only fixed anchor. Bundle this into the Sigstore attestation for external attestability. |
+| **T** | Selective deletion of rows. | Same. Chain verification fails on any gap. | — |
+| **R** | Operator denies a tool call happened. | Sigstore-signed attestation bundles cryptographically commit to the chain head. | Signing is opt-in (`occasio attest sign`); a non-signing operator has only the local hash chain. |
+| **I** | `tool_inputs` recorded in the chain contain secrets that propagate downstream. | Audit-time secret scanning is **not** applied; the chain captures inputs as-is for forensic value. The expectation is that operators consume the chain in a trusted environment. | If the chain is sent to a third party (compliance vendor), pre-redaction is the operator's responsibility. **Document this prominently.** |
+| **D** | Audit write failure aborts the proxy. | `AuditWriteError` is intentionally session-fatal (per `pipeline.js` line 39). No silent fallback. | A consistently failing audit (full disk, permissions) bricks the proxy. This is the right tradeoff but it must be loud — currently is. |
+| **E** | The `attest` CLI signs a bundle the chain does not support. | `canonicalize.js` produces a stable byte representation; `sign.js` signs that. Verification recomputes both. Any drift fails. | A bug in `canonicalize.js` is the single point of failure for the attestation pipeline. The "canonical-byte round-trip stable" test is the primary mitigation; broaden it. |
+## Mitigation matrix — which control covers which boundary
+| Control | B1 | B2 | B3 | B4 | B5 |
+|---|---|---|---|---|---|
+| `isInterceptable` whitelist | ✓ | — | ✓ | ✓ | — |
+| `scanSecrets` | ✓ | ✓ | ✓ | — | — |
+| `--budget` gate | — | ✓ | — | — | — |
+| Native handler (no shell) | — | — | — | ✓ | — |
+| `deny_paths` / `deny_patterns` | — | — | — | ✓ | — |
+| Hash-chain audit | ✓ | ✓ | ✓ | ✓ | ✓ |
+| Sigstore attestation | — | — | — | — | ✓ |
+| `MAX_OUTPUT` / `GLOB_MAX` caps | — | — | — | ✓ | — |
+## Residual risks worth naming
+These are real and currently unmitigated:
+1. **No per-connection rate limit** on the SSE interceptor or MCP server.
+   DoS from a buggy or malicious agent is possible.
+2. **No symlink-following control.** `handleReadTool` will follow a
+   symlink that escapes `cwd`. Policy `deny_paths` against the *resolved*
+   path is the only defense.
+3. **Pattern-based secret detection only.** No entropy heuristic.
+   `redact_secrets` mode mitigates accidental leaks; targeted exfil with
+   custom-format keys would slip through.
+4. **Anomaly detector calibrated on synthetic data.** FP-rate is
+   documented but empirical baselines from real sessions are not yet
+   gathered. The detector currently performs more like a smoke detector
+   than a precision instrument.
+5. ~~**No glob depth limit.**~~ **Addressed** in the same commit set as this
+   document: `GLOB_MAX_DEPTH=16` and `GLOB_MAX_MS=2000` cap recursion and
+   wall-clock per walk. A pathological symlink loop now stops at depth 16
+   or 2 s, whichever fires first. Both are env-tunable.
+6. **The whitelist in `isBareGitReadOnly` is the highest-impact integrity
+   surface.** A regression that admits `git push` would let a compromised
+   agent leak via the local git remote.
+7. **Audit chain captures `tool_inputs` verbatim.** Operators sending
+   the chain to third parties must pre-redact.
+8. **Sigstore caret-pin (`^3.1.0`).** A compromised minor version of
+   sigstore is admitted by the resolver. Lockfile commits help but do not
+   eliminate this.
+9. **No replay protection on the MCP frame parser.** A replayed valid
+   frame is treated as a new request. Acceptable because the MCP server
+   is stateless per call, but worth knowing.
+## Verification mapping
+| Mitigation | Where proven |
+|---|---|
+| `isInterceptable` whitelist correctness | `test-interceptor.js` §2, §19–§22 + `test-native-handlers.js` §1–§4 |
+| Hash-chain tamper detection | `test-audit-chain.js` (86 tests inc. integrity, GENESIS, repair) |
+| Sigstore round-trip | `test-attest.js` (58 tests) + CI-gated `test:e2e` |
+| `deny_paths` enforcement | `test-policy-paths.js` (26 tests) |
+| Public-API export drift | `test-native-handlers.js` §5 (drift guard) |
+| Native handler robustness against adversarial inputs | `test-fuzz.js` (new — this commit) |
+## Change protocol
+Any change that crosses one of these boundaries must update this
+document in the same PR. Changes inside a boundary (refactor, perf,
+ergonomics) do not. The author judges; reviewers can require an update.

package/docs/edr-calibration.md CHANGED Viewed

@@ -66,6 +66,35 @@ node scripts/calibrate-anomaly-detectors.js --window 30m --step 10m
 The script prints per-detector tallies, alert rates, one example per severity level, and a heuristic suggestion ("threshold likely too tight" if a detector fires >1 HIGH/day on normal usage). Use the suggestion as a starting point, not a verdict — your chain's activity profile may legitimately push a detector higher than the heuristic expects.
+## Synthetic profiles (false-positive matrix)
+`scripts/edr-synthetic.js` generates a hash-chained `pipeline-events.jsonl` matching one of four profiles. Use it to validate detector behavior without relying on your own chain history.
+```bash
+node scripts/edr-synthetic.js --profile low-activity --rows 600 --out /tmp/lo.jsonl
+node bin/occasio.js anomalies --chain /tmp/lo.jsonl --window 15m
+```
+| Profile         | What it models                                              | deny-rate | file-read-volume | secret-redact-rate | unknown-tool-input |
+|---              |---                                                          |---:       |---:              |---:                |---:                |
+| `low-activity`  | 1 tool-call / 5 min, no BLOCKs, no redactions               | quiet     | quiet            | quiet              | quiet              |
+| `bursty`        | 60 distinct file reads in 5-min spike late in the window    | quiet     | **fires**        | quiet              | quiet              |
+| `secret-heavy`  | Periodic redaction stretches (8-15 per window) over 10 hrs  | quiet     | quiet            | **fires** MED/HIGH | quiet              |
+| `denied-heavy`  | 12 BLOCK rows clustered in the final 15 minutes             | **fires** HIGH | quiet      | quiet              | quiet              |
+The `low-activity` profile is the canonical FP smoke: any HIGH-severity alert on this profile is a regression. `test-anomaly.js` asserts this on every CI run.
+### Threshold overrides per profile
+If your own chain looks more like `bursty` or `secret-heavy` on a steady-state basis (legitimately busy team), raise the threshold with `--threshold-multiplier`:
+```bash
+occasio anomalies --threshold-multiplier 2     # halve sensitivity
+occasio anomalies --threshold-multiplier 0.5   # double sensitivity
+```
+The multiplier currently affects `deny-rate` and `file-read-volume` (the two rate detectors with a continuous threshold). Categorical detectors (`unknown-tool-input`, `secret-redact-rate`'s "first-time-leak" branch) ignore it on purpose — those signals are not threshold-tunable.
 ## What this is not
 - **Not a replacement for adversarial validation.** Calibration tells us whether the threshold is too tight for normal use. It does not tell us whether the threshold is loose enough to catch genuine attacks. That is the job of `occasio demo anomalies` (which constructs a synthetic adversarial chain that must trigger all four detectors) and the [EDR demo walkthrough](edr-demo.md) (which runs a real Claude Code session against the policy and confirms the detectors fire on the resulting chain).

package/package.json CHANGED Viewed

@@ -1,6 +1,7 @@
 {
   "name": "@occasiolabs/occasio",
-  "version": "0.8.4",
+  "version": "0.8.5",
+  "mcpName": "io.github.occasiolabs/occasio",
   "description": "Occasio — cryptographically verifiable behavioral attestation for AI coding agents. Tool-call interception + policy enforcement + tamper-evident audit chain + Sigstore-signed in-toto attestations + windowed EDR detection. Same engine for Claude Code and MCP; Computer-Use scaffold included.",
   "main": "src/index.js",
   "files": [
@@ -14,10 +15,13 @@
     "NOTICE"
   ],
   "scripts": {
-    "test": "node test-interceptor.js && node test-audit-chain.js && node test-attest.js && node test-policy-paths.js",
-    "lint": "eslint src/audit src/attest",
+    "pretest": "npm run lint:all",
+    "test": "node test-interceptor.js && node test-native-handlers.js && node test-audit-chain.js && node test-attest.js && node test-policy-paths.js && node test-anomaly.js",
+    "lint": "eslint src/audit src/attest src/core src/executor",
     "lint:all": "eslint src bin",
     "smoke": "node test-smoke.js",
+    "fuzz": "node test-fuzz.js",
+    "test:e2e": "node test-attest-e2e.js",
     "test:mcp": "node test-mcp-server.js",
     "restart-check": "node scripts/restart-check.js",
     "check-validation": "node scripts/check-validation.js",

package/src/adapters/claude-code.js CHANGED Viewed

@@ -224,9 +224,8 @@ async function runToolLoop({
   const fs   = require('fs');
   const path = require('path');
   const {
-    classifyBlock, isInterceptable,
     blocksToContent, runOneRound,
-    scanToolResults, FALLBACK_REASONS,
+    FALLBACK_REASONS,
   } = require('../interceptor');
   const { blocks: initialBlocks, stopReason: initialStop, message: initialMessage } =

package/src/adapters/computer-use.js CHANGED Viewed

@@ -134,7 +134,7 @@ const RESERVED_SHELL_BLACKLIST = [
   /\bsudo\b/i, /\bsu\b/i,
   /\brm\s+-rf\s+\//i,
   /\bmkfs\b/i, /\bdd\s+if=/i,
-  /:\(\)\s*\{\s*:\|:\&/,   // fork bomb
+  /:\(\)\s*\{\s*:\|:&/,   // fork bomb
 ];
 // Compile a policy pattern string into a JS RegExp. PCRE/RE2-style inline

package/src/anomaly/cli.js CHANGED Viewed

@@ -51,6 +51,7 @@ function runAnomaliesCli(args = []) {
     process.stdout.write(
       'Usage:\n' +
       '  occasio anomalies [--window 15m] [--since <ISO>] [--chain <path>] [--json]\n' +
+      '                    [--threshold-multiplier <n>]   raise (>1) or lower (<1) deny/file-read thresholds\n' +
       '\n' +
       'Detectors:\n' +
       '  deny-rate           BLOCK rate spike vs historical baseline\n' +
@@ -65,8 +66,10 @@ function runAnomaliesCli(args = []) {
   const since    = flag(args, '--since');
   const chain    = flag(args, '--chain') || DEFAULT_CHAIN;
   const asJson   = bool(args, '--json');
+  const mult     = parseFloat(flag(args, '--threshold-multiplier', '1') || '1');
+  const thresholdMultiplier = Number.isFinite(mult) && mult > 0 ? mult : 1;
-  const result = runDetectors({ chainFile: chain, windowMs, now: since });
+  const result = runDetectors({ chainFile: chain, windowMs, now: since, thresholdMultiplier });
   if (asJson) {
     process.stdout.write(JSON.stringify(result, null, 2) + '\n');

package/src/anomaly/detectors/deny-rate.js CHANGED Viewed

@@ -63,7 +63,8 @@ function evaluate(windowRows, historicalRows, opts) {
   }
   const ratio = winBlocks / histRatePerWindow;
-  if (ratio < MULTIPLIER_THRESHOLD) return [];
+  const effectiveThreshold = MULTIPLIER_THRESHOLD * (opts.thresholdMultiplier || 1);
+  if (ratio < effectiveThreshold) return [];
   const severity = ratio > 10 ? 'high' : 'medium';
   return [{

package/src/anomaly/detectors/file-read-volume.js CHANGED Viewed

@@ -87,7 +87,8 @@ function evaluate(windowRows, historicalRows, opts) {
   }
   if (p95 === 0) return [];
-  if (winSet.size < p95 * P95_MULTIPLIER) return [];
+  const effectiveMult = P95_MULTIPLIER * (opts.thresholdMultiplier || 1);
+  if (winSet.size < p95 * effectiveMult) return [];
   const ratio = winSet.size / Math.max(p95, 1);
   const severity = ratio > 4 ? 'high' : 'medium';

package/src/anomaly/index.js CHANGED Viewed

@@ -110,6 +110,10 @@ function runDetectors({
   windowMs  = DEFAULT_WINDOW_MS,
   now       = null,
   detectors = null,
+  // Multiplier applied to detector internal thresholds. >1 = more permissive
+  // (fewer alerts), <1 = more sensitive. Detectors choose how to apply this;
+  // categorical detectors may ignore it.
+  thresholdMultiplier = 1,
   // For tests: pass rows directly instead of reading from disk.
   rows      = null,
 } = {}) {
@@ -123,6 +127,7 @@ function runDetectors({
     try {
       const out = d.evaluate(split.window, split.historical, {
         windowMs, windowStartMs: split.windowStartMs, windowEndMs: split.windowEndMs,
+        thresholdMultiplier,
       });
       if (!Array.isArray(out)) continue;
       for (const a of out) {

package/src/boundary.js CHANGED Viewed

@@ -116,7 +116,7 @@ function fmtBytes(b) {
   return `${(b / 1024).toFixed(1)} KB`;
 }
-function renderBoundaryView(view, opts = {}) {
+function renderBoundaryView(view, _opts = {}) {
   if (!view) return '';
   const lines = [];
   const tag = view.event_type ? `[${view.event_type}]` : '';

package/src/classifier.js CHANGED Viewed

@@ -37,7 +37,7 @@ const FEEDBACK_LOG = path.join(os.homedir(), '.occasio', 'routing-feedback.jsonl
  * @param {string} [context]  reserved for future ML use
  * @returns {{ local: boolean, confidence: number, reason: string }}
  */
-function routeLocally(toolName, command, context = '') {
+function routeLocally(toolName, command, _context = '') {
   if (toolName !== 'Bash') {
     return { local: false, confidence: 1.0, reason: 'non-bash tool' };
   }

package/src/cli/clear.js CHANGED Viewed

@@ -40,13 +40,13 @@ function run(args) {
     const blockedDir = path.join(LOG_DIR, 'blocked');
     let n = 0;
     for (const dir of [logsDir, blockedDir]) {
-      try { for (const f of fs.readdirSync(dir)) { fs.unlinkSync(path.join(dir, f)); n++; } } catch {}
+      try { for (const f of fs.readdirSync(dir)) { fs.unlinkSync(path.join(dir, f)); n++; } } catch { /* ignore */ }
     }
-    try { fs.unlinkSync(SESSION_FILE); } catch {}
+    try { fs.unlinkSync(SESSION_FILE); } catch { /* ignore */ }
     console.log(col.g(`✓ Cleared all history (${n} log files) and session data`));
   } else {
-    try { fs.unlinkSync(getLogFile()); } catch {}
-    try { fs.unlinkSync(SESSION_FILE); } catch {}
+    try { fs.unlinkSync(getLogFile()); } catch { /* ignore */ }
+    try { fs.unlinkSync(SESSION_FILE); } catch { /* ignore */ }
     console.log(col.g("✓ Cleared today's log and session data"));
     console.log(col.d('  Use --history to wipe all historical logs'));
   }