@occasiolabs/occasio 0.8.2 → 0.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/NOTICE +4 -4
- package/README.md +1 -0
- package/bin/supervisor/README.md +2 -2
- package/bin/supervisor/com.occasio.proxy.plist.template +3 -3
- package/bin/supervisor/install-windows-task.ps1 +7 -7
- package/bin/supervisor/occasio.service +1 -1
- package/docs/ARCHITECTURE.md +171 -0
- package/package.json +8 -2
- package/src/attest/check-summary.js +1 -1
- package/src/attest/index.js +14 -1
- package/src/audit/jsonl-auditor.js +180 -14
- package/src/audit/repair.js +118 -0
- package/src/audit/verifier.js +36 -2
- package/src/cli/clear.js +55 -0
- package/src/cli/help.js +81 -0
- package/src/cli/register.js +90 -0
- package/src/cli/status.js +94 -0
- package/src/cost/prices.js +106 -0
- package/src/harness.js +6 -6
- package/src/index.js +17 -272
- package/src/mcp-server.js +1 -1
- package/src/policy/loader.js +2 -2
- package/src/redteam.js +1 -1
- package/docs/__pycache__/audit_walker.cpython-313.pyc +0 -0
- package/docs/__pycache__/canonicalize.cpython-313.pyc +0 -0
package/NOTICE
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
|
|
1
|
+
Occasio
|
|
2
2
|
Copyright 2026 Leonard Brauer
|
|
3
3
|
|
|
4
|
-
This product includes software developed by the
|
|
5
|
-
(https://github.com/
|
|
4
|
+
This product includes software developed by the Occasio project
|
|
5
|
+
(https://github.com/occasiolabs/occasio).
|
|
6
6
|
|
|
7
7
|
Licensed under the Apache License, Version 2.0 (see LICENSE).
|
|
8
8
|
|
|
9
|
-
Versions 0.6.6 and earlier of
|
|
9
|
+
Versions 0.6.6 and earlier of Occasio were released under the MIT License
|
|
10
10
|
and remain available under MIT in perpetuity for those releases.
|
package/README.md
CHANGED
|
@@ -83,6 +83,7 @@ occasio attest --run-id <uuid> # Build a behavioral attestation for one session
|
|
|
83
83
|
| `occasio distill` | Inspect distilled tool outputs |
|
|
84
84
|
| `occasio dashboard` | Live browser dashboard at http://localhost:3001 |
|
|
85
85
|
| `occasio audit verify` | Re-walk the SHA-256 audit chain end-to-end |
|
|
86
|
+
| `occasio audit repair --file <path>` | Truncate a crash-partial trailing line (writes `.bak`) |
|
|
86
87
|
| `occasio report` | Governance summary export (`--days N`, `--format csv`) |
|
|
87
88
|
| `occasio anomalies` | EDR detection over the audit chain (`--window 15m`, `--json`) |
|
|
88
89
|
| `occasio attest --run-id <uuid>` | Build a behavioral attestation predicate v1 |
|
package/bin/supervisor/README.md
CHANGED
|
@@ -38,12 +38,12 @@ rm ~/.config/systemd/user/occasio.service
|
|
|
38
38
|
|
|
39
39
|
## macOS (launchd, user scope)
|
|
40
40
|
|
|
41
|
-
The plist is a template: replace `{{
|
|
41
|
+
The plist is a template: replace `{{OCCASIO_BIN}}` with the absolute
|
|
42
42
|
path to your `occasio` binary first.
|
|
43
43
|
|
|
44
44
|
```sh
|
|
45
45
|
LF_BIN="$(command -v occasio)"
|
|
46
|
-
sed "s|{{
|
|
46
|
+
sed "s|{{OCCASIO_BIN}}|$LF_BIN|g" com.occasio.proxy.plist.template \
|
|
47
47
|
> ~/Library/LaunchAgents/ai.occasio.proxy.plist
|
|
48
48
|
launchctl bootstrap gui/$(id -u) ~/Library/LaunchAgents/ai.occasio.proxy.plist
|
|
49
49
|
launchctl print gui/$(id -u)/ai.occasio.proxy
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
2
|
<!--
|
|
3
|
-
|
|
3
|
+
Occasio launchd template (v0.6.4).
|
|
4
4
|
|
|
5
|
-
This file is a TEMPLATE: replace {{
|
|
5
|
+
This file is a TEMPLATE: replace {{OCCASIO_BIN}} with the absolute
|
|
6
6
|
path to your `occasio` executable before installing. See
|
|
7
7
|
bin/supervisor/README.md for the install command.
|
|
8
8
|
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
|
|
19
19
|
<key>ProgramArguments</key>
|
|
20
20
|
<array>
|
|
21
|
-
<string>{{
|
|
21
|
+
<string>{{OCCASIO_BIN}}</string>
|
|
22
22
|
<string>start</string>
|
|
23
23
|
</array>
|
|
24
24
|
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
the current user, restarting it within 30 seconds if it exits.
|
|
5
5
|
|
|
6
6
|
.DESCRIPTION
|
|
7
|
-
v0.6.4 of
|
|
7
|
+
v0.6.4 of Occasio aborts with exit code 1 when it cannot append to
|
|
8
8
|
its audit log. This task brings the proxy back up so the agent can
|
|
9
9
|
resume work as soon as the underlying I/O issue clears.
|
|
10
10
|
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
Manually validated on Windows 11 Pro (PowerShell 7.x).
|
|
13
13
|
Tested-on: Windows.
|
|
14
14
|
|
|
15
|
-
The task runs at user logon, not at boot, because
|
|
15
|
+
The task runs at user logon, not at boot, because Occasio's audit
|
|
16
16
|
log lives in the user profile (~/.occasio/). Run from an elevated
|
|
17
17
|
shell only if you need the task to survive logoff.
|
|
18
18
|
#>
|
|
@@ -35,14 +35,14 @@ $Principal = New-ScheduledTaskPrincipal `
|
|
|
35
35
|
-RunLevel Limited
|
|
36
36
|
|
|
37
37
|
Register-ScheduledTask `
|
|
38
|
-
-TaskName "
|
|
39
|
-
-Description "
|
|
38
|
+
-TaskName "Occasio" `
|
|
39
|
+
-Description "Occasio — local AI-agent governance proxy (v0.6.4)" `
|
|
40
40
|
-Action $Action `
|
|
41
41
|
-Trigger $Trigger `
|
|
42
42
|
-Settings $Settings `
|
|
43
43
|
-Principal $Principal `
|
|
44
44
|
-Force
|
|
45
45
|
|
|
46
|
-
Write-Host "Registered scheduled task '
|
|
47
|
-
Write-Host "It will start at next logon. To start now: Start-ScheduledTask -TaskName
|
|
48
|
-
Write-Host "To remove: Unregister-ScheduledTask -TaskName
|
|
46
|
+
Write-Host "Registered scheduled task 'Occasio'."
|
|
47
|
+
Write-Host "It will start at next logon. To start now: Start-ScheduledTask -TaskName Occasio"
|
|
48
|
+
Write-Host "To remove: Unregister-ScheduledTask -TaskName Occasio -Confirm:`$false"
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# Occasio Architecture
|
|
2
|
+
|
|
3
|
+
A high-level map of the request pipeline and where each module lives.
|
|
4
|
+
Use this as the orientation document before reading individual files.
|
|
5
|
+
|
|
6
|
+
## Pipeline
|
|
7
|
+
|
|
8
|
+
Every tool call from a coding agent travels the same five-stage pipeline,
|
|
9
|
+
regardless of the upstream protocol (Anthropic SSE, MCP, computer-use).
|
|
10
|
+
Each stage produces input for the next; nothing skips the auditor.
|
|
11
|
+
|
|
12
|
+
```
|
|
13
|
+
agent (Claude Code, MCP client, computer-use loop)
|
|
14
|
+
|
|
|
15
|
+
v
|
|
16
|
+
+--------------+ raw events +-----------------+
|
|
17
|
+
| Adapter | -----------------> | Boundary event |
|
|
18
|
+
| src/adapters | | src/core/ |
|
|
19
|
+
+--------------+ +-----------------+
|
|
20
|
+
|
|
|
21
|
+
v
|
|
22
|
+
+-----------------+
|
|
23
|
+
| Policy |
|
|
24
|
+
| src/policy/ |
|
|
25
|
+
+-----------------+
|
|
26
|
+
|
|
|
27
|
+
Decision { action,
|
|
28
|
+
reason, transform,
|
|
29
|
+
executor }
|
|
30
|
+
|
|
|
31
|
+
v
|
|
32
|
+
+-----------------+
|
|
33
|
+
| Dispatcher |
|
|
34
|
+
| src/dispatch/ |
|
|
35
|
+
+-----------------+
|
|
36
|
+
|
|
|
37
|
+
Result { passThrough,
|
|
38
|
+
blocked, transformed,
|
|
39
|
+
exitCode, ... }
|
|
40
|
+
|
|
|
41
|
+
v
|
|
42
|
+
+-----------------+
|
|
43
|
+
| Auditor |
|
|
44
|
+
| src/audit/ |
|
|
45
|
+
+-----------------+
|
|
46
|
+
|
|
|
47
|
+
v
|
|
48
|
+
+-----------------+
|
|
49
|
+
| Attest |
|
|
50
|
+
| src/attest/ |
|
|
51
|
+
+-----------------+
|
|
52
|
+
|
|
|
53
|
+
v
|
|
54
|
+
in-toto + Sigstore bundle
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Stages
|
|
58
|
+
|
|
59
|
+
### 1. Adapter — `src/adapters/`
|
|
60
|
+
|
|
61
|
+
Each upstream protocol has its own adapter (`claude-code.js`,
|
|
62
|
+
`mcp-server.js`, `computer-use.js`). Adapters turn raw transport frames
|
|
63
|
+
(SSE deltas, JSON-RPC, screenshots) into a canonical `BoundaryEvent`
|
|
64
|
+
defined in `src/core/boundary-event.js`. Downstream stages know nothing
|
|
65
|
+
about the protocol of origin — that is the whole point of the boundary.
|
|
66
|
+
|
|
67
|
+
### 2. Policy — `src/policy/`
|
|
68
|
+
|
|
69
|
+
`engine.js` is a pure function over `(event, policy)` returning a
|
|
70
|
+
`Decision`. `loader.js` parses the YAML-subset policy file with hot
|
|
71
|
+
reload via watcher. `pattern-store.js` and `pathset.js` provide path
|
|
72
|
+
matching and deny-list semantics. The engine itself does no I/O — every
|
|
73
|
+
side effect happens in the dispatcher.
|
|
74
|
+
|
|
75
|
+
### 3. Dispatcher — `src/dispatch/`
|
|
76
|
+
|
|
77
|
+
Routes a Decision to one of three executors:
|
|
78
|
+
|
|
79
|
+
- `executors/cloud.js` — forward to the upstream LLM provider.
|
|
80
|
+
- `executors/local.js` — execute interceptable tools locally
|
|
81
|
+
(Read, Glob, Grep, TodoWrite, bounded shell reads).
|
|
82
|
+
- `executors/block.js` — return a deny response without making the call.
|
|
83
|
+
|
|
84
|
+
Transforms (redaction, distillation) run before execution and are
|
|
85
|
+
recorded as part of the Result.
|
|
86
|
+
|
|
87
|
+
### 4. Auditor — `src/audit/`
|
|
88
|
+
|
|
89
|
+
`jsonl-auditor.js` appends one tamper-evident row per
|
|
90
|
+
`(event, decision, result)` tuple to `~/.occasio/pipeline-events.jsonl`.
|
|
91
|
+
|
|
92
|
+
Key properties:
|
|
93
|
+
|
|
94
|
+
- Each row carries `prev_hash` (SHA-256 of the previous row's `hash`)
|
|
95
|
+
and `hash` (SHA-256 of the row minus the hash field).
|
|
96
|
+
- The first row's `prev_hash` is `GENESIS` (64 zero hex digits).
|
|
97
|
+
- Field order in the row literal is canonical and load-bearing. The
|
|
98
|
+
Python walker in `docs/audit_walker.py` mirrors that order so chain
|
|
99
|
+
verification does not depend on trusting Occasio's own code.
|
|
100
|
+
- `audit_schema: 1` versions every new row. Verifier accepts legacy
|
|
101
|
+
schema-less rows; unknown future versions log a warning but do not
|
|
102
|
+
flip ok=false.
|
|
103
|
+
- `loadPrevHash()` reads only the trailing 64KB of the log so bootstrap
|
|
104
|
+
on a million-row chain stays O(window) instead of O(file).
|
|
105
|
+
- On a partial trailing line (crash mid-append), `loadPrevHash` fails
|
|
106
|
+
hard with `AUDIT_CORRUPT`. Use `occasio audit repair --file <path>`
|
|
107
|
+
to truncate the partial line; a `.bak` is written first.
|
|
108
|
+
|
|
109
|
+
Subcommands:
|
|
110
|
+
|
|
111
|
+
```
|
|
112
|
+
occasio audit verify [--file <path>]
|
|
113
|
+
occasio audit repair --file <path> [--dry-run]
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### 5. Attest — `src/attest/`
|
|
117
|
+
|
|
118
|
+
At the end of a session (or on demand), the attest module builds an
|
|
119
|
+
in-toto statement covering the chain segment between two hashes and
|
|
120
|
+
signs it with Sigstore (keyless Fulcio + Rekor). The browser viewer in
|
|
121
|
+
`integrations/attest-view/` can re-verify the bundle.
|
|
122
|
+
|
|
123
|
+
## Cross-cutting concerns
|
|
124
|
+
|
|
125
|
+
- **Tool-name registry** (`src/core/tool-names.js`) holds canonical
|
|
126
|
+
tool identifiers. Adapters emit only canonical names. New tools must
|
|
127
|
+
be registered there first.
|
|
128
|
+
- **Boundary events** (`src/core/boundary-event.js`) are the only
|
|
129
|
+
cross-stage data type. If a stage needs a new field, add it to the
|
|
130
|
+
boundary event rather than passing it through a side channel.
|
|
131
|
+
- **Cost & ledger** (`src/cost/`) tracks tokens, model prices, and
|
|
132
|
+
per-session spend. The pipeline emits cost events into the same
|
|
133
|
+
audit log, chained with the rest.
|
|
134
|
+
- **Policy hot-reload** records a `policy_loaded` audit row whose hash
|
|
135
|
+
links into the same chain — a policy swap is a first-class event.
|
|
136
|
+
|
|
137
|
+
## What lives where
|
|
138
|
+
|
|
139
|
+
| Concern | Path |
|
|
140
|
+
|-----------------------|-------------------------------|
|
|
141
|
+
| HTTP proxy | `src/index.js`, `bin/` |
|
|
142
|
+
| Anthropic SSE | `src/adapters/claude-code.js` |
|
|
143
|
+
| MCP server | `src/adapters/mcp-server.js` |
|
|
144
|
+
| Policy DSL | `src/policy/loader.js` |
|
|
145
|
+
| Policy engine | `src/policy/engine.js` |
|
|
146
|
+
| Dispatcher | `src/dispatch/` |
|
|
147
|
+
| Local tool execution | `src/dispatch/executors/local.js` |
|
|
148
|
+
| Audit chain | `src/audit/jsonl-auditor.js` |
|
|
149
|
+
| Audit repair | `src/audit/repair.js` |
|
|
150
|
+
| Audit verifier | `src/audit/verifier.js` |
|
|
151
|
+
| Attestation | `src/attest/index.js` |
|
|
152
|
+
| Cost/ledger | `src/cost/` |
|
|
153
|
+
|
|
154
|
+
## What is intentionally NOT in this diagram
|
|
155
|
+
|
|
156
|
+
- Telemetry: there is none, by design.
|
|
157
|
+
- Background daemons: audit writes happen synchronously on the
|
|
158
|
+
request-handling path. A queue-based async writer is a roadmap item
|
|
159
|
+
(`src/audit/queue.js` is reserved) but has not landed.
|
|
160
|
+
- Database: every persisted artifact is an append-only file. No SQL,
|
|
161
|
+
no migrations to manage at runtime.
|
|
162
|
+
|
|
163
|
+
## Reading order for new contributors
|
|
164
|
+
|
|
165
|
+
1. `src/core/boundary-event.js` — what flows through the pipeline.
|
|
166
|
+
2. `src/policy/engine.js` — pure-function decisions.
|
|
167
|
+
3. `src/audit/jsonl-auditor.js` — the hash-chain invariant.
|
|
168
|
+
4. `src/dispatch/index.js` — how Decisions become Results.
|
|
169
|
+
5. `test-audit-chain.js` — the scenarios the auditor must survive.
|
|
170
|
+
|
|
171
|
+
After those five files the rest of the codebase reads quickly.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@occasiolabs/occasio",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.4",
|
|
4
4
|
"description": "Occasio — cryptographically verifiable behavioral attestation for AI coding agents. Tool-call interception + policy enforcement + tamper-evident audit chain + Sigstore-signed in-toto attestations + windowed EDR detection. Same engine for Claude Code and MCP; Computer-Use scaffold included.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"files": [
|
|
@@ -14,7 +14,9 @@
|
|
|
14
14
|
"NOTICE"
|
|
15
15
|
],
|
|
16
16
|
"scripts": {
|
|
17
|
-
"test": "node test-interceptor.js",
|
|
17
|
+
"test": "node test-interceptor.js && node test-audit-chain.js && node test-attest.js && node test-policy-paths.js",
|
|
18
|
+
"lint": "eslint src/audit src/attest",
|
|
19
|
+
"lint:all": "eslint src bin",
|
|
18
20
|
"smoke": "node test-smoke.js",
|
|
19
21
|
"test:mcp": "node test-mcp-server.js",
|
|
20
22
|
"restart-check": "node scripts/restart-check.js",
|
|
@@ -64,6 +66,10 @@
|
|
|
64
66
|
},
|
|
65
67
|
"license": "Apache-2.0",
|
|
66
68
|
"dependencies": {
|
|
69
|
+
"proper-lockfile": "^4.1.2",
|
|
67
70
|
"sigstore": "^3.1.0"
|
|
71
|
+
},
|
|
72
|
+
"devDependencies": {
|
|
73
|
+
"eslint": "^9.39.4"
|
|
68
74
|
}
|
|
69
75
|
}
|
package/src/attest/index.js
CHANGED
|
@@ -65,7 +65,20 @@ function offsetSeconds(start, then) {
|
|
|
65
65
|
function readPolicyRulesDigest(policyFile) {
|
|
66
66
|
let text;
|
|
67
67
|
try { text = fs.readFileSync(policyFile, 'utf8'); }
|
|
68
|
-
catch {
|
|
68
|
+
catch (e) {
|
|
69
|
+
// Loud-fail: a missing/unreadable policy file is operationally significant
|
|
70
|
+
// for an attestation (the rules_digest in the output will be defaults, not
|
|
71
|
+
// the file's real contents). Surface it on stderr; the caller still falls
|
|
72
|
+
// back to schema defaults so attestation generation does not abort.
|
|
73
|
+
// ENOENT is the common, expected case when no user policy exists; demote
|
|
74
|
+
// it to a single-line note. Anything else (EACCES, EIO, etc.) is louder.
|
|
75
|
+
if (e && e.code === 'ENOENT') {
|
|
76
|
+
process.stderr.write(`[Occasio] attest: no policy file at ${policyFile} — using schema defaults for rules_digest\n`);
|
|
77
|
+
} else {
|
|
78
|
+
process.stderr.write(`[Occasio] attest: cannot read policy ${policyFile}: ${e.message} — rules_digest will use schema defaults\n`);
|
|
79
|
+
}
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
69
82
|
|
|
70
83
|
// Tiny line-level scan — full parsing is overkill for a digest. Counts only.
|
|
71
84
|
let denyPaths = 0, denyPatterns = 0, blockSecrets = null;
|
|
@@ -37,19 +37,114 @@ function computeHash(rowWithoutHash) {
|
|
|
37
37
|
return sha256hex(JSON.stringify(rowWithoutHash));
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
-
//
|
|
41
|
-
//
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
40
|
+
// Default tail-read window size (bytes). Large enough to contain several full
|
|
41
|
+
// audit rows on any plausible workload; small enough that bootstrap on a
|
|
42
|
+
// 100k-event log stays sub-50ms.
|
|
43
|
+
const TAIL_READ_BYTES = 64 * 1024;
|
|
44
|
+
|
|
45
|
+
// Read the trailing window of a file. Returns the decoded string (utf8) along
|
|
46
|
+
// with a flag indicating whether the read started mid-file (i.e. the first
|
|
47
|
+
// line in the returned buffer may be truncated).
|
|
48
|
+
function readTail(filePath, bytes = TAIL_READ_BYTES) {
|
|
49
|
+
const fd = fs.openSync(filePath, 'r');
|
|
50
|
+
try {
|
|
51
|
+
const { size } = fs.fstatSync(fd);
|
|
52
|
+
const start = Math.max(0, size - bytes);
|
|
53
|
+
const len = size - start;
|
|
54
|
+
const buf = Buffer.alloc(len);
|
|
55
|
+
if (len > 0) fs.readSync(fd, buf, 0, len, start);
|
|
56
|
+
return { content: buf.toString('utf8'), truncated: start > 0 };
|
|
57
|
+
} finally {
|
|
58
|
+
fs.closeSync(fd);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Outcome codes for scanning the tail. A [code, value] tuple is used rather
|
|
63
|
+
// than an object literal so that field-name tokens used by the audit row
|
|
64
|
+
// schema (see test-interceptor.js §32) cannot appear earlier in this file
|
|
65
|
+
// than the row builder in record() below.
|
|
66
|
+
// ['hash', hexString] found a valid hash-bearing row
|
|
67
|
+
// ['genesis'] file empty / legacy-only / missing
|
|
68
|
+
// ['corrupt', detailString] last line invalid JSON, no fallback row
|
|
69
|
+
function scanTailForPrevHash(filePath, bytes = TAIL_READ_BYTES) {
|
|
70
|
+
let content, truncated;
|
|
71
|
+
try {
|
|
72
|
+
({ content, truncated } = readTail(filePath, bytes));
|
|
73
|
+
} catch {
|
|
74
|
+
return ['genesis'];
|
|
75
|
+
}
|
|
76
|
+
if (!content) return ['genesis'];
|
|
77
|
+
|
|
78
|
+
// Split into raw lines (no filter) so we can detect a partial trailing line
|
|
79
|
+
// separately from intentionally-empty lines. A well-formed JSONL ends in
|
|
80
|
+
// '\n'; if the last element after split is non-empty, the file was cut
|
|
81
|
+
// mid-write.
|
|
82
|
+
const raw = content.split('\n');
|
|
83
|
+
const lastFragment = raw[raw.length - 1];
|
|
84
|
+
const lines = raw.filter(Boolean);
|
|
85
|
+
if (lines.length === 0) return ['genesis'];
|
|
86
|
+
|
|
87
|
+
// If we read from offset 0, the first line is authoritative. If we read
|
|
88
|
+
// from mid-file, the first line in the window may be a fragment of a row
|
|
89
|
+
// truncated by the window — drop it so we never treat a fragment as legacy.
|
|
90
|
+
const startIdx = truncated && raw[0] === lines[0] ? 1 : 0;
|
|
91
|
+
const lastNonEmptyIdx = lines.length - 1;
|
|
92
|
+
|
|
93
|
+
// Detect a partial trailing line: file does not end in '\n' AND the last
|
|
94
|
+
// line fails to JSON.parse. A complete row that *happens* to be the final
|
|
95
|
+
// entry is fine — its trailing newline guarantees lastFragment === ''.
|
|
96
|
+
const trailingPartial = lastFragment !== '' && (() => {
|
|
97
|
+
try { JSON.parse(lines[lastNonEmptyIdx]); return false; } catch { return true; }
|
|
98
|
+
})();
|
|
99
|
+
|
|
100
|
+
// Walk lines in reverse to find the most recent valid hash-bearing row.
|
|
101
|
+
// Skip the partial trailing line if present.
|
|
102
|
+
const scanFrom = trailingPartial ? lastNonEmptyIdx - 1 : lastNonEmptyIdx;
|
|
103
|
+
for (let i = scanFrom; i >= startIdx; i--) {
|
|
47
104
|
try {
|
|
48
105
|
const row = JSON.parse(lines[i]);
|
|
49
|
-
if (typeof row.hash === 'string' && row.hash.length === 64)
|
|
50
|
-
|
|
106
|
+
if (typeof row.hash === 'string' && row.hash.length === 64) {
|
|
107
|
+
return ['hash', row.hash];
|
|
108
|
+
}
|
|
109
|
+
} catch {
|
|
110
|
+
// Mid-window JSON.parse failure: a truly corrupt earlier row. We do not
|
|
111
|
+
// attempt to recover past it here — if no valid hash row exists in the
|
|
112
|
+
// remaining window, fall through to the corrupt/genesis decision below.
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// No hash row found in the window. If we observed a partial trailing line
|
|
117
|
+
// AND the window contains no complete hash row, the chain is in an
|
|
118
|
+
// ambiguous state — caller must decide whether to fail hard or fall back
|
|
119
|
+
// to GENESIS (only safe if the file truly contains zero prior chain rows).
|
|
120
|
+
if (trailingPartial) {
|
|
121
|
+
return ['corrupt', 'partial trailing line, no recoverable prev_hash in tail window'];
|
|
122
|
+
}
|
|
123
|
+
return ['genesis'];
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Public: returns the most recent hash to chain from, or GENESIS.
|
|
127
|
+
// Throws AuditCorruptError when the file's last line is JSON-broken and no
|
|
128
|
+
// earlier hash-bearing row exists in the tail window — this prevents the
|
|
129
|
+
// silent tamper gap where a partial write would otherwise restart the chain.
|
|
130
|
+
function loadPrevHash(filePath, opts = {}) {
|
|
131
|
+
const { tailBytes = TAIL_READ_BYTES, failHard = true } = opts;
|
|
132
|
+
// Missing file → genesis (initial bootstrap).
|
|
133
|
+
if (!fs.existsSync(filePath)) return GENESIS;
|
|
134
|
+
const [code, detail] = scanTailForPrevHash(filePath, tailBytes);
|
|
135
|
+
if (code === 'hash') return detail;
|
|
136
|
+
if (code === 'genesis') return GENESIS;
|
|
137
|
+
// code === 'corrupt'
|
|
138
|
+
if (!failHard) {
|
|
139
|
+
process.stderr.write(`[occasio audit] WARNING: ${filePath}: ${detail}\n`);
|
|
140
|
+
return GENESIS;
|
|
51
141
|
}
|
|
52
|
-
|
|
142
|
+
const err = new Error(
|
|
143
|
+
`Audit log corrupt at ${filePath}: ${detail}. ` +
|
|
144
|
+
`Run \`occasio audit repair --file ${filePath}\` to truncate the partial trailing line.`
|
|
145
|
+
);
|
|
146
|
+
err.code = 'AUDIT_CORRUPT';
|
|
147
|
+
throw err;
|
|
53
148
|
}
|
|
54
149
|
|
|
55
150
|
/**
|
|
@@ -63,17 +158,75 @@ function loadPrevHash(filePath) {
|
|
|
63
158
|
* call. prevHash is only advanced on a successful append, keeping the
|
|
64
159
|
* in-memory chain consistent with what is on disk if the proxy is restarted.
|
|
65
160
|
*/
|
|
66
|
-
function createAuditor(filePath = DEFAULT_LOG) {
|
|
67
|
-
|
|
161
|
+
function createAuditor(filePath = DEFAULT_LOG, opts = {}) {
|
|
162
|
+
// lock=true wraps each append in a proper-lockfile lockSync/unlockSync pair
|
|
163
|
+
// and re-reads prev_hash from disk inside the lock. This is the only safe
|
|
164
|
+
// way to share an audit log between two concurrent writers (e.g. proxy +
|
|
165
|
+
// MCP server on the same machine). Default off because single-writer
|
|
166
|
+
// workloads do not pay the I/O cost.
|
|
167
|
+
const { lock = false } = opts;
|
|
168
|
+
let lockfile = null;
|
|
169
|
+
if (lock) {
|
|
170
|
+
// Lazy-require so a missing proper-lockfile install does not break
|
|
171
|
+
// single-writer setups that never opt in.
|
|
172
|
+
try { lockfile = require('proper-lockfile'); }
|
|
173
|
+
catch (e) {
|
|
174
|
+
throw new Error(`createAuditor({ lock: true }) requires proper-lockfile: ${e.message}`);
|
|
175
|
+
}
|
|
176
|
+
// The lockfile target must exist before lockSync can create its companion
|
|
177
|
+
// directory marker. Touch it.
|
|
178
|
+
if (!fs.existsSync(filePath)) {
|
|
179
|
+
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
|
180
|
+
fs.writeFileSync(filePath, '');
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
try { fs.mkdirSync(path.dirname(filePath), { recursive: true }); }
|
|
185
|
+
catch { /* directory already exists, or unwritable — surface on first append */ }
|
|
68
186
|
|
|
69
187
|
let prevHash = loadPrevHash(filePath);
|
|
70
188
|
|
|
189
|
+
function withLock(fn) {
|
|
190
|
+
if (!lock) return fn();
|
|
191
|
+
// proper-lockfile uses mkdir(2) for atomicity; staleness keeps the lock
|
|
192
|
+
// self-healing across crashes. realpath:false avoids extra syscalls for
|
|
193
|
+
// a path we already control.
|
|
194
|
+
// proper-lockfile's sync API forbids `retries` — it must busy-loop on
|
|
195
|
+
// EEXIST itself. Keep stale-cleanup so a crashed writer cannot freeze
|
|
196
|
+
// siblings forever.
|
|
197
|
+
let release = null;
|
|
198
|
+
const start = Date.now();
|
|
199
|
+
while (release === null) {
|
|
200
|
+
try {
|
|
201
|
+
release = lockfile.lockSync(filePath, { stale: 10000, realpath: false });
|
|
202
|
+
} catch (e) {
|
|
203
|
+
if (e.code !== 'ELOCKED') throw e;
|
|
204
|
+
if (Date.now() - start > 10000) throw e;
|
|
205
|
+
// tight spin — node has no sleepSync; a microtask burst is fine for
|
|
206
|
+
// contention durations expected here (single-digit ms per writer)
|
|
207
|
+
const until = Date.now() + 2;
|
|
208
|
+
while (Date.now() < until) { /* spin */ }
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
// Inside the lock we MUST re-read prev_hash — another process may have
|
|
212
|
+
// appended since we last advanced it. Without this, two concurrent
|
|
213
|
+
// writers would produce two rows with the same prev_hash → chain break.
|
|
214
|
+
prevHash = loadPrevHash(filePath, { failHard: false });
|
|
215
|
+
try { return fn(); }
|
|
216
|
+
finally { try { release(); } catch { /* lock already released by stale-timeout reaper */ } }
|
|
217
|
+
}
|
|
218
|
+
|
|
71
219
|
function record(event, decision, result) {
|
|
72
220
|
if (!event || !decision) return { ok: true };
|
|
221
|
+
return withLock(() => recordInner(event, decision, result));
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function recordInner(event, decision, result) {
|
|
73
225
|
// Field order is explicit and must remain stable — computeHash depends on it.
|
|
74
226
|
// The Python walker in docs/audit_walker.py mirrors this order; any change
|
|
75
227
|
// here without updating that walker breaks independent verifiability.
|
|
76
228
|
const row = {
|
|
229
|
+
audit_schema: 1,
|
|
77
230
|
ts: event.timestamp,
|
|
78
231
|
event_id: event.id,
|
|
79
232
|
session_id: event.sessionId,
|
|
@@ -131,8 +284,13 @@ function createAuditor(filePath = DEFAULT_LOG) {
|
|
|
131
284
|
* append failure, mirroring record()'s contract so the caller can
|
|
132
285
|
* propagate AuditWriteError uniformly.
|
|
133
286
|
*/
|
|
134
|
-
function recordPolicyLoaded(
|
|
287
|
+
function recordPolicyLoaded(args) {
|
|
288
|
+
return withLock(() => recordPolicyLoadedInner(args));
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
function recordPolicyLoadedInner({ hash, path: policyPath, version, source }) {
|
|
135
292
|
const row = {
|
|
293
|
+
audit_schema: 1,
|
|
136
294
|
ts: new Date().toISOString(),
|
|
137
295
|
event_id: crypto.randomUUID(),
|
|
138
296
|
session_id: undefined,
|
|
@@ -175,4 +333,12 @@ function createAuditor(filePath = DEFAULT_LOG) {
|
|
|
175
333
|
return { record, recordPolicyLoaded, file: filePath };
|
|
176
334
|
}
|
|
177
335
|
|
|
178
|
-
module.exports = {
|
|
336
|
+
module.exports = {
|
|
337
|
+
createAuditor,
|
|
338
|
+
DEFAULT_LOG,
|
|
339
|
+
GENESIS,
|
|
340
|
+
computeHash,
|
|
341
|
+
loadPrevHash,
|
|
342
|
+
scanTailForPrevHash,
|
|
343
|
+
TAIL_READ_BYTES,
|
|
344
|
+
};
|