@occasiolabs/occasio 0.8.3 → 0.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/docs/ARCHITECTURE.md +171 -0
- package/package.json +8 -2
- package/src/attest/check-summary.js +1 -1
- package/src/attest/index.js +14 -1
- package/src/audit/jsonl-auditor.js +180 -14
- package/src/audit/repair.js +118 -0
- package/src/audit/verifier.js +36 -2
- package/src/cli/clear.js +55 -0
- package/src/cli/help.js +81 -0
- package/src/cli/register.js +90 -0
- package/src/cli/status.js +94 -0
- package/src/cost/prices.js +106 -0
- package/src/index.js +15 -270
package/README.md
CHANGED
|
@@ -83,6 +83,7 @@ occasio attest --run-id <uuid> # Build a behavioral attestation for one session
|
|
|
83
83
|
| `occasio distill` | Inspect distilled tool outputs |
|
|
84
84
|
| `occasio dashboard` | Live browser dashboard at http://localhost:3001 |
|
|
85
85
|
| `occasio audit verify` | Re-walk the SHA-256 audit chain end-to-end |
|
|
86
|
+
| `occasio audit repair --file <path>` | Truncate a crash-partial trailing line (writes `.bak`) |
|
|
86
87
|
| `occasio report` | Governance summary export (`--days N`, `--format csv`) |
|
|
87
88
|
| `occasio anomalies` | EDR detection over the audit chain (`--window 15m`, `--json`) |
|
|
88
89
|
| `occasio attest --run-id <uuid>` | Build a behavioral attestation predicate v1 |
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# Occasio Architecture
|
|
2
|
+
|
|
3
|
+
A high-level map of the request pipeline and where each module lives.
|
|
4
|
+
Use this as the orientation document before reading individual files.
|
|
5
|
+
|
|
6
|
+
## Pipeline
|
|
7
|
+
|
|
8
|
+
Every tool call from a coding agent travels the same five-stage pipeline,
|
|
9
|
+
regardless of the upstream protocol (Anthropic SSE, MCP, computer-use).
|
|
10
|
+
Each stage produces input for the next; nothing skips the auditor.
|
|
11
|
+
|
|
12
|
+
```
|
|
13
|
+
agent (Claude Code, MCP client, computer-use loop)
|
|
14
|
+
|
|
|
15
|
+
v
|
|
16
|
+
+--------------+ raw events +-----------------+
|
|
17
|
+
| Adapter | -----------------> | Boundary event |
|
|
18
|
+
| src/adapters | | src/core/ |
|
|
19
|
+
+--------------+ +-----------------+
|
|
20
|
+
|
|
|
21
|
+
v
|
|
22
|
+
+-----------------+
|
|
23
|
+
| Policy |
|
|
24
|
+
| src/policy/ |
|
|
25
|
+
+-----------------+
|
|
26
|
+
|
|
|
27
|
+
Decision { action,
|
|
28
|
+
reason, transform,
|
|
29
|
+
executor }
|
|
30
|
+
|
|
|
31
|
+
v
|
|
32
|
+
+-----------------+
|
|
33
|
+
| Dispatcher |
|
|
34
|
+
| src/dispatch/ |
|
|
35
|
+
+-----------------+
|
|
36
|
+
|
|
|
37
|
+
Result { passThrough,
|
|
38
|
+
blocked, transformed,
|
|
39
|
+
exitCode, ... }
|
|
40
|
+
|
|
|
41
|
+
v
|
|
42
|
+
+-----------------+
|
|
43
|
+
| Auditor |
|
|
44
|
+
| src/audit/ |
|
|
45
|
+
+-----------------+
|
|
46
|
+
|
|
|
47
|
+
v
|
|
48
|
+
+-----------------+
|
|
49
|
+
| Attest |
|
|
50
|
+
| src/attest/ |
|
|
51
|
+
+-----------------+
|
|
52
|
+
|
|
|
53
|
+
v
|
|
54
|
+
in-toto + Sigstore bundle
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Stages
|
|
58
|
+
|
|
59
|
+
### 1. Adapter — `src/adapters/`
|
|
60
|
+
|
|
61
|
+
Each upstream protocol has its own adapter (`claude-code.js`,
|
|
62
|
+
`mcp-server.js`, `computer-use.js`). Adapters turn raw transport frames
|
|
63
|
+
(SSE deltas, JSON-RPC, screenshots) into a canonical `BoundaryEvent`
|
|
64
|
+
defined in `src/core/boundary-event.js`. Downstream stages know nothing
|
|
65
|
+
about the protocol of origin — that is the whole point of the boundary.
|
|
66
|
+
|
|
67
|
+
### 2. Policy — `src/policy/`
|
|
68
|
+
|
|
69
|
+
`engine.js` is a pure function over `(event, policy)` returning a
|
|
70
|
+
`Decision`. `loader.js` parses the YAML-subset policy file with hot
|
|
71
|
+
reload via watcher. `pattern-store.js` and `pathset.js` provide path
|
|
72
|
+
matching and deny-list semantics. The engine itself does no I/O — every
|
|
73
|
+
side effect happens in the dispatcher.
|
|
74
|
+
|
|
75
|
+
### 3. Dispatcher — `src/dispatch/`
|
|
76
|
+
|
|
77
|
+
Routes a Decision to one of three executors:
|
|
78
|
+
|
|
79
|
+
- `executors/cloud.js` — forward to the upstream LLM provider.
|
|
80
|
+
- `executors/local.js` — execute interceptable tools locally
|
|
81
|
+
(Read, Glob, Grep, TodoWrite, bounded shell reads).
|
|
82
|
+
- `executors/block.js` — return a deny response without making the call.
|
|
83
|
+
|
|
84
|
+
Transforms (redaction, distillation) run before execution and are
|
|
85
|
+
recorded as part of the Result.
|
|
86
|
+
|
|
87
|
+
### 4. Auditor — `src/audit/`
|
|
88
|
+
|
|
89
|
+
`jsonl-auditor.js` appends one tamper-evident row per
|
|
90
|
+
`(event, decision, result)` tuple to `~/.occasio/pipeline-events.jsonl`.
|
|
91
|
+
|
|
92
|
+
Key properties:
|
|
93
|
+
|
|
94
|
+
- Each row carries `prev_hash` (SHA-256 of the previous row's `hash`)
|
|
95
|
+
and `hash` (SHA-256 of the row minus the hash field).
|
|
96
|
+
- The first row's `prev_hash` is `GENESIS` (64 zero hex digits).
|
|
97
|
+
- Field order in the row literal is canonical and load-bearing. The
|
|
98
|
+
Python walker in `docs/audit_walker.py` mirrors that order so chain
|
|
99
|
+
verification does not depend on trusting Occasio's own code.
|
|
100
|
+
- `audit_schema: 1` versions every new row. Verifier accepts legacy
|
|
101
|
+
schema-less rows; unknown future versions log a warning but do not
|
|
102
|
+
flip ok=false.
|
|
103
|
+
- `loadPrevHash()` reads only the trailing 64KB of the log so bootstrap
|
|
104
|
+
on a million-row chain stays O(window) instead of O(file).
|
|
105
|
+
- On a partial trailing line (crash mid-append), `loadPrevHash` fails
|
|
106
|
+
hard with `AUDIT_CORRUPT`. Use `occasio audit repair --file <path>`
|
|
107
|
+
to truncate the partial line; a `.bak` is written first.
|
|
108
|
+
|
|
109
|
+
Subcommands:
|
|
110
|
+
|
|
111
|
+
```
|
|
112
|
+
occasio audit verify [--file <path>]
|
|
113
|
+
occasio audit repair --file <path> [--dry-run]
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### 5. Attest — `src/attest/`
|
|
117
|
+
|
|
118
|
+
At the end of a session (or on demand), the attest module builds an
|
|
119
|
+
in-toto statement covering the chain segment between two hashes and
|
|
120
|
+
signs it with Sigstore (keyless Fulcio + Rekor). The browser viewer in
|
|
121
|
+
`integrations/attest-view/` can re-verify the bundle.
|
|
122
|
+
|
|
123
|
+
## Cross-cutting concerns
|
|
124
|
+
|
|
125
|
+
- **Tool-name registry** (`src/core/tool-names.js`) holds canonical
|
|
126
|
+
tool identifiers. Adapters emit only canonical names. New tools must
|
|
127
|
+
be registered there first.
|
|
128
|
+
- **Boundary events** (`src/core/boundary-event.js`) are the only
|
|
129
|
+
cross-stage data type. If a stage needs a new field, add it to the
|
|
130
|
+
boundary event rather than passing it through a side channel.
|
|
131
|
+
- **Cost & ledger** (`src/cost/`) tracks tokens, model prices, and
|
|
132
|
+
per-session spend. The pipeline emits cost events into the same
|
|
133
|
+
audit log, chained with the rest.
|
|
134
|
+
- **Policy hot-reload** records a `policy_loaded` audit row whose hash
|
|
135
|
+
links into the same chain — a policy swap is a first-class event.
|
|
136
|
+
|
|
137
|
+
## What lives where
|
|
138
|
+
|
|
139
|
+
| Concern | Path |
|
|
140
|
+
|-----------------------|-------------------------------|
|
|
141
|
+
| HTTP proxy | `src/index.js`, `bin/` |
|
|
142
|
+
| Anthropic SSE | `src/adapters/claude-code.js` |
|
|
143
|
+
| MCP server | `src/adapters/mcp-server.js` |
|
|
144
|
+
| Policy DSL | `src/policy/loader.js` |
|
|
145
|
+
| Policy engine | `src/policy/engine.js` |
|
|
146
|
+
| Dispatcher | `src/dispatch/` |
|
|
147
|
+
| Local tool execution | `src/dispatch/executors/local.js` |
|
|
148
|
+
| Audit chain | `src/audit/jsonl-auditor.js` |
|
|
149
|
+
| Audit repair | `src/audit/repair.js` |
|
|
150
|
+
| Audit verifier | `src/audit/verifier.js` |
|
|
151
|
+
| Attestation | `src/attest/index.js` |
|
|
152
|
+
| Cost/ledger | `src/cost/` |
|
|
153
|
+
|
|
154
|
+
## What is intentionally NOT in this diagram
|
|
155
|
+
|
|
156
|
+
- Telemetry: there is none, by design.
|
|
157
|
+
- Background daemons: audit writes happen synchronously on the
|
|
158
|
+
request-handling path. A queue-based async writer is a roadmap item
|
|
159
|
+
(`src/audit/queue.js` is reserved) but has not landed.
|
|
160
|
+
- Database: every persisted artifact is an append-only file. No SQL,
|
|
161
|
+
no migrations to manage at runtime.
|
|
162
|
+
|
|
163
|
+
## Reading order for new contributors
|
|
164
|
+
|
|
165
|
+
1. `src/core/boundary-event.js` — what flows through the pipeline.
|
|
166
|
+
2. `src/policy/engine.js` — pure-function decisions.
|
|
167
|
+
3. `src/audit/jsonl-auditor.js` — the hash-chain invariant.
|
|
168
|
+
4. `src/dispatch/index.js` — how Decisions become Results.
|
|
169
|
+
5. `test-audit-chain.js` — the scenarios the auditor must survive.
|
|
170
|
+
|
|
171
|
+
After those five files the rest of the codebase reads quickly.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@occasiolabs/occasio",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.4",
|
|
4
4
|
"description": "Occasio — cryptographically verifiable behavioral attestation for AI coding agents. Tool-call interception + policy enforcement + tamper-evident audit chain + Sigstore-signed in-toto attestations + windowed EDR detection. Same engine for Claude Code and MCP; Computer-Use scaffold included.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"files": [
|
|
@@ -14,7 +14,9 @@
|
|
|
14
14
|
"NOTICE"
|
|
15
15
|
],
|
|
16
16
|
"scripts": {
|
|
17
|
-
"test": "node test-interceptor.js",
|
|
17
|
+
"test": "node test-interceptor.js && node test-audit-chain.js && node test-attest.js && node test-policy-paths.js",
|
|
18
|
+
"lint": "eslint src/audit src/attest",
|
|
19
|
+
"lint:all": "eslint src bin",
|
|
18
20
|
"smoke": "node test-smoke.js",
|
|
19
21
|
"test:mcp": "node test-mcp-server.js",
|
|
20
22
|
"restart-check": "node scripts/restart-check.js",
|
|
@@ -64,6 +66,10 @@
|
|
|
64
66
|
},
|
|
65
67
|
"license": "Apache-2.0",
|
|
66
68
|
"dependencies": {
|
|
69
|
+
"proper-lockfile": "^4.1.2",
|
|
67
70
|
"sigstore": "^3.1.0"
|
|
71
|
+
},
|
|
72
|
+
"devDependencies": {
|
|
73
|
+
"eslint": "^9.39.4"
|
|
68
74
|
}
|
|
69
75
|
}
|
package/src/attest/index.js
CHANGED
|
@@ -65,7 +65,20 @@ function offsetSeconds(start, then) {
|
|
|
65
65
|
function readPolicyRulesDigest(policyFile) {
|
|
66
66
|
let text;
|
|
67
67
|
try { text = fs.readFileSync(policyFile, 'utf8'); }
|
|
68
|
-
catch {
|
|
68
|
+
catch (e) {
|
|
69
|
+
// Loud-fail: a missing/unreadable policy file is operationally significant
|
|
70
|
+
// for an attestation (the rules_digest in the output will be defaults, not
|
|
71
|
+
// the file's real contents). Surface it on stderr; the caller still falls
|
|
72
|
+
// back to schema defaults so attestation generation does not abort.
|
|
73
|
+
// ENOENT is the common, expected case when no user policy exists; demote
|
|
74
|
+
// it to a single-line note. Anything else (EACCES, EIO, etc.) is louder.
|
|
75
|
+
if (e && e.code === 'ENOENT') {
|
|
76
|
+
process.stderr.write(`[Occasio] attest: no policy file at ${policyFile} — using schema defaults for rules_digest\n`);
|
|
77
|
+
} else {
|
|
78
|
+
process.stderr.write(`[Occasio] attest: cannot read policy ${policyFile}: ${e.message} — rules_digest will use schema defaults\n`);
|
|
79
|
+
}
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
69
82
|
|
|
70
83
|
// Tiny line-level scan — full parsing is overkill for a digest. Counts only.
|
|
71
84
|
let denyPaths = 0, denyPatterns = 0, blockSecrets = null;
|
|
@@ -37,19 +37,114 @@ function computeHash(rowWithoutHash) {
|
|
|
37
37
|
return sha256hex(JSON.stringify(rowWithoutHash));
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
-
//
|
|
41
|
-
//
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
40
|
+
// Default tail-read window size (bytes). Large enough to contain several full
|
|
41
|
+
// audit rows on any plausible workload; small enough that bootstrap on a
|
|
42
|
+
// 100k-event log stays sub-50ms.
|
|
43
|
+
const TAIL_READ_BYTES = 64 * 1024;
|
|
44
|
+
|
|
45
|
+
// Read the trailing window of a file. Returns the decoded string (utf8) along
|
|
46
|
+
// with a flag indicating whether the read started mid-file (i.e. the first
|
|
47
|
+
// line in the returned buffer may be truncated).
|
|
48
|
+
function readTail(filePath, bytes = TAIL_READ_BYTES) {
|
|
49
|
+
const fd = fs.openSync(filePath, 'r');
|
|
50
|
+
try {
|
|
51
|
+
const { size } = fs.fstatSync(fd);
|
|
52
|
+
const start = Math.max(0, size - bytes);
|
|
53
|
+
const len = size - start;
|
|
54
|
+
const buf = Buffer.alloc(len);
|
|
55
|
+
if (len > 0) fs.readSync(fd, buf, 0, len, start);
|
|
56
|
+
return { content: buf.toString('utf8'), truncated: start > 0 };
|
|
57
|
+
} finally {
|
|
58
|
+
fs.closeSync(fd);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Outcome codes for scanning the tail. A [code, value] tuple is used rather
|
|
63
|
+
// than an object literal so that field-name tokens used by the audit row
|
|
64
|
+
// schema (see test-interceptor.js §32) cannot appear earlier in this file
|
|
65
|
+
// than the row builder in record() below.
|
|
66
|
+
// ['hash', hexString] found a valid hash-bearing row
|
|
67
|
+
// ['genesis'] file empty / legacy-only / missing
|
|
68
|
+
// ['corrupt', detailString] last line invalid JSON, no fallback row
|
|
69
|
+
function scanTailForPrevHash(filePath, bytes = TAIL_READ_BYTES) {
|
|
70
|
+
let content, truncated;
|
|
71
|
+
try {
|
|
72
|
+
({ content, truncated } = readTail(filePath, bytes));
|
|
73
|
+
} catch {
|
|
74
|
+
return ['genesis'];
|
|
75
|
+
}
|
|
76
|
+
if (!content) return ['genesis'];
|
|
77
|
+
|
|
78
|
+
// Split into raw lines (no filter) so we can detect a partial trailing line
|
|
79
|
+
// separately from intentionally-empty lines. A well-formed JSONL ends in
|
|
80
|
+
// '\n'; if the last element after split is non-empty, the file was cut
|
|
81
|
+
// mid-write.
|
|
82
|
+
const raw = content.split('\n');
|
|
83
|
+
const lastFragment = raw[raw.length - 1];
|
|
84
|
+
const lines = raw.filter(Boolean);
|
|
85
|
+
if (lines.length === 0) return ['genesis'];
|
|
86
|
+
|
|
87
|
+
// If we read from offset 0, the first line is authoritative. If we read
|
|
88
|
+
// from mid-file, the first line in the window may be a fragment of a row
|
|
89
|
+
// truncated by the window — drop it so we never treat a fragment as legacy.
|
|
90
|
+
const startIdx = truncated && raw[0] === lines[0] ? 1 : 0;
|
|
91
|
+
const lastNonEmptyIdx = lines.length - 1;
|
|
92
|
+
|
|
93
|
+
// Detect a partial trailing line: file does not end in '\n' AND the last
|
|
94
|
+
// line fails to JSON.parse. A complete row that *happens* to be the final
|
|
95
|
+
// entry is fine — its trailing newline guarantees lastFragment === ''.
|
|
96
|
+
const trailingPartial = lastFragment !== '' && (() => {
|
|
97
|
+
try { JSON.parse(lines[lastNonEmptyIdx]); return false; } catch { return true; }
|
|
98
|
+
})();
|
|
99
|
+
|
|
100
|
+
// Walk lines in reverse to find the most recent valid hash-bearing row.
|
|
101
|
+
// Skip the partial trailing line if present.
|
|
102
|
+
const scanFrom = trailingPartial ? lastNonEmptyIdx - 1 : lastNonEmptyIdx;
|
|
103
|
+
for (let i = scanFrom; i >= startIdx; i--) {
|
|
47
104
|
try {
|
|
48
105
|
const row = JSON.parse(lines[i]);
|
|
49
|
-
if (typeof row.hash === 'string' && row.hash.length === 64)
|
|
50
|
-
|
|
106
|
+
if (typeof row.hash === 'string' && row.hash.length === 64) {
|
|
107
|
+
return ['hash', row.hash];
|
|
108
|
+
}
|
|
109
|
+
} catch {
|
|
110
|
+
// Mid-window JSON.parse failure: a truly corrupt earlier row. We do not
|
|
111
|
+
// attempt to recover past it here — if no valid hash row exists in the
|
|
112
|
+
// remaining window, fall through to the corrupt/genesis decision below.
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// No hash row found in the window. If we observed a partial trailing line
|
|
117
|
+
// AND the window contains no complete hash row, the chain is in an
|
|
118
|
+
// ambiguous state — caller must decide whether to fail hard or fall back
|
|
119
|
+
// to GENESIS (only safe if the file truly contains zero prior chain rows).
|
|
120
|
+
if (trailingPartial) {
|
|
121
|
+
return ['corrupt', 'partial trailing line, no recoverable prev_hash in tail window'];
|
|
122
|
+
}
|
|
123
|
+
return ['genesis'];
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Public: returns the most recent hash to chain from, or GENESIS.
|
|
127
|
+
// Throws AuditCorruptError when the file's last line is JSON-broken and no
|
|
128
|
+
// earlier hash-bearing row exists in the tail window — this prevents the
|
|
129
|
+
// silent tamper gap where a partial write would otherwise restart the chain.
|
|
130
|
+
function loadPrevHash(filePath, opts = {}) {
|
|
131
|
+
const { tailBytes = TAIL_READ_BYTES, failHard = true } = opts;
|
|
132
|
+
// Missing file → genesis (initial bootstrap).
|
|
133
|
+
if (!fs.existsSync(filePath)) return GENESIS;
|
|
134
|
+
const [code, detail] = scanTailForPrevHash(filePath, tailBytes);
|
|
135
|
+
if (code === 'hash') return detail;
|
|
136
|
+
if (code === 'genesis') return GENESIS;
|
|
137
|
+
// code === 'corrupt'
|
|
138
|
+
if (!failHard) {
|
|
139
|
+
process.stderr.write(`[occasio audit] WARNING: ${filePath}: ${detail}\n`);
|
|
140
|
+
return GENESIS;
|
|
51
141
|
}
|
|
52
|
-
|
|
142
|
+
const err = new Error(
|
|
143
|
+
`Audit log corrupt at ${filePath}: ${detail}. ` +
|
|
144
|
+
`Run \`occasio audit repair --file ${filePath}\` to truncate the partial trailing line.`
|
|
145
|
+
);
|
|
146
|
+
err.code = 'AUDIT_CORRUPT';
|
|
147
|
+
throw err;
|
|
53
148
|
}
|
|
54
149
|
|
|
55
150
|
/**
|
|
@@ -63,17 +158,75 @@ function loadPrevHash(filePath) {
|
|
|
63
158
|
* call. prevHash is only advanced on a successful append, keeping the
|
|
64
159
|
* in-memory chain consistent with what is on disk if the proxy is restarted.
|
|
65
160
|
*/
|
|
66
|
-
function createAuditor(filePath = DEFAULT_LOG) {
|
|
67
|
-
|
|
161
|
+
function createAuditor(filePath = DEFAULT_LOG, opts = {}) {
|
|
162
|
+
// lock=true wraps each append in a proper-lockfile lockSync/unlockSync pair
|
|
163
|
+
// and re-reads prev_hash from disk inside the lock. This is the only safe
|
|
164
|
+
// way to share an audit log between two concurrent writers (e.g. proxy +
|
|
165
|
+
// MCP server on the same machine). Default off because single-writer
|
|
166
|
+
// workloads do not pay the I/O cost.
|
|
167
|
+
const { lock = false } = opts;
|
|
168
|
+
let lockfile = null;
|
|
169
|
+
if (lock) {
|
|
170
|
+
// Lazy-require so a missing proper-lockfile install does not break
|
|
171
|
+
// single-writer setups that never opt in.
|
|
172
|
+
try { lockfile = require('proper-lockfile'); }
|
|
173
|
+
catch (e) {
|
|
174
|
+
throw new Error(`createAuditor({ lock: true }) requires proper-lockfile: ${e.message}`);
|
|
175
|
+
}
|
|
176
|
+
// The lockfile target must exist before lockSync can create its companion
|
|
177
|
+
// directory marker. Touch it.
|
|
178
|
+
if (!fs.existsSync(filePath)) {
|
|
179
|
+
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
|
180
|
+
fs.writeFileSync(filePath, '');
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
try { fs.mkdirSync(path.dirname(filePath), { recursive: true }); }
|
|
185
|
+
catch { /* directory already exists, or unwritable — surface on first append */ }
|
|
68
186
|
|
|
69
187
|
let prevHash = loadPrevHash(filePath);
|
|
70
188
|
|
|
189
|
+
function withLock(fn) {
|
|
190
|
+
if (!lock) return fn();
|
|
191
|
+
// proper-lockfile uses mkdir(2) for atomicity; staleness keeps the lock
|
|
192
|
+
// self-healing across crashes. realpath:false avoids extra syscalls for
|
|
193
|
+
// a path we already control.
|
|
194
|
+
// proper-lockfile's sync API forbids `retries` — it must busy-loop on
|
|
195
|
+
// EEXIST itself. Keep stale-cleanup so a crashed writer cannot freeze
|
|
196
|
+
// siblings forever.
|
|
197
|
+
let release = null;
|
|
198
|
+
const start = Date.now();
|
|
199
|
+
while (release === null) {
|
|
200
|
+
try {
|
|
201
|
+
release = lockfile.lockSync(filePath, { stale: 10000, realpath: false });
|
|
202
|
+
} catch (e) {
|
|
203
|
+
if (e.code !== 'ELOCKED') throw e;
|
|
204
|
+
if (Date.now() - start > 10000) throw e;
|
|
205
|
+
// tight spin — node has no sleepSync; a microtask burst is fine for
|
|
206
|
+
// contention durations expected here (single-digit ms per writer)
|
|
207
|
+
const until = Date.now() + 2;
|
|
208
|
+
while (Date.now() < until) { /* spin */ }
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
// Inside the lock we MUST re-read prev_hash — another process may have
|
|
212
|
+
// appended since we last advanced it. Without this, two concurrent
|
|
213
|
+
// writers would produce two rows with the same prev_hash → chain break.
|
|
214
|
+
prevHash = loadPrevHash(filePath, { failHard: false });
|
|
215
|
+
try { return fn(); }
|
|
216
|
+
finally { try { release(); } catch { /* lock already released by stale-timeout reaper */ } }
|
|
217
|
+
}
|
|
218
|
+
|
|
71
219
|
function record(event, decision, result) {
|
|
72
220
|
if (!event || !decision) return { ok: true };
|
|
221
|
+
return withLock(() => recordInner(event, decision, result));
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function recordInner(event, decision, result) {
|
|
73
225
|
// Field order is explicit and must remain stable — computeHash depends on it.
|
|
74
226
|
// The Python walker in docs/audit_walker.py mirrors this order; any change
|
|
75
227
|
// here without updating that walker breaks independent verifiability.
|
|
76
228
|
const row = {
|
|
229
|
+
audit_schema: 1,
|
|
77
230
|
ts: event.timestamp,
|
|
78
231
|
event_id: event.id,
|
|
79
232
|
session_id: event.sessionId,
|
|
@@ -131,8 +284,13 @@ function createAuditor(filePath = DEFAULT_LOG) {
|
|
|
131
284
|
* append failure, mirroring record()'s contract so the caller can
|
|
132
285
|
* propagate AuditWriteError uniformly.
|
|
133
286
|
*/
|
|
134
|
-
function recordPolicyLoaded(
|
|
287
|
+
function recordPolicyLoaded(args) {
|
|
288
|
+
return withLock(() => recordPolicyLoadedInner(args));
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
function recordPolicyLoadedInner({ hash, path: policyPath, version, source }) {
|
|
135
292
|
const row = {
|
|
293
|
+
audit_schema: 1,
|
|
136
294
|
ts: new Date().toISOString(),
|
|
137
295
|
event_id: crypto.randomUUID(),
|
|
138
296
|
session_id: undefined,
|
|
@@ -175,4 +333,12 @@ function createAuditor(filePath = DEFAULT_LOG) {
|
|
|
175
333
|
return { record, recordPolicyLoaded, file: filePath };
|
|
176
334
|
}
|
|
177
335
|
|
|
178
|
-
module.exports = {
|
|
336
|
+
module.exports = {
|
|
337
|
+
createAuditor,
|
|
338
|
+
DEFAULT_LOG,
|
|
339
|
+
GENESIS,
|
|
340
|
+
computeHash,
|
|
341
|
+
loadPrevHash,
|
|
342
|
+
scanTailForPrevHash,
|
|
343
|
+
TAIL_READ_BYTES,
|
|
344
|
+
};
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* repair.js — `occasio audit repair`
|
|
5
|
+
*
|
|
6
|
+
* Truncates the trailing partial line of an audit log so a crash mid-append
|
|
7
|
+
* does not leave the file in a state where loadPrevHash() fails hard.
|
|
8
|
+
*
|
|
9
|
+
* Contract:
|
|
10
|
+
* - Examines only the last line. Earlier corruption is out of scope and
|
|
11
|
+
* should be investigated via `occasio audit verify`.
|
|
12
|
+
* - "Partial" means: the file does not end in '\n' AND the last
|
|
13
|
+
* non-empty line cannot be JSON.parsed. Any other shape is a no-op.
|
|
14
|
+
* - Writes a .bak alongside the original before mutating, even on dry-run
|
|
15
|
+
* the .bak is NOT created.
|
|
16
|
+
* - Returns { truncated, wouldTruncate, backupPath, removedBytes, detail }.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
const fs = require('fs');
|
|
20
|
+
const path = require('path');
|
|
21
|
+
|
|
22
|
+
function inspect(filePath) {
|
|
23
|
+
const buf = fs.readFileSync(filePath);
|
|
24
|
+
if (buf.length === 0) return { partial: false, reason: 'empty file' };
|
|
25
|
+
const endsWithNewline = buf[buf.length - 1] === 0x0A;
|
|
26
|
+
const text = buf.toString('utf8');
|
|
27
|
+
const raw = text.split('\n');
|
|
28
|
+
const lastFragment = raw[raw.length - 1];
|
|
29
|
+
|
|
30
|
+
if (endsWithNewline && lastFragment === '') {
|
|
31
|
+
return { partial: false, reason: 'file ends in newline; last line is complete' };
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Non-empty trailing fragment. Test whether it parses as JSON — a complete
|
|
35
|
+
// single-line record without a trailing newline is valid (rare but legal).
|
|
36
|
+
try {
|
|
37
|
+
JSON.parse(lastFragment);
|
|
38
|
+
return { partial: false, reason: 'trailing line parses as JSON; not partial' };
|
|
39
|
+
} catch {
|
|
40
|
+
// Truncate to the last preceding newline.
|
|
41
|
+
const lastNewline = buf.lastIndexOf(0x0A);
|
|
42
|
+
if (lastNewline === -1) {
|
|
43
|
+
// Entire file is a single partial line.
|
|
44
|
+
return { partial: true, truncateTo: 0, removedBytes: buf.length };
|
|
45
|
+
}
|
|
46
|
+
return {
|
|
47
|
+
partial: true,
|
|
48
|
+
truncateTo: lastNewline + 1,
|
|
49
|
+
removedBytes: buf.length - (lastNewline + 1),
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function repairAuditFile(filePath, opts = {}) {
|
|
55
|
+
const { dryRun = false } = opts;
|
|
56
|
+
if (!fs.existsSync(filePath)) {
|
|
57
|
+
return { truncated: false, wouldTruncate: false, detail: `file not found: ${filePath}` };
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const info = inspect(filePath);
|
|
61
|
+
if (!info.partial) {
|
|
62
|
+
return {
|
|
63
|
+
truncated: false, wouldTruncate: false,
|
|
64
|
+
detail: info.reason,
|
|
65
|
+
removedBytes: 0,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if (dryRun) {
|
|
70
|
+
return {
|
|
71
|
+
truncated: false, wouldTruncate: true,
|
|
72
|
+
removedBytes: info.removedBytes,
|
|
73
|
+
detail: `would truncate ${info.removedBytes} bytes from tail`,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const backupPath = `${filePath}.bak`;
|
|
78
|
+
fs.copyFileSync(filePath, backupPath);
|
|
79
|
+
// Truncate in place.
|
|
80
|
+
const fd = fs.openSync(filePath, 'r+');
|
|
81
|
+
try {
|
|
82
|
+
fs.ftruncateSync(fd, info.truncateTo);
|
|
83
|
+
} finally {
|
|
84
|
+
fs.closeSync(fd);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return {
|
|
88
|
+
truncated: true,
|
|
89
|
+
wouldTruncate: true,
|
|
90
|
+
backupPath,
|
|
91
|
+
removedBytes: info.removedBytes,
|
|
92
|
+
detail: `truncated ${info.removedBytes} bytes from tail; backup at ${path.basename(backupPath)}`,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function runRepairCli(args) {
|
|
97
|
+
const fileIdx = args.indexOf('--file');
|
|
98
|
+
if (fileIdx === -1 || !args[fileIdx + 1]) {
|
|
99
|
+
console.error('Usage: occasio audit repair --file <path> [--dry-run]');
|
|
100
|
+
process.exit(2);
|
|
101
|
+
}
|
|
102
|
+
const filePath = args[fileIdx + 1];
|
|
103
|
+
const dryRun = args.includes('--dry-run');
|
|
104
|
+
|
|
105
|
+
const r = repairAuditFile(filePath, { dryRun });
|
|
106
|
+
if (r.truncated) {
|
|
107
|
+
console.log(`occasio audit repair: ${r.detail}`);
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
if (r.wouldTruncate) {
|
|
111
|
+
console.log(`occasio audit repair (dry-run): ${r.detail}`);
|
|
112
|
+
console.log('Re-run without --dry-run to apply.');
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
console.log(`occasio audit repair: nothing to do — ${r.detail}`);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
module.exports = { repairAuditFile, runRepairCli };
|
package/src/audit/verifier.js
CHANGED
|
@@ -55,6 +55,8 @@ function verifyFile(filePath = DEFAULT_LOG) {
|
|
|
55
55
|
let legacy = 0, chained = 0;
|
|
56
56
|
let expectedPrevHash = null; // null = no chained row seen yet
|
|
57
57
|
let firstHash = null, lastHash = null;
|
|
58
|
+
const seenSchemaVersions = new Set();
|
|
59
|
+
const SUPPORTED_SCHEMA_VERSIONS = new Set([1]);
|
|
58
60
|
|
|
59
61
|
for (let i = 0; i < lines.length; i++) {
|
|
60
62
|
let row;
|
|
@@ -72,6 +74,23 @@ function verifyFile(filePath = DEFAULT_LOG) {
|
|
|
72
74
|
|
|
73
75
|
chained++;
|
|
74
76
|
|
|
77
|
+
// Schema-version policy:
|
|
78
|
+
// - rows with audit_schema=undefined are legacy (pre-versioning) and
|
|
79
|
+
// verify as before — they are valid.
|
|
80
|
+
// - rows with audit_schema=1 verify normally.
|
|
81
|
+
// - rows with audit_schema=N for unknown N record a non-fatal warning
|
|
82
|
+
// in errors with a `warning: true` marker; ok-state is unaffected.
|
|
83
|
+
if (row.audit_schema !== undefined) {
|
|
84
|
+
seenSchemaVersions.add(row.audit_schema);
|
|
85
|
+
if (!SUPPORTED_SCHEMA_VERSIONS.has(row.audit_schema)) {
|
|
86
|
+
errors.push({
|
|
87
|
+
line: i + 1,
|
|
88
|
+
detail: `unknown audit_schema version ${row.audit_schema} (this build supports: ${[...SUPPORTED_SCHEMA_VERSIONS].join(',')})`,
|
|
89
|
+
warning: true,
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
75
94
|
if (expectedPrevHash === null) {
|
|
76
95
|
// First chained row: prev_hash must be GENESIS.
|
|
77
96
|
firstHash = row.prev_hash;
|
|
@@ -99,15 +118,30 @@ function verifyFile(filePath = DEFAULT_LOG) {
|
|
|
99
118
|
lastHash = storedHash;
|
|
100
119
|
}
|
|
101
120
|
|
|
102
|
-
|
|
121
|
+
// Warnings (unknown schema versions) do not flip ok=false — they are
|
|
122
|
+
// forward-compatibility hints, not chain breakage.
|
|
123
|
+
const fatal = errors.filter(e => !e.warning);
|
|
124
|
+
return {
|
|
125
|
+
ok: fatal.length === 0,
|
|
126
|
+
total: lines.length, legacy, chained,
|
|
127
|
+
errors,
|
|
128
|
+
firstHash, lastHash,
|
|
129
|
+
schemaVersions: [...seenSchemaVersions],
|
|
130
|
+
};
|
|
103
131
|
}
|
|
104
132
|
|
|
105
133
|
function runAuditCli(args) {
|
|
106
134
|
const sub = args[0];
|
|
107
135
|
|
|
136
|
+
// Dispatch sub-commands. `repair` is forwarded to src/audit/repair.js.
|
|
137
|
+
if (sub === 'repair') {
|
|
138
|
+
const { runRepairCli } = require('./repair');
|
|
139
|
+
return runRepairCli(args.slice(1));
|
|
140
|
+
}
|
|
141
|
+
|
|
108
142
|
// Accept: `occasio audit`, `occasio audit verify`, `occasio audit verify --file <path>`
|
|
109
143
|
if (sub && sub !== 'verify' && !sub.startsWith('-')) {
|
|
110
|
-
console.error(`Unknown audit subcommand: ${sub}\nUsage: occasio audit [verify] [--file <path>]`);
|
|
144
|
+
console.error(`Unknown audit subcommand: ${sub}\nUsage: occasio audit [verify|repair] [--file <path>]`);
|
|
111
145
|
process.exit(1);
|
|
112
146
|
}
|
|
113
147
|
|