@occasiolabs/occasio 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/NOTICE +10 -0
- package/README.md +216 -0
- package/bin/occasio-mcp.js +5 -0
- package/bin/occasio.js +2 -0
- package/bin/supervisor/README.md +90 -0
- package/bin/supervisor/com.occasio.proxy.plist.template +36 -0
- package/bin/supervisor/install-windows-task.ps1 +48 -0
- package/bin/supervisor/occasio.service +18 -0
- package/docs/AUDIT.md +120 -0
- package/docs/attest_verify.py +283 -0
- package/docs/audit_walker.py +65 -0
- package/docs/canonicalize.py +99 -0
- package/docs/compliance-mapping.md +93 -0
- package/docs/demos/mcp-block.md +148 -0
- package/docs/edr-calibration.md +73 -0
- package/docs/edr-demo.md +83 -0
- package/docs/python-verifier.md +74 -0
- package/docs/reference-pipeline.md +140 -0
- package/package.json +69 -0
- package/policy-templates/dev-default.yml +84 -0
- package/policy-templates/finance.yml +61 -0
- package/policy-templates/strict.yml +49 -0
- package/schemas/agent-attestation-v1.json +190 -0
- package/schemas/occasio-policy.schema.json +99 -0
- package/spec/agent-attestation/v1/README.md +137 -0
- package/src/adapters/claude-code.js +518 -0
- package/src/adapters/cline.js +161 -0
- package/src/adapters/computer-use-cli.js +198 -0
- package/src/adapters/computer-use.js +227 -0
- package/src/analyzer.js +170 -0
- package/src/anomaly/cli.js +143 -0
- package/src/anomaly/detectors/deny-rate.js +84 -0
- package/src/anomaly/detectors/file-read-volume.js +109 -0
- package/src/anomaly/detectors/secret-redact-rate.js +107 -0
- package/src/anomaly/detectors/unknown-tool-input.js +83 -0
- package/src/anomaly/index.js +169 -0
- package/src/attest/canonicalize.js +97 -0
- package/src/attest/index.js +355 -0
- package/src/attest/run-slice.js +57 -0
- package/src/attest/sign.js +186 -0
- package/src/attest/verify.js +192 -0
- package/src/audit/errors.js +21 -0
- package/src/audit/input-normalizer.js +121 -0
- package/src/audit/jsonl-auditor.js +178 -0
- package/src/audit/verifier.js +152 -0
- package/src/baseline.js +507 -0
- package/src/boundary.js +238 -0
- package/src/budget.js +42 -0
- package/src/classifier.js +115 -0
- package/src/context-budget.js +77 -0
- package/src/core/boundary-event.js +75 -0
- package/src/core/decision.js +61 -0
- package/src/core/pipeline.js +66 -0
- package/src/core/tool-names.js +105 -0
- package/src/dashboard.js +892 -0
- package/src/demo/README.md +31 -0
- package/src/demo/anomalies-demo.js +211 -0
- package/src/demo/attest-demo.js +198 -0
- package/src/distiller.js +155 -0
- package/src/embeddings.json +72 -0
- package/src/executor/dispatcher.js +230 -0
- package/src/harness.js +817 -0
- package/src/index.js +1711 -0
- package/src/inspect.js +329 -0
- package/src/interceptor.js +1198 -0
- package/src/lao.js +185 -0
- package/src/lao_prep.py +119 -0
- package/src/ledger.js +209 -0
- package/src/mcp-experiment.js +140 -0
- package/src/mcp-normalize.js +139 -0
- package/src/mcp-server.js +320 -0
- package/src/outbound-policy.js +433 -0
- package/src/policy/built-in-classifiers.js +78 -0
- package/src/policy/doctor.js +226 -0
- package/src/policy/engine.js +339 -0
- package/src/policy/init.js +153 -0
- package/src/policy/loader.js +448 -0
- package/src/policy/rules-default.js +36 -0
- package/src/policy/shell-path.js +135 -0
- package/src/policy/show.js +196 -0
- package/src/policy/validate.js +310 -0
- package/src/preflight/cli.js +164 -0
- package/src/preflight/miner.js +329 -0
- package/src/proxy/agent-router.js +93 -0
- package/src/redteam.js +428 -0
- package/src/replay.js +446 -0
- package/src/report/index.js +224 -0
- package/src/runtime.js +595 -0
- package/src/scanner/index.js +49 -0
- package/src/selftest.js +192 -0
- package/src/session.js +36 -0
package/src/boundary.js
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* boundary.js — per-request "what crossed the boundary" view.
|
|
5
|
+
*
|
|
6
|
+
* Reads existing per-request JSONL entries from ~/.occasio/logs/ and
|
|
7
|
+
* projects each request into a three-column accounting:
|
|
8
|
+
*
|
|
9
|
+
* produced — raw bytes/tokens each tool emitted
|
|
10
|
+
* re-entered — bytes/tokens that actually re-entered the model's next request
|
|
11
|
+
* prevented — bytes/tokens kept out, classified by reason
|
|
12
|
+
*
|
|
13
|
+
* The data primitives (`bytes`, `kept_bytes`, `prevention_reason`) are
|
|
14
|
+
* recorded per tool call in `interceptor.js`. Older log entries that predate
|
|
15
|
+
* the new fields are handled by falling back to `bytes` (treated as "kept
|
|
16
|
+
* fully"). No new audit-chain fields are introduced.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
const fs = require('fs');
|
|
20
|
+
const path = require('path');
|
|
21
|
+
const os = require('os');
|
|
22
|
+
|
|
23
|
+
const LOG_DIR = path.join(os.homedir(), '.occasio', 'logs');
|
|
24
|
+
|
|
25
|
+
// Char-per-token ratio used by the analyzer. Approximate; every token
|
|
26
|
+
// figure surfaced by this module is prefixed with '~' in the renderer.
|
|
27
|
+
const CHARS_PER_TOKEN = 4;
|
|
28
|
+
|
|
29
|
+
function estTokens(bytes) {
|
|
30
|
+
return Math.ceil((bytes || 0) / CHARS_PER_TOKEN);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const PREVENTION_LABELS = {
|
|
34
|
+
distill_clip: 'distilled',
|
|
35
|
+
redact_secrets: 'redacted',
|
|
36
|
+
block: 'blocked',
|
|
37
|
+
context_budget: 'budget-clipped',
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Build a per-request boundary view from a single JSONL log entry.
|
|
42
|
+
*
|
|
43
|
+
* @param {object} entry parsed JSONL log entry
|
|
44
|
+
* @returns {object|null} { iso, model, event_type, tools: [...], totals: {...} }
|
|
45
|
+
*/
|
|
46
|
+
function buildBoundaryView(entry) {
|
|
47
|
+
if (!entry || typeof entry !== 'object') return null;
|
|
48
|
+
const toolRuns = Array.isArray(entry.tools) ? entry.tools : [];
|
|
49
|
+
|
|
50
|
+
const tools = toolRuns.map((t) => {
|
|
51
|
+
const raw = typeof t.bytes === 'number' ? t.bytes : 0;
|
|
52
|
+
// Backward compat: log entries written before kept_bytes treat the entire
|
|
53
|
+
// raw output as kept (no shaping was recorded).
|
|
54
|
+
const kept = typeof t.kept_bytes === 'number' ? t.kept_bytes : raw;
|
|
55
|
+
const prevented = Math.max(0, raw - kept);
|
|
56
|
+
const reason =
|
|
57
|
+
t.prevention_reason ||
|
|
58
|
+
(t.distilled ? 'distill_clip' :
|
|
59
|
+
t.secretsRedacted ? 'redact_secrets' :
|
|
60
|
+
t.blocked ? 'block' : null);
|
|
61
|
+
|
|
62
|
+
return {
|
|
63
|
+
tool: t.tool || '(unknown)',
|
|
64
|
+
tool_use_id: t.tool_use_id || null,
|
|
65
|
+
cmd: t.cmd || '',
|
|
66
|
+
raw_bytes: raw,
|
|
67
|
+
kept_bytes: kept,
|
|
68
|
+
prevented_bytes: prevented,
|
|
69
|
+
raw_tokens: estTokens(raw),
|
|
70
|
+
kept_tokens: estTokens(kept),
|
|
71
|
+
prevented_tokens: estTokens(prevented),
|
|
72
|
+
prevention_reason: reason,
|
|
73
|
+
prevention_label: reason ? (PREVENTION_LABELS[reason] || reason) : null,
|
|
74
|
+
native: !!t.native,
|
|
75
|
+
};
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
const totalRaw = tools.reduce((s, t) => s + t.raw_bytes, 0);
|
|
79
|
+
const totalKept = tools.reduce((s, t) => s + t.kept_bytes, 0);
|
|
80
|
+
const totalPrevented = Math.max(0, totalRaw - totalKept);
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
iso: entry.iso,
|
|
84
|
+
model: entry.model || '',
|
|
85
|
+
event_type: entry.event_type || '',
|
|
86
|
+
run_id: entry.run_id || null,
|
|
87
|
+
tools,
|
|
88
|
+
totals: {
|
|
89
|
+
raw_bytes: totalRaw,
|
|
90
|
+
kept_bytes: totalKept,
|
|
91
|
+
prevented_bytes: totalPrevented,
|
|
92
|
+
raw_tokens: estTokens(totalRaw),
|
|
93
|
+
kept_tokens: estTokens(totalKept),
|
|
94
|
+
prevented_tokens: estTokens(totalPrevented),
|
|
95
|
+
},
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// ── Renderer ──────────────────────────────────────────────────────────────────
|
|
100
|
+
|
|
101
|
+
const C = (() => {
|
|
102
|
+
const noColor = process.env.NO_COLOR || !process.stdout.isTTY;
|
|
103
|
+
if (noColor) return new Proxy({}, { get: () => (s) => s });
|
|
104
|
+
return {
|
|
105
|
+
b: (s) => `\x1b[1m${s}\x1b[0m`,
|
|
106
|
+
d: (s) => `\x1b[2m${s}\x1b[0m`,
|
|
107
|
+
g: (s) => `\x1b[32m${s}\x1b[0m`,
|
|
108
|
+
y: (s) => `\x1b[33m${s}\x1b[0m`,
|
|
109
|
+
c: (s) => `\x1b[36m${s}\x1b[0m`,
|
|
110
|
+
r: (s) => `\x1b[31m${s}\x1b[0m`,
|
|
111
|
+
};
|
|
112
|
+
})();
|
|
113
|
+
|
|
114
|
+
function fmtBytes(b) {
|
|
115
|
+
if (b < 1024) return `${b} B`;
|
|
116
|
+
return `${(b / 1024).toFixed(1)} KB`;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function renderBoundaryView(view, opts = {}) {
|
|
120
|
+
if (!view) return '';
|
|
121
|
+
const lines = [];
|
|
122
|
+
const tag = view.event_type ? `[${view.event_type}]` : '';
|
|
123
|
+
lines.push('');
|
|
124
|
+
lines.push(`${C.b('Request')} ${C.d(view.iso || '')} ${C.d(view.model)} ${C.d(tag)}`);
|
|
125
|
+
lines.push('');
|
|
126
|
+
|
|
127
|
+
if (view.tools.length === 0) {
|
|
128
|
+
lines.push(C.d(' (no tool calls in this request)'));
|
|
129
|
+
return lines.join('\n');
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
lines.push(` ${C.b('Tool'.padEnd(12))} ${C.b('Target'.padEnd(34))} ${C.b('Produced'.padEnd(16))} ${C.b('Re-entered'.padEnd(16))} ${C.b('Prevented')}`);
|
|
133
|
+
lines.push(` ${C.d('─'.repeat(12))} ${C.d('─'.repeat(34))} ${C.d('─'.repeat(16))} ${C.d('─'.repeat(16))} ${C.d('─'.repeat(20))}`);
|
|
134
|
+
|
|
135
|
+
for (const t of view.tools) {
|
|
136
|
+
const target = (t.cmd || '').slice(0, 32).padEnd(34);
|
|
137
|
+
const produced = `${fmtBytes(t.raw_bytes)} ~${t.raw_tokens}t`.padEnd(16);
|
|
138
|
+
const kept = (t.kept_bytes === t.raw_bytes ? C.g(`${fmtBytes(t.kept_bytes)} ~${t.kept_tokens}t`) : C.y(`${fmtBytes(t.kept_bytes)} ~${t.kept_tokens}t`)).padEnd(t.kept_bytes === t.raw_bytes ? 21 : 21);
|
|
139
|
+
const prevented = t.prevented_bytes === 0
|
|
140
|
+
? C.d('—')
|
|
141
|
+
: `${C.r('~' + t.prevented_tokens + 't')} ${C.d('(' + t.prevention_label + ')')}`;
|
|
142
|
+
lines.push(` ${t.tool.padEnd(12)} ${target} ${produced} ${kept} ${prevented}`);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
lines.push('');
|
|
146
|
+
lines.push(` ${C.b('Totals:')}`);
|
|
147
|
+
lines.push(` produced ${fmtBytes(view.totals.raw_bytes)} ~${view.totals.raw_tokens}t`);
|
|
148
|
+
lines.push(` re-entered ${C.g(fmtBytes(view.totals.kept_bytes) + ' ~' + view.totals.kept_tokens + 't')}`);
|
|
149
|
+
lines.push(` prevented ${view.totals.prevented_bytes === 0
|
|
150
|
+
? C.d('0 B ~0t')
|
|
151
|
+
: C.r(fmtBytes(view.totals.prevented_bytes) + ' ~' + view.totals.prevented_tokens + 't')}`);
|
|
152
|
+
lines.push('');
|
|
153
|
+
lines.push(C.d(' Token figures are approximate (chars/4). Audit chain at') + ' ' + C.d('~/.occasio/pipeline-events.jsonl'));
|
|
154
|
+
return lines.join('\n');
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// ── Log reader ────────────────────────────────────────────────────────────────
|
|
158
|
+
|
|
159
|
+
function readEntries({ scope = 'today' } = {}) {
|
|
160
|
+
const today = (() => {
|
|
161
|
+
const d = new Date();
|
|
162
|
+
return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`;
|
|
163
|
+
})();
|
|
164
|
+
|
|
165
|
+
let files = [];
|
|
166
|
+
try {
|
|
167
|
+
files = fs.readdirSync(LOG_DIR).filter(f => f.endsWith('.jsonl'));
|
|
168
|
+
} catch { /* logs dir absent */ }
|
|
169
|
+
if (scope === 'today') files = files.filter(f => f === `${today}.jsonl`);
|
|
170
|
+
files.sort();
|
|
171
|
+
|
|
172
|
+
const entries = [];
|
|
173
|
+
for (const f of files) {
|
|
174
|
+
let text;
|
|
175
|
+
try { text = fs.readFileSync(path.join(LOG_DIR, f), 'utf8'); } catch { continue; }
|
|
176
|
+
for (const line of text.split('\n')) {
|
|
177
|
+
if (!line.trim()) continue;
|
|
178
|
+
try { entries.push(JSON.parse(line)); } catch { /* skip malformed */ }
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
return entries;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// ── CLI ───────────────────────────────────────────────────────────────────────
|
|
185
|
+
|
|
186
|
+
function runBoundaryCli(args = []) {
|
|
187
|
+
const lastIdx = args.indexOf('--last');
|
|
188
|
+
const entryIdx = args.indexOf('--entry');
|
|
189
|
+
const runIdx = args.indexOf('--run');
|
|
190
|
+
const json = args.includes('--json');
|
|
191
|
+
const scopeIdx = args.indexOf('--scope');
|
|
192
|
+
|
|
193
|
+
const scope = scopeIdx >= 0 ? (args[scopeIdx + 1] || 'today') : 'today';
|
|
194
|
+
const all = readEntries({ scope });
|
|
195
|
+
if (all.length === 0) {
|
|
196
|
+
process.stdout.write('\n ' + C.d('No requests in scope. Run a session first.') + '\n');
|
|
197
|
+
return { ok: true, count: 0 };
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Selection: --entry N (1-indexed), --run <prefix>, --last N, default last 1
|
|
201
|
+
let selected;
|
|
202
|
+
if (entryIdx >= 0) {
|
|
203
|
+
const n = parseInt(args[entryIdx + 1], 10);
|
|
204
|
+
selected = isFinite(n) && n >= 1 && n <= all.length ? [all[n - 1]] : [];
|
|
205
|
+
} else if (runIdx >= 0) {
|
|
206
|
+
const prefix = (args[runIdx + 1] || '');
|
|
207
|
+
selected = all.filter(e => (e.run_id || '').startsWith(prefix));
|
|
208
|
+
} else {
|
|
209
|
+
const n = lastIdx >= 0 ? Math.max(1, parseInt(args[lastIdx + 1], 10) || 1) : 1;
|
|
210
|
+
selected = all.slice(-n);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
if (selected.length === 0) {
|
|
214
|
+
process.stdout.write('\n ' + C.d('No matching requests.') + '\n');
|
|
215
|
+
return { ok: false, count: 0 };
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
if (json) {
|
|
219
|
+
const views = selected.map(buildBoundaryView).filter(Boolean);
|
|
220
|
+
process.stdout.write(JSON.stringify(views, null, 2) + '\n');
|
|
221
|
+
return { ok: true, count: views.length };
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
process.stdout.write('\n' + C.b('Occasio Boundary') + ' ' +
|
|
225
|
+
C.d(`scope: ${scope} · ${all.length} entries · showing ${selected.length}`) + '\n');
|
|
226
|
+
for (const e of selected) {
|
|
227
|
+
const view = buildBoundaryView(e);
|
|
228
|
+
process.stdout.write(renderBoundaryView(view) + '\n');
|
|
229
|
+
}
|
|
230
|
+
return { ok: true, count: selected.length };
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
module.exports = {
|
|
234
|
+
buildBoundaryView,
|
|
235
|
+
renderBoundaryView,
|
|
236
|
+
runBoundaryCli,
|
|
237
|
+
readEntries,
|
|
238
|
+
};
|
package/src/budget.js
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* budget.js — Pure budget-enforcement helpers.
|
|
5
|
+
* No I/O. Import in index.js and test-interceptor.js.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
const WARN_THRESHOLD = 0.80; // emit warning once at 80 % of budget
|
|
9
|
+
const BUDGET_EXCEEDED_EVENT = 'budget_exceeded';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Given current session spend and the configured budget limit, return the
|
|
13
|
+
* enforcement state that the caller should act on.
|
|
14
|
+
*
|
|
15
|
+
* @param {number} spent Accumulated session cost so far (before this request)
|
|
16
|
+
* @param {number|null} budget Dollar limit (null = no budget)
|
|
17
|
+
* @returns {{ exceeded: boolean, warnNow: boolean, pct: number|null }}
|
|
18
|
+
* exceeded – true if spend >= budget (block the request)
|
|
19
|
+
* warnNow – true if spend crossed the warning threshold (fire at most once)
|
|
20
|
+
* pct – spend / budget ratio (null when no budget)
|
|
21
|
+
*/
|
|
22
|
+
function budgetStatus(spent, budget) {
|
|
23
|
+
if (budget == null || budget <= 0) return { exceeded: false, warnNow: false, pct: null };
|
|
24
|
+
const pct = spent / budget;
|
|
25
|
+
return {
|
|
26
|
+
exceeded: pct >= 1.0,
|
|
27
|
+
warnNow: pct >= WARN_THRESHOLD,
|
|
28
|
+
pct,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Format a spend-vs-budget string for display.
|
|
34
|
+
* e.g. "$0.85 / $1.0000 (85%)"
|
|
35
|
+
*/
|
|
36
|
+
function fmtBudget(spent, budget) {
|
|
37
|
+
if (budget == null) return '';
|
|
38
|
+
const pct = Math.min(999, Math.round(spent / budget * 100));
|
|
39
|
+
return `$${spent.toFixed(4)} / $${budget.toFixed(4)} (${pct}%)`;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
module.exports = { budgetStatus, fmtBudget, WARN_THRESHOLD, BUDGET_EXCEEDED_EVENT };
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* classifier.js — Semantic routing for Bash tool_use calls.
|
|
5
|
+
*
|
|
6
|
+
* Replaces the simple LOCAL_BASH_CMDS whitelist with structured class data
|
|
7
|
+
* from embeddings.json. Key improvements over the whitelist:
|
|
8
|
+
*
|
|
9
|
+
* 1. Git subcommand awareness — "git push" is rejected, "git log" is accepted.
|
|
10
|
+
* 2. Dangerous flag detection — "--force", "--hard" block interception.
|
|
11
|
+
* 3. Extensible via embeddings.json without code changes.
|
|
12
|
+
* 4. Routing feedback log for future ML training.
|
|
13
|
+
*
|
|
14
|
+
* routeLocally() is the public interface.
|
|
15
|
+
* isInterceptable() in interceptor.js delegates here.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
const fs = require('fs');
|
|
19
|
+
const path = require('path');
|
|
20
|
+
const os = require('os');
|
|
21
|
+
|
|
22
|
+
const DATA = require('./embeddings.json');
|
|
23
|
+
const ALWAYS_LOCAL = new Set(DATA.always_local);
|
|
24
|
+
const NEVER_LOCAL = new Set(DATA.never_local);
|
|
25
|
+
const GIT_SAFE = new Set(DATA.git_safe_subcommands);
|
|
26
|
+
const GIT_UNSAFE = new Set(DATA.git_unsafe_subcommands);
|
|
27
|
+
const DANGER_FLAGS = new Set(DATA.dangerous_flags);
|
|
28
|
+
const SHELL_META = /[;&|`$<>\\]/;
|
|
29
|
+
|
|
30
|
+
const FEEDBACK_LOG = path.join(os.homedir(), '.occasio', 'routing-feedback.jsonl');
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Decide whether a tool call should be executed locally.
|
|
34
|
+
*
|
|
35
|
+
* @param {string} toolName e.g. "Bash"
|
|
36
|
+
* @param {string} command the command string
|
|
37
|
+
* @param {string} [context] reserved for future ML use
|
|
38
|
+
* @returns {{ local: boolean, confidence: number, reason: string }}
|
|
39
|
+
*/
|
|
40
|
+
function routeLocally(toolName, command, context = '') {
|
|
41
|
+
if (toolName !== 'Bash') {
|
|
42
|
+
return { local: false, confidence: 1.0, reason: 'non-bash tool' };
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const cmd = (command || '').trim();
|
|
46
|
+
if (!cmd) {
|
|
47
|
+
return { local: false, confidence: 1.0, reason: 'empty command' };
|
|
48
|
+
}
|
|
49
|
+
if (SHELL_META.test(cmd)) {
|
|
50
|
+
return { local: false, confidence: 1.0, reason: 'shell metacharacter' };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const parts = cmd.split(/\s+/);
|
|
54
|
+
const firstWord = parts[0].toLowerCase();
|
|
55
|
+
|
|
56
|
+
// Dangerous flags anywhere in the command override everything.
|
|
57
|
+
// Check exact case (git -D is uppercase, distinct from -d) then also lowercase
|
|
58
|
+
// so --Force and similar variants are still caught.
|
|
59
|
+
const hasDangerFlag = parts.slice(1).some(p => DANGER_FLAGS.has(p) || DANGER_FLAGS.has(p.toLowerCase()));
|
|
60
|
+
if (hasDangerFlag) {
|
|
61
|
+
return { local: false, confidence: 0.95, reason: 'dangerous flag' };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Hard block
|
|
65
|
+
if (NEVER_LOCAL.has(firstWord)) {
|
|
66
|
+
return { local: false, confidence: 0.95, reason: `${firstWord}: write/execute command` };
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Git requires subcommand-level inspection
|
|
70
|
+
if (firstWord === 'git') {
|
|
71
|
+
const sub = (parts[1] || '').toLowerCase();
|
|
72
|
+
if (GIT_SAFE.has(sub)) {
|
|
73
|
+
return { local: true, confidence: 0.95, reason: `git ${sub}: read-only` };
|
|
74
|
+
}
|
|
75
|
+
if (GIT_UNSAFE.has(sub) || sub === '') {
|
|
76
|
+
return { local: false, confidence: 0.92, reason: `git ${sub || '(bare)'}: write/network operation` };
|
|
77
|
+
}
|
|
78
|
+
// Unknown git subcommand — conservative
|
|
79
|
+
return { local: false, confidence: 0.65, reason: `git ${sub}: unknown subcommand` };
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Safe read commands
|
|
83
|
+
if (ALWAYS_LOCAL.has(firstWord)) {
|
|
84
|
+
return { local: true, confidence: 0.95, reason: `${firstWord}: safe read command` };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Unknown — default to cloud
|
|
88
|
+
return { local: false, confidence: 0.6, reason: `${firstWord}: unknown command` };
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Append a routing decision to the feedback log for future training.
|
|
93
|
+
*
|
|
94
|
+
* @param {string} command
|
|
95
|
+
* @param {{ local: boolean, confidence: number, reason: string }} decision
|
|
96
|
+
* @param {number} latency_ms
|
|
97
|
+
* @param {'success'|'error'|'timeout'} outcome
|
|
98
|
+
*/
|
|
99
|
+
function logRoutingDecision(command, decision, latency_ms, outcome) {
|
|
100
|
+
try {
|
|
101
|
+
const entry = JSON.stringify({
|
|
102
|
+
ts: new Date().toISOString(),
|
|
103
|
+
command: (command || '').slice(0, 200),
|
|
104
|
+
local: decision.local,
|
|
105
|
+
confidence: decision.confidence,
|
|
106
|
+
reason: decision.reason,
|
|
107
|
+
latency_ms,
|
|
108
|
+
outcome,
|
|
109
|
+
}) + '\n';
|
|
110
|
+
fs.mkdirSync(path.dirname(FEEDBACK_LOG), { recursive: true });
|
|
111
|
+
fs.appendFileSync(FEEDBACK_LOG, entry);
|
|
112
|
+
} catch { /* feedback is best-effort */ }
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
module.exports = { routeLocally, logRoutingDecision };
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* context-budget.js — per-tool context budget enforcement.
|
|
5
|
+
*
|
|
6
|
+
* Pure function. Given a string output and a token cap (positive integer),
|
|
7
|
+
* returns either the input untouched or a clipped version with a one-line
|
|
8
|
+
* marker that mirrors distill()'s convention. Token estimate is the same
|
|
9
|
+
* char/4 rule used throughout the project (see boundary.js, analyzer.js).
|
|
10
|
+
*
|
|
11
|
+
* Returns:
|
|
12
|
+
* { content: string, clipped: boolean,
|
|
13
|
+
* raw_tokens: number, kept_tokens: number, prevented_tokens: number }
|
|
14
|
+
*
|
|
15
|
+
* The caller (interceptor.runOneRound) applies this AFTER any existing
|
|
16
|
+
* transform/distill so context-budget is the final clip before the tool
|
|
17
|
+
* result re-enters the model's next request.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
const CHARS_PER_TOKEN = 4;
|
|
21
|
+
|
|
22
|
+
function estTokens(s) {
|
|
23
|
+
if (typeof s !== 'string' || !s) return 0;
|
|
24
|
+
return Math.ceil(Buffer.byteLength(s, 'utf8') / CHARS_PER_TOKEN);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function enforceContextBudget(output, maxTokens) {
|
|
28
|
+
if (typeof output !== 'string' || !output) {
|
|
29
|
+
return {
|
|
30
|
+
content: output || '',
|
|
31
|
+
clipped: false,
|
|
32
|
+
raw_tokens: 0,
|
|
33
|
+
kept_tokens: 0,
|
|
34
|
+
prevented_tokens: 0,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
// No cap or invalid cap → pass through. Caller is responsible for validating
|
|
38
|
+
// the policy value, but be defensive: never fall closed on a bad config.
|
|
39
|
+
if (typeof maxTokens !== 'number' || !Number.isFinite(maxTokens) || maxTokens <= 0) {
|
|
40
|
+
const tok = estTokens(output);
|
|
41
|
+
return {
|
|
42
|
+
content: output, clipped: false,
|
|
43
|
+
raw_tokens: tok, kept_tokens: tok, prevented_tokens: 0,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
const rawTokens = estTokens(output);
|
|
47
|
+
if (rawTokens <= maxTokens) {
|
|
48
|
+
return {
|
|
49
|
+
content: output, clipped: false,
|
|
50
|
+
raw_tokens: rawTokens, kept_tokens: rawTokens, prevented_tokens: 0,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
// Cut by chars (≈ tokens × 4), preserving valid UTF-8 by using slice on the
|
|
54
|
+
// string (JS strings are UTF-16, but the char/4 estimate is an estimate —
|
|
55
|
+
// we don't claim exactness, only approximation marked with '~').
|
|
56
|
+
const charBudget = maxTokens * CHARS_PER_TOKEN;
|
|
57
|
+
const cut = output.slice(0, charBudget);
|
|
58
|
+
const cutTokens = estTokens(cut);
|
|
59
|
+
// "Prevented" is the count of ORIGINAL tool-output tokens that did not
|
|
60
|
+
// make it into the model's next request. The informational marker added
|
|
61
|
+
// below counts toward kept_tokens (it is in the prompt) but must not
|
|
62
|
+
// shrink prevented_tokens — the bytes that came from the tool itself are
|
|
63
|
+
// gone whether the marker is appended or not.
|
|
64
|
+
const preventedTokens = Math.max(0, rawTokens - cutTokens);
|
|
65
|
+
const clipped = cut +
|
|
66
|
+
`\n\n[Occasio: ~${preventedTokens}t cut by context_budget (max ${maxTokens}t). Full output not re-sent to model.]`;
|
|
67
|
+
const keptTokens = estTokens(clipped);
|
|
68
|
+
return {
|
|
69
|
+
content: clipped,
|
|
70
|
+
clipped: true,
|
|
71
|
+
raw_tokens: rawTokens,
|
|
72
|
+
kept_tokens: keptTokens,
|
|
73
|
+
prevented_tokens: preventedTokens,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
module.exports = { enforceContextBudget, estTokens };
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* BoundaryEvent — the canonical record of one cross-boundary interaction
|
|
5
|
+
* between an AI agent and the user's machine.
|
|
6
|
+
*
|
|
7
|
+
* Every cross-boundary event in the system flows through this shape.
|
|
8
|
+
* Adapters produce them; policy evaluates them; executors act on them;
|
|
9
|
+
* auditors record them; observability reads them.
|
|
10
|
+
*
|
|
11
|
+
* Stage 1: the shape is minimal. Speculative fields are deliberately omitted.
|
|
12
|
+
* Add fields only when there is live evidence that a downstream layer needs them.
|
|
13
|
+
*
|
|
14
|
+
* Hard rule for the `raw` field: only the adapter touches it. The pipeline
|
|
15
|
+
* never introspects it. If a downstream layer needs a value out of `raw`,
|
|
16
|
+
* the adapter must promote it to a normalized field first.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
const { randomUUID } = require('crypto');
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Build a BoundaryEvent. All optional fields default to undefined.
|
|
23
|
+
*
|
|
24
|
+
* @param {object} input
|
|
25
|
+
* @param {'outbound'|'inbound'} input.direction
|
|
26
|
+
* @param {'request'|'tool_call'|'tool_result'|'response'} input.kind
|
|
27
|
+
* @param {string} input.agent Stable agent identity, e.g. 'claude-code'
|
|
28
|
+
* @param {string} input.protocol Stable protocol identity, e.g. 'anthropic-http'
|
|
29
|
+
* @param {string} [input.sessionId]
|
|
30
|
+
* @param {string} [input.runId]
|
|
31
|
+
* @param {string} [input.toolName] Canonical tool name (mapped by adapter)
|
|
32
|
+
* @param {object} [input.toolInput] Canonical input shape
|
|
33
|
+
* @param {object|string} [input.toolResult]
|
|
34
|
+
* @param {object|string} [input.payload]
|
|
35
|
+
* @param {unknown} [input.raw] Adapter-private; opaque to other layers
|
|
36
|
+
*/
|
|
37
|
+
function makeBoundaryEvent({
|
|
38
|
+
direction,
|
|
39
|
+
kind,
|
|
40
|
+
agent,
|
|
41
|
+
protocol,
|
|
42
|
+
sessionId,
|
|
43
|
+
runId,
|
|
44
|
+
toolName,
|
|
45
|
+
toolInput,
|
|
46
|
+
toolResult,
|
|
47
|
+
payload,
|
|
48
|
+
raw,
|
|
49
|
+
}) {
|
|
50
|
+
if (!direction) throw new Error('BoundaryEvent.direction is required');
|
|
51
|
+
if (!kind) throw new Error('BoundaryEvent.kind is required');
|
|
52
|
+
if (!agent) throw new Error('BoundaryEvent.agent is required');
|
|
53
|
+
if (!protocol) throw new Error('BoundaryEvent.protocol is required');
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
id: randomUUID(),
|
|
57
|
+
timestamp: new Date().toISOString(),
|
|
58
|
+
sessionId,
|
|
59
|
+
runId,
|
|
60
|
+
agent,
|
|
61
|
+
protocol,
|
|
62
|
+
direction,
|
|
63
|
+
kind,
|
|
64
|
+
toolName,
|
|
65
|
+
toolInput,
|
|
66
|
+
toolResult,
|
|
67
|
+
payload,
|
|
68
|
+
raw,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const KINDS = ['request', 'tool_call', 'tool_result', 'response'];
|
|
73
|
+
const DIRECTIONS = ['outbound', 'inbound'];
|
|
74
|
+
|
|
75
|
+
module.exports = { makeBoundaryEvent, KINDS, DIRECTIONS };
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Decision — the result of evaluating policy against a BoundaryEvent.
|
|
5
|
+
*
|
|
6
|
+
* The policy engine produces a Decision; the dispatcher acts on it.
|
|
7
|
+
* Decisions are pure data: no functions, no closures. They can be logged
|
|
8
|
+
* and replayed.
|
|
9
|
+
*
|
|
10
|
+
* Stage 1: minimal shape. `policySource` is always 'default' until Stage 2
|
|
11
|
+
* introduces user-authored YAML rules.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
const ACTIONS = ['PASS', 'LOCAL', 'TRANSFORM', 'BLOCK'];
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Build a Decision.
|
|
18
|
+
*
|
|
19
|
+
* @param {object} input
|
|
20
|
+
* @param {'PASS'|'LOCAL'|'TRANSFORM'|'BLOCK'} input.action
|
|
21
|
+
* @param {string} input.reason Stable code, e.g. 'native-handleable'
|
|
22
|
+
* @param {string} [input.policySource] 'default' in Stage 1
|
|
23
|
+
* @param {string} [input.executor] Required when action === 'LOCAL'
|
|
24
|
+
* @param {string} [input.transform] Required when action === 'TRANSFORM'
|
|
25
|
+
* @param {object} [input.syntheticResponse] Required when action === 'BLOCK'
|
|
26
|
+
*/
|
|
27
|
+
function makeDecision({ action, reason, policySource = 'default', executor, transform, syntheticResponse }) {
|
|
28
|
+
if (!ACTIONS.includes(action)) {
|
|
29
|
+
throw new Error(`Decision.action must be one of ${ACTIONS.join('/')} — got ${action}`);
|
|
30
|
+
}
|
|
31
|
+
if (!reason) throw new Error('Decision.reason is required');
|
|
32
|
+
if (action === 'LOCAL' && !executor) throw new Error('Decision.executor is required for LOCAL');
|
|
33
|
+
if (action === 'TRANSFORM' && !transform) throw new Error('Decision.transform is required for TRANSFORM');
|
|
34
|
+
if (action === 'BLOCK' && !syntheticResponse) throw new Error('Decision.syntheticResponse is required for BLOCK');
|
|
35
|
+
return { action, reason, policySource, executor, transform, syntheticResponse };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Convenience constructors for the common cases.
|
|
39
|
+
const PASS = (reason, policySource) => makeDecision({ action: 'PASS', reason, policySource });
|
|
40
|
+
const LOCAL = (executor, reason, policySource) => makeDecision({ action: 'LOCAL', reason, policySource, executor });
|
|
41
|
+
const TRANSFORM = (transform, reason, policySource) => makeDecision({ action: 'TRANSFORM', reason, policySource, transform });
|
|
42
|
+
const BLOCK = (syntheticResponse, reason, policySource) =>
|
|
43
|
+
makeDecision({ action: 'BLOCK', reason, policySource, syntheticResponse });
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Build a chained TRANSFORM decision that applies multiple named transforms
|
|
47
|
+
* in sequence. `transforms` must be an ordered array of ≥ 2 names.
|
|
48
|
+
*
|
|
49
|
+
* The `transform` field on the decision is set to `transforms.join('+')` so
|
|
50
|
+
* audit consumers that only read the scalar field see a stable, readable name.
|
|
51
|
+
* The dispatcher reads `decision.transforms` (array) to execute the chain.
|
|
52
|
+
*/
|
|
53
|
+
const TRANSFORM_CHAIN = (transforms, reason, policySource) => {
|
|
54
|
+
if (!Array.isArray(transforms) || transforms.length < 2) {
|
|
55
|
+
throw new Error('TRANSFORM_CHAIN requires an array of at least 2 transforms');
|
|
56
|
+
}
|
|
57
|
+
const d = makeDecision({ action: 'TRANSFORM', reason: reason || 'chain', policySource, transform: transforms.join('+') });
|
|
58
|
+
return Object.freeze({ ...d, transforms });
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
module.exports = { makeDecision, PASS, LOCAL, TRANSFORM, TRANSFORM_CHAIN, BLOCK, ACTIONS };
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Pipeline — the canonical orchestration for a single boundary event.
|
|
5
|
+
*
|
|
6
|
+
* adapter (in caller) → policy.evaluate → dispatcher.dispatch → auditor.record
|
|
7
|
+
*
|
|
8
|
+
* The pipeline does not own state. It is a pure orchestration function:
|
|
9
|
+
* given an event, the layers, and any per-call context, it returns a Result.
|
|
10
|
+
*
|
|
11
|
+
* Stage 1: this function exists and has clean contracts, but the index.js
|
|
12
|
+
* request handler does not yet call it. Stage 2 migrates the request path
|
|
13
|
+
* onto the pipeline; for now it is exercised by tests and `processToolEvent`
|
|
14
|
+
* provides a wired end-to-end demonstration with the default layers.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Run one BoundaryEvent through the pipeline with caller-provided layers.
|
|
19
|
+
*
|
|
20
|
+
* @param {object} args
|
|
21
|
+
* @param {object} args.event BoundaryEvent
|
|
22
|
+
* @param {object} args.policy { evaluate(event) → Decision }
|
|
23
|
+
* @param {object} args.dispatcher { dispatch(event, decision, ctx) → Result }
|
|
24
|
+
* @param {object} [args.auditor] { record(event, decision, result) }
|
|
25
|
+
* @param {object} [args.ctx] Per-call context forwarded to dispatcher
|
|
26
|
+
* @returns {object} { event, decision, result }
|
|
27
|
+
*/
|
|
28
|
+
async function process({ event, policy, dispatcher, auditor, ctx }) {
|
|
29
|
+
const decision = policy.evaluate(event);
|
|
30
|
+
const result = await dispatcher.dispatch(event, decision, ctx);
|
|
31
|
+
if (auditor && typeof auditor.record === 'function') {
|
|
32
|
+
// v0.6.4: auditor.record now returns { ok, error, droppedRow }. A failed
|
|
33
|
+
// append is session-fatal — propagate as AuditWriteError so the proxy
|
|
34
|
+
// can abort before the cloud call completes. A missing audit row must
|
|
35
|
+
// not coexist with a successful tool dispatch.
|
|
36
|
+
const status = auditor.record(event, decision, result);
|
|
37
|
+
if (status && status.ok === false) {
|
|
38
|
+
const { AuditWriteError } = require('../audit/errors');
|
|
39
|
+
throw new AuditWriteError(status.error, status.droppedRow);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return { event, decision, result };
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Convenience: run one BoundaryEvent through the default Stage 1 layers
|
|
47
|
+
* (policy.engine, executor.dispatcher, optional auditor). This is the wired
|
|
48
|
+
* demonstration of the architecture; index.js does not yet call it for the
|
|
49
|
+
* proxy's request flow — Stage 2 owns that migration.
|
|
50
|
+
*
|
|
51
|
+
* @param {object} event BoundaryEvent
|
|
52
|
+
* @param {object} [opts] { auditor, ctx }
|
|
53
|
+
*/
|
|
54
|
+
async function processToolEvent(event, opts = {}) {
|
|
55
|
+
const policy = require('../policy/engine');
|
|
56
|
+
const dispatcher = require('../executor/dispatcher');
|
|
57
|
+
return process({
|
|
58
|
+
event,
|
|
59
|
+
policy,
|
|
60
|
+
dispatcher,
|
|
61
|
+
auditor: opts.auditor,
|
|
62
|
+
ctx: opts.ctx,
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
module.exports = { process, processToolEvent };
|