@occasiolabs/occasio 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/NOTICE +10 -0
- package/README.md +216 -0
- package/bin/occasio-mcp.js +5 -0
- package/bin/occasio.js +2 -0
- package/bin/supervisor/README.md +90 -0
- package/bin/supervisor/com.occasio.proxy.plist.template +36 -0
- package/bin/supervisor/install-windows-task.ps1 +48 -0
- package/bin/supervisor/occasio.service +18 -0
- package/docs/AUDIT.md +120 -0
- package/docs/attest_verify.py +283 -0
- package/docs/audit_walker.py +65 -0
- package/docs/canonicalize.py +99 -0
- package/docs/compliance-mapping.md +93 -0
- package/docs/demos/mcp-block.md +148 -0
- package/docs/edr-calibration.md +73 -0
- package/docs/edr-demo.md +83 -0
- package/docs/python-verifier.md +74 -0
- package/docs/reference-pipeline.md +140 -0
- package/package.json +69 -0
- package/policy-templates/dev-default.yml +84 -0
- package/policy-templates/finance.yml +61 -0
- package/policy-templates/strict.yml +49 -0
- package/schemas/agent-attestation-v1.json +190 -0
- package/schemas/occasio-policy.schema.json +99 -0
- package/spec/agent-attestation/v1/README.md +137 -0
- package/src/adapters/claude-code.js +518 -0
- package/src/adapters/cline.js +161 -0
- package/src/adapters/computer-use-cli.js +198 -0
- package/src/adapters/computer-use.js +227 -0
- package/src/analyzer.js +170 -0
- package/src/anomaly/cli.js +143 -0
- package/src/anomaly/detectors/deny-rate.js +84 -0
- package/src/anomaly/detectors/file-read-volume.js +109 -0
- package/src/anomaly/detectors/secret-redact-rate.js +107 -0
- package/src/anomaly/detectors/unknown-tool-input.js +83 -0
- package/src/anomaly/index.js +169 -0
- package/src/attest/canonicalize.js +97 -0
- package/src/attest/index.js +355 -0
- package/src/attest/run-slice.js +57 -0
- package/src/attest/sign.js +186 -0
- package/src/attest/verify.js +192 -0
- package/src/audit/errors.js +21 -0
- package/src/audit/input-normalizer.js +121 -0
- package/src/audit/jsonl-auditor.js +178 -0
- package/src/audit/verifier.js +152 -0
- package/src/baseline.js +507 -0
- package/src/boundary.js +238 -0
- package/src/budget.js +42 -0
- package/src/classifier.js +115 -0
- package/src/context-budget.js +77 -0
- package/src/core/boundary-event.js +75 -0
- package/src/core/decision.js +61 -0
- package/src/core/pipeline.js +66 -0
- package/src/core/tool-names.js +105 -0
- package/src/dashboard.js +892 -0
- package/src/demo/README.md +31 -0
- package/src/demo/anomalies-demo.js +211 -0
- package/src/demo/attest-demo.js +198 -0
- package/src/distiller.js +155 -0
- package/src/embeddings.json +72 -0
- package/src/executor/dispatcher.js +230 -0
- package/src/harness.js +817 -0
- package/src/index.js +1711 -0
- package/src/inspect.js +329 -0
- package/src/interceptor.js +1198 -0
- package/src/lao.js +185 -0
- package/src/lao_prep.py +119 -0
- package/src/ledger.js +209 -0
- package/src/mcp-experiment.js +140 -0
- package/src/mcp-normalize.js +139 -0
- package/src/mcp-server.js +320 -0
- package/src/outbound-policy.js +433 -0
- package/src/policy/built-in-classifiers.js +78 -0
- package/src/policy/doctor.js +226 -0
- package/src/policy/engine.js +339 -0
- package/src/policy/init.js +153 -0
- package/src/policy/loader.js +448 -0
- package/src/policy/rules-default.js +36 -0
- package/src/policy/shell-path.js +135 -0
- package/src/policy/show.js +196 -0
- package/src/policy/validate.js +310 -0
- package/src/preflight/cli.js +164 -0
- package/src/preflight/miner.js +329 -0
- package/src/proxy/agent-router.js +93 -0
- package/src/redteam.js +428 -0
- package/src/replay.js +446 -0
- package/src/report/index.js +224 -0
- package/src/runtime.js +595 -0
- package/src/scanner/index.js +49 -0
- package/src/selftest.js +192 -0
- package/src/session.js +36 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* anomaly/cli.js — `occasio anomalies`
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* occasio anomalies Run all detectors over the last 15 min
|
|
8
|
+
* occasio anomalies --window 5m Override window size (s/m/h suffixes)
|
|
9
|
+
* occasio anomalies --json Machine-readable output
|
|
10
|
+
* occasio anomalies --since <ISO> Pin the "now" anchor (testing)
|
|
11
|
+
* occasio anomalies --chain <path> Override chain file
|
|
12
|
+
*
|
|
13
|
+
* Exit codes:
|
|
14
|
+
* 0 no alerts
|
|
15
|
+
* 1 alerts present (low only)
|
|
16
|
+
* 2 alerts present including medium/high
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
const { runDetectors, DEFAULT_CHAIN, DEFAULT_WINDOW_MS } = require('./index');
|
|
20
|
+
|
|
21
|
+
const C = {
|
|
22
|
+
r: s => `\x1b[31m${s}\x1b[0m`,
|
|
23
|
+
y: s => `\x1b[33m${s}\x1b[0m`,
|
|
24
|
+
g: s => `\x1b[32m${s}\x1b[0m`,
|
|
25
|
+
c: s => `\x1b[36m${s}\x1b[0m`,
|
|
26
|
+
d: s => `\x1b[2m${s}\x1b[0m`,
|
|
27
|
+
b: s => `\x1b[1m${s}\x1b[0m`,
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
const SEV_COLOR = { high: C.r, medium: C.y, low: C.c };
|
|
31
|
+
const SEV_RANK = { high: 3, medium: 2, low: 1 };
|
|
32
|
+
|
|
33
|
+
function parseWindow(spec) {
|
|
34
|
+
if (!spec) return DEFAULT_WINDOW_MS;
|
|
35
|
+
const m = String(spec).trim().match(/^(\d+)\s*([smh]?)$/i);
|
|
36
|
+
if (!m) return DEFAULT_WINDOW_MS;
|
|
37
|
+
const n = parseInt(m[1], 10);
|
|
38
|
+
const unit = (m[2] || 'm').toLowerCase();
|
|
39
|
+
const mult = unit === 's' ? 1000 : unit === 'h' ? 3600_000 : 60_000;
|
|
40
|
+
return n * mult;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function flag(args, name) {
|
|
44
|
+
const i = args.indexOf(name);
|
|
45
|
+
return i >= 0 ? args[i + 1] : undefined;
|
|
46
|
+
}
|
|
47
|
+
function bool(args, name) { return args.indexOf(name) >= 0; }
|
|
48
|
+
|
|
49
|
+
function runAnomaliesCli(args = []) {
|
|
50
|
+
if (bool(args, '--help') || bool(args, '-h')) {
|
|
51
|
+
process.stdout.write(
|
|
52
|
+
'Usage:\n' +
|
|
53
|
+
' occasio anomalies [--window 15m] [--since <ISO>] [--chain <path>] [--json]\n' +
|
|
54
|
+
'\n' +
|
|
55
|
+
'Detectors:\n' +
|
|
56
|
+
' deny-rate BLOCK rate spike vs historical baseline\n' +
|
|
57
|
+
' file-read-volume Distinct files-read burst (recon pattern)\n' +
|
|
58
|
+
' unknown-tool-input Previously-unseen tool_inputs shape\n' +
|
|
59
|
+
' secret-redact-rate Secret-redaction count vs historical baseline\n'
|
|
60
|
+
);
|
|
61
|
+
return 0;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const windowMs = parseWindow(flag(args, '--window'));
|
|
65
|
+
const since = flag(args, '--since');
|
|
66
|
+
const chain = flag(args, '--chain') || DEFAULT_CHAIN;
|
|
67
|
+
const asJson = bool(args, '--json');
|
|
68
|
+
|
|
69
|
+
const result = runDetectors({ chainFile: chain, windowMs, now: since });
|
|
70
|
+
|
|
71
|
+
if (asJson) {
|
|
72
|
+
process.stdout.write(JSON.stringify(result, null, 2) + '\n');
|
|
73
|
+
} else {
|
|
74
|
+
renderHuman(result, windowMs);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Exit code reflects worst alert severity. Detector errors are
|
|
78
|
+
// a separate signal — exit code 3 dedicated to "the EDR layer
|
|
79
|
+
// itself is broken", so CI can distinguish "everything OK" from
|
|
80
|
+
// "an alert fired" from "the detector failed before it could decide".
|
|
81
|
+
const errCount = (result.errors || []).length;
|
|
82
|
+
if (result.alerts.length === 0) {
|
|
83
|
+
return errCount > 0 ? 3 : 0;
|
|
84
|
+
}
|
|
85
|
+
const worst = result.alerts.reduce(
|
|
86
|
+
(m, a) => Math.max(m, SEV_RANK[a.severity] || 0), 0);
|
|
87
|
+
// Errors take precedence over low/medium alerts: a broken
|
|
88
|
+
// detector means we cannot trust the absence of higher-severity
|
|
89
|
+
// findings either.
|
|
90
|
+
if (errCount > 0 && worst < 2) return 3;
|
|
91
|
+
return worst >= 2 ? 2 : 1;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function renderHuman(result, windowMs) {
|
|
95
|
+
const winLabel = humanDuration(windowMs);
|
|
96
|
+
process.stdout.write(
|
|
97
|
+
`${C.b('occasio anomalies')} ${C.d('window=' + winLabel + ', ' +
|
|
98
|
+
result.window_rows + ' rows in window, ' +
|
|
99
|
+
result.history_rows + ' historical)')}\n\n`);
|
|
100
|
+
|
|
101
|
+
const errs = result.errors || [];
|
|
102
|
+
|
|
103
|
+
if (result.alerts.length === 0 && errs.length === 0) {
|
|
104
|
+
process.stdout.write(` ${C.g('✓')} no anomalies in the current window\n\n`);
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Sort alerts high→low so worst is visible first.
|
|
109
|
+
const sorted = [...result.alerts].sort(
|
|
110
|
+
(a, b) => (SEV_RANK[b.severity] || 0) - (SEV_RANK[a.severity] || 0));
|
|
111
|
+
for (const a of sorted) {
|
|
112
|
+
const color = SEV_COLOR[a.severity] || C.d;
|
|
113
|
+
const sev = color(`[${a.severity.toUpperCase()}]`);
|
|
114
|
+
process.stdout.write(` ${sev} ${C.b(a.detector_id)} — ${a.message}\n`);
|
|
115
|
+
if (a.rows_implicated.length) {
|
|
116
|
+
const sample = a.rows_implicated[0];
|
|
117
|
+
process.stdout.write(` ${C.d('implicated rows: ' + sample.slice(0,12) +
|
|
118
|
+
'… (+' + (a.rows_implicated.length - 1) + ' more)')}\n`);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Errors render separately so reviewers know to escalate to engineering,
|
|
123
|
+
// not to compliance. A crashed detector is a code defect, not a finding.
|
|
124
|
+
if (errs.length > 0) {
|
|
125
|
+
if (result.alerts.length > 0) process.stdout.write('\n');
|
|
126
|
+
process.stdout.write(` ${C.r('!')} ${errs.length} detector(s) crashed — investigate as a bug, not a finding:\n`);
|
|
127
|
+
for (const e of errs) {
|
|
128
|
+
process.stdout.write(` ${C.b(e.detector_id)}: ${e.error}\n`);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
process.stdout.write('\n');
|
|
133
|
+
process.stdout.write(
|
|
134
|
+
` ${C.d('Tip: `occasio replay --run <id>` to see the run that produced an implicated row.')}\n\n`);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function humanDuration(ms) {
|
|
138
|
+
if (ms < 60_000) return `${Math.round(ms / 1000)}s`;
|
|
139
|
+
if (ms < 3600_000) return `${Math.round(ms / 60_000)}m`;
|
|
140
|
+
return `${(ms / 3600_000).toFixed(1)}h`;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
module.exports = { runAnomaliesCli, parseWindow, humanDuration };
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* deny-rate — alert when the BLOCK rate in the window is materially higher
|
|
5
|
+
* than the historical BLOCK rate.
|
|
6
|
+
*
|
|
7
|
+
* Window count vs. historical rate-per-equivalent-window. Cold start: if
|
|
8
|
+
* historical has < 100 rows, only fire when window has ≥ MIN_ABSOLUTE blocks,
|
|
9
|
+
* to avoid spurious alerts on a fresh chain.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const ID = 'deny-rate';
|
|
13
|
+
const LABEL = 'Deny rate spike';
|
|
14
|
+
|
|
15
|
+
const MIN_ABSOLUTE = 5; // window must contain at least this many BLOCKs
|
|
16
|
+
const MULTIPLIER_THRESHOLD = 3.0; // window rate >= baseline_rate × this
|
|
17
|
+
const COLD_START_MIN_HISTORY = 100;
|
|
18
|
+
|
|
19
|
+
function countBlocks(rows) {
|
|
20
|
+
let n = 0;
|
|
21
|
+
for (const r of rows) if (r.action === 'BLOCK') n++;
|
|
22
|
+
return n;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function evaluate(windowRows, historicalRows, opts) {
|
|
26
|
+
const winBlocks = countBlocks(windowRows);
|
|
27
|
+
if (winBlocks < MIN_ABSOLUTE) return [];
|
|
28
|
+
|
|
29
|
+
const winMs = opts.windowMs || 0;
|
|
30
|
+
if (winMs <= 0) return [];
|
|
31
|
+
|
|
32
|
+
// Historical rate: BLOCKs / (history span in ms) × winMs
|
|
33
|
+
if (historicalRows.length < COLD_START_MIN_HISTORY) {
|
|
34
|
+
// Cold start: only fire when window has a lot of blocks AND the
|
|
35
|
+
// overall window proportion is anomalously high. Avoids spurious
|
|
36
|
+
// alerts on a brand-new install.
|
|
37
|
+
const winRate = windowRows.length ? winBlocks / windowRows.length : 0;
|
|
38
|
+
if (winRate < 0.5) return [];
|
|
39
|
+
return [{
|
|
40
|
+
severity: 'medium',
|
|
41
|
+
observed: winBlocks,
|
|
42
|
+
baseline: 0,
|
|
43
|
+
message: `BLOCK count ${winBlocks} in current window (cold-start: no historical baseline yet, but ${(winRate * 100).toFixed(0)}% of window rows are blocked)`,
|
|
44
|
+
rows_implicated: implicatedHashes(windowRows),
|
|
45
|
+
}];
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const histBlocks = countBlocks(historicalRows);
|
|
49
|
+
const firstTs = Date.parse(historicalRows[0]?.ts);
|
|
50
|
+
const lastTs = Date.parse(historicalRows[historicalRows.length - 1]?.ts);
|
|
51
|
+
if (!Number.isFinite(firstTs) || !Number.isFinite(lastTs) || lastTs <= firstTs) return [];
|
|
52
|
+
|
|
53
|
+
const histSpanMs = lastTs - firstTs;
|
|
54
|
+
const histRatePerWindow = (histBlocks / histSpanMs) * winMs;
|
|
55
|
+
if (histRatePerWindow <= 0) {
|
|
56
|
+
return [{
|
|
57
|
+
severity: 'high',
|
|
58
|
+
observed: winBlocks,
|
|
59
|
+
baseline: 0,
|
|
60
|
+
message: `${winBlocks} BLOCKs in current window; historical baseline shows none — possible new attack pattern or policy change`,
|
|
61
|
+
rows_implicated: implicatedHashes(windowRows),
|
|
62
|
+
}];
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const ratio = winBlocks / histRatePerWindow;
|
|
66
|
+
if (ratio < MULTIPLIER_THRESHOLD) return [];
|
|
67
|
+
|
|
68
|
+
const severity = ratio > 10 ? 'high' : 'medium';
|
|
69
|
+
return [{
|
|
70
|
+
severity,
|
|
71
|
+
observed: winBlocks,
|
|
72
|
+
baseline: Number(histRatePerWindow.toFixed(2)),
|
|
73
|
+
message: `BLOCK rate ${winBlocks} in current window vs. historical ${histRatePerWindow.toFixed(2)}/window (×${ratio.toFixed(1)})`,
|
|
74
|
+
rows_implicated: implicatedHashes(windowRows),
|
|
75
|
+
}];
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function implicatedHashes(rows) {
|
|
79
|
+
return rows.filter(r => r.action === 'BLOCK' && typeof r.hash === 'string')
|
|
80
|
+
.slice(0, 5)
|
|
81
|
+
.map(r => r.hash);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
module.exports = { id: ID, label: LABEL, evaluate };
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* file-read-volume — alert when the count of distinct files read in the
|
|
5
|
+
* window exceeds the per-window historical p95 by a configurable multiplier.
|
|
6
|
+
*
|
|
7
|
+
* Catches recon-style bursts where an agent suddenly reads dozens of files
|
|
8
|
+
* it has never touched. Uses **distinct** paths (not total reads) to avoid
|
|
9
|
+
* false positives from a single re-read loop.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const ID = 'file-read-volume';
|
|
13
|
+
const LABEL = 'File-read volume spike';
|
|
14
|
+
|
|
15
|
+
const MIN_ABSOLUTE = 20; // window must read at least this many distinct paths
|
|
16
|
+
const P95_MULTIPLIER = 1.5; // window > p95 × this
|
|
17
|
+
const COLD_START_MIN_RUNS = 5; // need ≥ this many historical windows to compute p95
|
|
18
|
+
|
|
19
|
+
function readTool(row) {
|
|
20
|
+
const t = String(row.tool_name || '').toLowerCase();
|
|
21
|
+
return t === 'read' || t === 'read_file';
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function pathOf(row) {
|
|
25
|
+
return row.tool_inputs && (row.tool_inputs.path || row.tool_inputs.file_path) || null;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function distinctReadPaths(rows) {
|
|
29
|
+
const s = new Set();
|
|
30
|
+
for (const r of rows) {
|
|
31
|
+
if (!readTool(r)) continue;
|
|
32
|
+
const p = pathOf(r);
|
|
33
|
+
if (p) s.add(p);
|
|
34
|
+
}
|
|
35
|
+
return s;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Build a p95 of "distinct read-paths per equivalent window" from history,
|
|
39
|
+
// by splitting historical rows into N consecutive windows of size windowMs.
|
|
40
|
+
function historicalDistinctP95(historicalRows, windowMs) {
|
|
41
|
+
if (!historicalRows.length) return { p95: 0, samples: 0 };
|
|
42
|
+
const first = Date.parse(historicalRows[0].ts);
|
|
43
|
+
const last = Date.parse(historicalRows[historicalRows.length - 1].ts);
|
|
44
|
+
if (!Number.isFinite(first) || !Number.isFinite(last) || last <= first) {
|
|
45
|
+
return { p95: 0, samples: 0 };
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const buckets = new Map(); // bucketIdx → Set<path>
|
|
49
|
+
for (const r of historicalRows) {
|
|
50
|
+
if (!readTool(r)) continue;
|
|
51
|
+
const p = pathOf(r);
|
|
52
|
+
if (!p) continue;
|
|
53
|
+
const t = Date.parse(r.ts);
|
|
54
|
+
if (!Number.isFinite(t)) continue;
|
|
55
|
+
const idx = Math.floor((t - first) / windowMs);
|
|
56
|
+
if (!buckets.has(idx)) buckets.set(idx, new Set());
|
|
57
|
+
buckets.get(idx).add(p);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const counts = Array.from(buckets.values()).map(s => s.size);
|
|
61
|
+
if (counts.length < COLD_START_MIN_RUNS) return { p95: 0, samples: counts.length };
|
|
62
|
+
|
|
63
|
+
counts.sort((a, b) => a - b);
|
|
64
|
+
const i95 = Math.min(counts.length - 1, Math.floor(counts.length * 0.95));
|
|
65
|
+
return { p95: counts[i95], samples: counts.length };
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function evaluate(windowRows, historicalRows, opts) {
|
|
69
|
+
const winSet = distinctReadPaths(windowRows);
|
|
70
|
+
if (winSet.size < MIN_ABSOLUTE) return [];
|
|
71
|
+
|
|
72
|
+
const winMs = opts.windowMs || 0;
|
|
73
|
+
if (winMs <= 0) return [];
|
|
74
|
+
|
|
75
|
+
const { p95, samples } = historicalDistinctP95(historicalRows, winMs);
|
|
76
|
+
if (samples < COLD_START_MIN_RUNS) {
|
|
77
|
+
// Cold start fallback: fire only on very large bursts so we never
|
|
78
|
+
// alarm on a normal first session.
|
|
79
|
+
if (winSet.size < MIN_ABSOLUTE * 3) return [];
|
|
80
|
+
return [{
|
|
81
|
+
severity: 'medium',
|
|
82
|
+
observed: winSet.size,
|
|
83
|
+
baseline: null,
|
|
84
|
+
message: `${winSet.size} distinct files read in current window (cold-start: not enough historical windows yet to compute p95)`,
|
|
85
|
+
rows_implicated: firstReadHashes(windowRows),
|
|
86
|
+
}];
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (p95 === 0) return [];
|
|
90
|
+
if (winSet.size < p95 * P95_MULTIPLIER) return [];
|
|
91
|
+
|
|
92
|
+
const ratio = winSet.size / Math.max(p95, 1);
|
|
93
|
+
const severity = ratio > 4 ? 'high' : 'medium';
|
|
94
|
+
return [{
|
|
95
|
+
severity,
|
|
96
|
+
observed: winSet.size,
|
|
97
|
+
baseline: p95,
|
|
98
|
+
message: `${winSet.size} distinct files read in current window vs. p95 ${p95} (×${ratio.toFixed(1)})`,
|
|
99
|
+
rows_implicated: firstReadHashes(windowRows),
|
|
100
|
+
}];
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function firstReadHashes(rows) {
|
|
104
|
+
return rows.filter(r => readTool(r) && typeof r.hash === 'string')
|
|
105
|
+
.slice(0, 5)
|
|
106
|
+
.map(r => r.hash);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
module.exports = { id: ID, label: LABEL, evaluate };
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* secret-redact-rate — alert when the rate of secret-redaction events in the
|
|
5
|
+
* window is materially higher than baseline. Single redactions get a low-
|
|
6
|
+
* severity alert; spikes get medium/high. A non-zero window count when
|
|
7
|
+
* history is zero gets high severity (first-time-leak signal).
|
|
8
|
+
*
|
|
9
|
+
* `secrets_redacted` is an integer count on tool-call rows when the result
|
|
10
|
+
* carried redacted secrets. Any row with `secrets_redacted >= 1` counts.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const ID = 'secret-redact-rate';
|
|
14
|
+
const LABEL = 'Secret-redaction rate';
|
|
15
|
+
|
|
16
|
+
const MIN_HISTORY_FOR_RATE = 200;
|
|
17
|
+
const MULTIPLIER_THRESHOLD = 5.0;
|
|
18
|
+
|
|
19
|
+
function sumRedactions(rows) {
|
|
20
|
+
let n = 0;
|
|
21
|
+
for (const r of rows) {
|
|
22
|
+
const v = r.secrets_redacted;
|
|
23
|
+
if (typeof v === 'number' && v > 0) n += v;
|
|
24
|
+
}
|
|
25
|
+
return n;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function rowsWithRedactions(rows) {
|
|
29
|
+
const out = [];
|
|
30
|
+
for (const r of rows) {
|
|
31
|
+
if (typeof r.secrets_redacted === 'number' && r.secrets_redacted > 0) out.push(r);
|
|
32
|
+
}
|
|
33
|
+
return out;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function evaluate(windowRows, historicalRows, opts) {
|
|
37
|
+
const winRows = rowsWithRedactions(windowRows);
|
|
38
|
+
const winCount = sumRedactions(winRows);
|
|
39
|
+
if (winCount === 0) return [];
|
|
40
|
+
|
|
41
|
+
const winMs = opts.windowMs || 0;
|
|
42
|
+
if (winMs <= 0) return [];
|
|
43
|
+
|
|
44
|
+
// First-leak signal: there's redaction in the window, none in history.
|
|
45
|
+
if (historicalRows.length >= MIN_HISTORY_FOR_RATE) {
|
|
46
|
+
const histCount = sumRedactions(historicalRows);
|
|
47
|
+
if (histCount === 0) {
|
|
48
|
+
return [{
|
|
49
|
+
severity: 'high',
|
|
50
|
+
observed: winCount,
|
|
51
|
+
baseline: 0,
|
|
52
|
+
message: `${winCount} secret-redaction event(s) in current window; no historical baseline of redactions — first time we are blocking leaks from this agent/policy combination`,
|
|
53
|
+
rows_implicated: firstHashes(winRows),
|
|
54
|
+
}];
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const firstTs = Date.parse(historicalRows[0].ts);
|
|
58
|
+
const lastTs = Date.parse(historicalRows[historicalRows.length - 1].ts);
|
|
59
|
+
if (Number.isFinite(firstTs) && Number.isFinite(lastTs) && lastTs > firstTs) {
|
|
60
|
+
const histSpanMs = lastTs - firstTs;
|
|
61
|
+
const histPerWindow = (histCount / histSpanMs) * winMs;
|
|
62
|
+
const ratio = winCount / Math.max(histPerWindow, 0.01);
|
|
63
|
+
|
|
64
|
+
if (ratio < MULTIPLIER_THRESHOLD) {
|
|
65
|
+
// Still surface non-spike redactions as low severity so reviewers
|
|
66
|
+
// see them. Compliance teams want to know about every leak attempt.
|
|
67
|
+
return [{
|
|
68
|
+
severity: 'low',
|
|
69
|
+
observed: winCount,
|
|
70
|
+
baseline: Number(histPerWindow.toFixed(2)),
|
|
71
|
+
message: `${winCount} secret-redaction event(s) in current window (baseline ${histPerWindow.toFixed(2)}/window, within normal range)`,
|
|
72
|
+
rows_implicated: firstHashes(winRows),
|
|
73
|
+
}];
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const severity = ratio > 20 ? 'high' : 'medium';
|
|
77
|
+
return [{
|
|
78
|
+
severity,
|
|
79
|
+
observed: winCount,
|
|
80
|
+
baseline: Number(histPerWindow.toFixed(2)),
|
|
81
|
+
message: `${winCount} secret-redaction event(s) in current window vs. baseline ${histPerWindow.toFixed(2)}/window (×${ratio.toFixed(1)})`,
|
|
82
|
+
rows_implicated: firstHashes(winRows),
|
|
83
|
+
}];
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Cold-start: a single redaction is LOW (visible in JSON/SIEM, not loud
|
|
88
|
+
// for human review); a burst (≥ COLD_START_BURST) is MEDIUM. Calibration
|
|
89
|
+
// showed that emitting MEDIUM on every cold-start redaction produces a
|
|
90
|
+
// false-positive every coding session that touches files matching the
|
|
91
|
+
// built-in secret patterns (e.g. test fixtures, example configs). The
|
|
92
|
+
// burst threshold ensures real spikes still surface during warm-up.
|
|
93
|
+
const COLD_START_BURST = 5;
|
|
94
|
+
return [{
|
|
95
|
+
severity: winCount >= COLD_START_BURST ? 'medium' : 'low',
|
|
96
|
+
observed: winCount,
|
|
97
|
+
baseline: null,
|
|
98
|
+
message: `${winCount} secret-redaction event(s) in current window (cold-start: not enough history yet to compute baseline)`,
|
|
99
|
+
rows_implicated: firstHashes(winRows),
|
|
100
|
+
}];
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function firstHashes(rows) {
|
|
104
|
+
return rows.filter(r => typeof r.hash === 'string').slice(0, 5).map(r => r.hash);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
module.exports = { id: ID, label: LABEL, evaluate };
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* unknown-tool-input — alert when a tool is invoked with a `tool_inputs`
|
|
5
|
+
* shape (sorted-key fingerprint) that has never appeared in history.
|
|
6
|
+
*
|
|
7
|
+
* Catches "agent now passes a flag I've never seen it pass before" — e.g.
|
|
8
|
+
* a Bash call with `--privileged`, a Read with an unexpected option, a new
|
|
9
|
+
* MCP tool variant. Cold start: warm up over COLD_START_MIN_ROWS chain
|
|
10
|
+
* rows before firing.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const ID = 'unknown-tool-input';
|
|
14
|
+
const LABEL = 'Unknown tool-input shape';
|
|
15
|
+
|
|
16
|
+
// Calibrated against a ~3-day real audit chain (2518 rows of normal
|
|
17
|
+
// coding usage). Pre-tune the detector fired 27 times in 3.2 days at
|
|
18
|
+
// MEDIUM severity on benign new fingerprints (e.g. `grep` first called
|
|
19
|
+
// with only a `path` key). After raising the cold-start floor and
|
|
20
|
+
// dropping non-privileged novelty to LOW, the rate drops to ~1/day on
|
|
21
|
+
// normal use while privileged-key novelty still fires HIGH. See
|
|
22
|
+
// docs/edr-calibration.md for the run that produced these numbers.
|
|
23
|
+
const COLD_START_MIN_ROWS = 200;
|
|
24
|
+
// Keys that suggest privilege escalation or sandbox escape — novelty on
|
|
25
|
+
// these is escalated to HIGH. Everything else: LOW (logged but not loud).
|
|
26
|
+
const PRIVILEGED_KEY_RE = /\b(env|privileged|sudo|raw|exec|eval|stdin|cap|caps|seccomp)\b/;
|
|
27
|
+
|
|
28
|
+
// Fingerprint = `${tool_name}:${sorted comma-separated tool_inputs keys}`.
|
|
29
|
+
// Empty inputs → `${tool_name}:`. Non-object inputs are ignored (we only
|
|
30
|
+
// fingerprint shape, not values).
|
|
31
|
+
function fingerprint(row) {
|
|
32
|
+
if (!row.tool_name) return null;
|
|
33
|
+
const inputs = row.tool_inputs;
|
|
34
|
+
let keys = '';
|
|
35
|
+
if (inputs && typeof inputs === 'object' && !Array.isArray(inputs)) {
|
|
36
|
+
keys = Object.keys(inputs).filter(k => inputs[k] !== undefined).sort().join(',');
|
|
37
|
+
}
|
|
38
|
+
return `${String(row.tool_name).toLowerCase()}:${keys}`;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function evaluate(windowRows, historicalRows, _opts) {
|
|
42
|
+
if (historicalRows.length < COLD_START_MIN_ROWS) return [];
|
|
43
|
+
|
|
44
|
+
const knownFingerprints = new Set();
|
|
45
|
+
for (const r of historicalRows) {
|
|
46
|
+
if (r.kind && r.kind !== 'tool_call') continue;
|
|
47
|
+
const fp = fingerprint(r);
|
|
48
|
+
if (fp) knownFingerprints.add(fp);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Group window rows by their (novel) fingerprint so we emit one alert per
|
|
52
|
+
// new shape, not one per row.
|
|
53
|
+
const byFp = new Map();
|
|
54
|
+
for (const r of windowRows) {
|
|
55
|
+
if (r.kind && r.kind !== 'tool_call') continue;
|
|
56
|
+
const fp = fingerprint(r);
|
|
57
|
+
if (!fp || knownFingerprints.has(fp)) continue;
|
|
58
|
+
if (!byFp.has(fp)) byFp.set(fp, []);
|
|
59
|
+
byFp.get(fp).push(r);
|
|
60
|
+
}
|
|
61
|
+
if (byFp.size === 0) return [];
|
|
62
|
+
|
|
63
|
+
const alerts = [];
|
|
64
|
+
for (const [fp, rows] of byFp) {
|
|
65
|
+
const [toolName, keysJoined] = fp.split(':');
|
|
66
|
+
const keysDisplay = keysJoined ? `{${keysJoined}}` : '{}';
|
|
67
|
+
// Privileged-key novelty is loud (HIGH). Everything else is LOW
|
|
68
|
+
// — visible in JSON/SIEM consumers but not noisy for human review.
|
|
69
|
+
// Calibration showed MEDIUM on non-privileged novelty was the
|
|
70
|
+
// largest source of false positives on normal usage.
|
|
71
|
+
const high = PRIVILEGED_KEY_RE.test(keysJoined);
|
|
72
|
+
alerts.push({
|
|
73
|
+
severity: high ? 'high' : 'low',
|
|
74
|
+
observed: rows.length,
|
|
75
|
+
baseline: 0,
|
|
76
|
+
message: `Tool '${toolName}' invoked with previously-unseen input shape ${keysDisplay} (${rows.length}× in current window)`,
|
|
77
|
+
rows_implicated: rows.filter(r => typeof r.hash === 'string').slice(0, 5).map(r => r.hash),
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
return alerts;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
module.exports = { id: ID, label: LABEL, evaluate, _fingerprint: fingerprint };
|