@occasiolabs/occasio 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/LICENSE +202 -0
  2. package/NOTICE +10 -0
  3. package/README.md +216 -0
  4. package/bin/occasio-mcp.js +5 -0
  5. package/bin/occasio.js +2 -0
  6. package/bin/supervisor/README.md +90 -0
  7. package/bin/supervisor/com.occasio.proxy.plist.template +36 -0
  8. package/bin/supervisor/install-windows-task.ps1 +48 -0
  9. package/bin/supervisor/occasio.service +18 -0
  10. package/docs/AUDIT.md +120 -0
  11. package/docs/attest_verify.py +283 -0
  12. package/docs/audit_walker.py +65 -0
  13. package/docs/canonicalize.py +99 -0
  14. package/docs/compliance-mapping.md +93 -0
  15. package/docs/demos/mcp-block.md +148 -0
  16. package/docs/edr-calibration.md +73 -0
  17. package/docs/edr-demo.md +83 -0
  18. package/docs/python-verifier.md +74 -0
  19. package/docs/reference-pipeline.md +140 -0
  20. package/package.json +69 -0
  21. package/policy-templates/dev-default.yml +84 -0
  22. package/policy-templates/finance.yml +61 -0
  23. package/policy-templates/strict.yml +49 -0
  24. package/schemas/agent-attestation-v1.json +190 -0
  25. package/schemas/occasio-policy.schema.json +99 -0
  26. package/spec/agent-attestation/v1/README.md +137 -0
  27. package/src/adapters/claude-code.js +518 -0
  28. package/src/adapters/cline.js +161 -0
  29. package/src/adapters/computer-use-cli.js +198 -0
  30. package/src/adapters/computer-use.js +227 -0
  31. package/src/analyzer.js +170 -0
  32. package/src/anomaly/cli.js +143 -0
  33. package/src/anomaly/detectors/deny-rate.js +84 -0
  34. package/src/anomaly/detectors/file-read-volume.js +109 -0
  35. package/src/anomaly/detectors/secret-redact-rate.js +107 -0
  36. package/src/anomaly/detectors/unknown-tool-input.js +83 -0
  37. package/src/anomaly/index.js +169 -0
  38. package/src/attest/canonicalize.js +97 -0
  39. package/src/attest/index.js +355 -0
  40. package/src/attest/run-slice.js +57 -0
  41. package/src/attest/sign.js +186 -0
  42. package/src/attest/verify.js +192 -0
  43. package/src/audit/errors.js +21 -0
  44. package/src/audit/input-normalizer.js +121 -0
  45. package/src/audit/jsonl-auditor.js +178 -0
  46. package/src/audit/verifier.js +152 -0
  47. package/src/baseline.js +507 -0
  48. package/src/boundary.js +238 -0
  49. package/src/budget.js +42 -0
  50. package/src/classifier.js +115 -0
  51. package/src/context-budget.js +77 -0
  52. package/src/core/boundary-event.js +75 -0
  53. package/src/core/decision.js +61 -0
  54. package/src/core/pipeline.js +66 -0
  55. package/src/core/tool-names.js +105 -0
  56. package/src/dashboard.js +892 -0
  57. package/src/demo/README.md +31 -0
  58. package/src/demo/anomalies-demo.js +211 -0
  59. package/src/demo/attest-demo.js +198 -0
  60. package/src/distiller.js +155 -0
  61. package/src/embeddings.json +72 -0
  62. package/src/executor/dispatcher.js +230 -0
  63. package/src/harness.js +817 -0
  64. package/src/index.js +1711 -0
  65. package/src/inspect.js +329 -0
  66. package/src/interceptor.js +1198 -0
  67. package/src/lao.js +185 -0
  68. package/src/lao_prep.py +119 -0
  69. package/src/ledger.js +209 -0
  70. package/src/mcp-experiment.js +140 -0
  71. package/src/mcp-normalize.js +139 -0
  72. package/src/mcp-server.js +320 -0
  73. package/src/outbound-policy.js +433 -0
  74. package/src/policy/built-in-classifiers.js +78 -0
  75. package/src/policy/doctor.js +226 -0
  76. package/src/policy/engine.js +339 -0
  77. package/src/policy/init.js +153 -0
  78. package/src/policy/loader.js +448 -0
  79. package/src/policy/rules-default.js +36 -0
  80. package/src/policy/shell-path.js +135 -0
  81. package/src/policy/show.js +196 -0
  82. package/src/policy/validate.js +310 -0
  83. package/src/preflight/cli.js +164 -0
  84. package/src/preflight/miner.js +329 -0
  85. package/src/proxy/agent-router.js +93 -0
  86. package/src/redteam.js +428 -0
  87. package/src/replay.js +446 -0
  88. package/src/report/index.js +224 -0
  89. package/src/runtime.js +595 -0
  90. package/src/scanner/index.js +49 -0
  91. package/src/selftest.js +192 -0
  92. package/src/session.js +36 -0
@@ -0,0 +1,143 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * anomaly/cli.js — `occasio anomalies`
5
+ *
6
+ * Usage:
7
+ * occasio anomalies Run all detectors over the last 15 min
8
+ * occasio anomalies --window 5m Override window size (s/m/h suffixes)
9
+ * occasio anomalies --json Machine-readable output
10
+ * occasio anomalies --since <ISO> Pin the "now" anchor (testing)
11
+ * occasio anomalies --chain <path> Override chain file
12
+ *
13
+ * Exit codes:
14
+ * 0 no alerts
15
+ * 1 alerts present (low only)
16
+ * 2 alerts present including medium/high
17
+ */
18
+
19
+ const { runDetectors, DEFAULT_CHAIN, DEFAULT_WINDOW_MS } = require('./index');
20
+
21
+ const C = {
22
+ r: s => `\x1b[31m${s}\x1b[0m`,
23
+ y: s => `\x1b[33m${s}\x1b[0m`,
24
+ g: s => `\x1b[32m${s}\x1b[0m`,
25
+ c: s => `\x1b[36m${s}\x1b[0m`,
26
+ d: s => `\x1b[2m${s}\x1b[0m`,
27
+ b: s => `\x1b[1m${s}\x1b[0m`,
28
+ };
29
+
30
+ const SEV_COLOR = { high: C.r, medium: C.y, low: C.c };
31
+ const SEV_RANK = { high: 3, medium: 2, low: 1 };
32
+
33
+ function parseWindow(spec) {
34
+ if (!spec) return DEFAULT_WINDOW_MS;
35
+ const m = String(spec).trim().match(/^(\d+)\s*([smh]?)$/i);
36
+ if (!m) return DEFAULT_WINDOW_MS;
37
+ const n = parseInt(m[1], 10);
38
+ const unit = (m[2] || 'm').toLowerCase();
39
+ const mult = unit === 's' ? 1000 : unit === 'h' ? 3600_000 : 60_000;
40
+ return n * mult;
41
+ }
42
+
43
+ function flag(args, name) {
44
+ const i = args.indexOf(name);
45
+ return i >= 0 ? args[i + 1] : undefined;
46
+ }
47
+ function bool(args, name) { return args.indexOf(name) >= 0; }
48
+
49
+ function runAnomaliesCli(args = []) {
50
+ if (bool(args, '--help') || bool(args, '-h')) {
51
+ process.stdout.write(
52
+ 'Usage:\n' +
53
+ ' occasio anomalies [--window 15m] [--since <ISO>] [--chain <path>] [--json]\n' +
54
+ '\n' +
55
+ 'Detectors:\n' +
56
+ ' deny-rate BLOCK rate spike vs historical baseline\n' +
57
+ ' file-read-volume Distinct files-read burst (recon pattern)\n' +
58
+ ' unknown-tool-input Previously-unseen tool_inputs shape\n' +
59
+ ' secret-redact-rate Secret-redaction count vs historical baseline\n'
60
+ );
61
+ return 0;
62
+ }
63
+
64
+ const windowMs = parseWindow(flag(args, '--window'));
65
+ const since = flag(args, '--since');
66
+ const chain = flag(args, '--chain') || DEFAULT_CHAIN;
67
+ const asJson = bool(args, '--json');
68
+
69
+ const result = runDetectors({ chainFile: chain, windowMs, now: since });
70
+
71
+ if (asJson) {
72
+ process.stdout.write(JSON.stringify(result, null, 2) + '\n');
73
+ } else {
74
+ renderHuman(result, windowMs);
75
+ }
76
+
77
+ // Exit code reflects worst alert severity. Detector errors are
78
+ // a separate signal — exit code 3 dedicated to "the EDR layer
79
+ // itself is broken", so CI can distinguish "everything OK" from
80
+ // "an alert fired" from "the detector failed before it could decide".
81
+ const errCount = (result.errors || []).length;
82
+ if (result.alerts.length === 0) {
83
+ return errCount > 0 ? 3 : 0;
84
+ }
85
+ const worst = result.alerts.reduce(
86
+ (m, a) => Math.max(m, SEV_RANK[a.severity] || 0), 0);
87
+ // Errors take precedence over low/medium alerts: a broken
88
+ // detector means we cannot trust the absence of higher-severity
89
+ // findings either.
90
+ if (errCount > 0 && worst < 2) return 3;
91
+ return worst >= 2 ? 2 : 1;
92
+ }
93
+
94
+ function renderHuman(result, windowMs) {
95
+ const winLabel = humanDuration(windowMs);
96
+ process.stdout.write(
97
+ `${C.b('occasio anomalies')} ${C.d('window=' + winLabel + ', ' +
98
+ result.window_rows + ' rows in window, ' +
99
+ result.history_rows + ' historical)')}\n\n`);
100
+
101
+ const errs = result.errors || [];
102
+
103
+ if (result.alerts.length === 0 && errs.length === 0) {
104
+ process.stdout.write(` ${C.g('✓')} no anomalies in the current window\n\n`);
105
+ return;
106
+ }
107
+
108
+ // Sort alerts high→low so worst is visible first.
109
+ const sorted = [...result.alerts].sort(
110
+ (a, b) => (SEV_RANK[b.severity] || 0) - (SEV_RANK[a.severity] || 0));
111
+ for (const a of sorted) {
112
+ const color = SEV_COLOR[a.severity] || C.d;
113
+ const sev = color(`[${a.severity.toUpperCase()}]`);
114
+ process.stdout.write(` ${sev} ${C.b(a.detector_id)} — ${a.message}\n`);
115
+ if (a.rows_implicated.length) {
116
+ const sample = a.rows_implicated[0];
117
+ process.stdout.write(` ${C.d('implicated rows: ' + sample.slice(0,12) +
118
+ '… (+' + (a.rows_implicated.length - 1) + ' more)')}\n`);
119
+ }
120
+ }
121
+
122
+ // Errors render separately so reviewers know to escalate to engineering,
123
+ // not to compliance. A crashed detector is a code defect, not a finding.
124
+ if (errs.length > 0) {
125
+ if (result.alerts.length > 0) process.stdout.write('\n');
126
+ process.stdout.write(` ${C.r('!')} ${errs.length} detector(s) crashed — investigate as a bug, not a finding:\n`);
127
+ for (const e of errs) {
128
+ process.stdout.write(` ${C.b(e.detector_id)}: ${e.error}\n`);
129
+ }
130
+ }
131
+
132
+ process.stdout.write('\n');
133
+ process.stdout.write(
134
+ ` ${C.d('Tip: `occasio replay --run <id>` to see the run that produced an implicated row.')}\n\n`);
135
+ }
136
+
137
+ function humanDuration(ms) {
138
+ if (ms < 60_000) return `${Math.round(ms / 1000)}s`;
139
+ if (ms < 3600_000) return `${Math.round(ms / 60_000)}m`;
140
+ return `${(ms / 3600_000).toFixed(1)}h`;
141
+ }
142
+
143
+ module.exports = { runAnomaliesCli, parseWindow, humanDuration };
@@ -0,0 +1,84 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * deny-rate — alert when the BLOCK rate in the window is materially higher
5
+ * than the historical BLOCK rate.
6
+ *
7
+ * Window count vs. historical rate-per-equivalent-window. Cold start: if
8
+ * historical has < 100 rows, only fire when window has ≥ MIN_ABSOLUTE blocks,
9
+ * to avoid spurious alerts on a fresh chain.
10
+ */
11
+
12
+ const ID = 'deny-rate';
13
+ const LABEL = 'Deny rate spike';
14
+
15
+ const MIN_ABSOLUTE = 5; // window must contain at least this many BLOCKs
16
+ const MULTIPLIER_THRESHOLD = 3.0; // window rate >= baseline_rate × this
17
+ const COLD_START_MIN_HISTORY = 100;
18
+
19
+ function countBlocks(rows) {
20
+ let n = 0;
21
+ for (const r of rows) if (r.action === 'BLOCK') n++;
22
+ return n;
23
+ }
24
+
25
+ function evaluate(windowRows, historicalRows, opts) {
26
+ const winBlocks = countBlocks(windowRows);
27
+ if (winBlocks < MIN_ABSOLUTE) return [];
28
+
29
+ const winMs = opts.windowMs || 0;
30
+ if (winMs <= 0) return [];
31
+
32
+ // Historical rate: BLOCKs / (history span in ms) × winMs
33
+ if (historicalRows.length < COLD_START_MIN_HISTORY) {
34
+ // Cold start: only fire when window has a lot of blocks AND the
35
+ // overall window proportion is anomalously high. Avoids spurious
36
+ // alerts on a brand-new install.
37
+ const winRate = windowRows.length ? winBlocks / windowRows.length : 0;
38
+ if (winRate < 0.5) return [];
39
+ return [{
40
+ severity: 'medium',
41
+ observed: winBlocks,
42
+ baseline: 0,
43
+ message: `BLOCK count ${winBlocks} in current window (cold-start: no historical baseline yet, but ${(winRate * 100).toFixed(0)}% of window rows are blocked)`,
44
+ rows_implicated: implicatedHashes(windowRows),
45
+ }];
46
+ }
47
+
48
+ const histBlocks = countBlocks(historicalRows);
49
+ const firstTs = Date.parse(historicalRows[0]?.ts);
50
+ const lastTs = Date.parse(historicalRows[historicalRows.length - 1]?.ts);
51
+ if (!Number.isFinite(firstTs) || !Number.isFinite(lastTs) || lastTs <= firstTs) return [];
52
+
53
+ const histSpanMs = lastTs - firstTs;
54
+ const histRatePerWindow = (histBlocks / histSpanMs) * winMs;
55
+ if (histRatePerWindow <= 0) {
56
+ return [{
57
+ severity: 'high',
58
+ observed: winBlocks,
59
+ baseline: 0,
60
+ message: `${winBlocks} BLOCKs in current window; historical baseline shows none — possible new attack pattern or policy change`,
61
+ rows_implicated: implicatedHashes(windowRows),
62
+ }];
63
+ }
64
+
65
+ const ratio = winBlocks / histRatePerWindow;
66
+ if (ratio < MULTIPLIER_THRESHOLD) return [];
67
+
68
+ const severity = ratio > 10 ? 'high' : 'medium';
69
+ return [{
70
+ severity,
71
+ observed: winBlocks,
72
+ baseline: Number(histRatePerWindow.toFixed(2)),
73
+ message: `BLOCK rate ${winBlocks} in current window vs. historical ${histRatePerWindow.toFixed(2)}/window (×${ratio.toFixed(1)})`,
74
+ rows_implicated: implicatedHashes(windowRows),
75
+ }];
76
+ }
77
+
78
+ function implicatedHashes(rows) {
79
+ return rows.filter(r => r.action === 'BLOCK' && typeof r.hash === 'string')
80
+ .slice(0, 5)
81
+ .map(r => r.hash);
82
+ }
83
+
84
+ module.exports = { id: ID, label: LABEL, evaluate };
@@ -0,0 +1,109 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * file-read-volume — alert when the count of distinct files read in the
5
+ * window exceeds the per-window historical p95 by a configurable multiplier.
6
+ *
7
+ * Catches recon-style bursts where an agent suddenly reads dozens of files
8
+ * it has never touched. Uses **distinct** paths (not total reads) to avoid
9
+ * false positives from a single re-read loop.
10
+ */
11
+
12
+ const ID = 'file-read-volume';
13
+ const LABEL = 'File-read volume spike';
14
+
15
+ const MIN_ABSOLUTE = 20; // window must read at least this many distinct paths
16
+ const P95_MULTIPLIER = 1.5; // window > p95 × this
17
+ const COLD_START_MIN_RUNS = 5; // need ≥ this many historical windows to compute p95
18
+
19
+ function readTool(row) {
20
+ const t = String(row.tool_name || '').toLowerCase();
21
+ return t === 'read' || t === 'read_file';
22
+ }
23
+
24
+ function pathOf(row) {
25
+ return row.tool_inputs && (row.tool_inputs.path || row.tool_inputs.file_path) || null;
26
+ }
27
+
28
+ function distinctReadPaths(rows) {
29
+ const s = new Set();
30
+ for (const r of rows) {
31
+ if (!readTool(r)) continue;
32
+ const p = pathOf(r);
33
+ if (p) s.add(p);
34
+ }
35
+ return s;
36
+ }
37
+
38
+ // Build a p95 of "distinct read-paths per equivalent window" from history,
39
+ // by splitting historical rows into N consecutive windows of size windowMs.
40
+ function historicalDistinctP95(historicalRows, windowMs) {
41
+ if (!historicalRows.length) return { p95: 0, samples: 0 };
42
+ const first = Date.parse(historicalRows[0].ts);
43
+ const last = Date.parse(historicalRows[historicalRows.length - 1].ts);
44
+ if (!Number.isFinite(first) || !Number.isFinite(last) || last <= first) {
45
+ return { p95: 0, samples: 0 };
46
+ }
47
+
48
+ const buckets = new Map(); // bucketIdx → Set<path>
49
+ for (const r of historicalRows) {
50
+ if (!readTool(r)) continue;
51
+ const p = pathOf(r);
52
+ if (!p) continue;
53
+ const t = Date.parse(r.ts);
54
+ if (!Number.isFinite(t)) continue;
55
+ const idx = Math.floor((t - first) / windowMs);
56
+ if (!buckets.has(idx)) buckets.set(idx, new Set());
57
+ buckets.get(idx).add(p);
58
+ }
59
+
60
+ const counts = Array.from(buckets.values()).map(s => s.size);
61
+ if (counts.length < COLD_START_MIN_RUNS) return { p95: 0, samples: counts.length };
62
+
63
+ counts.sort((a, b) => a - b);
64
+ const i95 = Math.min(counts.length - 1, Math.floor(counts.length * 0.95));
65
+ return { p95: counts[i95], samples: counts.length };
66
+ }
67
+
68
+ function evaluate(windowRows, historicalRows, opts) {
69
+ const winSet = distinctReadPaths(windowRows);
70
+ if (winSet.size < MIN_ABSOLUTE) return [];
71
+
72
+ const winMs = opts.windowMs || 0;
73
+ if (winMs <= 0) return [];
74
+
75
+ const { p95, samples } = historicalDistinctP95(historicalRows, winMs);
76
+ if (samples < COLD_START_MIN_RUNS) {
77
+ // Cold start fallback: fire only on very large bursts so we never
78
+ // alarm on a normal first session.
79
+ if (winSet.size < MIN_ABSOLUTE * 3) return [];
80
+ return [{
81
+ severity: 'medium',
82
+ observed: winSet.size,
83
+ baseline: null,
84
+ message: `${winSet.size} distinct files read in current window (cold-start: not enough historical windows yet to compute p95)`,
85
+ rows_implicated: firstReadHashes(windowRows),
86
+ }];
87
+ }
88
+
89
+ if (p95 === 0) return [];
90
+ if (winSet.size < p95 * P95_MULTIPLIER) return [];
91
+
92
+ const ratio = winSet.size / Math.max(p95, 1);
93
+ const severity = ratio > 4 ? 'high' : 'medium';
94
+ return [{
95
+ severity,
96
+ observed: winSet.size,
97
+ baseline: p95,
98
+ message: `${winSet.size} distinct files read in current window vs. p95 ${p95} (×${ratio.toFixed(1)})`,
99
+ rows_implicated: firstReadHashes(windowRows),
100
+ }];
101
+ }
102
+
103
+ function firstReadHashes(rows) {
104
+ return rows.filter(r => readTool(r) && typeof r.hash === 'string')
105
+ .slice(0, 5)
106
+ .map(r => r.hash);
107
+ }
108
+
109
+ module.exports = { id: ID, label: LABEL, evaluate };
@@ -0,0 +1,107 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * secret-redact-rate — alert when the rate of secret-redaction events in the
5
+ * window is materially higher than baseline. Single redactions get a low-
6
+ * severity alert; spikes get medium/high. A non-zero window count when
7
+ * history is zero gets high severity (first-time-leak signal).
8
+ *
9
+ * `secrets_redacted` is an integer count on tool-call rows when the result
10
+ * carried redacted secrets. Any row with `secrets_redacted >= 1` counts.
11
+ */
12
+
13
+ const ID = 'secret-redact-rate';
14
+ const LABEL = 'Secret-redaction rate';
15
+
16
+ const MIN_HISTORY_FOR_RATE = 200;
17
+ const MULTIPLIER_THRESHOLD = 5.0;
18
+
19
+ function sumRedactions(rows) {
20
+ let n = 0;
21
+ for (const r of rows) {
22
+ const v = r.secrets_redacted;
23
+ if (typeof v === 'number' && v > 0) n += v;
24
+ }
25
+ return n;
26
+ }
27
+
28
+ function rowsWithRedactions(rows) {
29
+ const out = [];
30
+ for (const r of rows) {
31
+ if (typeof r.secrets_redacted === 'number' && r.secrets_redacted > 0) out.push(r);
32
+ }
33
+ return out;
34
+ }
35
+
36
+ function evaluate(windowRows, historicalRows, opts) {
37
+ const winRows = rowsWithRedactions(windowRows);
38
+ const winCount = sumRedactions(winRows);
39
+ if (winCount === 0) return [];
40
+
41
+ const winMs = opts.windowMs || 0;
42
+ if (winMs <= 0) return [];
43
+
44
+ // First-leak signal: there's redaction in the window, none in history.
45
+ if (historicalRows.length >= MIN_HISTORY_FOR_RATE) {
46
+ const histCount = sumRedactions(historicalRows);
47
+ if (histCount === 0) {
48
+ return [{
49
+ severity: 'high',
50
+ observed: winCount,
51
+ baseline: 0,
52
+ message: `${winCount} secret-redaction event(s) in current window; no historical baseline of redactions — first time we are blocking leaks from this agent/policy combination`,
53
+ rows_implicated: firstHashes(winRows),
54
+ }];
55
+ }
56
+
57
+ const firstTs = Date.parse(historicalRows[0].ts);
58
+ const lastTs = Date.parse(historicalRows[historicalRows.length - 1].ts);
59
+ if (Number.isFinite(firstTs) && Number.isFinite(lastTs) && lastTs > firstTs) {
60
+ const histSpanMs = lastTs - firstTs;
61
+ const histPerWindow = (histCount / histSpanMs) * winMs;
62
+ const ratio = winCount / Math.max(histPerWindow, 0.01);
63
+
64
+ if (ratio < MULTIPLIER_THRESHOLD) {
65
+ // Still surface non-spike redactions as low severity so reviewers
66
+ // see them. Compliance teams want to know about every leak attempt.
67
+ return [{
68
+ severity: 'low',
69
+ observed: winCount,
70
+ baseline: Number(histPerWindow.toFixed(2)),
71
+ message: `${winCount} secret-redaction event(s) in current window (baseline ${histPerWindow.toFixed(2)}/window, within normal range)`,
72
+ rows_implicated: firstHashes(winRows),
73
+ }];
74
+ }
75
+
76
+ const severity = ratio > 20 ? 'high' : 'medium';
77
+ return [{
78
+ severity,
79
+ observed: winCount,
80
+ baseline: Number(histPerWindow.toFixed(2)),
81
+ message: `${winCount} secret-redaction event(s) in current window vs. baseline ${histPerWindow.toFixed(2)}/window (×${ratio.toFixed(1)})`,
82
+ rows_implicated: firstHashes(winRows),
83
+ }];
84
+ }
85
+ }
86
+
87
+ // Cold-start: a single redaction is LOW (visible in JSON/SIEM, not loud
88
+ // for human review); a burst (≥ COLD_START_BURST) is MEDIUM. Calibration
89
+ // showed that emitting MEDIUM on every cold-start redaction produces a
90
+ // false-positive every coding session that touches files matching the
91
+ // built-in secret patterns (e.g. test fixtures, example configs). The
92
+ // burst threshold ensures real spikes still surface during warm-up.
93
+ const COLD_START_BURST = 5;
94
+ return [{
95
+ severity: winCount >= COLD_START_BURST ? 'medium' : 'low',
96
+ observed: winCount,
97
+ baseline: null,
98
+ message: `${winCount} secret-redaction event(s) in current window (cold-start: not enough history yet to compute baseline)`,
99
+ rows_implicated: firstHashes(winRows),
100
+ }];
101
+ }
102
+
103
+ function firstHashes(rows) {
104
+ return rows.filter(r => typeof r.hash === 'string').slice(0, 5).map(r => r.hash);
105
+ }
106
+
107
+ module.exports = { id: ID, label: LABEL, evaluate };
@@ -0,0 +1,83 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * unknown-tool-input — alert when a tool is invoked with a `tool_inputs`
5
+ * shape (sorted-key fingerprint) that has never appeared in history.
6
+ *
7
+ * Catches "agent now passes a flag I've never seen it pass before" — e.g.
8
+ * a Bash call with `--privileged`, a Read with an unexpected option, a new
9
+ * MCP tool variant. Cold start: warm up over COLD_START_MIN_ROWS chain
10
+ * rows before firing.
11
+ */
12
+
13
+ const ID = 'unknown-tool-input';
14
+ const LABEL = 'Unknown tool-input shape';
15
+
16
+ // Calibrated against a ~3-day real audit chain (2518 rows of normal
17
+ // coding usage). Pre-tune the detector fired 27 times in 3.2 days at
18
+ // MEDIUM severity on benign new fingerprints (e.g. `grep` first called
19
+ // with only a `path` key). After raising the cold-start floor and
20
+ // dropping non-privileged novelty to LOW, the rate drops to ~1/day on
21
+ // normal use while privileged-key novelty still fires HIGH. See
22
+ // docs/edr-calibration.md for the run that produced these numbers.
23
+ const COLD_START_MIN_ROWS = 200;
24
+ // Keys that suggest privilege escalation or sandbox escape — novelty on
25
+ // these is escalated to HIGH. Everything else: LOW (logged but not loud).
26
+ const PRIVILEGED_KEY_RE = /\b(env|privileged|sudo|raw|exec|eval|stdin|cap|caps|seccomp)\b/;
27
+
28
+ // Fingerprint = `${tool_name}:${sorted comma-separated tool_inputs keys}`.
29
+ // Empty inputs → `${tool_name}:`. Non-object inputs are ignored (we only
30
+ // fingerprint shape, not values).
31
+ function fingerprint(row) {
32
+ if (!row.tool_name) return null;
33
+ const inputs = row.tool_inputs;
34
+ let keys = '';
35
+ if (inputs && typeof inputs === 'object' && !Array.isArray(inputs)) {
36
+ keys = Object.keys(inputs).filter(k => inputs[k] !== undefined).sort().join(',');
37
+ }
38
+ return `${String(row.tool_name).toLowerCase()}:${keys}`;
39
+ }
40
+
41
+ function evaluate(windowRows, historicalRows, _opts) {
42
+ if (historicalRows.length < COLD_START_MIN_ROWS) return [];
43
+
44
+ const knownFingerprints = new Set();
45
+ for (const r of historicalRows) {
46
+ if (r.kind && r.kind !== 'tool_call') continue;
47
+ const fp = fingerprint(r);
48
+ if (fp) knownFingerprints.add(fp);
49
+ }
50
+
51
+ // Group window rows by their (novel) fingerprint so we emit one alert per
52
+ // new shape, not one per row.
53
+ const byFp = new Map();
54
+ for (const r of windowRows) {
55
+ if (r.kind && r.kind !== 'tool_call') continue;
56
+ const fp = fingerprint(r);
57
+ if (!fp || knownFingerprints.has(fp)) continue;
58
+ if (!byFp.has(fp)) byFp.set(fp, []);
59
+ byFp.get(fp).push(r);
60
+ }
61
+ if (byFp.size === 0) return [];
62
+
63
+ const alerts = [];
64
+ for (const [fp, rows] of byFp) {
65
+ const [toolName, keysJoined] = fp.split(':');
66
+ const keysDisplay = keysJoined ? `{${keysJoined}}` : '{}';
67
+ // Privileged-key novelty is loud (HIGH). Everything else is LOW
68
+ // — visible in JSON/SIEM consumers but not noisy for human review.
69
+ // Calibration showed MEDIUM on non-privileged novelty was the
70
+ // largest source of false positives on normal usage.
71
+ const high = PRIVILEGED_KEY_RE.test(keysJoined);
72
+ alerts.push({
73
+ severity: high ? 'high' : 'low',
74
+ observed: rows.length,
75
+ baseline: 0,
76
+ message: `Tool '${toolName}' invoked with previously-unseen input shape ${keysDisplay} (${rows.length}× in current window)`,
77
+ rows_implicated: rows.filter(r => typeof r.hash === 'string').slice(0, 5).map(r => r.hash),
78
+ });
79
+ }
80
+ return alerts;
81
+ }
82
+
83
+ module.exports = { id: ID, label: LABEL, evaluate, _fingerprint: fingerprint };