@crewhaus/egress-classifier 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts DELETED
@@ -1,605 +0,0 @@
1
- /**
2
- * Pillar 3 sink-side chokepoint — `egress-classifier`.
3
- *
4
- * `boundary-classifier` shipped the source half of the fabric: every cross-
5
- * trust-domain ingress (MCP / sub-agent / channel / federation / skill /
6
- * compaction / tool / chain) flows through `classifyBoundary(content, …)`,
7
- * which tags the verdict with a `TrustOrigin` so downstream readers know
8
- * *where* the content came from.
9
- *
10
- * That stops a malicious string from being silently absorbed into the
11
- * model's context. It does **not** stop the agent from later transmitting
12
- * that string to an external sink — a URL fetched, a channel message sent,
13
- * a federation outbound payload, an MCP tool invocation. OpenAI's "Designing
14
- * AI agents to resist prompt injection" (2026-05-08) and SACR's "Runtime
15
- * Security for AI Agents" (2026) converge on the same conclusion:
16
- * classification at the source is necessary but not sufficient. An attacker
17
- * who controls a source AND an accessible sink can lateral-move across the
18
- * agent's permissions even when every individual permission check passes.
19
- *
20
- * The egress classifier is the symmetric companion. Every external tool
21
- * call (any tool with `scope: "external"` in the tool-catalog) routes its
22
- * payload through `classifyEgress(payload, ctx, opts)` before invocation.
23
- * The classifier looks up the run-context's `dataLineage` map (populated
24
- * by `tagContent(ctx, content, origin)` at every boundary site) and checks
25
- * whether the outbound payload contains substrings from non-`"user"`
26
- * origins. A hit produces an `EgressVerdict`:
27
- *
28
- * - `"pass"` → no tagged content found OR origin policy is permissive
29
- * - `"warn"` → tagged content found; log + emit audit event but proceed
30
- * - `"block"` → tagged content found AND origin policy is strict; deny
31
- *
32
- * The default policy is **defense-in-depth, not defense-in-perimeter**:
33
- * `"user"`-origin content always passes (the user can do whatever they want
34
- * with their own data); content tagged from any other origin defaults to
35
- * `"warn"` for sinks the user explicitly configured, and `"block"` for
36
- * sinks reached through dynamic discovery (e.g., an MCP server the agent
37
- * loaded mid-session, a federation peer it joined at runtime).
38
- *
39
- * Single-chokepoint design parity with `boundary-classifier`: the fabric
40
- * only holds if every external-tool site uses the *same* classifier with
41
- * the *same* policy. A new external tool that re-implements egress checks
42
- * inline (or skips them for "performance") is a security regression, not
43
- * a perf optimisation.
44
- *
45
- * Catalog layer: R8 (extension of §18 safety primitives, symmetric to
46
- * `boundary-classifier`). Recipe: demos/walkthroughs/55-egress-fabric.md.
47
- */
48
- import { createHash } from "node:crypto";
49
- import { CrewhausError } from "@crewhaus/errors";
50
- import type { RunContext, TrustOrigin } from "@crewhaus/run-context";
51
-
52
- export class EgressClassifierError extends CrewhausError {
53
- override readonly name = "EgressClassifierError";
54
- constructor(message: string, cause?: unknown) {
55
- super("config", message, cause);
56
- }
57
- }
58
-
59
- /**
60
- * The classifier's three possible verdicts. Callers (runtime-core's
61
- * pre-tool-call hook) inspect `action` and decide whether to block the
62
- * call, log + proceed, or proceed silently.
63
- */
64
- export type EgressVerdict = "pass" | "warn" | "block";
65
-
66
- /**
67
- * Where the egress is going. `"external-configured"` means a sink the user
68
- * explicitly wired in their spec (e.g. `tools: [fetch]` listed at compile
69
- * time). `"external-dynamic"` means a sink discovered at runtime (e.g. an
70
- * MCP server an agent registered mid-session, a federation peer that
71
- * joined the swarm). Dynamic sinks default to stricter policy because the
72
- * user never explicitly trusted them.
73
- */
74
- export type SinkScope = "external-configured" | "external-dynamic";
75
-
76
- export type EgressResult = {
77
- readonly verdict: EgressVerdict;
78
- /** Origins of tagged content found in the payload, deduped. Empty when no hits. */
79
- readonly originsFound: ReadonlyArray<TrustOrigin>;
80
- /** Number of distinct tagged strings that matched. */
81
- readonly matchCount: number;
82
- /** Was this verdict served from cache? */
83
- readonly fromCache: boolean;
84
- /** Sink the egress was destined for; passed through for audit logging. */
85
- readonly sinkId: string;
86
- readonly sinkScope: SinkScope;
87
- };
88
-
89
- /**
90
- * Per-origin default severity at egress time. `"user"` content is always
91
- * pass — the user can do whatever they want with their own data. Every
92
- * other origin defaults to `"warn"` on configured sinks (the user wired
93
- * the sink in deliberately, but we still log + flag the audit trail) and
94
- * `"block"` on dynamic sinks (the agent reached the sink without explicit
95
- * spec authorisation; combining that with cross-origin data is too close
96
- * to the social-engineering exfil pattern).
97
- *
98
- * Adding a new origin? Update both rows. The §41 `crewhaus doctor`
99
- * philosophy-alignment check catches drift.
100
- */
101
- type SeverityMatrix = Record<TrustOrigin, Record<SinkScope, EgressVerdict>>;
102
-
103
- const ORIGIN_DEFAULT_POLICY: SeverityMatrix = {
104
- user: { "external-configured": "pass", "external-dynamic": "pass" },
105
- mcp: { "external-configured": "warn", "external-dynamic": "block" },
106
- subagent: { "external-configured": "warn", "external-dynamic": "block" },
107
- channel: { "external-configured": "warn", "external-dynamic": "block" },
108
- federation: { "external-configured": "warn", "external-dynamic": "block" },
109
- skill: { "external-configured": "warn", "external-dynamic": "block" },
110
- compaction: { "external-configured": "warn", "external-dynamic": "block" },
111
- tool: { "external-configured": "warn", "external-dynamic": "block" },
112
- chain: { "external-configured": "warn", "external-dynamic": "block" },
113
- };
114
-
115
- /**
116
- * Minimum length for a tagged-content match to count. This is a BACKSTOP
117
- * against pathological lineage entries, not the primary false-positive
118
- * control: insertion discipline lives in run-context's `tagContent`, which
119
- * only admits whole blobs / lines >= 16 chars and credential-shaped tokens
120
- * >= 8 chars (audit follow-up R2 — see `MIN_TOKEN_TAG_LENGTH` and
121
- * `isCredentialShaped` there). 8 matches the token floor so vetted short
122
- * secrets (sk-..., hex runs, key=value secrets) can actually match at
123
- * egress; anything shorter is indistinguishable from prose. Keep in sync
124
- * with run-context's `MIN_TOKEN_TAG_LENGTH`.
125
- */
126
- export const MIN_MATCH_LENGTH = 8;
127
-
128
- /**
129
- * FR-006 — the matching step factored behind a strategy interface. The
130
- * matcher decides *which* tagged lineage entries the outbound payload
131
- * "contains"; it never decides pass/warn/block. The verdict fold (origin
132
- * policy + `block > warn > pass` precedence) stays in `classifyEgress`, so
133
- * the three audit outcomes and their precedence are structurally
134
- * matcher-independent.
135
- *
136
- * The default `SubstringEgressMatcher` is behavior-preserving: it is the
137
- * verbatim substring scan that lived inline before the seam existed,
138
- * including the `MIN_MATCH_LENGTH` floor. An optional embedding-backed
139
- * matcher ships separately as `@crewhaus/egress-matcher-semantic`; the
140
- * default egress path never imports it (no new hard dependency).
141
- *
142
- * NOTE: the FR sketch wrote `match(payload, lineage, opts)` with
143
- * `DataLineage` / `EgressOpts` types. Those names do not exist in the
144
- * codebase (lineage is `Map<string, TrustOrigin>` on `RunContext`; there
145
- * is no `DataLineage` type). This implementation uses a single
146
- * `EgressMatchInput` bag — idiomatic with this codebase's option-bag style
147
- * — and keeps the matcher returning only raw hits, which strictly
148
- * strengthens the matcher-independence guarantee.
149
- */
150
- export type EgressMatchInput = {
151
- /** The serialized outbound payload to inspect. */
152
- readonly payload: string;
153
- /** The run-context data-lineage map: tagged content → its trust origin. */
154
- readonly lineage: ReadonlyMap<string, TrustOrigin>;
155
- /** Floor below which a tagged entry is too short to count as a match. */
156
- readonly minMatchLength: number;
157
- };
158
-
159
- /**
160
- * Raw lineage hits — origins whose tagged content the matcher considers
161
- * present in the payload, plus a count of distinct matched tagged strings.
162
- * Deliberately verdict-free: `classifyEgress` folds policy over
163
- * `originsFound`, the matcher does not.
164
- */
165
- export type EgressMatchResult = {
166
- readonly originsFound: ReadonlyArray<TrustOrigin>;
167
- readonly matchCount: number;
168
- };
169
-
170
- /**
171
- * A pluggable egress-matching strategy. `name` namespaces audit/trace
172
- * records and the verdict cache key (so a semantic-matcher verdict never
173
- * serves a substring-matcher hit from cache). `match` may be sync or
174
- * async; `classifyEgress` awaits it either way.
175
- */
176
- export interface EgressMatcher {
177
- readonly name: string;
178
- match(input: EgressMatchInput): EgressMatchResult | Promise<EgressMatchResult>;
179
- }
180
-
181
- /**
182
- * Decode-aware normalization so a tagged secret that was *re-encoded* before
183
- * egress is still detected. A pure verbatim `payload.includes(tagged)` scan is
184
- * trivially bypassed by a prompt-injectable model two ways:
185
- *
186
- * 1. JSON-escaping — runtime-core builds the egress payload as
187
- * `JSON.stringify(toolInput)`, which escapes the newlines/quotes/
188
- * backslashes that tagged MCP/sub-agent/channel content almost always
189
- * contains, so the raw tagged string is no longer a substring.
190
- * 2. base64/hex/percent re-encoding — the model emits `base64(secret)`
191
- * instead of the plaintext.
192
- *
193
- * `buildScanViews` returns the payload plus normalized views (JSON-decoded
194
- * string values, and base64/hex/percent-decoded blobs found in either form),
195
- * and the matcher tests each tagged entry against ALL of them. The decoders
196
- * mirror `@crewhaus/prompt-injection-detector` (replicated, not imported, to
197
- * keep egress-classifier dependency-free; keep the copies in sync) and are
198
- * bounded (match count + recursion depth) so this is not itself a DoS vector.
199
- */
200
- function isMostlyPrintable(s: string): boolean {
201
- if (s.length === 0) return false;
202
- let printable = 0;
203
- for (let i = 0; i < s.length; i++) {
204
- const c = s.charCodeAt(i);
205
- if (c === 9 || c === 10 || c === 13 || (c >= 32 && c < 127)) printable++;
206
- }
207
- return printable / s.length > 0.85;
208
- }
209
-
210
- function tryDecodeBase64(blob: string): string | undefined {
211
- if (blob.length < 16 || blob.length % 4 === 1) return undefined;
212
- try {
213
- const decoded = Buffer.from(blob, "base64").toString("utf8");
214
- return isMostlyPrintable(decoded) ? decoded : undefined;
215
- } catch {
216
- return undefined;
217
- }
218
- }
219
-
220
- function tryDecodeHex(blob: string): string | undefined {
221
- if (blob.length < 16 || blob.length % 2 !== 0) return undefined;
222
- try {
223
- const decoded = Buffer.from(blob, "hex").toString("utf8");
224
- return isMostlyPrintable(decoded) ? decoded : undefined;
225
- } catch {
226
- return undefined;
227
- }
228
- }
229
-
230
- function tryDecodePercent(text: string): string | undefined {
231
- try {
232
- const decoded = decodeURIComponent(text);
233
- return decoded !== text ? decoded : undefined;
234
- } catch {
235
- return undefined;
236
- }
237
- }
238
-
239
- /** Recursively decode base64/hex/percent blobs. Bounded for DoS-safety. */
240
- function decodedVariants(text: string, depth = 2): string[] {
241
- if (depth <= 0 || text.length === 0) return [];
242
- const out: string[] = [];
243
- const push = (s: string | undefined): void => {
244
- if (s !== undefined && s.length > 0) out.push(s, ...decodedVariants(s, depth - 1));
245
- };
246
- for (const m of [...text.matchAll(/[A-Za-z0-9+/]{16,}={0,2}/g)].slice(0, 8)) {
247
- push(tryDecodeBase64(m[0]));
248
- }
249
- for (const m of [...text.matchAll(/(?:[0-9A-Fa-f]{2}){8,}/g)].slice(0, 8)) {
250
- push(tryDecodeHex(m[0]));
251
- }
252
- if (/%[0-9A-Fa-f]{2}/.test(text)) push(tryDecodePercent(text));
253
- return out.slice(0, 16);
254
- }
255
-
256
- /** Collect every string leaf of a parsed JSON value (bounded by JSON size). */
257
- function collectJsonStrings(value: unknown, out: string[]): void {
258
- if (typeof value === "string") {
259
- out.push(value);
260
- return;
261
- }
262
- if (Array.isArray(value)) {
263
- for (const v of value) collectJsonStrings(v, out);
264
- return;
265
- }
266
- if (value !== null && typeof value === "object") {
267
- for (const v of Object.values(value)) collectJsonStrings(v, out);
268
- }
269
- }
270
-
271
- /**
272
- * The set of strings to scan a tagged entry against: the raw payload, the
273
- * JSON-decoded string values (recovers content the `JSON.stringify` egress
274
- * encoding escaped), and base64/hex/percent decodings of both.
275
- */
276
- function buildScanViews(payload: string): string[] {
277
- const views: string[] = [payload];
278
- let jsonView: string | undefined;
279
- try {
280
- const parsed = JSON.parse(payload);
281
- const strings: string[] = [];
282
- collectJsonStrings(parsed, strings);
283
- if (strings.length > 0) jsonView = strings.join("\n");
284
- } catch {
285
- // Not JSON — only the raw payload + its decodings are scanned.
286
- }
287
- if (jsonView !== undefined) views.push(jsonView);
288
- const decodeSources = jsonView !== undefined ? [payload, jsonView] : [payload];
289
- for (const src of decodeSources) {
290
- for (const v of decodedVariants(src)) views.push(v);
291
- }
292
- return views;
293
- }
294
-
295
- /**
296
- * The default egress matcher. A tagged entry counts when it is at least
297
- * `minMatchLength` chars and appears in the payload OR in any of its
298
- * normalized views (see `buildScanViews`) — so JSON-escaping and
299
- * base64/hex/percent re-encoding can no longer slip a tagged secret past the
300
- * sink-side fabric. The raw payload is always scanned first, so every match
301
- * the old verbatim scan caught is still caught. `originsFound` is deduped;
302
- * `matchCount` counts distinct matched tagged strings.
303
- */
304
- export class SubstringEgressMatcher implements EgressMatcher {
305
- // Assigned in the constructor rather than as an inline field initializer:
306
- // bun's coverage instruments a class-field initializer as its own function
307
- // and (as of bun 1.3.x) cannot mark it covered, leaving an unreachable-by-
308
- // tests gap in the function-coverage count. A plain constructor assignment
309
- // is equivalent at runtime and is counted normally.
310
- readonly name: string;
311
- constructor() {
312
- this.name = "substring";
313
- }
314
- match(input: EgressMatchInput): EgressMatchResult {
315
- const views = buildScanViews(input.payload);
316
- const seen = new Set<TrustOrigin>();
317
- let matchCount = 0;
318
- for (const [tagged, origin] of input.lineage.entries()) {
319
- if (tagged.length < input.minMatchLength) continue;
320
- if (views.some((view) => view.includes(tagged))) {
321
- seen.add(origin);
322
- matchCount += 1;
323
- }
324
- }
325
- return { originsFound: [...seen], matchCount };
326
- }
327
- }
328
-
329
- /** Shared default-matcher singleton — the built-in egress detection. */
330
- export const substringMatcher: EgressMatcher = new SubstringEgressMatcher();
331
-
332
- export type EgressPolicyOverride = Partial<Record<TrustOrigin, EgressVerdict>>;
333
-
334
- export type ClassifyEgressOptions = {
335
- /**
336
- * Stable identifier for the sink — usually `tool.name` (e.g. `"fetch"`,
337
- * `"mcp:slack:send_message"`). Goes into the audit-log record so an
338
- * incident investigator can trace which sink the egress was destined
339
- * for without needing to reconstruct the call path.
340
- */
341
- readonly sinkId: string;
342
- readonly sinkScope: SinkScope;
343
- /**
344
- * Per-origin severity override for this sink. Highest-precedence: a
345
- * tool descriptor can carry `egressOverride: { subagent: "block" }` to
346
- * tighten policy beyond defaults. Origins not listed fall back to
347
- * `ORIGIN_DEFAULT_POLICY[origin][sinkScope]`.
348
- */
349
- readonly override?: EgressPolicyOverride;
350
- /**
351
- * Per-call cache bypass. Default false — production callers should
352
- * leave caching on. Tests use `true` to assert classification fires.
353
- */
354
- readonly bypassCache?: boolean;
355
- /**
356
- * Minimum match length override. Tests and recipe demos use a smaller
357
- * value to keep fixture payloads short. Production callers should not
358
- * supply this.
359
- */
360
- readonly minMatchLength?: number;
361
- /**
362
- * FR-006 — pluggable matching strategy. Defaults to `substringMatcher`
363
- * (behavior-preserving). Supply an alternate matcher (e.g. the optional
364
- * `@crewhaus/egress-matcher-semantic`) to swap *how* lineage matches are
365
- * detected; the per-origin/per-sink policy and the three audit outcomes
366
- * are unaffected. The cache key namespaces by `matcher.name`, so
367
- * switching matchers mid-run never cross-serves a stale verdict.
368
- */
369
- readonly matcher?: EgressMatcher;
370
- };
371
-
372
- /**
373
- * In-process LRU cache. Key = `sha256(sinkScope || sinkId || payload)`.
374
- * Same cap as `boundary-classifier` so the two chokepoints have parallel
375
- * memory budgets.
376
- */
377
- const DEFAULT_CACHE_CAP = 1024;
378
-
379
- class LruCache<V> {
380
- private readonly map: Map<string, V> = new Map();
381
- constructor(private readonly cap: number) {}
382
- get(key: string): V | undefined {
383
- const value = this.map.get(key);
384
- if (value !== undefined) {
385
- this.map.delete(key);
386
- this.map.set(key, value);
387
- }
388
- return value;
389
- }
390
- set(key: string, value: V): void {
391
- if (this.map.has(key)) this.map.delete(key);
392
- this.map.set(key, value);
393
- while (this.map.size > this.cap) {
394
- const oldest = this.map.keys().next().value;
395
- if (oldest === undefined) break;
396
- this.map.delete(oldest);
397
- }
398
- }
399
- size(): number {
400
- return this.map.size;
401
- }
402
- clear(): void {
403
- this.map.clear();
404
- }
405
- }
406
-
407
- type CachedVerdict = {
408
- readonly verdict: EgressVerdict;
409
- readonly originsFound: ReadonlyArray<TrustOrigin>;
410
- readonly matchCount: number;
411
- };
412
-
413
- const cache = new LruCache<CachedVerdict>(DEFAULT_CACHE_CAP);
414
-
415
- function cacheKey(
416
- payload: string,
417
- sinkScope: SinkScope,
418
- sinkId: string,
419
- matcherName: string,
420
- lineageDigest: string,
421
- ): string {
422
- // Length-prefix every field before hashing so the component boundaries are
423
- // unambiguous. A bare `"|"` delimiter is not injective when a field can
424
- // contain `"|"`: (sinkId="tool|", payload="x") and (sinkId="tool",
425
- // payload="|x") would otherwise hash identically and cross-serve a cached
426
- // verdict for a *different* payload — a cache-poisoning / egress-scan-bypass
427
- // vector when sinkId carries attacker influence (e.g. a dynamically
428
- // discovered MCP tool name). `<byteLength>:` framing makes each field
429
- // self-delimiting regardless of its contents.
430
- const h = createHash("sha256");
431
- for (const field of [matcherName, sinkScope, sinkId, payload, lineageDigest]) {
432
- h.update(String(Buffer.byteLength(field, "utf8")));
433
- h.update(":");
434
- h.update(field, "utf8");
435
- }
436
- return h.digest("hex");
437
- }
438
-
439
- /**
440
- * Stable digest of the lineage map's CONTENT (keys + origins, sorted), used
441
- * as a cache-key component. Without it the cache serves stale verdicts: the
442
- * lineage map GROWS during a run (every boundary crossing tags more
443
- * content), so the same (payload, sink) pair legitimately classifies
444
- * differently once a secret contained in the payload gets tagged. A verdict
445
- * cached before that tag would otherwise be served forever — an egress-scan
446
- * bypass. Sorting makes the digest insensitive to recency-refresh reordering
447
- * (delete + re-insert on re-tag), which changes Map iteration order without
448
- * changing content.
449
- */
450
- function lineageDigestOf(lineage: ReadonlyMap<string, TrustOrigin>): string {
451
- const h = createHash("sha256");
452
- const keys = [...lineage.keys()].sort();
453
- for (const k of keys) {
454
- h.update(String(Buffer.byteLength(k, "utf8")));
455
- h.update(":");
456
- h.update(k, "utf8");
457
- h.update(lineage.get(k) as string, "utf8");
458
- }
459
- return h.digest("hex");
460
- }
461
-
462
- /**
463
- * Resolve the most-severe verdict for a set of origins under the given
464
- * policy. `"block"` > `"warn"` > `"pass"`. Used to fold a list of origins
465
- * (one per matched tagged-content hit) into a single decision.
466
- */
467
- function foldVerdict(verdicts: ReadonlyArray<EgressVerdict>): EgressVerdict {
468
- if (verdicts.some((v) => v === "block")) return "block";
469
- if (verdicts.some((v) => v === "warn")) return "warn";
470
- return "pass";
471
- }
472
-
473
- function originVerdict(
474
- origin: TrustOrigin,
475
- sinkScope: SinkScope,
476
- override?: EgressPolicyOverride,
477
- ): EgressVerdict {
478
- const o = override?.[origin];
479
- if (o !== undefined) return o;
480
- return ORIGIN_DEFAULT_POLICY[origin][sinkScope];
481
- }
482
-
483
- /**
484
- * The single chokepoint. Inspect `payload` for substring matches against
485
- * any tagged content carried in `ctx.dataLineage`. For each match, look
486
- * up the origin's policy under `sinkScope`. The folded verdict is the
487
- * most-severe outcome across all hits.
488
- *
489
- * The classifier ALWAYS runs the scan. Override only controls what to do
490
- * with the verdict. This means the audit trail records every non-pass
491
- * outcome regardless of policy — honest audit even under permissive
492
- * policy.
493
- */
494
- export async function classifyEgress(
495
- payload: string,
496
- ctx: RunContext,
497
- opts: ClassifyEgressOptions,
498
- ): Promise<EgressResult> {
499
- if (typeof payload !== "string") {
500
- throw new EgressClassifierError(
501
- `classifyEgress expected a string payload, got ${typeof payload}`,
502
- );
503
- }
504
-
505
- const lineage = ctx.dataLineage;
506
- // No lineage tagging at all means nothing crossed a boundary yet — pass.
507
- if (lineage === undefined || lineage.size === 0) {
508
- return {
509
- verdict: "pass",
510
- originsFound: [],
511
- matchCount: 0,
512
- fromCache: false,
513
- sinkId: opts.sinkId,
514
- sinkScope: opts.sinkScope,
515
- };
516
- }
517
-
518
- const floor = opts.minMatchLength ?? MIN_MATCH_LENGTH;
519
- const matcher = opts.matcher ?? substringMatcher;
520
-
521
- // Namespace the cache by matcher name so a verdict produced by one
522
- // matcher (e.g. semantic) is never served to a call using another
523
- // (e.g. substring) over the same (sinkScope, sinkId, payload) — and by a
524
- // digest of the lineage content so a verdict computed against an OLDER,
525
- // smaller lineage is never served after new tags land (see
526
- // `lineageDigestOf`).
527
- const key = cacheKey(
528
- payload,
529
- opts.sinkScope,
530
- opts.sinkId,
531
- matcher.name,
532
- lineageDigestOf(lineage),
533
- );
534
- if (opts.bypassCache !== true) {
535
- const hit = cache.get(key);
536
- if (hit !== undefined) {
537
- // Re-evaluate the verdict under the *current* override (cache stores
538
- // raw hits; the policy decision is cheap to recompute).
539
- const verdicts = hit.originsFound.map((o) => originVerdict(o, opts.sinkScope, opts.override));
540
- return {
541
- verdict: foldVerdict(verdicts),
542
- originsFound: hit.originsFound,
543
- matchCount: hit.matchCount,
544
- fromCache: true,
545
- sinkId: opts.sinkId,
546
- sinkScope: opts.sinkScope,
547
- };
548
- }
549
- }
550
-
551
- // The matcher decides *which* lineage entries the payload contains; the
552
- // policy fold below is matcher-independent. `match` may be sync or async.
553
- const { originsFound, matchCount } = await matcher.match({
554
- payload,
555
- lineage,
556
- minMatchLength: floor,
557
- });
558
- const cached: CachedVerdict = { verdict: "pass", originsFound, matchCount };
559
- if (opts.bypassCache !== true) {
560
- cache.set(key, cached);
561
- }
562
-
563
- if (originsFound.length === 0) {
564
- return {
565
- verdict: "pass",
566
- originsFound,
567
- matchCount,
568
- fromCache: false,
569
- sinkId: opts.sinkId,
570
- sinkScope: opts.sinkScope,
571
- };
572
- }
573
-
574
- const verdicts = originsFound.map((o) => originVerdict(o, opts.sinkScope, opts.override));
575
- return {
576
- verdict: foldVerdict(verdicts),
577
- originsFound,
578
- matchCount,
579
- fromCache: false,
580
- sinkId: opts.sinkId,
581
- sinkScope: opts.sinkScope,
582
- };
583
- }
584
-
585
- /**
586
- * Build a redaction string for the audit log payload — the actual content
587
- * is sensitive and should never be re-logged verbatim. Callers stamp this
588
- * into the `payload_summary` field instead of the raw payload.
589
- */
590
- export function summarizeEgress(result: EgressResult): string {
591
- if (result.originsFound.length === 0) {
592
- return `clean (sink=${result.sinkId} scope=${result.sinkScope})`;
593
- }
594
- return `${result.verdict}: ${result.matchCount} match(es) from [${result.originsFound.join(",")}] (sink=${result.sinkId} scope=${result.sinkScope})`;
595
- }
596
-
597
- /** Test/diagnostics only — clear the LRU between tests. */
598
- export function _clearEgressCache(): void {
599
- cache.clear();
600
- }
601
-
602
- /** Test/diagnostics only — inspect cache size. */
603
- export function _cacheSize(): number {
604
- return cache.size();
605
- }