@crewhaus/egress-classifier 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@crewhaus/egress-classifier",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "type": "module",
5
5
  "description": "Pillar-3 sink-side chokepoint — classify content leaving via external sinks (fetch / web / mcp / channel / federation / evm-tx) against the data-lineage carried in run-context",
6
6
  "main": "src/index.ts",
@@ -12,14 +12,14 @@
12
12
  "test": "bun test src"
13
13
  },
14
14
  "dependencies": {
15
- "@crewhaus/errors": "0.0.0",
16
- "@crewhaus/run-context": "0.0.0"
15
+ "@crewhaus/errors": "0.1.2",
16
+ "@crewhaus/run-context": "0.1.2"
17
17
  },
18
18
  "license": "Apache-2.0",
19
19
  "author": {
20
20
  "name": "Max Meier",
21
- "email": "max@studiomax.io",
22
- "url": "https://studiomax.io"
21
+ "email": "max@crewhaus.ai",
22
+ "url": "https://crewhaus.ai"
23
23
  },
24
24
  "repository": {
25
25
  "type": "git",
@@ -31,12 +31,7 @@
31
31
  "url": "https://github.com/crewhaus/factory/issues"
32
32
  },
33
33
  "publishConfig": {
34
- "access": "restricted"
34
+ "access": "public"
35
35
  },
36
- "files": [
37
- "src",
38
- "README.md",
39
- "LICENSE",
40
- "NOTICE"
41
- ]
36
+ "files": ["src", "README.md", "LICENSE", "NOTICE"]
42
37
  }
@@ -0,0 +1,486 @@
1
+ /**
2
+ * Supplemental coverage + hardening tests for `egress-classifier`.
3
+ *
4
+ * Companion to `index.test.ts`: the FR-006 acceptance suite there exercises
5
+ * the matcher seam and the headline pass/warn/block flows; this file drives
6
+ * the remaining branches (LRU eviction + recency, every policy-matrix cell,
7
+ * the cache-key framing regression, and the summarize/diagnostics helpers)
8
+ * to 100% and pins the security-relevant invariants.
9
+ */
10
+ import { afterEach, describe, expect, test } from "bun:test";
11
+ import { CrewhausError } from "@crewhaus/errors";
12
+ import { type TrustOrigin, createRunContext, tagContent } from "@crewhaus/run-context";
13
+ import {
14
+ EgressClassifierError,
15
+ type EgressMatcher,
16
+ type EgressResult,
17
+ MIN_MATCH_LENGTH,
18
+ type SinkScope,
19
+ SubstringEgressMatcher,
20
+ _cacheSize,
21
+ _clearEgressCache,
22
+ classifyEgress,
23
+ substringMatcher,
24
+ summarizeEgress,
25
+ } from "./index";
26
+
27
+ afterEach(() => {
28
+ _clearEgressCache();
29
+ });
30
+
31
+ // A trivially deterministic matcher that always reports the given origins.
32
+ function fixedMatcher(
33
+ name: string,
34
+ originsFound: TrustOrigin[],
35
+ matchCount = originsFound.length,
36
+ ): EgressMatcher {
37
+ return { name, match: () => ({ originsFound, matchCount }) };
38
+ }
39
+
40
+ describe("post-match (await) return paths with forced cache miss", () => {
41
+ test("bypassCache + matcher returns no hits → fresh pass after the await", async () => {
42
+ // Guarantees the cache-miss branch runs (bypassCache), the matcher is
43
+ // awaited, and the post-await `originsFound.length === 0` early return is
44
+ // taken — distinct from the no-lineage pre-await pass.
45
+ const ctx = createRunContext();
46
+ ctx.dataLineage = new Map<string, TrustOrigin>([["anything-present", "subagent"]]);
47
+ const r = await classifyEgress("outbound bytes", ctx, {
48
+ sinkId: "fetch",
49
+ sinkScope: "external-configured",
50
+ matcher: fixedMatcher("no-hits", [], 0),
51
+ bypassCache: true,
52
+ });
53
+ expect(r.verdict).toBe("pass");
54
+ expect(r.fromCache).toBe(false);
55
+ expect(r.originsFound).toEqual([]);
56
+ expect(r.matchCount).toBe(0);
57
+ expect(_cacheSize()).toBe(0); // bypassCache never wrote
58
+ });
59
+
60
+ test("bypassCache + matcher returns hits → fresh non-pass after the await", async () => {
61
+ const ctx = createRunContext();
62
+ ctx.dataLineage = new Map<string, TrustOrigin>([["anything-present", "subagent"]]);
63
+ const r = await classifyEgress("outbound bytes", ctx, {
64
+ sinkId: "fetch",
65
+ sinkScope: "external-dynamic",
66
+ matcher: fixedMatcher("has-hits", ["subagent"], 1),
67
+ bypassCache: true,
68
+ });
69
+ expect(r.verdict).toBe("block");
70
+ expect(r.fromCache).toBe(false);
71
+ expect(_cacheSize()).toBe(0);
72
+ });
73
+
74
+ test("real substring path: cache miss, await, then non-empty return", async () => {
75
+ const ctx = createRunContext();
76
+ tagContent(ctx, "subagent payload that is verbatim present", "subagent");
77
+ const r = await classifyEgress("POST subagent payload that is verbatim present now", ctx, {
78
+ sinkId: "fetch",
79
+ sinkScope: "external-configured",
80
+ bypassCache: true,
81
+ });
82
+ expect(r.verdict).toBe("warn");
83
+ expect(r.fromCache).toBe(false);
84
+ });
85
+
86
+ test("real substring path: cache miss, await, then empty return (no overlap)", async () => {
87
+ const ctx = createRunContext();
88
+ tagContent(ctx, "tagged content that will not appear", "subagent");
89
+ const r = await classifyEgress("a completely disjoint outbound string", ctx, {
90
+ sinkId: "fetch",
91
+ sinkScope: "external-configured",
92
+ bypassCache: true,
93
+ });
94
+ expect(r.verdict).toBe("pass");
95
+ expect(r.fromCache).toBe(false);
96
+ expect(r.originsFound).toEqual([]);
97
+ });
98
+ });
99
+
100
+ describe("policy matrix — every TrustOrigin × SinkScope cell", () => {
101
+ // Every non-user origin: warn on configured, block on dynamic.
102
+ const nonUser: TrustOrigin[] = [
103
+ "mcp",
104
+ "subagent",
105
+ "channel",
106
+ "federation",
107
+ "skill",
108
+ "compaction",
109
+ "tool",
110
+ "chain",
111
+ ];
112
+
113
+ for (const origin of nonUser) {
114
+ test(`${origin}: configured → warn, dynamic → block`, async () => {
115
+ const ctx = createRunContext();
116
+ ctx.dataLineage = new Map<string, TrustOrigin>([["anything-tagged", origin]]);
117
+ const m = fixedMatcher(`fixed-${origin}`, [origin], 1);
118
+ const configured = await classifyEgress("payload", ctx, {
119
+ sinkId: "fetch",
120
+ sinkScope: "external-configured",
121
+ matcher: m,
122
+ bypassCache: true,
123
+ });
124
+ const dynamic = await classifyEgress("payload", ctx, {
125
+ sinkId: "dyn",
126
+ sinkScope: "external-dynamic",
127
+ matcher: m,
128
+ bypassCache: true,
129
+ });
130
+ expect(configured.verdict).toBe("warn");
131
+ expect(dynamic.verdict).toBe("block");
132
+ });
133
+ }
134
+
135
+ test("user: pass on both configured and dynamic", async () => {
136
+ const ctx = createRunContext();
137
+ ctx.dataLineage = new Map<string, TrustOrigin>([["anything-tagged", "user"]]);
138
+ const m = fixedMatcher("fixed-user", ["user"], 1);
139
+ for (const sinkScope of ["external-configured", "external-dynamic"] as SinkScope[]) {
140
+ const r = await classifyEgress("payload", ctx, {
141
+ sinkId: "s",
142
+ sinkScope,
143
+ matcher: m,
144
+ bypassCache: true,
145
+ });
146
+ expect(r.verdict).toBe("pass");
147
+ expect(r.originsFound).toEqual(["user"]);
148
+ }
149
+ });
150
+ });
151
+
152
+ describe("foldVerdict precedence (via classifyEgress)", () => {
153
+ test("warn wins over pass when no block present", async () => {
154
+ // user (pass) + tool@configured (warn) → warn, exercising the
155
+ // `some(warn)` branch after `some(block)` short-circuits to false.
156
+ const ctx = createRunContext();
157
+ ctx.dataLineage = new Map<string, TrustOrigin>([
158
+ ["one", "user"],
159
+ ["two", "tool"],
160
+ ]);
161
+ const m = fixedMatcher("multi", ["user", "tool"], 2);
162
+ const r = await classifyEgress("payload", ctx, {
163
+ sinkId: "fetch",
164
+ sinkScope: "external-configured",
165
+ matcher: m,
166
+ });
167
+ expect(r.verdict).toBe("warn");
168
+ });
169
+
170
+ test("block wins over warn", async () => {
171
+ const ctx = createRunContext();
172
+ ctx.dataLineage = new Map<string, TrustOrigin>([
173
+ ["one", "user"],
174
+ ["two", "mcp"],
175
+ ]);
176
+ // user → pass, mcp@dynamic → block; folded = block.
177
+ const m = fixedMatcher("multi2", ["user", "mcp"], 2);
178
+ const r = await classifyEgress("payload", ctx, {
179
+ sinkId: "dyn",
180
+ sinkScope: "external-dynamic",
181
+ matcher: m,
182
+ });
183
+ expect(r.verdict).toBe("block");
184
+ });
185
+
186
+ test("all-pass origins fold to pass", async () => {
187
+ // Two user hits → foldVerdict reaches the trailing `return "pass"`.
188
+ const ctx = createRunContext();
189
+ ctx.dataLineage = new Map<string, TrustOrigin>([
190
+ ["one", "user"],
191
+ ["two", "user"],
192
+ ]);
193
+ const m = fixedMatcher("two-user", ["user", "user"], 2);
194
+ const r = await classifyEgress("payload", ctx, {
195
+ sinkId: "fetch",
196
+ sinkScope: "external-dynamic",
197
+ matcher: m,
198
+ });
199
+ expect(r.verdict).toBe("pass");
200
+ });
201
+ });
202
+
203
+ describe("override semantics", () => {
204
+ test("override can loosen a default-block to pass on a dynamic sink", async () => {
205
+ const ctx = createRunContext();
206
+ tagContent(ctx, "mcp-sourced content from a server", "mcp");
207
+ const r = await classifyEgress("body: mcp-sourced content from a server", ctx, {
208
+ sinkId: "dyn-mcp",
209
+ sinkScope: "external-dynamic", // default mcp@dynamic = block
210
+ override: { mcp: "pass" },
211
+ });
212
+ expect(r.verdict).toBe("pass");
213
+ expect(r.originsFound).toEqual(["mcp"]);
214
+ });
215
+
216
+ test("override only applies to listed origins; others keep defaults", async () => {
217
+ const ctx = createRunContext();
218
+ ctx.dataLineage = new Map<string, TrustOrigin>([
219
+ ["one", "subagent"],
220
+ ["two", "channel"],
221
+ ]);
222
+ const m = fixedMatcher("two-origin", ["subagent", "channel"], 2);
223
+ // Loosen subagent to pass, leave channel at its dynamic default (block).
224
+ const r = await classifyEgress("payload", ctx, {
225
+ sinkId: "dyn",
226
+ sinkScope: "external-dynamic",
227
+ override: { subagent: "pass" },
228
+ matcher: m,
229
+ });
230
+ expect(r.verdict).toBe("block"); // channel still blocks
231
+ });
232
+
233
+ test("cached verdict is re-folded under a different override on the second call", async () => {
234
+ // First call caches the raw hit (subagent) with no override. Second call
235
+ // serves from cache but recomputes the verdict under a tightening
236
+ // override — exercising the cache-hit `.map(originVerdict)` arrow.
237
+ const ctx = createRunContext();
238
+ const tagged = "subagent content for cache reeval test";
239
+ tagContent(ctx, tagged, "subagent");
240
+ const first = await classifyEgress(`x ${tagged}`, ctx, {
241
+ sinkId: "fetch",
242
+ sinkScope: "external-configured", // warn
243
+ });
244
+ expect(first.fromCache).toBe(false);
245
+ expect(first.verdict).toBe("warn");
246
+
247
+ const second = await classifyEgress(`x ${tagged}`, ctx, {
248
+ sinkId: "fetch",
249
+ sinkScope: "external-configured",
250
+ override: { subagent: "block" }, // tighten
251
+ });
252
+ expect(second.fromCache).toBe(true);
253
+ expect(second.verdict).toBe("block");
254
+ // The cached raw hit is preserved even though the folded verdict changed.
255
+ expect(second.originsFound).toEqual(["subagent"]);
256
+ expect(second.matchCount).toBe(1);
257
+ });
258
+ });
259
+
260
+ describe("cache-key framing (regression: delimiter-collision exfil bypass)", () => {
261
+ test("shifted sinkId/payload boundary does NOT cross-serve a cached verdict", async () => {
262
+ // CONSTRUCT A TRUE COLLISION for the old bare-`|` key scheme. With
263
+ // matcher+scope held constant, these two calls byte-concatenate the same
264
+ // `sinkId|payload` stream:
265
+ // A: sinkId = "tool|", payload = P → "…|tool||P"
266
+ // B: sinkId = "tool", payload = "|" + P → "…|tool||P"
267
+ // Under the vulnerable key, B would hash-collide with A and be served A's
268
+ // cached entry (fromCache:true, cache size stays 1) — a cache-poisoning /
269
+ // egress-scan bypass when sinkId carries attacker influence (e.g. a
270
+ // dynamically discovered MCP tool name). Length-framed keys make the two
271
+ // self-delimiting and therefore distinct.
272
+ const ctx = createRunContext();
273
+ // Lineage must be non-empty so the classifier reaches the cache/match path
274
+ // (an empty lineage short-circuits to pass before any key is computed).
275
+ ctx.dataLineage = new Map<string, TrustOrigin>([["present-tag-entry", "subagent"]]);
276
+ const P = "shared-suffix outbound payload bytes";
277
+ // Use a matcher whose result is independent of payload so the two calls'
278
+ // verdicts would coincide — isolating `fromCache`/size as the sole tell.
279
+ const m = fixedMatcher("framing", ["subagent"], 1);
280
+
281
+ const a = await classifyEgress(P, ctx, {
282
+ sinkId: "tool|",
283
+ sinkScope: "external-configured",
284
+ matcher: m,
285
+ });
286
+ expect(a.fromCache).toBe(false);
287
+ expect(_cacheSize()).toBe(1);
288
+
289
+ const b = await classifyEgress(`|${P}`, ctx, {
290
+ sinkId: "tool",
291
+ sinkScope: "external-configured",
292
+ matcher: m,
293
+ });
294
+ // The discriminator: on the fixed key B is a fresh miss (its own slot);
295
+ // on the vulnerable key B would have been served A's entry.
296
+ expect(b.fromCache).toBe(false);
297
+ expect(_cacheSize()).toBe(2);
298
+ });
299
+
300
+ test("identical (matcher, scope, sinkId, payload) still hits cache", async () => {
301
+ const ctx = createRunContext();
302
+ tagContent(ctx, "subagent content stable for cache", "subagent");
303
+ const p = "POST subagent content stable for cache";
304
+ const first = await classifyEgress(p, ctx, {
305
+ sinkId: "fetch",
306
+ sinkScope: "external-configured",
307
+ });
308
+ const second = await classifyEgress(p, ctx, {
309
+ sinkId: "fetch",
310
+ sinkScope: "external-configured",
311
+ });
312
+ expect(first.fromCache).toBe(false);
313
+ expect(second.fromCache).toBe(true);
314
+ expect(_cacheSize()).toBe(1);
315
+ });
316
+
317
+ test("a literal '|' inside sinkId does not collide with a different split", async () => {
318
+ // Direct key-injectivity check at the classifier boundary: same payload,
319
+ // sinkIds "a|b" vs "a" with payload prefixed — must be two cache slots.
320
+ const ctx = createRunContext();
321
+ tagContent(ctx, "subagent content for framing test ok", "subagent");
322
+ await classifyEgress("subagent content for framing test ok", ctx, {
323
+ sinkId: "a|b",
324
+ sinkScope: "external-configured",
325
+ });
326
+ await classifyEgress("subagent content for framing test ok", ctx, {
327
+ sinkId: "a",
328
+ sinkScope: "external-configured",
329
+ });
330
+ expect(_cacheSize()).toBe(2);
331
+ });
332
+ });
333
+
334
+ describe("LRU cache behaviour", () => {
335
+ test("distinct payloads accumulate distinct entries", async () => {
336
+ const ctx = createRunContext();
337
+ tagContent(ctx, "subagent content for lru accumulation", "subagent");
338
+ for (let i = 0; i < 5; i++) {
339
+ await classifyEgress(`payload number ${i} subagent content for lru accumulation`, ctx, {
340
+ sinkId: "fetch",
341
+ sinkScope: "external-configured",
342
+ });
343
+ }
344
+ expect(_cacheSize()).toBe(5);
345
+ });
346
+
347
+ test("re-accessing an entry refreshes its recency (get path)", async () => {
348
+ // Exercises LruCache.get's move-to-end recency bump: an entry read on the
349
+ // second call survives even as new entries arrive.
350
+ const ctx = createRunContext();
351
+ tagContent(ctx, "subagent recency probe content here", "subagent");
352
+ const p0 = "first subagent recency probe content here";
353
+ const r1 = await classifyEgress(p0, ctx, { sinkId: "fetch", sinkScope: "external-configured" });
354
+ expect(r1.fromCache).toBe(false);
355
+ // Touch p0 again → cache hit, recency refreshed.
356
+ const r2 = await classifyEgress(p0, ctx, { sinkId: "fetch", sinkScope: "external-configured" });
357
+ expect(r2.fromCache).toBe(true);
358
+ expect(_cacheSize()).toBe(1);
359
+ });
360
+
361
+ test("bypassCache never populates the cache (no store on miss)", async () => {
362
+ const ctx = createRunContext();
363
+ tagContent(ctx, "subagent content under bypass mode here", "subagent");
364
+ await classifyEgress("x subagent content under bypass mode here", ctx, {
365
+ sinkId: "fetch",
366
+ sinkScope: "external-configured",
367
+ bypassCache: true,
368
+ });
369
+ expect(_cacheSize()).toBe(0);
370
+ });
371
+ });
372
+
373
+ describe("summarizeEgress", () => {
374
+ const base: Omit<EgressResult, "verdict" | "originsFound" | "matchCount"> = {
375
+ fromCache: false,
376
+ sinkId: "fetch",
377
+ sinkScope: "external-configured",
378
+ };
379
+
380
+ test("clean summary when no origins matched", () => {
381
+ const s = summarizeEgress({ ...base, verdict: "pass", originsFound: [], matchCount: 0 });
382
+ expect(s).toBe("clean (sink=fetch scope=external-configured)");
383
+ });
384
+
385
+ test("warn summary lists origins and count", () => {
386
+ const s = summarizeEgress({
387
+ ...base,
388
+ verdict: "warn",
389
+ originsFound: ["subagent"],
390
+ matchCount: 1,
391
+ sinkScope: "external-configured",
392
+ });
393
+ expect(s).toBe("warn: 1 match(es) from [subagent] (sink=fetch scope=external-configured)");
394
+ });
395
+
396
+ test("block summary with multiple origins joins with commas", () => {
397
+ const s = summarizeEgress({
398
+ verdict: "block",
399
+ originsFound: ["mcp", "federation"],
400
+ matchCount: 4,
401
+ fromCache: true,
402
+ sinkId: "dyn:peer",
403
+ sinkScope: "external-dynamic",
404
+ });
405
+ expect(s).toBe(
406
+ "block: 4 match(es) from [mcp,federation] (sink=dyn:peer scope=external-dynamic)",
407
+ );
408
+ });
409
+ });
410
+
411
+ describe("SubstringEgressMatcher direct", () => {
412
+ test("empty lineage yields no hits", () => {
413
+ const r = new SubstringEgressMatcher().match({
414
+ payload: "anything at all goes here",
415
+ lineage: new Map(),
416
+ minMatchLength: MIN_MATCH_LENGTH,
417
+ });
418
+ expect(r.originsFound).toEqual([]);
419
+ expect(r.matchCount).toBe(0);
420
+ });
421
+
422
+ test("dedupes origins but counts distinct matched strings", () => {
423
+ const lineage = new Map<string, TrustOrigin>([
424
+ ["first tagged string over floor", "subagent"],
425
+ ["second tagged string over floor", "subagent"],
426
+ ]);
427
+ const r = new SubstringEgressMatcher().match({
428
+ payload: "first tagged string over floor and second tagged string over floor",
429
+ lineage,
430
+ minMatchLength: MIN_MATCH_LENGTH,
431
+ });
432
+ expect(r.originsFound).toEqual(["subagent"]); // deduped
433
+ expect(r.matchCount).toBe(2); // two distinct strings
434
+ });
435
+
436
+ test("singleton and class share the same name", () => {
437
+ expect(substringMatcher.name).toBe("substring");
438
+ });
439
+ });
440
+
441
+ describe("diagnostics helpers", () => {
442
+ test("_clearEgressCache empties the cache", async () => {
443
+ const ctx = createRunContext();
444
+ tagContent(ctx, "subagent content to populate cache now", "subagent");
445
+ await classifyEgress("x subagent content to populate cache now", ctx, {
446
+ sinkId: "fetch",
447
+ sinkScope: "external-configured",
448
+ });
449
+ expect(_cacheSize()).toBeGreaterThan(0);
450
+ _clearEgressCache();
451
+ expect(_cacheSize()).toBe(0);
452
+ });
453
+ });
454
+
455
+ describe("EgressClassifierError", () => {
456
+ test("carries the config error code, fixed name, message, and cause chain", () => {
457
+ const cause = new Error("root");
458
+ const err = new EgressClassifierError("boom", cause);
459
+ // The `name` field initializer + constructor (the class's two functions)
460
+ // are exercised directly here, independent of the internal throw site.
461
+ expect(err.name).toBe("EgressClassifierError");
462
+ expect(err.message).toBe("boom");
463
+ expect(err.code).toBe("config");
464
+ expect(err.cause).toBe(cause);
465
+ expect(err).toBeInstanceOf(EgressClassifierError);
466
+ expect(err).toBeInstanceOf(CrewhausError);
467
+ expect(err).toBeInstanceOf(Error);
468
+ });
469
+
470
+ test("cause is optional", () => {
471
+ const err = new EgressClassifierError("no cause");
472
+ expect(err.cause).toBeUndefined();
473
+ expect(err.name).toBe("EgressClassifierError");
474
+ });
475
+
476
+ test("classifyEgress throws an EgressClassifierError for a non-string payload", async () => {
477
+ const ctx = createRunContext();
478
+ await expect(
479
+ // biome-ignore lint/suspicious/noExplicitAny: exercising the runtime type guard
480
+ classifyEgress({ not: "a string" } as any, ctx, {
481
+ sinkId: "fetch",
482
+ sinkScope: "external-configured",
483
+ }),
484
+ ).rejects.toBeInstanceOf(EgressClassifierError);
485
+ });
486
+ });
package/src/index.test.ts CHANGED
@@ -1,10 +1,15 @@
1
1
  import { afterEach, describe, expect, test } from "bun:test";
2
2
  import { type TrustOrigin, createRunContext, tagContent } from "@crewhaus/run-context";
3
3
  import {
4
+ type EgressMatchInput,
5
+ type EgressMatchResult,
6
+ type EgressMatcher,
4
7
  MIN_MATCH_LENGTH,
8
+ SubstringEgressMatcher,
5
9
  _cacheSize,
6
10
  _clearEgressCache,
7
11
  classifyEgress,
12
+ substringMatcher,
8
13
  summarizeEgress,
9
14
  } from "./index";
10
15
 
@@ -63,7 +68,7 @@ describe("classifyEgress", () => {
63
68
 
64
69
  test("ignores tagged content shorter than the match floor", async () => {
65
70
  const ctx = createRunContext();
66
- tagContent(ctx, "abc", "subagent"); // way under 16-char floor
71
+ tagContent(ctx, "abc", "subagent"); // way under the 8-char match floor
67
72
  const result = await classifyEgress("https://example.com/?q=abc", ctx, {
68
73
  sinkId: "fetch",
69
74
  sinkScope: "external-configured",
@@ -74,10 +79,11 @@ describe("classifyEgress", () => {
74
79
 
75
80
  test("respects a custom minMatchLength for fixtures", async () => {
76
81
  const ctx = createRunContext();
77
- // tagContent itself enforces a 16-char floor to keep lineage clean, so
78
- // for short-fixture tests we pre-populate dataLineage directly. In
79
- // production, the classifier's floor and tagContent's floor are both
80
- // 16; minMatchLength override is intended for tests + recipes.
82
+ // tagContent enforces its own floors (16 for blob/lines, 8 for vetted
83
+ // credential tokens) to keep lineage clean, so for short-fixture tests we
84
+ // pre-populate dataLineage directly. In production the classifier's
85
+ // MIN_MATCH_LENGTH=8 backstop matches the token floor; the
86
+ // minMatchLength override is intended for tests + recipes.
81
87
  ctx.dataLineage = new Map<string, TrustOrigin>([["shortish", "subagent"]]);
82
88
  const result = await classifyEgress("payload shortish embedded", ctx, {
83
89
  sinkId: "fetch",
@@ -157,11 +163,52 @@ describe("classifyEgress", () => {
157
163
  classifyEgress(123 as any, ctx, { sinkId: "fetch", sinkScope: "external-configured" }),
158
164
  ).rejects.toThrow(/expected a string/);
159
165
  });
166
+
167
+ // SECURITY (audit R2): the cache key includes a digest of the LINEAGE
168
+ // CONTENT. The lineage map grows during a run; a verdict computed before a
169
+ // secret was tagged must not be served after the tag lands — that would be
170
+ // an egress-scan bypass for every repeated payload.
171
+ test("lineage growth invalidates a cached verdict for the same payload", async () => {
172
+ const ctx = createRunContext();
173
+ tagContent(ctx, "some early boundary content of length", "subagent");
174
+ const payload = "exfiltrating sk-LaterTagged99 now";
175
+ const first = await classifyEgress(payload, ctx, {
176
+ sinkId: "fetch",
177
+ sinkScope: "external-dynamic",
178
+ });
179
+ expect(first.verdict).toBe("pass"); // secret not tagged yet
180
+ // The secret now crosses a boundary and gets token-tagged.
181
+ tagContent(ctx, "key issued: sk-LaterTagged99 keep private", "mcp");
182
+ const second = await classifyEgress(payload, ctx, {
183
+ sinkId: "fetch",
184
+ sinkScope: "external-dynamic",
185
+ });
186
+ expect(second.fromCache).toBe(false); // NOT served stale
187
+ expect(second.verdict).toBe("block");
188
+ expect(second.originsFound).toEqual(["mcp"]);
189
+ });
190
+
191
+ // SECURITY (audit R2): end-to-end short-secret coverage — a credential-
192
+ // shaped token too short for line tagging (under 16 chars) is token-tagged
193
+ // at the boundary and caught at egress when the model extracts JUST the
194
+ // secret from its line.
195
+ test("a short credential token extracted from its line is caught at egress", async () => {
196
+ const ctx = createRunContext();
197
+ tagContent(ctx, "Stripe key for deploys: sk-Ab12Cd34 (rotate quarterly)", "mcp");
198
+ const result = await classifyEgress("posting sk-Ab12Cd34 to a webhook", ctx, {
199
+ sinkId: "fetch",
200
+ sinkScope: "external-dynamic",
201
+ bypassCache: true,
202
+ });
203
+ expect(result.verdict).toBe("block");
204
+ expect(result.matchCount).toBeGreaterThanOrEqual(1);
205
+ expect(result.originsFound).toEqual(["mcp"]);
206
+ });
160
207
  });
161
208
 
162
209
  describe("MIN_MATCH_LENGTH constant", () => {
163
- test("is 16", () => {
164
- expect(MIN_MATCH_LENGTH).toBe(16);
210
+ test("is 8 — parity with run-context's MIN_TOKEN_TAG_LENGTH (audit R2)", () => {
211
+ expect(MIN_MATCH_LENGTH).toBe(8);
165
212
  });
166
213
  });
167
214
 
@@ -195,3 +242,254 @@ describe("summarizeEgress", () => {
195
242
  expect(summary).toContain("dynamic-mcp:foo");
196
243
  });
197
244
  });
245
+
246
+ // ---------------------------------------------------------------------------
247
+ // FR-006 — the EgressMatcher seam.
248
+ // ---------------------------------------------------------------------------
249
+
250
+ describe("SubstringEgressMatcher (FR-006)", () => {
251
+ test('name is "substring" for audit + cache namespacing', () => {
252
+ expect(substringMatcher.name).toBe("substring");
253
+ expect(new SubstringEgressMatcher().name).toBe("substring");
254
+ });
255
+
256
+ test("matches identically to the legacy inline scan", () => {
257
+ // The default matcher is the verbatim pre-FR-006 loop: tagged entries
258
+ // >= floor that the payload contains, deduped origins, distinct count.
259
+ const lineage = new Map<string, TrustOrigin>([
260
+ ["mcp-sourced bearer token segment", "mcp"],
261
+ ["subagent-flagged content from worker", "subagent"],
262
+ ["short", "tool"], // under floor — must be ignored
263
+ ["user-typed sentence here visible", "user"], // not present in payload
264
+ ]);
265
+ const payload =
266
+ "POST mcp-sourced bearer token segment + subagent-flagged content from worker (short)";
267
+ const result = new SubstringEgressMatcher().match({
268
+ payload,
269
+ lineage,
270
+ minMatchLength: MIN_MATCH_LENGTH,
271
+ });
272
+ expect([...result.originsFound].sort()).toEqual(["mcp", "subagent"]);
273
+ expect(result.matchCount).toBe(2); // the two over-floor hits; "short" skipped
274
+ });
275
+
276
+ test("respects the minMatchLength floor passed in the input", () => {
277
+ // Use the concrete class so `.match` is the synchronous overload.
278
+ const m = new SubstringEgressMatcher();
279
+ const lineage = new Map<string, TrustOrigin>([["short67", "subagent"]]);
280
+ // Under default floor (8) → a 7-char tag never matches.
281
+ expect(
282
+ m.match({
283
+ payload: "carries short67 inside",
284
+ lineage,
285
+ minMatchLength: MIN_MATCH_LENGTH,
286
+ }).matchCount,
287
+ ).toBe(0);
288
+ // With a low floor → hit.
289
+ expect(
290
+ m.match({
291
+ payload: "carries short67 inside",
292
+ lineage,
293
+ minMatchLength: 4,
294
+ }).matchCount,
295
+ ).toBe(1);
296
+ });
297
+
298
+ // SECURITY: a prompt-injectable model can re-encode a tagged secret before
299
+ // egress. A verbatim substring scan misses these; the decode-aware views do
300
+ // not. The raw tagged content is the lineage key in every case.
301
+ const TAGGED = "mcp-sourced secret value that exceeds the floor length";
302
+
303
+ test("detects raw tagged content hidden by JSON.stringify escaping (#5)", () => {
304
+ // runtime-core builds the egress payload as JSON.stringify(toolInput). A
305
+ // multi-line tagged string is escaped (\\n, \\\") inside it, so the raw
306
+ // string is NOT a verbatim substring — but the JSON-decoded view recovers it.
307
+ const tagged = `${TAGGED}\nsecond "quoted" line`;
308
+ const lineage = new Map<string, TrustOrigin>([[tagged, "mcp"]]);
309
+ const payload = JSON.stringify({ url: "https://evil.test", body: tagged });
310
+ expect(payload.includes(tagged)).toBe(false); // escaped — verbatim scan misses it
311
+ const result = new SubstringEgressMatcher().match({
312
+ payload,
313
+ lineage,
314
+ minMatchLength: MIN_MATCH_LENGTH,
315
+ });
316
+ expect(result.originsFound).toEqual(["mcp"]);
317
+ expect(result.matchCount).toBe(1);
318
+ });
319
+
320
+ test("detects base64-re-encoded tagged content (#6)", () => {
321
+ const lineage = new Map<string, TrustOrigin>([[TAGGED, "subagent"]]);
322
+ const b64 = Buffer.from(TAGGED, "utf8").toString("base64");
323
+ const payload = JSON.stringify({ note: `exfil: ${b64}` });
324
+ expect(payload.includes(TAGGED)).toBe(false);
325
+ const result = new SubstringEgressMatcher().match({
326
+ payload,
327
+ lineage,
328
+ minMatchLength: MIN_MATCH_LENGTH,
329
+ });
330
+ expect(result.originsFound).toEqual(["subagent"]);
331
+ });
332
+
333
+ test("detects hex-re-encoded tagged content (#6)", () => {
334
+ const lineage = new Map<string, TrustOrigin>([[TAGGED, "channel"]]);
335
+ const hex = Buffer.from(TAGGED, "utf8").toString("hex");
336
+ const result = new SubstringEgressMatcher().match({
337
+ payload: `prefix ${hex} suffix`,
338
+ lineage,
339
+ minMatchLength: MIN_MATCH_LENGTH,
340
+ });
341
+ expect(result.originsFound).toEqual(["channel"]);
342
+ });
343
+
344
+ test("detects percent-encoded tagged content (#6)", () => {
345
+ const lineage = new Map<string, TrustOrigin>([[TAGGED, "federation"]]);
346
+ const result = new SubstringEgressMatcher().match({
347
+ payload: `q=${encodeURIComponent(TAGGED)}`,
348
+ lineage,
349
+ minMatchLength: MIN_MATCH_LENGTH,
350
+ });
351
+ expect(result.originsFound).toEqual(["federation"]);
352
+ });
353
+
354
+ test("does not flag unrelated content (no false positive from decoding)", () => {
355
+ const lineage = new Map<string, TrustOrigin>([[TAGGED, "mcp"]]);
356
+ const payload = JSON.stringify({
357
+ note: Buffer.from("totally unrelated bytes here", "utf8").toString("base64"),
358
+ });
359
+ const result = new SubstringEgressMatcher().match({
360
+ payload,
361
+ lineage,
362
+ minMatchLength: MIN_MATCH_LENGTH,
363
+ });
364
+ expect(result.matchCount).toBe(0);
365
+ });
366
+ });
367
+
368
+ describe("classifyEgress with an injected matcher (FR-006)", () => {
369
+ test("uses the injected matcher's hits and folds policy over them", async () => {
370
+ const ctx = createRunContext();
371
+ // Populate lineage with content the SUBSTRING matcher would NOT find in
372
+ // the payload, proving the verdict came from the injected matcher.
373
+ ctx.dataLineage = new Map<string, TrustOrigin>([
374
+ ["paraphrased-and-reencoded original text", "subagent"],
375
+ ]);
376
+ const fakeMatcher: EgressMatcher = {
377
+ name: "fake-fixed",
378
+ match: (_input: EgressMatchInput): EgressMatchResult => ({
379
+ originsFound: ["subagent"],
380
+ matchCount: 1,
381
+ }),
382
+ };
383
+ const result = await classifyEgress("totally unrelated outbound bytes", ctx, {
384
+ sinkId: "fetch",
385
+ sinkScope: "external-configured", // subagent on configured → warn
386
+ matcher: fakeMatcher,
387
+ });
388
+ // The substring matcher would have returned pass (no verbatim overlap);
389
+ // the injected matcher's hit drives the warn verdict. This proves the
390
+ // policy fold is matcher-independent (acceptance #3).
391
+ expect(result.verdict).toBe("warn");
392
+ expect(result.originsFound).toEqual(["subagent"]);
393
+ expect(result.matchCount).toBe(1);
394
+ });
395
+
396
+ test("custom-matcher hits still respect per-origin/per-sink policy", async () => {
397
+ const ctx = createRunContext();
398
+ ctx.dataLineage = new Map<string, TrustOrigin>([["anything", "subagent"]]);
399
+ const subagentHit: EgressMatcher = {
400
+ name: "subagent-hit",
401
+ match: () => ({ originsFound: ["subagent"], matchCount: 1 }),
402
+ };
403
+ // Same matcher, same hit — warn on configured, block on dynamic. The
404
+ // outcome difference comes purely from sinkScope policy, not the matcher.
405
+ const configured = await classifyEgress("payload", ctx, {
406
+ sinkId: "fetch",
407
+ sinkScope: "external-configured",
408
+ matcher: subagentHit,
409
+ bypassCache: true,
410
+ });
411
+ const dynamic = await classifyEgress("payload", ctx, {
412
+ sinkId: "dyn",
413
+ sinkScope: "external-dynamic",
414
+ matcher: subagentHit,
415
+ bypassCache: true,
416
+ });
417
+ expect(configured.verdict).toBe("warn");
418
+ expect(dynamic.verdict).toBe("block");
419
+ });
420
+
421
+ test("an injected matcher may be async", async () => {
422
+ const ctx = createRunContext();
423
+ ctx.dataLineage = new Map<string, TrustOrigin>([["anything", "mcp"]]);
424
+ const asyncMatcher: EgressMatcher = {
425
+ name: "async-hit",
426
+ match: async () => {
427
+ await Promise.resolve();
428
+ return { originsFound: ["mcp"], matchCount: 2 };
429
+ },
430
+ };
431
+ const result = await classifyEgress("payload", ctx, {
432
+ sinkId: "fetch",
433
+ sinkScope: "external-dynamic", // mcp on dynamic → block
434
+ matcher: asyncMatcher,
435
+ });
436
+ expect(result.verdict).toBe("block");
437
+ expect(result.matchCount).toBe(2);
438
+ });
439
+
440
+ test("cache key namespaces by matcher name (no cross-serve)", async () => {
441
+ const ctx = createRunContext();
442
+ ctx.dataLineage = new Map<string, TrustOrigin>([["anything", "subagent"]]);
443
+ _clearEgressCache();
444
+ // Matcher A finds a hit → warn, and caches under name "A".
445
+ const matcherA: EgressMatcher = {
446
+ name: "matcher-A",
447
+ match: () => ({ originsFound: ["subagent"], matchCount: 1 }),
448
+ };
449
+ // Matcher B finds nothing → pass, under name "B". Same payload/sink.
450
+ const matcherB: EgressMatcher = {
451
+ name: "matcher-B",
452
+ match: () => ({ originsFound: [], matchCount: 0 }),
453
+ };
454
+ const a = await classifyEgress("same payload", ctx, {
455
+ sinkId: "fetch",
456
+ sinkScope: "external-configured",
457
+ matcher: matcherA,
458
+ });
459
+ const b = await classifyEgress("same payload", ctx, {
460
+ sinkId: "fetch",
461
+ sinkScope: "external-configured",
462
+ matcher: matcherB,
463
+ });
464
+ expect(a.verdict).toBe("warn");
465
+ expect(a.fromCache).toBe(false);
466
+ // If the cache did NOT namespace by matcher name, B would have served
467
+ // A's cached warn-hit. It must compute its own (pass) verdict instead.
468
+ expect(b.verdict).toBe("pass");
469
+ expect(b.fromCache).toBe(false);
470
+ expect(_cacheSize()).toBe(2); // two distinct keys, not one
471
+ });
472
+
473
+ test("re-running the same matcher does serve from cache", async () => {
474
+ const ctx = createRunContext();
475
+ ctx.dataLineage = new Map<string, TrustOrigin>([["anything", "subagent"]]);
476
+ _clearEgressCache();
477
+ const m: EgressMatcher = {
478
+ name: "stable",
479
+ match: () => ({ originsFound: ["subagent"], matchCount: 1 }),
480
+ };
481
+ const first = await classifyEgress("p", ctx, {
482
+ sinkId: "fetch",
483
+ sinkScope: "external-configured",
484
+ matcher: m,
485
+ });
486
+ const second = await classifyEgress("p", ctx, {
487
+ sinkId: "fetch",
488
+ sinkScope: "external-configured",
489
+ matcher: m,
490
+ });
491
+ expect(first.fromCache).toBe(false);
492
+ expect(second.fromCache).toBe(true);
493
+ expect(second.verdict).toBe("warn");
494
+ });
495
+ });
package/src/index.ts CHANGED
@@ -43,7 +43,7 @@
43
43
  * a perf optimisation.
44
44
  *
45
45
  * Catalog layer: R8 (extension of §18 safety primitives, symmetric to
46
- * `boundary-classifier`). Recipe: demos/walkthroughs/51-egress-fabric.md.
46
+ * `boundary-classifier`). Recipe: demos/walkthroughs/55-egress-fabric.md.
47
47
  */
48
48
  import { createHash } from "node:crypto";
49
49
  import { CrewhausError } from "@crewhaus/errors";
@@ -113,13 +113,221 @@ const ORIGIN_DEFAULT_POLICY: SeverityMatrix = {
113
113
  };
114
114
 
115
115
  /**
116
- * Minimum length for a tagged-content match to count. Short common
117
- * strings (whitespace, single words, IDs ≤8 chars) produce too many
118
- * false positives. 16 chars is the floor that empirically lets through
119
- * benign overlap (`"the"`, `"https"`, short identifiers) while still
120
- * catching meaningful exfil (URLs, tokens, sentences).
116
+ * Minimum length for a tagged-content match to count. This is a BACKSTOP
117
+ * against pathological lineage entries, not the primary false-positive
118
+ * control: insertion discipline lives in run-context's `tagContent`, which
119
+ * only admits whole blobs / lines >= 16 chars and credential-shaped tokens
120
+ * >= 8 chars (audit follow-up R2 — see `MIN_TOKEN_TAG_LENGTH` and
121
+ * `isCredentialShaped` there). 8 matches the token floor so vetted short
122
+ * secrets (sk-..., hex runs, key=value secrets) can actually match at
123
+ * egress; anything shorter is indistinguishable from prose. Keep in sync
124
+ * with run-context's `MIN_TOKEN_TAG_LENGTH`.
121
125
  */
122
- export const MIN_MATCH_LENGTH = 16;
126
+ export const MIN_MATCH_LENGTH = 8;
127
+
128
+ /**
129
+ * FR-006 — the matching step factored behind a strategy interface. The
130
+ * matcher decides *which* tagged lineage entries the outbound payload
131
+ * "contains"; it never decides pass/warn/block. The verdict fold (origin
132
+ * policy + `block > warn > pass` precedence) stays in `classifyEgress`, so
133
+ * the three audit outcomes and their precedence are structurally
134
+ * matcher-independent.
135
+ *
136
+ * The default `SubstringEgressMatcher` is behavior-preserving: it is the
137
+ * verbatim substring scan that lived inline before the seam existed,
138
+ * including the `MIN_MATCH_LENGTH` floor. An optional embedding-backed
139
+ * matcher ships separately as `@crewhaus/egress-matcher-semantic`; the
140
+ * default egress path never imports it (no new hard dependency).
141
+ *
142
+ * NOTE: the FR sketch wrote `match(payload, lineage, opts)` with
143
+ * `DataLineage` / `EgressOpts` types. Those names do not exist in the
144
+ * codebase (lineage is `Map<string, TrustOrigin>` on `RunContext`; there
145
+ * is no `DataLineage` type). This implementation uses a single
146
+ * `EgressMatchInput` bag — idiomatic with this codebase's option-bag style
147
+ * — and keeps the matcher returning only raw hits, which strictly
148
+ * strengthens the matcher-independence guarantee.
149
+ */
150
+ export type EgressMatchInput = {
151
+ /** The serialized outbound payload to inspect. */
152
+ readonly payload: string;
153
+ /** The run-context data-lineage map: tagged content → its trust origin. */
154
+ readonly lineage: ReadonlyMap<string, TrustOrigin>;
155
+ /** Floor below which a tagged entry is too short to count as a match. */
156
+ readonly minMatchLength: number;
157
+ };
158
+
159
+ /**
160
+ * Raw lineage hits — origins whose tagged content the matcher considers
161
+ * present in the payload, plus a count of distinct matched tagged strings.
162
+ * Deliberately verdict-free: `classifyEgress` folds policy over
163
+ * `originsFound`, the matcher does not.
164
+ */
165
+ export type EgressMatchResult = {
166
+ readonly originsFound: ReadonlyArray<TrustOrigin>;
167
+ readonly matchCount: number;
168
+ };
169
+
170
+ /**
171
+ * A pluggable egress-matching strategy. `name` namespaces audit/trace
172
+ * records and the verdict cache key (so a semantic-matcher verdict never
173
+ * serves a substring-matcher hit from cache). `match` may be sync or
174
+ * async; `classifyEgress` awaits it either way.
175
+ */
176
+ export interface EgressMatcher {
177
+ readonly name: string;
178
+ match(input: EgressMatchInput): EgressMatchResult | Promise<EgressMatchResult>;
179
+ }
180
+
181
+ /**
182
+ * Decode-aware normalization so a tagged secret that was *re-encoded* before
183
+ * egress is still detected. A pure verbatim `payload.includes(tagged)` scan is
184
+ * trivially bypassed by a prompt-injectable model two ways:
185
+ *
186
+ * 1. JSON-escaping — runtime-core builds the egress payload as
187
+ * `JSON.stringify(toolInput)`, which escapes the newlines/quotes/
188
+ * backslashes that tagged MCP/sub-agent/channel content almost always
189
+ * contains, so the raw tagged string is no longer a substring.
190
+ * 2. base64/hex/percent re-encoding — the model emits `base64(secret)`
191
+ * instead of the plaintext.
192
+ *
193
+ * `buildScanViews` returns the payload plus normalized views (JSON-decoded
194
+ * string values, and base64/hex/percent-decoded blobs found in either form),
195
+ * and the matcher tests each tagged entry against ALL of them. The decoders
196
+ * mirror `@crewhaus/prompt-injection-detector` (replicated, not imported, to
197
+ * keep egress-classifier dependency-free; keep the copies in sync) and are
198
+ * bounded (match count + recursion depth) so this is not itself a DoS vector.
199
+ */
200
+ function isMostlyPrintable(s: string): boolean {
201
+ if (s.length === 0) return false;
202
+ let printable = 0;
203
+ for (let i = 0; i < s.length; i++) {
204
+ const c = s.charCodeAt(i);
205
+ if (c === 9 || c === 10 || c === 13 || (c >= 32 && c < 127)) printable++;
206
+ }
207
+ return printable / s.length > 0.85;
208
+ }
209
+
210
+ function tryDecodeBase64(blob: string): string | undefined {
211
+ if (blob.length < 16 || blob.length % 4 === 1) return undefined;
212
+ try {
213
+ const decoded = Buffer.from(blob, "base64").toString("utf8");
214
+ return isMostlyPrintable(decoded) ? decoded : undefined;
215
+ } catch {
216
+ return undefined;
217
+ }
218
+ }
219
+
220
+ function tryDecodeHex(blob: string): string | undefined {
221
+ if (blob.length < 16 || blob.length % 2 !== 0) return undefined;
222
+ try {
223
+ const decoded = Buffer.from(blob, "hex").toString("utf8");
224
+ return isMostlyPrintable(decoded) ? decoded : undefined;
225
+ } catch {
226
+ return undefined;
227
+ }
228
+ }
229
+
230
+ function tryDecodePercent(text: string): string | undefined {
231
+ try {
232
+ const decoded = decodeURIComponent(text);
233
+ return decoded !== text ? decoded : undefined;
234
+ } catch {
235
+ return undefined;
236
+ }
237
+ }
238
+
239
+ /** Recursively decode base64/hex/percent blobs. Bounded for DoS-safety. */
240
+ function decodedVariants(text: string, depth = 2): string[] {
241
+ if (depth <= 0 || text.length === 0) return [];
242
+ const out: string[] = [];
243
+ const push = (s: string | undefined): void => {
244
+ if (s !== undefined && s.length > 0) out.push(s, ...decodedVariants(s, depth - 1));
245
+ };
246
+ for (const m of [...text.matchAll(/[A-Za-z0-9+/]{16,}={0,2}/g)].slice(0, 8)) {
247
+ push(tryDecodeBase64(m[0]));
248
+ }
249
+ for (const m of [...text.matchAll(/(?:[0-9A-Fa-f]{2}){8,}/g)].slice(0, 8)) {
250
+ push(tryDecodeHex(m[0]));
251
+ }
252
+ if (/%[0-9A-Fa-f]{2}/.test(text)) push(tryDecodePercent(text));
253
+ return out.slice(0, 16);
254
+ }
255
+
256
+ /** Collect every string leaf of a parsed JSON value (bounded by JSON size). */
257
+ function collectJsonStrings(value: unknown, out: string[]): void {
258
+ if (typeof value === "string") {
259
+ out.push(value);
260
+ return;
261
+ }
262
+ if (Array.isArray(value)) {
263
+ for (const v of value) collectJsonStrings(v, out);
264
+ return;
265
+ }
266
+ if (value !== null && typeof value === "object") {
267
+ for (const v of Object.values(value)) collectJsonStrings(v, out);
268
+ }
269
+ }
270
+
271
+ /**
272
+ * The set of strings to scan a tagged entry against: the raw payload, the
273
+ * JSON-decoded string values (recovers content the `JSON.stringify` egress
274
+ * encoding escaped), and base64/hex/percent decodings of both.
275
+ */
276
+ function buildScanViews(payload: string): string[] {
277
+ const views: string[] = [payload];
278
+ let jsonView: string | undefined;
279
+ try {
280
+ const parsed = JSON.parse(payload);
281
+ const strings: string[] = [];
282
+ collectJsonStrings(parsed, strings);
283
+ if (strings.length > 0) jsonView = strings.join("\n");
284
+ } catch {
285
+ // Not JSON — only the raw payload + its decodings are scanned.
286
+ }
287
+ if (jsonView !== undefined) views.push(jsonView);
288
+ const decodeSources = jsonView !== undefined ? [payload, jsonView] : [payload];
289
+ for (const src of decodeSources) {
290
+ for (const v of decodedVariants(src)) views.push(v);
291
+ }
292
+ return views;
293
+ }
294
+
295
+ /**
296
+ * The default egress matcher. A tagged entry counts when it is at least
297
+ * `minMatchLength` chars and appears in the payload OR in any of its
298
+ * normalized views (see `buildScanViews`) — so JSON-escaping and
299
+ * base64/hex/percent re-encoding can no longer slip a tagged secret past the
300
+ * sink-side fabric. The raw payload is always scanned first, so every match
301
+ * the old verbatim scan caught is still caught. `originsFound` is deduped;
302
+ * `matchCount` counts distinct matched tagged strings.
303
+ */
304
+ export class SubstringEgressMatcher implements EgressMatcher {
305
+ // Assigned in the constructor rather than as an inline field initializer:
306
+ // bun's coverage instruments a class-field initializer as its own function
307
+ // and (as of bun 1.3.x) cannot mark it covered, leaving an unreachable-by-
308
+ // tests gap in the function-coverage count. A plain constructor assignment
309
+ // is equivalent at runtime and is counted normally.
310
+ readonly name: string;
311
+ constructor() {
312
+ this.name = "substring";
313
+ }
314
+ match(input: EgressMatchInput): EgressMatchResult {
315
+ const views = buildScanViews(input.payload);
316
+ const seen = new Set<TrustOrigin>();
317
+ let matchCount = 0;
318
+ for (const [tagged, origin] of input.lineage.entries()) {
319
+ if (tagged.length < input.minMatchLength) continue;
320
+ if (views.some((view) => view.includes(tagged))) {
321
+ seen.add(origin);
322
+ matchCount += 1;
323
+ }
324
+ }
325
+ return { originsFound: [...seen], matchCount };
326
+ }
327
+ }
328
+
329
+ /** Shared default-matcher singleton — the built-in egress detection. */
330
+ export const substringMatcher: EgressMatcher = new SubstringEgressMatcher();
123
331
 
124
332
  export type EgressPolicyOverride = Partial<Record<TrustOrigin, EgressVerdict>>;
125
333
 
@@ -150,6 +358,15 @@ export type ClassifyEgressOptions = {
150
358
  * supply this.
151
359
  */
152
360
  readonly minMatchLength?: number;
361
+ /**
362
+ * FR-006 — pluggable matching strategy. Defaults to `substringMatcher`
363
+ * (behavior-preserving). Supply an alternate matcher (e.g. the optional
364
+ * `@crewhaus/egress-matcher-semantic`) to swap *how* lineage matches are
365
+ * detected; the per-origin/per-sink policy and the three audit outcomes
366
+ * are unaffected. The cache key namespaces by `matcher.name`, so
367
+ * switching matchers mid-run never cross-serves a stale verdict.
368
+ */
369
+ readonly matcher?: EgressMatcher;
153
370
  };
154
371
 
155
372
  /**
@@ -179,9 +396,6 @@ class LruCache<V> {
179
396
  this.map.delete(oldest);
180
397
  }
181
398
  }
182
- has(key: string): boolean {
183
- return this.map.has(key);
184
- }
185
399
  size(): number {
186
400
  return this.map.size;
187
401
  }
@@ -198,15 +412,51 @@ type CachedVerdict = {
198
412
 
199
413
  const cache = new LruCache<CachedVerdict>(DEFAULT_CACHE_CAP);
200
414
 
201
- function cacheKey(payload: string, sinkScope: SinkScope, sinkId: string): string {
202
- const h = createHash("sha256")
203
- .update(sinkScope)
204
- .update("|")
205
- .update(sinkId)
206
- .update("|")
207
- .update(payload, "utf8")
208
- .digest("hex");
209
- return h;
415
+ function cacheKey(
416
+ payload: string,
417
+ sinkScope: SinkScope,
418
+ sinkId: string,
419
+ matcherName: string,
420
+ lineageDigest: string,
421
+ ): string {
422
+ // Length-prefix every field before hashing so the component boundaries are
423
+ // unambiguous. A bare `"|"` delimiter is not injective when a field can
424
+ // contain `"|"`: (sinkId="tool|", payload="x") and (sinkId="tool",
425
+ // payload="|x") would otherwise hash identically and cross-serve a cached
426
+ // verdict for a *different* payload — a cache-poisoning / egress-scan-bypass
427
+ // vector when sinkId carries attacker influence (e.g. a dynamically
428
+ // discovered MCP tool name). `<byteLength>:` framing makes each field
429
+ // self-delimiting regardless of its contents.
430
+ const h = createHash("sha256");
431
+ for (const field of [matcherName, sinkScope, sinkId, payload, lineageDigest]) {
432
+ h.update(String(Buffer.byteLength(field, "utf8")));
433
+ h.update(":");
434
+ h.update(field, "utf8");
435
+ }
436
+ return h.digest("hex");
437
+ }
438
+
439
+ /**
440
+ * Stable digest of the lineage map's CONTENT (keys + origins, sorted), used
441
+ * as a cache-key component. Without it the cache serves stale verdicts: the
442
+ * lineage map GROWS during a run (every boundary crossing tags more
443
+ * content), so the same (payload, sink) pair legitimately classifies
444
+ * differently once a secret contained in the payload gets tagged. A verdict
445
+ * cached before that tag would otherwise be served forever — an egress-scan
446
+ * bypass. Sorting makes the digest insensitive to recency-refresh reordering
447
+ * (delete + re-insert on re-tag), which changes Map iteration order without
448
+ * changing content.
449
+ */
450
+ function lineageDigestOf(lineage: ReadonlyMap<string, TrustOrigin>): string {
451
+ const h = createHash("sha256");
452
+ const keys = [...lineage.keys()].sort();
453
+ for (const k of keys) {
454
+ h.update(String(Buffer.byteLength(k, "utf8")));
455
+ h.update(":");
456
+ h.update(k, "utf8");
457
+ h.update(lineage.get(k) as string, "utf8");
458
+ }
459
+ return h.digest("hex");
210
460
  }
211
461
 
212
462
  /**
@@ -265,7 +515,22 @@ export async function classifyEgress(
265
515
  };
266
516
  }
267
517
 
268
- const key = cacheKey(payload, opts.sinkScope, opts.sinkId);
518
+ const floor = opts.minMatchLength ?? MIN_MATCH_LENGTH;
519
+ const matcher = opts.matcher ?? substringMatcher;
520
+
521
+ // Namespace the cache by matcher name so a verdict produced by one
522
+ // matcher (e.g. semantic) is never served to a call using another
523
+ // (e.g. substring) over the same (sinkScope, sinkId, payload) — and by a
524
+ // digest of the lineage content so a verdict computed against an OLDER,
525
+ // smaller lineage is never served after new tags land (see
526
+ // `lineageDigestOf`).
527
+ const key = cacheKey(
528
+ payload,
529
+ opts.sinkScope,
530
+ opts.sinkId,
531
+ matcher.name,
532
+ lineageDigestOf(lineage),
533
+ );
269
534
  if (opts.bypassCache !== true) {
270
535
  const hit = cache.get(key);
271
536
  if (hit !== undefined) {
@@ -283,19 +548,13 @@ export async function classifyEgress(
283
548
  }
284
549
  }
285
550
 
286
- const floor = opts.minMatchLength ?? MIN_MATCH_LENGTH;
287
- const seen = new Set<TrustOrigin>();
288
- let matchCount = 0;
289
-
290
- for (const [tagged, origin] of lineage.entries()) {
291
- if (tagged.length < floor) continue;
292
- if (payload.includes(tagged)) {
293
- seen.add(origin);
294
- matchCount += 1;
295
- }
296
- }
297
-
298
- const originsFound: ReadonlyArray<TrustOrigin> = [...seen];
551
+ // The matcher decides *which* lineage entries the payload contains; the
552
+ // policy fold below is matcher-independent. `match` may be sync or async.
553
+ const { originsFound, matchCount } = await matcher.match({
554
+ payload,
555
+ lineage,
556
+ minMatchLength: floor,
557
+ });
299
558
  const cached: CachedVerdict = { verdict: "pass", originsFound, matchCount };
300
559
  if (opts.bypassCache !== true) {
301
560
  cache.set(key, cached);