@crewhaus/egress-classifier 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.test.ts DELETED
@@ -1,495 +0,0 @@
1
- import { afterEach, describe, expect, test } from "bun:test";
2
- import { type TrustOrigin, createRunContext, tagContent } from "@crewhaus/run-context";
3
- import {
4
- type EgressMatchInput,
5
- type EgressMatchResult,
6
- type EgressMatcher,
7
- MIN_MATCH_LENGTH,
8
- SubstringEgressMatcher,
9
- _cacheSize,
10
- _clearEgressCache,
11
- classifyEgress,
12
- substringMatcher,
13
- summarizeEgress,
14
- } from "./index";
15
-
16
- afterEach(() => {
17
- _clearEgressCache();
18
- });
19
-
20
- describe("classifyEgress", () => {
21
- test("returns pass when run-context has no dataLineage", async () => {
22
- const ctx = createRunContext();
23
- const result = await classifyEgress("any outbound payload", ctx, {
24
- sinkId: "fetch",
25
- sinkScope: "external-configured",
26
- });
27
- expect(result.verdict).toBe("pass");
28
- expect(result.originsFound).toEqual([]);
29
- expect(result.matchCount).toBe(0);
30
- });
31
-
32
- test("returns pass for user-origin content even at strict sink", async () => {
33
- const ctx = createRunContext();
34
- const tagged = "this is user-typed CLI input string";
35
- tagContent(ctx, tagged, "user");
36
- const result = await classifyEgress(`prefix ${tagged} suffix`, ctx, {
37
- sinkId: "fetch",
38
- sinkScope: "external-dynamic",
39
- });
40
- expect(result.verdict).toBe("pass");
41
- expect(result.originsFound).toEqual(["user"]);
42
- expect(result.matchCount).toBe(1);
43
- });
44
-
45
- test("warns when subagent content reaches a configured external sink", async () => {
46
- const ctx = createRunContext();
47
- const tagged = "API_KEY=sleeper-token-12345";
48
- tagContent(ctx, tagged, "subagent");
49
- const result = await classifyEgress(`POST body: ${tagged}`, ctx, {
50
- sinkId: "fetch",
51
- sinkScope: "external-configured",
52
- });
53
- expect(result.verdict).toBe("warn");
54
- expect(result.originsFound).toEqual(["subagent"]);
55
- });
56
-
57
- test("blocks when subagent content reaches a dynamic external sink", async () => {
58
- const ctx = createRunContext();
59
- const tagged = "API_KEY=sleeper-token-12345";
60
- tagContent(ctx, tagged, "subagent");
61
- const result = await classifyEgress(`Bearer ${tagged}`, ctx, {
62
- sinkId: "dynamic-mcp:foo",
63
- sinkScope: "external-dynamic",
64
- });
65
- expect(result.verdict).toBe("block");
66
- expect(result.originsFound).toEqual(["subagent"]);
67
- });
68
-
69
- test("ignores tagged content shorter than the match floor", async () => {
70
- const ctx = createRunContext();
71
- tagContent(ctx, "abc", "subagent"); // way under the 8-char match floor
72
- const result = await classifyEgress("https://example.com/?q=abc", ctx, {
73
- sinkId: "fetch",
74
- sinkScope: "external-configured",
75
- });
76
- expect(result.verdict).toBe("pass");
77
- expect(result.matchCount).toBe(0);
78
- });
79
-
80
- test("respects a custom minMatchLength for fixtures", async () => {
81
- const ctx = createRunContext();
82
- // tagContent enforces its own floors (16 for blob/lines, 8 for vetted
83
- // credential tokens) to keep lineage clean, so for short-fixture tests we
84
- // pre-populate dataLineage directly. In production the classifier's
85
- // MIN_MATCH_LENGTH=8 backstop matches the token floor; the
86
- // minMatchLength override is intended for tests + recipes.
87
- ctx.dataLineage = new Map<string, TrustOrigin>([["shortish", "subagent"]]);
88
- const result = await classifyEgress("payload shortish embedded", ctx, {
89
- sinkId: "fetch",
90
- sinkScope: "external-configured",
91
- minMatchLength: 4,
92
- });
93
- expect(result.verdict).toBe("warn");
94
- expect(result.matchCount).toBe(1);
95
- });
96
-
97
- test("folds to the most severe origin across multiple matches", async () => {
98
- const ctx = createRunContext();
99
- tagContent(ctx, "user-typed sentence here visible", "user");
100
- tagContent(ctx, "mcp-sourced bearer token segment", "mcp");
101
- const result = await classifyEgress(
102
- "user-typed sentence here visible + mcp-sourced bearer token segment",
103
- ctx,
104
- {
105
- sinkId: "dynamic-fetch",
106
- sinkScope: "external-dynamic",
107
- },
108
- );
109
- expect(result.verdict).toBe("block"); // mcp on dynamic-sink → block
110
- expect(result.originsFound).toContain("user");
111
- expect(result.originsFound).toContain("mcp");
112
- });
113
-
114
- test("override tightens policy beyond default", async () => {
115
- const ctx = createRunContext();
116
- tagContent(ctx, "subagent-flagged content from worker", "subagent");
117
- const result = await classifyEgress("POST: subagent-flagged content from worker", ctx, {
118
- sinkId: "fetch",
119
- sinkScope: "external-configured", // default = warn
120
- override: { subagent: "block" },
121
- });
122
- expect(result.verdict).toBe("block");
123
- });
124
-
125
- test("caches verdicts by (sinkScope, sinkId, payload)", async () => {
126
- const ctx = createRunContext();
127
- tagContent(ctx, "content tagged by subagent boundary", "subagent");
128
- _clearEgressCache();
129
- const first = await classifyEgress("POST content tagged by subagent boundary", ctx, {
130
- sinkId: "fetch",
131
- sinkScope: "external-configured",
132
- });
133
- expect(first.fromCache).toBe(false);
134
- expect(_cacheSize()).toBe(1);
135
-
136
- const second = await classifyEgress("POST content tagged by subagent boundary", ctx, {
137
- sinkId: "fetch",
138
- sinkScope: "external-configured",
139
- });
140
- expect(second.fromCache).toBe(true);
141
- expect(second.verdict).toBe("warn");
142
- });
143
-
144
- test("cache bypass forces re-evaluation", async () => {
145
- const ctx = createRunContext();
146
- tagContent(ctx, "content tagged by subagent boundary", "subagent");
147
- await classifyEgress("POST content tagged by subagent boundary", ctx, {
148
- sinkId: "fetch",
149
- sinkScope: "external-configured",
150
- });
151
- const re = await classifyEgress("POST content tagged by subagent boundary", ctx, {
152
- sinkId: "fetch",
153
- sinkScope: "external-configured",
154
- bypassCache: true,
155
- });
156
- expect(re.fromCache).toBe(false);
157
- });
158
-
159
- test("rejects non-string payloads", async () => {
160
- const ctx = createRunContext();
161
- await expect(
162
- // biome-ignore lint/suspicious/noExplicitAny: testing runtime guard
163
- classifyEgress(123 as any, ctx, { sinkId: "fetch", sinkScope: "external-configured" }),
164
- ).rejects.toThrow(/expected a string/);
165
- });
166
-
167
- // SECURITY (audit R2): the cache key includes a digest of the LINEAGE
168
- // CONTENT. The lineage map grows during a run; a verdict computed before a
169
- // secret was tagged must not be served after the tag lands — that would be
170
- // an egress-scan bypass for every repeated payload.
171
- test("lineage growth invalidates a cached verdict for the same payload", async () => {
172
- const ctx = createRunContext();
173
- tagContent(ctx, "some early boundary content of length", "subagent");
174
- const payload = "exfiltrating sk-LaterTagged99 now";
175
- const first = await classifyEgress(payload, ctx, {
176
- sinkId: "fetch",
177
- sinkScope: "external-dynamic",
178
- });
179
- expect(first.verdict).toBe("pass"); // secret not tagged yet
180
- // The secret now crosses a boundary and gets token-tagged.
181
- tagContent(ctx, "key issued: sk-LaterTagged99 keep private", "mcp");
182
- const second = await classifyEgress(payload, ctx, {
183
- sinkId: "fetch",
184
- sinkScope: "external-dynamic",
185
- });
186
- expect(second.fromCache).toBe(false); // NOT served stale
187
- expect(second.verdict).toBe("block");
188
- expect(second.originsFound).toEqual(["mcp"]);
189
- });
190
-
191
- // SECURITY (audit R2): end-to-end short-secret coverage — a credential-
192
- // shaped token too short for line tagging (under 16 chars) is token-tagged
193
- // at the boundary and caught at egress when the model extracts JUST the
194
- // secret from its line.
195
- test("a short credential token extracted from its line is caught at egress", async () => {
196
- const ctx = createRunContext();
197
- tagContent(ctx, "Stripe key for deploys: sk-Ab12Cd34 (rotate quarterly)", "mcp");
198
- const result = await classifyEgress("posting sk-Ab12Cd34 to a webhook", ctx, {
199
- sinkId: "fetch",
200
- sinkScope: "external-dynamic",
201
- bypassCache: true,
202
- });
203
- expect(result.verdict).toBe("block");
204
- expect(result.matchCount).toBeGreaterThanOrEqual(1);
205
- expect(result.originsFound).toEqual(["mcp"]);
206
- });
207
- });
208
-
209
- describe("MIN_MATCH_LENGTH constant", () => {
210
- test("is 8 — parity with run-context's MIN_TOKEN_TAG_LENGTH (audit R2)", () => {
211
- expect(MIN_MATCH_LENGTH).toBe(8);
212
- });
213
- });
214
-
215
- describe("summarizeEgress", () => {
216
- test("formats a clean verdict for audit logs", () => {
217
- const summary = summarizeEgress({
218
- verdict: "pass",
219
- originsFound: [],
220
- matchCount: 0,
221
- fromCache: false,
222
- sinkId: "fetch",
223
- sinkScope: "external-configured",
224
- });
225
- expect(summary).toContain("clean");
226
- expect(summary).toContain("fetch");
227
- expect(summary).toContain("external-configured");
228
- });
229
-
230
- test("formats a block verdict with origin list", () => {
231
- const summary = summarizeEgress({
232
- verdict: "block",
233
- originsFound: ["mcp", "subagent"],
234
- matchCount: 3,
235
- fromCache: false,
236
- sinkId: "dynamic-mcp:foo",
237
- sinkScope: "external-dynamic",
238
- });
239
- expect(summary).toContain("block");
240
- expect(summary).toContain("3");
241
- expect(summary).toContain("mcp,subagent");
242
- expect(summary).toContain("dynamic-mcp:foo");
243
- });
244
- });
245
-
246
- // ---------------------------------------------------------------------------
247
- // FR-006 — the EgressMatcher seam.
248
- // ---------------------------------------------------------------------------
249
-
250
- describe("SubstringEgressMatcher (FR-006)", () => {
251
- test('name is "substring" for audit + cache namespacing', () => {
252
- expect(substringMatcher.name).toBe("substring");
253
- expect(new SubstringEgressMatcher().name).toBe("substring");
254
- });
255
-
256
- test("matches identically to the legacy inline scan", () => {
257
- // The default matcher is the verbatim pre-FR-006 loop: tagged entries
258
- // >= floor that the payload contains, deduped origins, distinct count.
259
- const lineage = new Map<string, TrustOrigin>([
260
- ["mcp-sourced bearer token segment", "mcp"],
261
- ["subagent-flagged content from worker", "subagent"],
262
- ["short", "tool"], // under floor — must be ignored
263
- ["user-typed sentence here visible", "user"], // not present in payload
264
- ]);
265
- const payload =
266
- "POST mcp-sourced bearer token segment + subagent-flagged content from worker (short)";
267
- const result = new SubstringEgressMatcher().match({
268
- payload,
269
- lineage,
270
- minMatchLength: MIN_MATCH_LENGTH,
271
- });
272
- expect([...result.originsFound].sort()).toEqual(["mcp", "subagent"]);
273
- expect(result.matchCount).toBe(2); // the two over-floor hits; "short" skipped
274
- });
275
-
276
- test("respects the minMatchLength floor passed in the input", () => {
277
- // Use the concrete class so `.match` is the synchronous overload.
278
- const m = new SubstringEgressMatcher();
279
- const lineage = new Map<string, TrustOrigin>([["short67", "subagent"]]);
280
- // Under default floor (8) → a 7-char tag never matches.
281
- expect(
282
- m.match({
283
- payload: "carries short67 inside",
284
- lineage,
285
- minMatchLength: MIN_MATCH_LENGTH,
286
- }).matchCount,
287
- ).toBe(0);
288
- // With a low floor → hit.
289
- expect(
290
- m.match({
291
- payload: "carries short67 inside",
292
- lineage,
293
- minMatchLength: 4,
294
- }).matchCount,
295
- ).toBe(1);
296
- });
297
-
298
- // SECURITY: a prompt-injectable model can re-encode a tagged secret before
299
- // egress. A verbatim substring scan misses these; the decode-aware views do
300
- // not. The raw tagged content is the lineage key in every case.
301
- const TAGGED = "mcp-sourced secret value that exceeds the floor length";
302
-
303
- test("detects raw tagged content hidden by JSON.stringify escaping (#5)", () => {
304
- // runtime-core builds the egress payload as JSON.stringify(toolInput). A
305
- // multi-line tagged string is escaped (\\n, \\\") inside it, so the raw
306
- // string is NOT a verbatim substring — but the JSON-decoded view recovers it.
307
- const tagged = `${TAGGED}\nsecond "quoted" line`;
308
- const lineage = new Map<string, TrustOrigin>([[tagged, "mcp"]]);
309
- const payload = JSON.stringify({ url: "https://evil.test", body: tagged });
310
- expect(payload.includes(tagged)).toBe(false); // escaped — verbatim scan misses it
311
- const result = new SubstringEgressMatcher().match({
312
- payload,
313
- lineage,
314
- minMatchLength: MIN_MATCH_LENGTH,
315
- });
316
- expect(result.originsFound).toEqual(["mcp"]);
317
- expect(result.matchCount).toBe(1);
318
- });
319
-
320
- test("detects base64-re-encoded tagged content (#6)", () => {
321
- const lineage = new Map<string, TrustOrigin>([[TAGGED, "subagent"]]);
322
- const b64 = Buffer.from(TAGGED, "utf8").toString("base64");
323
- const payload = JSON.stringify({ note: `exfil: ${b64}` });
324
- expect(payload.includes(TAGGED)).toBe(false);
325
- const result = new SubstringEgressMatcher().match({
326
- payload,
327
- lineage,
328
- minMatchLength: MIN_MATCH_LENGTH,
329
- });
330
- expect(result.originsFound).toEqual(["subagent"]);
331
- });
332
-
333
- test("detects hex-re-encoded tagged content (#6)", () => {
334
- const lineage = new Map<string, TrustOrigin>([[TAGGED, "channel"]]);
335
- const hex = Buffer.from(TAGGED, "utf8").toString("hex");
336
- const result = new SubstringEgressMatcher().match({
337
- payload: `prefix ${hex} suffix`,
338
- lineage,
339
- minMatchLength: MIN_MATCH_LENGTH,
340
- });
341
- expect(result.originsFound).toEqual(["channel"]);
342
- });
343
-
344
- test("detects percent-encoded tagged content (#6)", () => {
345
- const lineage = new Map<string, TrustOrigin>([[TAGGED, "federation"]]);
346
- const result = new SubstringEgressMatcher().match({
347
- payload: `q=${encodeURIComponent(TAGGED)}`,
348
- lineage,
349
- minMatchLength: MIN_MATCH_LENGTH,
350
- });
351
- expect(result.originsFound).toEqual(["federation"]);
352
- });
353
-
354
- test("does not flag unrelated content (no false positive from decoding)", () => {
355
- const lineage = new Map<string, TrustOrigin>([[TAGGED, "mcp"]]);
356
- const payload = JSON.stringify({
357
- note: Buffer.from("totally unrelated bytes here", "utf8").toString("base64"),
358
- });
359
- const result = new SubstringEgressMatcher().match({
360
- payload,
361
- lineage,
362
- minMatchLength: MIN_MATCH_LENGTH,
363
- });
364
- expect(result.matchCount).toBe(0);
365
- });
366
- });
367
-
368
- describe("classifyEgress with an injected matcher (FR-006)", () => {
369
- test("uses the injected matcher's hits and folds policy over them", async () => {
370
- const ctx = createRunContext();
371
- // Populate lineage with content the SUBSTRING matcher would NOT find in
372
- // the payload, proving the verdict came from the injected matcher.
373
- ctx.dataLineage = new Map<string, TrustOrigin>([
374
- ["paraphrased-and-reencoded original text", "subagent"],
375
- ]);
376
- const fakeMatcher: EgressMatcher = {
377
- name: "fake-fixed",
378
- match: (_input: EgressMatchInput): EgressMatchResult => ({
379
- originsFound: ["subagent"],
380
- matchCount: 1,
381
- }),
382
- };
383
- const result = await classifyEgress("totally unrelated outbound bytes", ctx, {
384
- sinkId: "fetch",
385
- sinkScope: "external-configured", // subagent on configured → warn
386
- matcher: fakeMatcher,
387
- });
388
- // The substring matcher would have returned pass (no verbatim overlap);
389
- // the injected matcher's hit drives the warn verdict. This proves the
390
- // policy fold is matcher-independent (acceptance #3).
391
- expect(result.verdict).toBe("warn");
392
- expect(result.originsFound).toEqual(["subagent"]);
393
- expect(result.matchCount).toBe(1);
394
- });
395
-
396
- test("custom-matcher hits still respect per-origin/per-sink policy", async () => {
397
- const ctx = createRunContext();
398
- ctx.dataLineage = new Map<string, TrustOrigin>([["anything", "subagent"]]);
399
- const subagentHit: EgressMatcher = {
400
- name: "subagent-hit",
401
- match: () => ({ originsFound: ["subagent"], matchCount: 1 }),
402
- };
403
- // Same matcher, same hit — warn on configured, block on dynamic. The
404
- // outcome difference comes purely from sinkScope policy, not the matcher.
405
- const configured = await classifyEgress("payload", ctx, {
406
- sinkId: "fetch",
407
- sinkScope: "external-configured",
408
- matcher: subagentHit,
409
- bypassCache: true,
410
- });
411
- const dynamic = await classifyEgress("payload", ctx, {
412
- sinkId: "dyn",
413
- sinkScope: "external-dynamic",
414
- matcher: subagentHit,
415
- bypassCache: true,
416
- });
417
- expect(configured.verdict).toBe("warn");
418
- expect(dynamic.verdict).toBe("block");
419
- });
420
-
421
- test("an injected matcher may be async", async () => {
422
- const ctx = createRunContext();
423
- ctx.dataLineage = new Map<string, TrustOrigin>([["anything", "mcp"]]);
424
- const asyncMatcher: EgressMatcher = {
425
- name: "async-hit",
426
- match: async () => {
427
- await Promise.resolve();
428
- return { originsFound: ["mcp"], matchCount: 2 };
429
- },
430
- };
431
- const result = await classifyEgress("payload", ctx, {
432
- sinkId: "fetch",
433
- sinkScope: "external-dynamic", // mcp on dynamic → block
434
- matcher: asyncMatcher,
435
- });
436
- expect(result.verdict).toBe("block");
437
- expect(result.matchCount).toBe(2);
438
- });
439
-
440
- test("cache key namespaces by matcher name (no cross-serve)", async () => {
441
- const ctx = createRunContext();
442
- ctx.dataLineage = new Map<string, TrustOrigin>([["anything", "subagent"]]);
443
- _clearEgressCache();
444
- // Matcher A finds a hit → warn, and caches under name "A".
445
- const matcherA: EgressMatcher = {
446
- name: "matcher-A",
447
- match: () => ({ originsFound: ["subagent"], matchCount: 1 }),
448
- };
449
- // Matcher B finds nothing → pass, under name "B". Same payload/sink.
450
- const matcherB: EgressMatcher = {
451
- name: "matcher-B",
452
- match: () => ({ originsFound: [], matchCount: 0 }),
453
- };
454
- const a = await classifyEgress("same payload", ctx, {
455
- sinkId: "fetch",
456
- sinkScope: "external-configured",
457
- matcher: matcherA,
458
- });
459
- const b = await classifyEgress("same payload", ctx, {
460
- sinkId: "fetch",
461
- sinkScope: "external-configured",
462
- matcher: matcherB,
463
- });
464
- expect(a.verdict).toBe("warn");
465
- expect(a.fromCache).toBe(false);
466
- // If the cache did NOT namespace by matcher name, B would have served
467
- // A's cached warn-hit. It must compute its own (pass) verdict instead.
468
- expect(b.verdict).toBe("pass");
469
- expect(b.fromCache).toBe(false);
470
- expect(_cacheSize()).toBe(2); // two distinct keys, not one
471
- });
472
-
473
- test("re-running the same matcher does serve from cache", async () => {
474
- const ctx = createRunContext();
475
- ctx.dataLineage = new Map<string, TrustOrigin>([["anything", "subagent"]]);
476
- _clearEgressCache();
477
- const m: EgressMatcher = {
478
- name: "stable",
479
- match: () => ({ originsFound: ["subagent"], matchCount: 1 }),
480
- };
481
- const first = await classifyEgress("p", ctx, {
482
- sinkId: "fetch",
483
- sinkScope: "external-configured",
484
- matcher: m,
485
- });
486
- const second = await classifyEgress("p", ctx, {
487
- sinkId: "fetch",
488
- sinkScope: "external-configured",
489
- matcher: m,
490
- });
491
- expect(first.fromCache).toBe(false);
492
- expect(second.fromCache).toBe(true);
493
- expect(second.verdict).toBe("warn");
494
- });
495
- });