@bookedsolid/rea 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/.husky/pre-push +15 -18
  2. package/README.md +41 -1
  3. package/THREAT_MODEL.md +100 -29
  4. package/dist/audit/append.d.ts +21 -8
  5. package/dist/audit/append.js +48 -83
  6. package/dist/audit/fs.d.ts +68 -0
  7. package/dist/audit/fs.js +171 -0
  8. package/dist/cli/audit.d.ts +40 -0
  9. package/dist/cli/audit.js +205 -0
  10. package/dist/cli/doctor.d.ts +19 -4
  11. package/dist/cli/doctor.js +172 -5
  12. package/dist/cli/index.js +26 -1
  13. package/dist/cli/init.js +93 -7
  14. package/dist/cli/install/pre-push.d.ts +335 -0
  15. package/dist/cli/install/pre-push.js +2818 -0
  16. package/dist/cli/serve.d.ts +64 -0
  17. package/dist/cli/serve.js +270 -2
  18. package/dist/cli/status.d.ts +90 -0
  19. package/dist/cli/status.js +399 -0
  20. package/dist/cli/utils.d.ts +4 -0
  21. package/dist/cli/utils.js +4 -0
  22. package/dist/gateway/audit/rotator.d.ts +116 -0
  23. package/dist/gateway/audit/rotator.js +289 -0
  24. package/dist/gateway/circuit-breaker.d.ts +17 -0
  25. package/dist/gateway/circuit-breaker.js +32 -3
  26. package/dist/gateway/downstream-pool.d.ts +2 -1
  27. package/dist/gateway/downstream-pool.js +2 -2
  28. package/dist/gateway/downstream.d.ts +39 -3
  29. package/dist/gateway/downstream.js +73 -14
  30. package/dist/gateway/log.d.ts +122 -0
  31. package/dist/gateway/log.js +334 -0
  32. package/dist/gateway/middleware/audit.d.ts +24 -1
  33. package/dist/gateway/middleware/audit.js +103 -58
  34. package/dist/gateway/middleware/blocked-paths.d.ts +0 -9
  35. package/dist/gateway/middleware/blocked-paths.js +439 -67
  36. package/dist/gateway/middleware/injection.d.ts +218 -13
  37. package/dist/gateway/middleware/injection.js +433 -51
  38. package/dist/gateway/middleware/kill-switch.d.ts +10 -1
  39. package/dist/gateway/middleware/kill-switch.js +20 -1
  40. package/dist/gateway/observability/metrics.d.ts +125 -0
  41. package/dist/gateway/observability/metrics.js +321 -0
  42. package/dist/gateway/server.d.ts +19 -0
  43. package/dist/gateway/server.js +99 -15
  44. package/dist/policy/loader.d.ts +47 -0
  45. package/dist/policy/loader.js +47 -0
  46. package/dist/policy/profiles.d.ts +13 -0
  47. package/dist/policy/profiles.js +12 -0
  48. package/dist/policy/types.d.ts +52 -0
  49. package/dist/registry/fingerprint.d.ts +73 -0
  50. package/dist/registry/fingerprint.js +81 -0
  51. package/dist/registry/fingerprints-store.d.ts +62 -0
  52. package/dist/registry/fingerprints-store.js +111 -0
  53. package/dist/registry/interpolate.d.ts +58 -0
  54. package/dist/registry/interpolate.js +121 -0
  55. package/dist/registry/loader.d.ts +2 -2
  56. package/dist/registry/loader.js +22 -1
  57. package/dist/registry/tofu-gate.d.ts +41 -0
  58. package/dist/registry/tofu-gate.js +189 -0
  59. package/dist/registry/tofu.d.ts +111 -0
  60. package/dist/registry/tofu.js +173 -0
  61. package/dist/registry/types.d.ts +9 -1
  62. package/package.json +3 -1
  63. package/profiles/bst-internal-no-codex.yaml +5 -0
  64. package/profiles/bst-internal.yaml +7 -0
  65. package/scripts/tarball-smoke.sh +197 -0
@@ -1,9 +1,21 @@
1
- import { InvocationStatus } from '../../policy/types.js';
1
+ import { z } from 'zod';
2
+ import { InvocationStatus, Tier } from '../../policy/types.js';
2
3
  import { wrapRegex } from '../redact-safe/match-timeout.js';
3
4
  /**
4
5
  * Known prompt injection phrases (lowercase for case-insensitive matching).
5
6
  * These patterns are commonly used to override system instructions in tool
6
7
  * descriptions or resource content returned by downstream MCP servers.
8
+ *
9
+ * SECURITY (G9 follow-up): inputs are NFKC-normalized, whitespace-collapsed,
10
+ * and zero-width-stripped before matching (see `normalizeForMatch`). That
11
+ * means the phrases below can safely be written with plain ASCII spaces —
12
+ * the normalizer will fold NBSP, en-space, em-space, zero-width joiners,
13
+ * etc. into the same form so crafted Unicode variants cannot bypass.
14
+ *
15
+ * The pattern library is intentionally terse. Extending it is follow-up
16
+ * work (G9.1): pattern-set extensibility via policy is out of scope for
17
+ * this patch. Phrases added here must be short, lowercase, and tolerate
18
+ * the normalization pipeline (no Unicode, no non-ASCII punctuation).
7
19
  */
8
20
  export const INJECTION_PHRASES = [
9
21
  'ignore previous instructions',
@@ -16,7 +28,65 @@ export const INJECTION_PHRASES = [
16
28
  // is what distinguishes injection from ordinary status messages.
17
29
  'you are now a ',
18
30
  'you are now an ',
31
+ // G9 follow-up: additional role-reassignment / persona-swap vectors.
32
+ // Intentionally NARROW: broader phrases like "act as a" / "act as an"
33
+ // were considered and dropped because at read-tier any literal hit
34
+ // escalates to `likely_injection`, which would deny benign prose such
35
+ // as "this proxy can act as a bridge" or "the service can act as an
36
+ // intermediary." The phrases below all contain a direct second-person
37
+ // address ("you") or an explicit roleplay framing ("roleplay as"),
38
+ // which is rare in ordinary documentation/chat content.
39
+ 'pretend you are ',
40
+ 'roleplay as ',
19
41
  ];
42
+ /**
43
+ * G9 follow-up — Unicode whitespace class covering the most common bypass
44
+ * vectors for literal-phrase matchers: NBSP (U+00A0), OGHAM SPACE MARK
45
+ * (U+1680), the EN/EM/THIN/HAIR space family (U+2000..U+200A), NARROW NBSP
46
+ * (U+202F), MEDIUM MATHEMATICAL SPACE (U+205F), IDEOGRAPHIC SPACE (U+3000).
47
+ * Collapsed to a single ASCII space before matching.
48
+ */
49
+ const UNICODE_WHITESPACE_RE = /[\s\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]+/g;
50
+ /**
51
+ * G9 follow-up (Codex round-2, finding #1) — strip all Default_Ignorable_Code_Point
52
+ * characters before matching. The Unicode property `Default_Ignorable_Code_Point`
53
+ * covers every codepoint that is invisible and has no glyph in standard rendering:
54
+ * soft hyphen (U+00AD), combining grapheme joiner (U+034F), Arabic letter mark
55
+ * (U+061C), Mongolian vowel separator (U+180E), zero-width space/non-joiner/joiner
56
+ * (U+200B–U+200D), word joiner (U+2060), invisible times/separator/plus
57
+ * (U+2062–U+2064), BIDI isolation controls (U+2066–U+2069), variation selector-16
58
+ * (U+FE0F), zero-width no-break space / BOM (U+FEFF), and others.
59
+ *
60
+ * Using `\p{Default_Ignorable_Code_Point}` (requires the `u` flag, Node 22+)
61
+ * is future-proof: new Default_Ignorable codepoints added to Unicode are
62
+ * automatically covered without updating this regex.
63
+ */
64
+ const IGNORABLE_CP_RE = /\p{Default_Ignorable_Code_Point}/gu;
65
+ /**
66
+ * G9 follow-up — normalize an input string to a canonical form for literal
67
+ * phrase matching.
68
+ *
69
+ * 1. NFKC Unicode normalization — folds compatibility forms (fullwidth
70
+ * letters, mathematical alphanumerics) into ASCII equivalents.
71
+ * 2. Strip all Default_Ignorable_Code_Point characters — invisible codepoints
72
+ * that have no rendering and are used only to visually split or obscure
73
+ * injection keywords (soft hyphen, zero-width joiners/non-joiners/spaces,
74
+ * BIDI isolation controls, variation selectors, BOM, etc.).
75
+ * 3. Collapse any run of Unicode whitespace (including NBSP, en/em space)
76
+ * to a single ASCII space.
77
+ * 4. Lowercase — matches the case-insensitive contract of INJECTION_PHRASES.
78
+ *
79
+ * NEVER logs or exports the normalized text; it is used only for match-time
80
+ * comparison. The audit record still surfaces the PHRASE that matched, not
81
+ * the normalized input.
82
+ */
83
+ export function normalizeForMatch(input) {
84
+ return input
85
+ .normalize('NFKC')
86
+ .replace(IGNORABLE_CP_RE, '')
87
+ .replace(UNICODE_WHITESPACE_RE, ' ')
88
+ .toLowerCase();
89
+ }
20
90
  /**
21
91
  * Base64-token scanner regex. The only regex the injection middleware runs
22
92
  * against untrusted payloads; wrapped in `SafeRegex` at middleware creation
@@ -35,6 +105,33 @@ export const INJECTION_BASE64_SHAPE = /^[A-Za-z0-9+/]+=*$/;
35
105
  * one invocation append to an array under this key.
36
106
  */
37
107
  export const INJECTION_TIMEOUT_METADATA_KEY = 'injection.regex_timeout';
108
+ /**
109
+ * Audit metadata key for the classifier verdict. The value is an
110
+ * `InjectionClassifierMetadata` object.
111
+ */
112
+ export const INJECTION_METADATA_KEY = 'injection';
113
+ /**
114
+ * G9 follow-up — zod schema for the `ctx.metadata.injection` record the
115
+ * middleware emits. Every emitted record has a `verdict` field; the schema
116
+ * exists so internal test code (and a follow-up public surface, once we
117
+ * decide how to expose audit-record types) can catch shape regressions —
118
+ * notably the pre-fix behavior where a regex-timeout emitted timing
119
+ * metadata under a different key without ever writing a verdict.
120
+ *
121
+ * INTERNAL today. Not reachable via the published package `exports` map
122
+ * (only `.`, `./policy`, `./middleware`, and `./audit` are public). If
123
+ * downstream consumers (e.g. Helix) need to validate audit records they
124
+ * read off `.rea/audit.jsonl`, we will promote this to a public entrypoint
125
+ * in a follow-up (filed as G9.2). Do not rely on this symbol from outside
126
+ * the rea repo yet.
127
+ */
128
+ export const InjectionMetadataSchema = z
129
+ .object({
130
+ verdict: z.enum(['suspicious', 'likely_injection', 'error']),
131
+ matched_patterns: z.array(z.string()),
132
+ base64_decoded: z.boolean(),
133
+ })
134
+ .strict();
38
135
  /**
39
136
  * Decode a base64 string, returning the decoded text or null if decoding fails.
40
137
  * Only decodes if the input looks like base64 (64-char alphabet, length divisible by 4 or padded).
@@ -53,6 +150,108 @@ function tryDecodeBase64(input, safe) {
53
150
  return null;
54
151
  }
55
152
  }
153
+ /**
154
+ * Minimum token length considered for standalone base64 probing via
155
+ * `decodeBase64Strings`. Below this, the decoded payload is too short to
156
+ * plausibly contain an injection phrase (the shortest phrase in
157
+ * `INJECTION_PHRASES` is 16 chars; 24 base64 chars → 18 decoded chars, with
158
+ * some slack for leading/trailing noise).
159
+ */
160
+ const MIN_BASE64_PROBE_LENGTH = 24;
161
+ /**
162
+ * Maximum token length considered for standalone base64 probing via
163
+ * `decodeBase64Strings`. Strings longer than this are skipped — base64
164
+ * payloads this large are unlikely to be valid whole-string injection
165
+ * vectors (they would need padding-aligned framing) and decoding them
166
+ * unboundedly can force significant CPU/memory. 16 KiB gives ample room
167
+ * for any plausible injection phrase.
168
+ */
169
+ const MAX_BASE64_PROBE_LENGTH = 16384; // 16 KiB — beyond this, base64 strings are truncated or padding-invalid
170
+ /**
171
+ * G9 — printable-ASCII ratio threshold for accepting a base64 decode as a
172
+ * potential injection payload. The spec requires ≥95% printable characters
173
+ * and no null bytes; stricter than the inline decoder used by
174
+ * `scanForInjection` (which accepts any successful UTF-8 decode) because this
175
+ * probe is used to FORCE-escalate to `likely_injection`, and we want the
176
+ * probe's positive signal to be near-certain.
177
+ */
178
+ const BASE64_PRINTABLE_RATIO = 0.95;
179
+ /**
180
+ * Return true when `s` is printable-enough to plausibly be an injection
181
+ * payload. Printable = ASCII 0x20..0x7E, plus tab/newline/CR. Null bytes
182
+ * (often used for payload truncation games) disqualify the string outright.
183
+ */
184
+ function isPrintableDecoded(s) {
185
+ if (s.length === 0)
186
+ return false;
187
+ let printable = 0;
188
+ for (let i = 0; i < s.length; i++) {
189
+ const code = s.charCodeAt(i);
190
+ if (code === 0)
191
+ return false; // null byte → fail closed
192
+ if ((code >= 0x20 && code <= 0x7e) ||
193
+ code === 0x09 /* tab */ ||
194
+ code === 0x0a /* LF */ ||
195
+ code === 0x0d /* CR */) {
196
+ printable++;
197
+ }
198
+ }
199
+ return printable / s.length >= BASE64_PRINTABLE_RATIO;
200
+ }
201
+ /**
202
+ * G9 — pure helper that walks an arbitrary `unknown` value and returns every
203
+ * successfully decoded base64-looking string. Decoding is attempted only for
204
+ * strings that:
205
+ * - are ≥ `MIN_BASE64_PROBE_LENGTH` (24) chars
206
+ * - have length divisible by 4 (base64 framing)
207
+ * - match the `INJECTION_BASE64_SHAPE` (`^[A-Za-z0-9+/]+=*$`)
208
+ * - decode to a UTF-8 string that is ≥95% printable and contains no null bytes
209
+ *
210
+ * NOTE: This function is NOT called from the middleware body. The inline base64
211
+ * probe in `scanStringForInjection` (via `INJECTION_BASE64_PATTERN`) already
212
+ * covers embedded base64 token detection. Calling `decodeBase64Strings` as a
213
+ * second full-tree pass would duplicate that work and add an avoidable DoS
214
+ * amplification surface (full tree traversal + decoded-string allocation for
215
+ * every base64-shaped leaf). This function is exported for testing and external
216
+ * use only.
217
+ */
218
+ export function decodeBase64Strings(input) {
219
+ const out = [];
220
+ const visit = (v) => {
221
+ if (typeof v === 'string') {
222
+ if (v.length < MIN_BASE64_PROBE_LENGTH)
223
+ return;
224
+ if (v.length > MAX_BASE64_PROBE_LENGTH)
225
+ return;
226
+ if (v.length % 4 !== 0)
227
+ return;
228
+ if (!INJECTION_BASE64_SHAPE.test(v))
229
+ return;
230
+ let decoded;
231
+ try {
232
+ decoded = Buffer.from(v, 'base64').toString('utf8');
233
+ }
234
+ catch {
235
+ return;
236
+ }
237
+ if (!isPrintableDecoded(decoded))
238
+ return;
239
+ out.push(decoded);
240
+ return;
241
+ }
242
+ if (Array.isArray(v)) {
243
+ for (const item of v)
244
+ visit(item);
245
+ return;
246
+ }
247
+ if (v !== null && typeof v === 'object') {
248
+ for (const val of Object.values(v))
249
+ visit(val);
250
+ }
251
+ };
252
+ visit(input);
253
+ return out;
254
+ }
56
255
  /**
57
256
  * Build compiled injection patterns with the provided timeout. Precompiled at
58
257
  * middleware creation so the worker spawn is the only per-call overhead.
@@ -74,64 +273,144 @@ export function compileInjectionPatterns(timeoutMs, onTimeout) {
74
273
  };
75
274
  }
76
275
  /**
77
- * Scan a string for known prompt injection phrases.
78
- * Also decodes base64 tokens and checks the decoded content.
79
- * Returns an array of matched phrase descriptions, empty if clean.
276
+ * Scan a single string and record hits into the provided `InjectionScanResult`
277
+ * buckets. Exported for test surface and for callers who want to scan a known
278
+ * string without walking a tree.
279
+ *
280
+ * - Literal matches (case-insensitive substring) go into `literalMatches`.
281
+ * - Base64-decoded matches (tokens extracted via `INJECTION_BASE64_PATTERN`,
282
+ * decoded, then re-scanned for literals) go into `base64DecodedMatches`.
80
283
  *
81
- * The `safe` parameter carries precompiled SafeRegex wrappers; callers build
82
- * it once via `compileInjectionPatterns`.
284
+ * Set semantics dedupe by phrase: the same phrase matched five times in one
285
+ * string counts as one distinct pattern, which is intentional for the
286
+ * classifier's "≥2 distinct patterns → likely" rule.
83
287
  */
84
- export function scanForInjection(input, safe) {
288
+ export function scanStringForInjection(input, result, safe) {
85
289
  if (!input || typeof input !== 'string')
86
- return [];
87
- const lower = input.toLowerCase();
88
- const matches = [];
89
- // Check literal phrases (indexOf no regex, no ReDoS surface).
290
+ return;
291
+ // G9 follow-up: normalize before matching so NBSP / zero-width / fullwidth
292
+ // variants of injection phrases cannot bypass the literal check. The raw
293
+ // input is still scanned by the base64 tokenizer (SafeRegex expects the
294
+ // pre-normalization bytes).
295
+ const normalized = normalizeForMatch(input);
296
+ // Literal phrases (indexOf — no regex, no ReDoS surface).
90
297
  for (const phrase of INJECTION_PHRASES) {
91
- if (lower.includes(phrase)) {
92
- matches.push(`literal: "${phrase}"`);
298
+ if (normalized.includes(phrase)) {
299
+ result.literalMatches.add(phrase);
93
300
  }
94
301
  }
95
- // Check base64-encoded variants scan word-like tokens that look like
96
- // base64. The regex match is bounded via SafeRegex (timeout + hard worker
97
- // kill).
302
+ // Embedded base64 tokens. SafeRegex wraps the scan so a pathological input
303
+ // cannot hang the event loop.
98
304
  const tokenResult = safe.base64Token.matchAll(input);
99
305
  const base64Tokens = tokenResult.matches;
100
306
  for (const token of base64Tokens) {
101
307
  const decoded = tryDecodeBase64(token, safe);
102
308
  if (!decoded)
103
309
  continue;
104
- const decodedLower = decoded.toLowerCase();
310
+ const decodedNormalized = normalizeForMatch(decoded);
105
311
  for (const phrase of INJECTION_PHRASES) {
106
- if (decodedLower.includes(phrase)) {
107
- matches.push(`base64-encoded: "${phrase}"`);
108
- break; // One report per token is enough
312
+ if (decodedNormalized.includes(phrase)) {
313
+ result.base64DecodedMatches.add(phrase);
109
314
  }
110
315
  }
111
316
  }
112
- return matches;
113
317
  }
114
318
  /**
115
- * Scan an unknown value recursively, collecting all injection matches.
116
- * Walks strings, arrays, and plain objects.
319
+ * Back-compat wrapper: legacy callers (and the old audit-metadata consumer)
320
+ * received a flat `string[]` of "literal: …" / "base64-encoded: …" descriptions.
321
+ * Kept as an exported helper so `scripts/lint-safe-regex.mjs` and any external
322
+ * consumer that imported it continue to work. New code should call
323
+ * `scanStringForInjection` directly.
117
324
  */
118
- function scanValue(value, matches, safe) {
325
+ export function scanForInjection(input, safe) {
326
+ const result = {
327
+ literalMatches: new Set(),
328
+ base64DecodedMatches: new Set(),
329
+ };
330
+ scanStringForInjection(input, result, safe);
331
+ const out = [];
332
+ for (const p of result.literalMatches)
333
+ out.push(`literal: "${p}"`);
334
+ for (const p of result.base64DecodedMatches)
335
+ out.push(`base64-encoded: "${p}"`);
336
+ return out;
337
+ }
338
+ /**
339
+ * Recursively scan an unknown value (string, array, or plain object) and
340
+ * accumulate matches into the supplied `InjectionScanResult` buckets.
341
+ */
342
+ export function scanValueForInjection(value, result, safe) {
119
343
  if (typeof value === 'string') {
120
- matches.push(...scanForInjection(value, safe));
344
+ scanStringForInjection(value, result, safe);
121
345
  return;
122
346
  }
123
347
  if (Array.isArray(value)) {
124
- for (const item of value) {
125
- scanValue(item, matches, safe);
126
- }
348
+ for (const item of value)
349
+ scanValueForInjection(item, result, safe);
127
350
  return;
128
351
  }
129
352
  if (value !== null && typeof value === 'object') {
130
353
  for (const v of Object.values(value)) {
131
- scanValue(v, matches, safe);
354
+ scanValueForInjection(v, result, safe);
132
355
  }
133
356
  }
134
357
  }
358
+ export function classifyInjection(scan, tier) {
359
+ const literalCount = scan.literalMatches.size;
360
+ const base64Count = scan.base64DecodedMatches.size;
361
+ if (literalCount === 0 && base64Count === 0) {
362
+ return { verdict: 'clean' };
363
+ }
364
+ // Dedupe: a phrase that appears both literally AND in a base64-decoded
365
+ // payload in the same input counts once in `matched_patterns`. Union via
366
+ // Set before sorting.
367
+ const matched = [
368
+ ...new Set([...scan.literalMatches, ...scan.base64DecodedMatches]),
369
+ ].sort();
370
+ // Rule 2 — base64 always escalates, regardless of count or tier.
371
+ if (base64Count > 0) {
372
+ return {
373
+ verdict: 'likely_injection',
374
+ matched_patterns: matched,
375
+ base64_decoded: true,
376
+ };
377
+ }
378
+ // Rule 3 — multi-literal (distinct patterns) always escalates.
379
+ if (literalCount >= 2) {
380
+ return {
381
+ verdict: 'likely_injection',
382
+ matched_patterns: matched,
383
+ base64_decoded: false,
384
+ };
385
+ }
386
+ // Rule 4 — any match at read-tier, or unknown tier, is anomalous enough
387
+ // to treat as likely. Tier middleware runs before injection middleware,
388
+ // so an undefined tier here means tier-classification failed; fail closed.
389
+ if (tier === Tier.Read || tier === undefined) {
390
+ return {
391
+ verdict: 'likely_injection',
392
+ matched_patterns: matched,
393
+ base64_decoded: false,
394
+ };
395
+ }
396
+ // TODO (G9-follow-up): per-pattern "deny-tag" metadata can force this
397
+ // branch to `likely_injection` even for a single literal at write tier.
398
+ // Not shipped in this PR; pattern list is unchanged.
399
+ // Rule 5 — exactly 1 literal at write/destructive.
400
+ return {
401
+ verdict: 'suspicious',
402
+ matched_patterns: matched,
403
+ base64_decoded: false,
404
+ };
405
+ }
406
+ /**
407
+ * Maximum result size (in UTF-8 bytes) that the injection scanner will attempt
408
+ * to scan. Payloads larger than this cannot be scanned within the 100ms timeout
409
+ * budget before result-size-cap (which runs later in the chain) has had a
410
+ * chance to truncate them. Treat oversized payloads the same as a scan timeout:
411
+ * deny in block mode, pass in warn mode.
412
+ */
413
+ const MAX_RESULT_SCAN_BYTES = 2 * 1024 * 1024; // 2 MiB
135
414
  /**
136
415
  * Record a regex-timeout event on `ctx.metadata`. Array-valued so multiple
137
416
  * timeouts in one invocation are all recorded.
@@ -155,50 +434,153 @@ function recordInjectionTimeout(ctx, patternId, inputBytes, timeoutMs) {
155
434
  }
156
435
  }
157
436
  /**
158
- * PostToolUse middleware: scans tool results for prompt injection patterns.
437
+ * PostToolUse middleware: classifies tool results for prompt injection.
438
+ *
439
+ * G9 tiered classifier:
440
+ * - `clean` → allow, no log
441
+ * - `suspicious` → warn (stderr + audit metadata `injection.suspicious`).
442
+ * Denies only when `suspiciousBlocksWrites: true`.
443
+ * - `likely_injection` → always deny, always log.
159
444
  *
160
445
  * Operates on tool output (ctx.result) returned from downstream MCP servers.
161
- * On detection:
162
- * - Always logs to audit metadata and emits a warning to stderr.
163
- * - If action is 'block' (default), sets ctx.status to Denied and blocks the result.
164
- * - If action is 'warn', allows the result through with a warning only.
165
446
  *
166
- * SECURITY: Checking PostToolUse (after downstream execution, before the result
167
- * reaches the LLM) is the correct place to catch injection in tool descriptions
168
- * and resource content coming from potentially untrusted downstream servers.
447
+ * SECURITY: Checking PostToolUse (after downstream execution, before the
448
+ * result reaches the LLM) is the correct place to catch injection in tool
449
+ * descriptions and resource content coming from potentially untrusted
450
+ * downstream servers.
169
451
  *
170
452
  * SECURITY (G3): The only regexes this middleware runs are wrapped in
171
453
  * `SafeRegex` with a 100ms default per-call timeout. On timeout the scanner
172
454
  * records an audit event and proceeds — blocking is governed by the literal
173
455
  * substring checks (which have no ReDoS surface).
456
+ *
457
+ * The legacy `action` parameter (`'block' | 'warn'`) selects the fallback
458
+ * behavior for `suspicious` verdicts when the G9 flag is unset — preserving
459
+ * 0.2.x `injection_detection: 'warn'` semantics for operators who pinned it.
460
+ * `likely_injection` ignores this parameter.
174
461
  */
175
462
  export function createInjectionMiddleware(action = 'block', opts = {}) {
176
463
  const timeoutMs = opts.matchTimeoutMs ?? 100;
464
+ // Default `suspiciousBlocksWrites` to `false` when unset to preserve 0.3.x
465
+ // behavior for existing installs that omit the `injection:` policy block.
466
+ // A consumer who had `injection_detection: block` in 0.3.x without the new
467
+ // field would otherwise silently start hard-failing benign tool writes that
468
+ // contain a single matching phrase on upgrade — a breaking change disguised
469
+ // as a default. The tighter posture (single literal hit → deny) must be
470
+ // opted into explicitly via `injection.suspicious_blocks_writes: true`, or
471
+ // by using a profile (e.g. bst-internal) that already sets it.
472
+ //
473
+ // Fail-closed-on-timeout (Finding 1 fix) already tightens security for
474
+ // incomplete scans; this default preserves parity for complete scans.
475
+ const denyOnSuspicious = action === 'warn'
476
+ ? false // warn mode hard-overrides suspicious deny — 0.2.x parity with `injection_detection: warn`
477
+ : (opts.suspiciousBlocksWrites ?? false); // block mode: default false (0.3.x default preserved)
177
478
  return async (ctx, next) => {
178
479
  await next();
179
480
  // Only scan if we have a result to inspect
180
481
  if (ctx.result == null)
181
482
  return;
483
+ // Pre-scan size check: if the result is too large to scan within the timeout
484
+ // budget, treat as a timeout. Result-size-cap runs later in the chain, so we
485
+ // bound the scan here rather than relying on downstream truncation.
486
+ const resultBytes = Buffer.byteLength(JSON.stringify(ctx.result), 'utf8');
487
+ if (resultBytes > MAX_RESULT_SCAN_BYTES) {
488
+ if (action === 'block') {
489
+ const errorMeta = {
490
+ verdict: 'error',
491
+ matched_patterns: [],
492
+ base64_decoded: false,
493
+ };
494
+ ctx.metadata[INJECTION_METADATA_KEY] = errorMeta;
495
+ ctx.status = InvocationStatus.Denied;
496
+ ctx.error = `injection scan skipped — result exceeds ${MAX_RESULT_SCAN_BYTES} bytes; failing closed under block policy`;
497
+ return;
498
+ }
499
+ // warn mode: let through — result-size-cap will truncate downstream.
500
+ return;
501
+ }
502
+ // G9 follow-up (finding #4): track scanner timeout via a closure flag so
503
+ // we can emit a stable `verdict: 'error'` metadata record alongside the
504
+ // existing `injection.regex_timeout` event. Downstream audit consumers
505
+ // that key off `metadata.injection.verdict` no longer see a bare timing
506
+ // record with no verdict shape.
507
+ let scanTimedOut = false;
182
508
  const safe = compileInjectionPatterns(timeoutMs, (patternId, input) => {
509
+ scanTimedOut = true;
183
510
  recordInjectionTimeout(ctx, patternId, Buffer.byteLength(input, 'utf8'), timeoutMs);
184
511
  });
185
- const matches = [];
186
- scanValue(ctx.result, matches, safe);
187
- if (matches.length === 0)
512
+ const scan = {
513
+ literalMatches: new Set(),
514
+ base64DecodedMatches: new Set(),
515
+ };
516
+ scanValueForInjection(ctx.result, scan, safe);
517
+ // Fail closed: in block mode, ANY timeout denies — regardless of what the
518
+ // partial scan found. An incomplete scan cannot prove the unscanned suffix
519
+ // is safe. If the provisional classification were `suspicious` (one early
520
+ // literal hit before the timeout), falling through to the normal policy
521
+ // path could still allow the call under `suspiciousBlocksWrites: false`,
522
+ // even though the unscanned suffix might contain a second phrase that
523
+ // would have escalated to `likely_injection`. Hoisting this check before
524
+ // `classifyInjection` closes that gap.
525
+ if (scanTimedOut && action === 'block') {
526
+ const errorMeta = {
527
+ verdict: 'error',
528
+ matched_patterns: [],
529
+ base64_decoded: false,
530
+ };
531
+ ctx.metadata[INJECTION_METADATA_KEY] = errorMeta;
532
+ process.stderr.write(`[rea] INJECTION-GUARD (error): regex-timeout during scan of tool "${ctx.tool_name}" result; verdict inconclusive\n`);
533
+ ctx.status = InvocationStatus.Denied;
534
+ ctx.error = 'injection scan timed out — failing closed under block policy';
535
+ return; // do NOT call next()
536
+ }
537
+ const classification = classifyInjection(scan, ctx.tier);
538
+ // warn/log mode + timeout: fail-open — emit a verdict:'error' metadata
539
+ // record alongside the existing injection.regex_timeout event so
540
+ // downstream audit consumers see a stable verdict shape, then allow
541
+ // through. This branch only fires in warn mode (block mode was handled
542
+ // above) when no actionable signal was collected before the timeout.
543
+ if (scanTimedOut && classification.verdict === 'clean') {
544
+ const errorMeta = {
545
+ verdict: 'error',
546
+ matched_patterns: [],
547
+ base64_decoded: false,
548
+ };
549
+ ctx.metadata[INJECTION_METADATA_KEY] = errorMeta;
550
+ process.stderr.write(`[rea] INJECTION-GUARD (error): regex-timeout during scan of tool "${ctx.tool_name}" result; verdict inconclusive\n`);
551
+ // warn/log mode: let through but record — verdict:'error' is written above.
552
+ return;
553
+ }
554
+ if (classification.verdict === 'clean')
188
555
  return;
189
- // Deduplicate matches
190
- const unique = [...new Set(matches)];
191
- // Always log to audit metadata
192
- ctx.metadata.injection_matches = unique;
193
- // Always emit warning to stderr
194
- process.stderr.write(`[rea] INJECTION-GUARD: Prompt injection pattern detected in tool "${ctx.tool_name}" result\n`);
195
- for (const match of unique) {
196
- process.stderr.write(` Pattern: ${match}\n`);
556
+ // Write audit metadata. Export verdict + distinct matched phrases +
557
+ // base64 flag. NEVER export the input text.
558
+ const auditMeta = {
559
+ verdict: classification.verdict,
560
+ matched_patterns: classification.matched_patterns,
561
+ base64_decoded: classification.base64_decoded,
562
+ };
563
+ ctx.metadata[INJECTION_METADATA_KEY] = auditMeta;
564
+ // Always emit a stderr warning. Operators rely on this as the live signal.
565
+ process.stderr.write(`[rea] INJECTION-GUARD (${classification.verdict}): pattern(s) detected in tool "${ctx.tool_name}" result\n`);
566
+ for (const p of classification.matched_patterns) {
567
+ process.stderr.write(` Pattern: ${p}\n`);
568
+ }
569
+ if (classification.base64_decoded) {
570
+ process.stderr.write(` Base64-decoded match detected\n`);
197
571
  }
198
- process.stderr.write(` Action: ${action} — review the downstream server "${ctx.server_name}" for compromise.\n`);
199
- if (action === 'block') {
572
+ process.stderr.write(` Action: review the downstream server "${ctx.server_name}" for compromise.\n`);
573
+ // Deny policy:
574
+ // likely_injection → always deny
575
+ // suspicious → deny iff denyOnSuspicious (constructed above)
576
+ const shouldDeny = classification.verdict === 'likely_injection' ||
577
+ (classification.verdict === 'suspicious' && denyOnSuspicious);
578
+ if (shouldDeny) {
200
579
  ctx.status = InvocationStatus.Denied;
201
- ctx.error = `Prompt injection detected in tool result (${unique.length} pattern(s) matched). Result blocked.`;
580
+ ctx.error =
581
+ classification.verdict === 'likely_injection'
582
+ ? `Likely prompt injection detected in tool result (${classification.matched_patterns.length} pattern(s), base64=${classification.base64_decoded}). Result blocked.`
583
+ : `Suspicious prompt injection pattern in tool result (1 pattern at ${String(ctx.tier)} tier). Result blocked by policy.`;
202
584
  ctx.result = undefined;
203
585
  }
204
586
  };
@@ -1,4 +1,5 @@
1
1
  import type { Middleware } from './chain.js';
2
+ import type { MetricsRegistry } from '../observability/metrics.js';
2
3
  /**
3
4
  * HALT semantic guarantee:
4
5
  * - HALT is read exactly once per invocation, at the top of this middleware layer.
@@ -22,4 +23,12 @@ import type { Middleware } from './chain.js';
22
23
  * - The decision is recorded on `ctx.metadata.halt_decision` for audit and is
23
24
  * never re-consulted by downstream middleware.
24
25
  */
25
- export declare function createKillSwitchMiddleware(baseDir: string): Middleware;
26
+ export declare function createKillSwitchMiddleware(baseDir: string,
27
+ /**
28
+ * Optional metrics registry. When supplied, every invocation marks the
29
+ * `rea_seconds_since_last_halt_check` gauge with a fresh timestamp so the
30
+ * exposed gauge reflects real per-call check cadence rather than the
31
+ * startup-time mark `rea serve` sets once. When omitted, no metric is
32
+ * emitted.
33
+ */
34
+ metrics?: MetricsRegistry): Middleware;
@@ -28,9 +28,28 @@ const HALT_FILE = 'HALT';
28
28
  * - The decision is recorded on `ctx.metadata.halt_decision` for audit and is
29
29
  * never re-consulted by downstream middleware.
30
30
  */
31
- export function createKillSwitchMiddleware(baseDir) {
31
+ export function createKillSwitchMiddleware(baseDir,
32
+ /**
33
+ * Optional metrics registry. When supplied, every invocation marks the
34
+ * `rea_seconds_since_last_halt_check` gauge with a fresh timestamp so the
35
+ * exposed gauge reflects real per-call check cadence rather than the
36
+ * startup-time mark `rea serve` sets once. When omitted, no metric is
37
+ * emitted.
38
+ */
39
+ metrics) {
32
40
  return async (ctx, next) => {
33
41
  const haltPath = path.join(baseDir, REA_DIR, HALT_FILE);
42
+ // Record the HALT-check attempt BEFORE we probe the filesystem so the
43
+ // gauge reflects "how long since we last looked", regardless of whether
44
+ // this check succeeds or fails. Fresh on every invocation; failure to
45
+ // update metrics must not crash the gateway.
46
+ try {
47
+ metrics?.markHaltCheck();
48
+ }
49
+ catch {
50
+ // Metrics registry implementations are expected to be infallible,
51
+ // but we refuse to let them take down the chain in any case.
52
+ }
34
53
  let fh;
35
54
  try {
36
55
  fh = await fs.open(haltPath, fsConstants.O_RDONLY);