@bookedsolid/rea 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.husky/pre-push +15 -18
- package/README.md +41 -1
- package/THREAT_MODEL.md +100 -29
- package/dist/audit/append.d.ts +21 -8
- package/dist/audit/append.js +48 -83
- package/dist/audit/fs.d.ts +68 -0
- package/dist/audit/fs.js +171 -0
- package/dist/cli/audit.d.ts +40 -0
- package/dist/cli/audit.js +205 -0
- package/dist/cli/doctor.d.ts +19 -4
- package/dist/cli/doctor.js +172 -5
- package/dist/cli/index.js +26 -1
- package/dist/cli/init.js +93 -7
- package/dist/cli/install/pre-push.d.ts +335 -0
- package/dist/cli/install/pre-push.js +2818 -0
- package/dist/cli/serve.d.ts +64 -0
- package/dist/cli/serve.js +270 -2
- package/dist/cli/status.d.ts +90 -0
- package/dist/cli/status.js +399 -0
- package/dist/cli/utils.d.ts +4 -0
- package/dist/cli/utils.js +4 -0
- package/dist/gateway/audit/rotator.d.ts +116 -0
- package/dist/gateway/audit/rotator.js +289 -0
- package/dist/gateway/circuit-breaker.d.ts +17 -0
- package/dist/gateway/circuit-breaker.js +32 -3
- package/dist/gateway/downstream-pool.d.ts +2 -1
- package/dist/gateway/downstream-pool.js +2 -2
- package/dist/gateway/downstream.d.ts +39 -3
- package/dist/gateway/downstream.js +73 -14
- package/dist/gateway/log.d.ts +122 -0
- package/dist/gateway/log.js +334 -0
- package/dist/gateway/middleware/audit.d.ts +24 -1
- package/dist/gateway/middleware/audit.js +103 -58
- package/dist/gateway/middleware/blocked-paths.d.ts +0 -9
- package/dist/gateway/middleware/blocked-paths.js +439 -67
- package/dist/gateway/middleware/injection.d.ts +218 -13
- package/dist/gateway/middleware/injection.js +433 -51
- package/dist/gateway/middleware/kill-switch.d.ts +10 -1
- package/dist/gateway/middleware/kill-switch.js +20 -1
- package/dist/gateway/observability/metrics.d.ts +125 -0
- package/dist/gateway/observability/metrics.js +321 -0
- package/dist/gateway/server.d.ts +19 -0
- package/dist/gateway/server.js +99 -15
- package/dist/policy/loader.d.ts +47 -0
- package/dist/policy/loader.js +47 -0
- package/dist/policy/profiles.d.ts +13 -0
- package/dist/policy/profiles.js +12 -0
- package/dist/policy/types.d.ts +52 -0
- package/dist/registry/fingerprint.d.ts +73 -0
- package/dist/registry/fingerprint.js +81 -0
- package/dist/registry/fingerprints-store.d.ts +62 -0
- package/dist/registry/fingerprints-store.js +111 -0
- package/dist/registry/interpolate.d.ts +58 -0
- package/dist/registry/interpolate.js +121 -0
- package/dist/registry/loader.d.ts +2 -2
- package/dist/registry/loader.js +22 -1
- package/dist/registry/tofu-gate.d.ts +41 -0
- package/dist/registry/tofu-gate.js +189 -0
- package/dist/registry/tofu.d.ts +111 -0
- package/dist/registry/tofu.js +173 -0
- package/dist/registry/types.d.ts +9 -1
- package/package.json +3 -1
- package/profiles/bst-internal-no-codex.yaml +5 -0
- package/profiles/bst-internal.yaml +7 -0
- package/scripts/tarball-smoke.sh +197 -0
|
@@ -1,9 +1,21 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { InvocationStatus, Tier } from '../../policy/types.js';
|
|
2
3
|
import { wrapRegex } from '../redact-safe/match-timeout.js';
|
|
3
4
|
/**
|
|
4
5
|
* Known prompt injection phrases (lowercase for case-insensitive matching).
|
|
5
6
|
* These patterns are commonly used to override system instructions in tool
|
|
6
7
|
* descriptions or resource content returned by downstream MCP servers.
|
|
8
|
+
*
|
|
9
|
+
* SECURITY (G9 follow-up): inputs are NFKC-normalized, whitespace-collapsed,
|
|
10
|
+
* and zero-width-stripped before matching (see `normalizeForMatch`). That
|
|
11
|
+
* means the phrases below can safely be written with plain ASCII spaces —
|
|
12
|
+
* the normalizer will fold NBSP, en-space, em-space, zero-width joiners,
|
|
13
|
+
* etc. into the same form so crafted Unicode variants cannot bypass.
|
|
14
|
+
*
|
|
15
|
+
* The pattern library is intentionally terse. Extending it is follow-up
|
|
16
|
+
* work (G9.1): pattern-set extensibility via policy is out of scope for
|
|
17
|
+
* this patch. Phrases added here must be short, lowercase, and tolerate
|
|
18
|
+
* the normalization pipeline (no Unicode, no non-ASCII punctuation).
|
|
7
19
|
*/
|
|
8
20
|
export const INJECTION_PHRASES = [
|
|
9
21
|
'ignore previous instructions',
|
|
@@ -16,7 +28,65 @@ export const INJECTION_PHRASES = [
|
|
|
16
28
|
// is what distinguishes injection from ordinary status messages.
|
|
17
29
|
'you are now a ',
|
|
18
30
|
'you are now an ',
|
|
31
|
+
// G9 follow-up: additional role-reassignment / persona-swap vectors.
|
|
32
|
+
// Intentionally NARROW: broader phrases like "act as a" / "act as an"
|
|
33
|
+
// were considered and dropped because at read-tier any literal hit
|
|
34
|
+
// escalates to `likely_injection`, which would deny benign prose such
|
|
35
|
+
// as "this proxy can act as a bridge" or "the service can act as an
|
|
36
|
+
// intermediary." The phrases below all contain a direct second-person
|
|
37
|
+
// address ("you") or an explicit roleplay framing ("roleplay as"),
|
|
38
|
+
// which is rare in ordinary documentation/chat content.
|
|
39
|
+
'pretend you are ',
|
|
40
|
+
'roleplay as ',
|
|
19
41
|
];
|
|
42
|
+
/**
|
|
43
|
+
* G9 follow-up — Unicode whitespace class covering the most common bypass
|
|
44
|
+
* vectors for literal-phrase matchers: NBSP (U+00A0), OGHAM SPACE MARK
|
|
45
|
+
* (U+1680), the EN/EM/THIN/HAIR space family (U+2000..U+200A), NARROW NBSP
|
|
46
|
+
* (U+202F), MEDIUM MATHEMATICAL SPACE (U+205F), IDEOGRAPHIC SPACE (U+3000).
|
|
47
|
+
* Collapsed to a single ASCII space before matching.
|
|
48
|
+
*/
|
|
49
|
+
const UNICODE_WHITESPACE_RE = /[\s\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]+/g;
|
|
50
|
+
/**
|
|
51
|
+
* G9 follow-up (Codex round-2, finding #1) — strip all Default_Ignorable_Code_Point
|
|
52
|
+
* characters before matching. The Unicode property `Default_Ignorable_Code_Point`
|
|
53
|
+
* covers every codepoint that is invisible and has no glyph in standard rendering:
|
|
54
|
+
* soft hyphen (U+00AD), combining grapheme joiner (U+034F), Arabic letter mark
|
|
55
|
+
* (U+061C), Mongolian vowel separator (U+180E), zero-width space/non-joiner/joiner
|
|
56
|
+
* (U+200B–U+200D), word joiner (U+2060), invisible times/separator/plus
|
|
57
|
+
* (U+2062–U+2064), BIDI isolation controls (U+2066–U+2069), variation selector-16
|
|
58
|
+
* (U+FE0F), zero-width no-break space / BOM (U+FEFF), and others.
|
|
59
|
+
*
|
|
60
|
+
* Using `\p{Default_Ignorable_Code_Point}` (requires the `u` flag, Node 22+)
|
|
61
|
+
* is future-proof: new Default_Ignorable codepoints added to Unicode are
|
|
62
|
+
* automatically covered without updating this regex.
|
|
63
|
+
*/
|
|
64
|
+
const IGNORABLE_CP_RE = /\p{Default_Ignorable_Code_Point}/gu;
|
|
65
|
+
/**
|
|
66
|
+
* G9 follow-up — normalize an input string to a canonical form for literal
|
|
67
|
+
* phrase matching.
|
|
68
|
+
*
|
|
69
|
+
* 1. NFKC Unicode normalization — folds compatibility forms (fullwidth
|
|
70
|
+
* letters, mathematical alphanumerics) into ASCII equivalents.
|
|
71
|
+
* 2. Strip all Default_Ignorable_Code_Point characters — invisible codepoints
|
|
72
|
+
* that have no rendering and are used only to visually split or obscure
|
|
73
|
+
* injection keywords (soft hyphen, zero-width joiners/non-joiners/spaces,
|
|
74
|
+
* BIDI isolation controls, variation selectors, BOM, etc.).
|
|
75
|
+
* 3. Collapse any run of Unicode whitespace (including NBSP, en/em space)
|
|
76
|
+
* to a single ASCII space.
|
|
77
|
+
* 4. Lowercase — matches the case-insensitive contract of INJECTION_PHRASES.
|
|
78
|
+
*
|
|
79
|
+
* NEVER logs or exports the normalized text; it is used only for match-time
|
|
80
|
+
* comparison. The audit record still surfaces the PHRASE that matched, not
|
|
81
|
+
* the normalized input.
|
|
82
|
+
*/
|
|
83
|
+
export function normalizeForMatch(input) {
|
|
84
|
+
return input
|
|
85
|
+
.normalize('NFKC')
|
|
86
|
+
.replace(IGNORABLE_CP_RE, '')
|
|
87
|
+
.replace(UNICODE_WHITESPACE_RE, ' ')
|
|
88
|
+
.toLowerCase();
|
|
89
|
+
}
|
|
20
90
|
/**
|
|
21
91
|
* Base64-token scanner regex. The only regex the injection middleware runs
|
|
22
92
|
* against untrusted payloads; wrapped in `SafeRegex` at middleware creation
|
|
@@ -35,6 +105,33 @@ export const INJECTION_BASE64_SHAPE = /^[A-Za-z0-9+/]+=*$/;
|
|
|
35
105
|
* one invocation append to an array under this key.
|
|
36
106
|
*/
|
|
37
107
|
export const INJECTION_TIMEOUT_METADATA_KEY = 'injection.regex_timeout';
|
|
108
|
+
/**
|
|
109
|
+
* Audit metadata key for the classifier verdict. The value is an
|
|
110
|
+
* `InjectionClassifierMetadata` object.
|
|
111
|
+
*/
|
|
112
|
+
export const INJECTION_METADATA_KEY = 'injection';
|
|
113
|
+
/**
|
|
114
|
+
* G9 follow-up — zod schema for the `ctx.metadata.injection` record the
|
|
115
|
+
* middleware emits. Every emitted record has a `verdict` field; the schema
|
|
116
|
+
* exists so internal test code (and a follow-up public surface, once we
|
|
117
|
+
* decide how to expose audit-record types) can catch shape regressions —
|
|
118
|
+
* notably the pre-fix behavior where a regex-timeout emitted timing
|
|
119
|
+
* metadata under a different key without ever writing a verdict.
|
|
120
|
+
*
|
|
121
|
+
* INTERNAL today. Not reachable via the published package `exports` map
|
|
122
|
+
* (only `.`, `./policy`, `./middleware`, and `./audit` are public). If
|
|
123
|
+
* downstream consumers (e.g. Helix) need to validate audit records they
|
|
124
|
+
* read off `.rea/audit.jsonl`, we will promote this to a public entrypoint
|
|
125
|
+
* in a follow-up (filed as G9.2). Do not rely on this symbol from outside
|
|
126
|
+
* the rea repo yet.
|
|
127
|
+
*/
|
|
128
|
+
export const InjectionMetadataSchema = z
|
|
129
|
+
.object({
|
|
130
|
+
verdict: z.enum(['suspicious', 'likely_injection', 'error']),
|
|
131
|
+
matched_patterns: z.array(z.string()),
|
|
132
|
+
base64_decoded: z.boolean(),
|
|
133
|
+
})
|
|
134
|
+
.strict();
|
|
38
135
|
/**
|
|
39
136
|
* Decode a base64 string, returning the decoded text or null if decoding fails.
|
|
40
137
|
* Only decodes if the input looks like base64 (64-char alphabet, length divisible by 4 or padded).
|
|
@@ -53,6 +150,108 @@ function tryDecodeBase64(input, safe) {
|
|
|
53
150
|
return null;
|
|
54
151
|
}
|
|
55
152
|
}
|
|
153
|
+
/**
|
|
154
|
+
* Minimum token length considered for standalone base64 probing via
|
|
155
|
+
* `decodeBase64Strings`. Below this, the decoded payload is too short to
|
|
156
|
+
* plausibly contain an injection phrase (the shortest phrase in
|
|
157
|
+
* `INJECTION_PHRASES` is 16 chars; 24 base64 chars → 18 decoded chars, with
|
|
158
|
+
* some slack for leading/trailing noise).
|
|
159
|
+
*/
|
|
160
|
+
const MIN_BASE64_PROBE_LENGTH = 24;
|
|
161
|
+
/**
|
|
162
|
+
* Maximum token length considered for standalone base64 probing via
|
|
163
|
+
* `decodeBase64Strings`. Strings longer than this are skipped — base64
|
|
164
|
+
* payloads this large are unlikely to be valid whole-string injection
|
|
165
|
+
* vectors (they would need padding-aligned framing) and decoding them
|
|
166
|
+
* unboundedly can force significant CPU/memory. 16 KiB gives ample room
|
|
167
|
+
* for any plausible injection phrase.
|
|
168
|
+
*/
|
|
169
|
+
const MAX_BASE64_PROBE_LENGTH = 16384; // 16 KiB — beyond this, base64 strings are truncated or padding-invalid
|
|
170
|
+
/**
|
|
171
|
+
* G9 — printable-ASCII ratio threshold for accepting a base64 decode as a
|
|
172
|
+
* potential injection payload. The spec requires ≥95% printable characters
|
|
173
|
+
* and no null bytes; stricter than the inline decoder used by
|
|
174
|
+
* `scanForInjection` (which accepts any successful UTF-8 decode) because this
|
|
175
|
+
* probe is used to FORCE-escalate to `likely_injection`, and we want the
|
|
176
|
+
* probe's positive signal to be near-certain.
|
|
177
|
+
*/
|
|
178
|
+
const BASE64_PRINTABLE_RATIO = 0.95;
|
|
179
|
+
/**
|
|
180
|
+
* Return true when `s` is printable-enough to plausibly be an injection
|
|
181
|
+
* payload. Printable = ASCII 0x20..0x7E, plus tab/newline/CR. Null bytes
|
|
182
|
+
* (often used for payload truncation games) disqualify the string outright.
|
|
183
|
+
*/
|
|
184
|
+
function isPrintableDecoded(s) {
|
|
185
|
+
if (s.length === 0)
|
|
186
|
+
return false;
|
|
187
|
+
let printable = 0;
|
|
188
|
+
for (let i = 0; i < s.length; i++) {
|
|
189
|
+
const code = s.charCodeAt(i);
|
|
190
|
+
if (code === 0)
|
|
191
|
+
return false; // null byte → fail closed
|
|
192
|
+
if ((code >= 0x20 && code <= 0x7e) ||
|
|
193
|
+
code === 0x09 /* tab */ ||
|
|
194
|
+
code === 0x0a /* LF */ ||
|
|
195
|
+
code === 0x0d /* CR */) {
|
|
196
|
+
printable++;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
return printable / s.length >= BASE64_PRINTABLE_RATIO;
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* G9 — pure helper that walks an arbitrary `unknown` value and returns every
|
|
203
|
+
* successfully decoded base64-looking string. Decoding is attempted only for
|
|
204
|
+
* strings that:
|
|
205
|
+
* - are ≥ `MIN_BASE64_PROBE_LENGTH` (24) chars
|
|
206
|
+
* - have length divisible by 4 (base64 framing)
|
|
207
|
+
* - match the `INJECTION_BASE64_SHAPE` (`^[A-Za-z0-9+/]+=*$`)
|
|
208
|
+
* - decode to a UTF-8 string that is ≥95% printable and contains no null bytes
|
|
209
|
+
*
|
|
210
|
+
* NOTE: This function is NOT called from the middleware body. The inline base64
|
|
211
|
+
* probe in `scanStringForInjection` (via `INJECTION_BASE64_PATTERN`) already
|
|
212
|
+
* covers embedded base64 token detection. Calling `decodeBase64Strings` as a
|
|
213
|
+
* second full-tree pass would duplicate that work and add an avoidable DoS
|
|
214
|
+
* amplification surface (full tree traversal + decoded-string allocation for
|
|
215
|
+
* every base64-shaped leaf). This function is exported for testing and external
|
|
216
|
+
* use only.
|
|
217
|
+
*/
|
|
218
|
+
export function decodeBase64Strings(input) {
|
|
219
|
+
const out = [];
|
|
220
|
+
const visit = (v) => {
|
|
221
|
+
if (typeof v === 'string') {
|
|
222
|
+
if (v.length < MIN_BASE64_PROBE_LENGTH)
|
|
223
|
+
return;
|
|
224
|
+
if (v.length > MAX_BASE64_PROBE_LENGTH)
|
|
225
|
+
return;
|
|
226
|
+
if (v.length % 4 !== 0)
|
|
227
|
+
return;
|
|
228
|
+
if (!INJECTION_BASE64_SHAPE.test(v))
|
|
229
|
+
return;
|
|
230
|
+
let decoded;
|
|
231
|
+
try {
|
|
232
|
+
decoded = Buffer.from(v, 'base64').toString('utf8');
|
|
233
|
+
}
|
|
234
|
+
catch {
|
|
235
|
+
return;
|
|
236
|
+
}
|
|
237
|
+
if (!isPrintableDecoded(decoded))
|
|
238
|
+
return;
|
|
239
|
+
out.push(decoded);
|
|
240
|
+
return;
|
|
241
|
+
}
|
|
242
|
+
if (Array.isArray(v)) {
|
|
243
|
+
for (const item of v)
|
|
244
|
+
visit(item);
|
|
245
|
+
return;
|
|
246
|
+
}
|
|
247
|
+
if (v !== null && typeof v === 'object') {
|
|
248
|
+
for (const val of Object.values(v))
|
|
249
|
+
visit(val);
|
|
250
|
+
}
|
|
251
|
+
};
|
|
252
|
+
visit(input);
|
|
253
|
+
return out;
|
|
254
|
+
}
|
|
56
255
|
/**
|
|
57
256
|
* Build compiled injection patterns with the provided timeout. Precompiled at
|
|
58
257
|
* middleware creation so the worker spawn is the only per-call overhead.
|
|
@@ -74,64 +273,144 @@ export function compileInjectionPatterns(timeoutMs, onTimeout) {
|
|
|
74
273
|
};
|
|
75
274
|
}
|
|
76
275
|
/**
|
|
77
|
-
* Scan a string
|
|
78
|
-
*
|
|
79
|
-
*
|
|
276
|
+
* Scan a single string and record hits into the provided `InjectionScanResult`
|
|
277
|
+
* buckets. Exported for test surface and for callers who want to scan a known
|
|
278
|
+
* string without walking a tree.
|
|
279
|
+
*
|
|
280
|
+
* - Literal matches (case-insensitive substring) go into `literalMatches`.
|
|
281
|
+
* - Base64-decoded matches (tokens extracted via `INJECTION_BASE64_PATTERN`,
|
|
282
|
+
* decoded, then re-scanned for literals) go into `base64DecodedMatches`.
|
|
80
283
|
*
|
|
81
|
-
*
|
|
82
|
-
*
|
|
284
|
+
* Set semantics dedupe by phrase: the same phrase matched five times in one
|
|
285
|
+
* string counts as one distinct pattern, which is intentional for the
|
|
286
|
+
* classifier's "≥2 distinct patterns → likely" rule.
|
|
83
287
|
*/
|
|
84
|
-
export function
|
|
288
|
+
export function scanStringForInjection(input, result, safe) {
|
|
85
289
|
if (!input || typeof input !== 'string')
|
|
86
|
-
return
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
//
|
|
290
|
+
return;
|
|
291
|
+
// G9 follow-up: normalize before matching so NBSP / zero-width / fullwidth
|
|
292
|
+
// variants of injection phrases cannot bypass the literal check. The raw
|
|
293
|
+
// input is still scanned by the base64 tokenizer (SafeRegex expects the
|
|
294
|
+
// pre-normalization bytes).
|
|
295
|
+
const normalized = normalizeForMatch(input);
|
|
296
|
+
// Literal phrases (indexOf — no regex, no ReDoS surface).
|
|
90
297
|
for (const phrase of INJECTION_PHRASES) {
|
|
91
|
-
if (
|
|
92
|
-
|
|
298
|
+
if (normalized.includes(phrase)) {
|
|
299
|
+
result.literalMatches.add(phrase);
|
|
93
300
|
}
|
|
94
301
|
}
|
|
95
|
-
//
|
|
96
|
-
//
|
|
97
|
-
// kill).
|
|
302
|
+
// Embedded base64 tokens. SafeRegex wraps the scan so a pathological input
|
|
303
|
+
// cannot hang the event loop.
|
|
98
304
|
const tokenResult = safe.base64Token.matchAll(input);
|
|
99
305
|
const base64Tokens = tokenResult.matches;
|
|
100
306
|
for (const token of base64Tokens) {
|
|
101
307
|
const decoded = tryDecodeBase64(token, safe);
|
|
102
308
|
if (!decoded)
|
|
103
309
|
continue;
|
|
104
|
-
const
|
|
310
|
+
const decodedNormalized = normalizeForMatch(decoded);
|
|
105
311
|
for (const phrase of INJECTION_PHRASES) {
|
|
106
|
-
if (
|
|
107
|
-
|
|
108
|
-
break; // One report per token is enough
|
|
312
|
+
if (decodedNormalized.includes(phrase)) {
|
|
313
|
+
result.base64DecodedMatches.add(phrase);
|
|
109
314
|
}
|
|
110
315
|
}
|
|
111
316
|
}
|
|
112
|
-
return matches;
|
|
113
317
|
}
|
|
114
318
|
/**
|
|
115
|
-
*
|
|
116
|
-
*
|
|
319
|
+
* Back-compat wrapper: legacy callers (and the old audit-metadata consumer)
|
|
320
|
+
* received a flat `string[]` of "literal: …" / "base64-encoded: …" descriptions.
|
|
321
|
+
* Kept as an exported helper so `scripts/lint-safe-regex.mjs` and any external
|
|
322
|
+
* consumer that imported it continue to work. New code should call
|
|
323
|
+
* `scanStringForInjection` directly.
|
|
117
324
|
*/
|
|
118
|
-
function
|
|
325
|
+
export function scanForInjection(input, safe) {
|
|
326
|
+
const result = {
|
|
327
|
+
literalMatches: new Set(),
|
|
328
|
+
base64DecodedMatches: new Set(),
|
|
329
|
+
};
|
|
330
|
+
scanStringForInjection(input, result, safe);
|
|
331
|
+
const out = [];
|
|
332
|
+
for (const p of result.literalMatches)
|
|
333
|
+
out.push(`literal: "${p}"`);
|
|
334
|
+
for (const p of result.base64DecodedMatches)
|
|
335
|
+
out.push(`base64-encoded: "${p}"`);
|
|
336
|
+
return out;
|
|
337
|
+
}
|
|
338
|
+
/**
|
|
339
|
+
* Recursively scan an unknown value (string, array, or plain object) and
|
|
340
|
+
* accumulate matches into the supplied `InjectionScanResult` buckets.
|
|
341
|
+
*/
|
|
342
|
+
export function scanValueForInjection(value, result, safe) {
|
|
119
343
|
if (typeof value === 'string') {
|
|
120
|
-
|
|
344
|
+
scanStringForInjection(value, result, safe);
|
|
121
345
|
return;
|
|
122
346
|
}
|
|
123
347
|
if (Array.isArray(value)) {
|
|
124
|
-
for (const item of value)
|
|
125
|
-
|
|
126
|
-
}
|
|
348
|
+
for (const item of value)
|
|
349
|
+
scanValueForInjection(item, result, safe);
|
|
127
350
|
return;
|
|
128
351
|
}
|
|
129
352
|
if (value !== null && typeof value === 'object') {
|
|
130
353
|
for (const v of Object.values(value)) {
|
|
131
|
-
|
|
354
|
+
scanValueForInjection(v, result, safe);
|
|
132
355
|
}
|
|
133
356
|
}
|
|
134
357
|
}
|
|
358
|
+
export function classifyInjection(scan, tier) {
|
|
359
|
+
const literalCount = scan.literalMatches.size;
|
|
360
|
+
const base64Count = scan.base64DecodedMatches.size;
|
|
361
|
+
if (literalCount === 0 && base64Count === 0) {
|
|
362
|
+
return { verdict: 'clean' };
|
|
363
|
+
}
|
|
364
|
+
// Dedupe: a phrase that appears both literally AND in a base64-decoded
|
|
365
|
+
// payload in the same input counts once in `matched_patterns`. Union via
|
|
366
|
+
// Set before sorting.
|
|
367
|
+
const matched = [
|
|
368
|
+
...new Set([...scan.literalMatches, ...scan.base64DecodedMatches]),
|
|
369
|
+
].sort();
|
|
370
|
+
// Rule 2 — base64 always escalates, regardless of count or tier.
|
|
371
|
+
if (base64Count > 0) {
|
|
372
|
+
return {
|
|
373
|
+
verdict: 'likely_injection',
|
|
374
|
+
matched_patterns: matched,
|
|
375
|
+
base64_decoded: true,
|
|
376
|
+
};
|
|
377
|
+
}
|
|
378
|
+
// Rule 3 — multi-literal (distinct patterns) always escalates.
|
|
379
|
+
if (literalCount >= 2) {
|
|
380
|
+
return {
|
|
381
|
+
verdict: 'likely_injection',
|
|
382
|
+
matched_patterns: matched,
|
|
383
|
+
base64_decoded: false,
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
// Rule 4 — any match at read-tier, or unknown tier, is anomalous enough
|
|
387
|
+
// to treat as likely. Tier middleware runs before injection middleware,
|
|
388
|
+
// so an undefined tier here means tier-classification failed; fail closed.
|
|
389
|
+
if (tier === Tier.Read || tier === undefined) {
|
|
390
|
+
return {
|
|
391
|
+
verdict: 'likely_injection',
|
|
392
|
+
matched_patterns: matched,
|
|
393
|
+
base64_decoded: false,
|
|
394
|
+
};
|
|
395
|
+
}
|
|
396
|
+
// TODO (G9-follow-up): per-pattern "deny-tag" metadata can force this
|
|
397
|
+
// branch to `likely_injection` even for a single literal at write tier.
|
|
398
|
+
// Not shipped in this PR; pattern list is unchanged.
|
|
399
|
+
// Rule 5 — exactly 1 literal at write/destructive.
|
|
400
|
+
return {
|
|
401
|
+
verdict: 'suspicious',
|
|
402
|
+
matched_patterns: matched,
|
|
403
|
+
base64_decoded: false,
|
|
404
|
+
};
|
|
405
|
+
}
|
|
406
|
+
/**
|
|
407
|
+
* Maximum result size (in UTF-8 bytes) that the injection scanner will attempt
|
|
408
|
+
* to scan. Payloads larger than this cannot be scanned within the 100ms timeout
|
|
409
|
+
* budget before result-size-cap (which runs later in the chain) has had a
|
|
410
|
+
* chance to truncate them. Treat oversized payloads the same as a scan timeout:
|
|
411
|
+
* deny in block mode, pass in warn mode.
|
|
412
|
+
*/
|
|
413
|
+
const MAX_RESULT_SCAN_BYTES = 2 * 1024 * 1024; // 2 MiB
|
|
135
414
|
/**
|
|
136
415
|
* Record a regex-timeout event on `ctx.metadata`. Array-valued so multiple
|
|
137
416
|
* timeouts in one invocation are all recorded.
|
|
@@ -155,50 +434,153 @@ function recordInjectionTimeout(ctx, patternId, inputBytes, timeoutMs) {
|
|
|
155
434
|
}
|
|
156
435
|
}
|
|
157
436
|
/**
|
|
158
|
-
* PostToolUse middleware:
|
|
437
|
+
* PostToolUse middleware: classifies tool results for prompt injection.
|
|
438
|
+
*
|
|
439
|
+
* G9 tiered classifier:
|
|
440
|
+
* - `clean` → allow, no log
|
|
441
|
+
* - `suspicious` → warn (stderr + audit metadata `injection.suspicious`).
|
|
442
|
+
* Denies only when `suspiciousBlocksWrites: true`.
|
|
443
|
+
* - `likely_injection` → always deny, always log.
|
|
159
444
|
*
|
|
160
445
|
* Operates on tool output (ctx.result) returned from downstream MCP servers.
|
|
161
|
-
* On detection:
|
|
162
|
-
* - Always logs to audit metadata and emits a warning to stderr.
|
|
163
|
-
* - If action is 'block' (default), sets ctx.status to Denied and blocks the result.
|
|
164
|
-
* - If action is 'warn', allows the result through with a warning only.
|
|
165
446
|
*
|
|
166
|
-
* SECURITY: Checking PostToolUse (after downstream execution, before the
|
|
167
|
-
* reaches the LLM) is the correct place to catch injection in tool
|
|
168
|
-
* and resource content coming from potentially untrusted
|
|
447
|
+
* SECURITY: Checking PostToolUse (after downstream execution, before the
|
|
448
|
+
* result reaches the LLM) is the correct place to catch injection in tool
|
|
449
|
+
* descriptions and resource content coming from potentially untrusted
|
|
450
|
+
* downstream servers.
|
|
169
451
|
*
|
|
170
452
|
* SECURITY (G3): The only regexes this middleware runs are wrapped in
|
|
171
453
|
* `SafeRegex` with a 100ms default per-call timeout. On timeout the scanner
|
|
172
454
|
* records an audit event and proceeds — blocking is governed by the literal
|
|
173
455
|
* substring checks (which have no ReDoS surface).
|
|
456
|
+
*
|
|
457
|
+
* The legacy `action` parameter (`'block' | 'warn'`) selects the fallback
|
|
458
|
+
* behavior for `suspicious` verdicts when the G9 flag is unset — preserving
|
|
459
|
+
* 0.2.x `injection_detection: 'warn'` semantics for operators who pinned it.
|
|
460
|
+
* `likely_injection` ignores this parameter.
|
|
174
461
|
*/
|
|
175
462
|
export function createInjectionMiddleware(action = 'block', opts = {}) {
|
|
176
463
|
const timeoutMs = opts.matchTimeoutMs ?? 100;
|
|
464
|
+
// Default `suspiciousBlocksWrites` to `false` when unset to preserve 0.3.x
|
|
465
|
+
// behavior for existing installs that omit the `injection:` policy block.
|
|
466
|
+
// A consumer who had `injection_detection: block` in 0.3.x without the new
|
|
467
|
+
// field would otherwise silently start hard-failing benign tool writes that
|
|
468
|
+
// contain a single matching phrase on upgrade — a breaking change disguised
|
|
469
|
+
// as a default. The tighter posture (single literal hit → deny) must be
|
|
470
|
+
// opted into explicitly via `injection.suspicious_blocks_writes: true`, or
|
|
471
|
+
// by using a profile (e.g. bst-internal) that already sets it.
|
|
472
|
+
//
|
|
473
|
+
// Fail-closed-on-timeout (Finding 1 fix) already tightens security for
|
|
474
|
+
// incomplete scans; this default preserves parity for complete scans.
|
|
475
|
+
const denyOnSuspicious = action === 'warn'
|
|
476
|
+
? false // warn mode hard-overrides suspicious deny — 0.2.x parity with `injection_detection: warn`
|
|
477
|
+
: (opts.suspiciousBlocksWrites ?? false); // block mode: default false (0.3.x default preserved)
|
|
177
478
|
return async (ctx, next) => {
|
|
178
479
|
await next();
|
|
179
480
|
// Only scan if we have a result to inspect
|
|
180
481
|
if (ctx.result == null)
|
|
181
482
|
return;
|
|
483
|
+
// Pre-scan size check: if the result is too large to scan within the timeout
|
|
484
|
+
// budget, treat as a timeout. Result-size-cap runs later in the chain, so we
|
|
485
|
+
// bound the scan here rather than relying on downstream truncation.
|
|
486
|
+
const resultBytes = Buffer.byteLength(JSON.stringify(ctx.result), 'utf8');
|
|
487
|
+
if (resultBytes > MAX_RESULT_SCAN_BYTES) {
|
|
488
|
+
if (action === 'block') {
|
|
489
|
+
const errorMeta = {
|
|
490
|
+
verdict: 'error',
|
|
491
|
+
matched_patterns: [],
|
|
492
|
+
base64_decoded: false,
|
|
493
|
+
};
|
|
494
|
+
ctx.metadata[INJECTION_METADATA_KEY] = errorMeta;
|
|
495
|
+
ctx.status = InvocationStatus.Denied;
|
|
496
|
+
ctx.error = `injection scan skipped — result exceeds ${MAX_RESULT_SCAN_BYTES} bytes; failing closed under block policy`;
|
|
497
|
+
return;
|
|
498
|
+
}
|
|
499
|
+
// warn mode: let through — result-size-cap will truncate downstream.
|
|
500
|
+
return;
|
|
501
|
+
}
|
|
502
|
+
// G9 follow-up (finding #4): track scanner timeout via a closure flag so
|
|
503
|
+
// we can emit a stable `verdict: 'error'` metadata record alongside the
|
|
504
|
+
// existing `injection.regex_timeout` event. Downstream audit consumers
|
|
505
|
+
// that key off `metadata.injection.verdict` no longer see a bare timing
|
|
506
|
+
// record with no verdict shape.
|
|
507
|
+
let scanTimedOut = false;
|
|
182
508
|
const safe = compileInjectionPatterns(timeoutMs, (patternId, input) => {
|
|
509
|
+
scanTimedOut = true;
|
|
183
510
|
recordInjectionTimeout(ctx, patternId, Buffer.byteLength(input, 'utf8'), timeoutMs);
|
|
184
511
|
});
|
|
185
|
-
const
|
|
186
|
-
|
|
187
|
-
|
|
512
|
+
const scan = {
|
|
513
|
+
literalMatches: new Set(),
|
|
514
|
+
base64DecodedMatches: new Set(),
|
|
515
|
+
};
|
|
516
|
+
scanValueForInjection(ctx.result, scan, safe);
|
|
517
|
+
// Fail closed: in block mode, ANY timeout denies — regardless of what the
|
|
518
|
+
// partial scan found. An incomplete scan cannot prove the unscanned suffix
|
|
519
|
+
// is safe. If the provisional classification were `suspicious` (one early
|
|
520
|
+
// literal hit before the timeout), falling through to the normal policy
|
|
521
|
+
// path could still allow the call under `suspiciousBlocksWrites: false`,
|
|
522
|
+
// even though the unscanned suffix might contain a second phrase that
|
|
523
|
+
// would have escalated to `likely_injection`. Hoisting this check before
|
|
524
|
+
// `classifyInjection` closes that gap.
|
|
525
|
+
if (scanTimedOut && action === 'block') {
|
|
526
|
+
const errorMeta = {
|
|
527
|
+
verdict: 'error',
|
|
528
|
+
matched_patterns: [],
|
|
529
|
+
base64_decoded: false,
|
|
530
|
+
};
|
|
531
|
+
ctx.metadata[INJECTION_METADATA_KEY] = errorMeta;
|
|
532
|
+
process.stderr.write(`[rea] INJECTION-GUARD (error): regex-timeout during scan of tool "${ctx.tool_name}" result; verdict inconclusive\n`);
|
|
533
|
+
ctx.status = InvocationStatus.Denied;
|
|
534
|
+
ctx.error = 'injection scan timed out — failing closed under block policy';
|
|
535
|
+
return; // do NOT call next()
|
|
536
|
+
}
|
|
537
|
+
const classification = classifyInjection(scan, ctx.tier);
|
|
538
|
+
// warn/log mode + timeout: fail-open — emit a verdict:'error' metadata
|
|
539
|
+
// record alongside the existing injection.regex_timeout event so
|
|
540
|
+
// downstream audit consumers see a stable verdict shape, then allow
|
|
541
|
+
// through. This branch only fires in warn mode (block mode was handled
|
|
542
|
+
// above) when no actionable signal was collected before the timeout.
|
|
543
|
+
if (scanTimedOut && classification.verdict === 'clean') {
|
|
544
|
+
const errorMeta = {
|
|
545
|
+
verdict: 'error',
|
|
546
|
+
matched_patterns: [],
|
|
547
|
+
base64_decoded: false,
|
|
548
|
+
};
|
|
549
|
+
ctx.metadata[INJECTION_METADATA_KEY] = errorMeta;
|
|
550
|
+
process.stderr.write(`[rea] INJECTION-GUARD (error): regex-timeout during scan of tool "${ctx.tool_name}" result; verdict inconclusive\n`);
|
|
551
|
+
// warn/log mode: let through but record — verdict:'error' is written above.
|
|
552
|
+
return;
|
|
553
|
+
}
|
|
554
|
+
if (classification.verdict === 'clean')
|
|
188
555
|
return;
|
|
189
|
-
//
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
556
|
+
// Write audit metadata. Export verdict + distinct matched phrases +
|
|
557
|
+
// base64 flag. NEVER export the input text.
|
|
558
|
+
const auditMeta = {
|
|
559
|
+
verdict: classification.verdict,
|
|
560
|
+
matched_patterns: classification.matched_patterns,
|
|
561
|
+
base64_decoded: classification.base64_decoded,
|
|
562
|
+
};
|
|
563
|
+
ctx.metadata[INJECTION_METADATA_KEY] = auditMeta;
|
|
564
|
+
// Always emit a stderr warning. Operators rely on this as the live signal.
|
|
565
|
+
process.stderr.write(`[rea] INJECTION-GUARD (${classification.verdict}): pattern(s) detected in tool "${ctx.tool_name}" result\n`);
|
|
566
|
+
for (const p of classification.matched_patterns) {
|
|
567
|
+
process.stderr.write(` Pattern: ${p}\n`);
|
|
568
|
+
}
|
|
569
|
+
if (classification.base64_decoded) {
|
|
570
|
+
process.stderr.write(` Base64-decoded match detected\n`);
|
|
197
571
|
}
|
|
198
|
-
process.stderr.write(` Action:
|
|
199
|
-
|
|
572
|
+
process.stderr.write(` Action: review the downstream server "${ctx.server_name}" for compromise.\n`);
|
|
573
|
+
// Deny policy:
|
|
574
|
+
// likely_injection → always deny
|
|
575
|
+
// suspicious → deny iff denyOnSuspicious (constructed above)
|
|
576
|
+
const shouldDeny = classification.verdict === 'likely_injection' ||
|
|
577
|
+
(classification.verdict === 'suspicious' && denyOnSuspicious);
|
|
578
|
+
if (shouldDeny) {
|
|
200
579
|
ctx.status = InvocationStatus.Denied;
|
|
201
|
-
ctx.error =
|
|
580
|
+
ctx.error =
|
|
581
|
+
classification.verdict === 'likely_injection'
|
|
582
|
+
? `Likely prompt injection detected in tool result (${classification.matched_patterns.length} pattern(s), base64=${classification.base64_decoded}). Result blocked.`
|
|
583
|
+
: `Suspicious prompt injection pattern in tool result (1 pattern at ${String(ctx.tier)} tier). Result blocked by policy.`;
|
|
202
584
|
ctx.result = undefined;
|
|
203
585
|
}
|
|
204
586
|
};
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { Middleware } from './chain.js';
|
|
2
|
+
import type { MetricsRegistry } from '../observability/metrics.js';
|
|
2
3
|
/**
|
|
3
4
|
* HALT semantic guarantee:
|
|
4
5
|
* - HALT is read exactly once per invocation, at the top of this middleware layer.
|
|
@@ -22,4 +23,12 @@ import type { Middleware } from './chain.js';
|
|
|
22
23
|
* - The decision is recorded on `ctx.metadata.halt_decision` for audit and is
|
|
23
24
|
* never re-consulted by downstream middleware.
|
|
24
25
|
*/
|
|
25
|
-
export declare function createKillSwitchMiddleware(baseDir: string
|
|
26
|
+
export declare function createKillSwitchMiddleware(baseDir: string,
|
|
27
|
+
/**
|
|
28
|
+
* Optional metrics registry. When supplied, every invocation marks the
|
|
29
|
+
* `rea_seconds_since_last_halt_check` gauge with a fresh timestamp so the
|
|
30
|
+
* exposed gauge reflects real per-call check cadence rather than the
|
|
31
|
+
* startup-time mark `rea serve` sets once. When omitted, no metric is
|
|
32
|
+
* emitted.
|
|
33
|
+
*/
|
|
34
|
+
metrics?: MetricsRegistry): Middleware;
|
|
@@ -28,9 +28,28 @@ const HALT_FILE = 'HALT';
|
|
|
28
28
|
* - The decision is recorded on `ctx.metadata.halt_decision` for audit and is
|
|
29
29
|
* never re-consulted by downstream middleware.
|
|
30
30
|
*/
|
|
31
|
-
export function createKillSwitchMiddleware(baseDir
|
|
31
|
+
export function createKillSwitchMiddleware(baseDir,
|
|
32
|
+
/**
|
|
33
|
+
* Optional metrics registry. When supplied, every invocation marks the
|
|
34
|
+
* `rea_seconds_since_last_halt_check` gauge with a fresh timestamp so the
|
|
35
|
+
* exposed gauge reflects real per-call check cadence rather than the
|
|
36
|
+
* startup-time mark `rea serve` sets once. When omitted, no metric is
|
|
37
|
+
* emitted.
|
|
38
|
+
*/
|
|
39
|
+
metrics) {
|
|
32
40
|
return async (ctx, next) => {
|
|
33
41
|
const haltPath = path.join(baseDir, REA_DIR, HALT_FILE);
|
|
42
|
+
// Record the HALT-check attempt BEFORE we probe the filesystem so the
|
|
43
|
+
// gauge reflects "how long since we last looked", regardless of whether
|
|
44
|
+
// this check succeeds or fails. Fresh on every invocation; failure to
|
|
45
|
+
// update metrics must not crash the gateway.
|
|
46
|
+
try {
|
|
47
|
+
metrics?.markHaltCheck();
|
|
48
|
+
}
|
|
49
|
+
catch {
|
|
50
|
+
// Metrics registry implementations are expected to be infallible,
|
|
51
|
+
// but we refuse to let them take down the chain in any case.
|
|
52
|
+
}
|
|
34
53
|
let fh;
|
|
35
54
|
try {
|
|
36
55
|
fh = await fs.open(haltPath, fsConstants.O_RDONLY);
|