npm - @bookedsolid/rea - Versions diffs - 0.2.1 → 0.4.0 - Mend

@bookedsolid/rea 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

package/.husky/pre-push +15 -18
package/README.md +41 -1
package/THREAT_MODEL.md +100 -29
package/dist/audit/append.d.ts +21 -8
package/dist/audit/append.js +48 -83
package/dist/audit/fs.d.ts +68 -0
package/dist/audit/fs.js +171 -0
package/dist/cli/audit.d.ts +40 -0
package/dist/cli/audit.js +205 -0
package/dist/cli/doctor.d.ts +19 -4
package/dist/cli/doctor.js +172 -5
package/dist/cli/index.js +26 -1
package/dist/cli/init.js +93 -7
package/dist/cli/install/pre-push.d.ts +335 -0
package/dist/cli/install/pre-push.js +2818 -0
package/dist/cli/serve.d.ts +64 -0
package/dist/cli/serve.js +270 -2
package/dist/cli/status.d.ts +90 -0
package/dist/cli/status.js +399 -0
package/dist/cli/utils.d.ts +4 -0
package/dist/cli/utils.js +4 -0
package/dist/gateway/audit/rotator.d.ts +116 -0
package/dist/gateway/audit/rotator.js +289 -0
package/dist/gateway/circuit-breaker.d.ts +17 -0
package/dist/gateway/circuit-breaker.js +32 -3
package/dist/gateway/downstream-pool.d.ts +2 -1
package/dist/gateway/downstream-pool.js +2 -2
package/dist/gateway/downstream.d.ts +39 -3
package/dist/gateway/downstream.js +73 -14
package/dist/gateway/log.d.ts +122 -0
package/dist/gateway/log.js +334 -0
package/dist/gateway/middleware/audit.d.ts +24 -1
package/dist/gateway/middleware/audit.js +103 -58
package/dist/gateway/middleware/blocked-paths.d.ts +0 -9
package/dist/gateway/middleware/blocked-paths.js +439 -67
package/dist/gateway/middleware/injection.d.ts +218 -13
package/dist/gateway/middleware/injection.js +433 -51
package/dist/gateway/middleware/kill-switch.d.ts +10 -1
package/dist/gateway/middleware/kill-switch.js +20 -1
package/dist/gateway/observability/metrics.d.ts +125 -0
package/dist/gateway/observability/metrics.js +321 -0
package/dist/gateway/server.d.ts +19 -0
package/dist/gateway/server.js +99 -15
package/dist/policy/loader.d.ts +47 -0
package/dist/policy/loader.js +47 -0
package/dist/policy/profiles.d.ts +13 -0
package/dist/policy/profiles.js +12 -0
package/dist/policy/types.d.ts +52 -0
package/dist/registry/fingerprint.d.ts +73 -0
package/dist/registry/fingerprint.js +81 -0
package/dist/registry/fingerprints-store.d.ts +62 -0
package/dist/registry/fingerprints-store.js +111 -0
package/dist/registry/interpolate.d.ts +58 -0
package/dist/registry/interpolate.js +121 -0
package/dist/registry/loader.d.ts +2 -2
package/dist/registry/loader.js +22 -1
package/dist/registry/tofu-gate.d.ts +41 -0
package/dist/registry/tofu-gate.js +189 -0
package/dist/registry/tofu.d.ts +111 -0
package/dist/registry/tofu.js +173 -0
package/dist/registry/types.d.ts +9 -1
package/package.json +3 -1
package/profiles/bst-internal-no-codex.yaml +5 -0
package/profiles/bst-internal.yaml +7 -0
package/scripts/tarball-smoke.sh +197 -0

package/dist/gateway/middleware/injection.js CHANGED Viewed

@@ -1,9 +1,21 @@
-import { InvocationStatus } from '../../policy/types.js';
+import { z } from 'zod';
+import { InvocationStatus, Tier } from '../../policy/types.js';
 import { wrapRegex } from '../redact-safe/match-timeout.js';
 /**
  * Known prompt injection phrases (lowercase for case-insensitive matching).
  * These patterns are commonly used to override system instructions in tool
  * descriptions or resource content returned by downstream MCP servers.
+ *
+ * SECURITY (G9 follow-up): inputs are NFKC-normalized, whitespace-collapsed,
+ * and zero-width-stripped before matching (see `normalizeForMatch`). That
+ * means the phrases below can safely be written with plain ASCII spaces —
+ * the normalizer will fold NBSP, en-space, em-space, zero-width joiners,
+ * etc. into the same form so crafted Unicode variants cannot bypass.
+ *
+ * The pattern library is intentionally terse. Extending it is follow-up
+ * work (G9.1): pattern-set extensibility via policy is out of scope for
+ * this patch. Phrases added here must be short, lowercase, and tolerate
+ * the normalization pipeline (no Unicode, no non-ASCII punctuation).
  */
 export const INJECTION_PHRASES = [
     'ignore previous instructions',
@@ -16,7 +28,65 @@ export const INJECTION_PHRASES = [
     // is what distinguishes injection from ordinary status messages.
     'you are now a ',
     'you are now an ',
+    // G9 follow-up: additional role-reassignment / persona-swap vectors.
+    // Intentionally NARROW: broader phrases like "act as a" / "act as an"
+    // were considered and dropped because at read-tier any literal hit
+    // escalates to `likely_injection`, which would deny benign prose such
+    // as "this proxy can act as a bridge" or "the service can act as an
+    // intermediary." The phrases below all contain a direct second-person
+    // address ("you") or an explicit roleplay framing ("roleplay as"),
+    // which is rare in ordinary documentation/chat content.
+    'pretend you are ',
+    'roleplay as ',
 ];
+/**
+ * G9 follow-up — Unicode whitespace class covering the most common bypass
+ * vectors for literal-phrase matchers: NBSP (U+00A0), OGHAM SPACE MARK
+ * (U+1680), the EN/EM/THIN/HAIR space family (U+2000..U+200A), NARROW NBSP
+ * (U+202F), MEDIUM MATHEMATICAL SPACE (U+205F), IDEOGRAPHIC SPACE (U+3000).
+ * Collapsed to a single ASCII space before matching.
+ */
+const UNICODE_WHITESPACE_RE = /[\s\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]+/g;
+/**
+ * G9 follow-up (Codex round-2, finding #1) — strip all Default_Ignorable_Code_Point
+ * characters before matching. The Unicode property `Default_Ignorable_Code_Point`
+ * covers every codepoint that is invisible and has no glyph in standard rendering:
+ * soft hyphen (U+00AD), combining grapheme joiner (U+034F), Arabic letter mark
+ * (U+061C), Mongolian vowel separator (U+180E), zero-width space/non-joiner/joiner
+ * (U+200B–U+200D), word joiner (U+2060), invisible times/separator/plus
+ * (U+2062–U+2064), BIDI isolation controls (U+2066–U+2069), variation selector-16
+ * (U+FE0F), zero-width no-break space / BOM (U+FEFF), and others.
+ *
+ * Using `\p{Default_Ignorable_Code_Point}` (requires the `u` flag, Node 22+)
+ * is future-proof: new Default_Ignorable codepoints added to Unicode are
+ * automatically covered without updating this regex.
+ */
+const IGNORABLE_CP_RE = /\p{Default_Ignorable_Code_Point}/gu;
+/**
+ * G9 follow-up — normalize an input string to a canonical form for literal
+ * phrase matching.
+ *
+ *   1. NFKC Unicode normalization — folds compatibility forms (fullwidth
+ *      letters, mathematical alphanumerics) into ASCII equivalents.
+ *   2. Strip all Default_Ignorable_Code_Point characters — invisible codepoints
+ *      that have no rendering and are used only to visually split or obscure
+ *      injection keywords (soft hyphen, zero-width joiners/non-joiners/spaces,
+ *      BIDI isolation controls, variation selectors, BOM, etc.).
+ *   3. Collapse any run of Unicode whitespace (including NBSP, en/em space)
+ *      to a single ASCII space.
+ *   4. Lowercase — matches the case-insensitive contract of INJECTION_PHRASES.
+ *
+ * NEVER logs or exports the normalized text; it is used only for match-time
+ * comparison. The audit record still surfaces the PHRASE that matched, not
+ * the normalized input.
+ */
+export function normalizeForMatch(input) {
+    return input
+        .normalize('NFKC')
+        .replace(IGNORABLE_CP_RE, '')
+        .replace(UNICODE_WHITESPACE_RE, ' ')
+        .toLowerCase();
+}
 /**
  * Base64-token scanner regex. The only regex the injection middleware runs
  * against untrusted payloads; wrapped in `SafeRegex` at middleware creation
@@ -35,6 +105,33 @@ export const INJECTION_BASE64_SHAPE = /^[A-Za-z0-9+/]+=*$/;
  * one invocation append to an array under this key.
  */
 export const INJECTION_TIMEOUT_METADATA_KEY = 'injection.regex_timeout';
+/**
+ * Audit metadata key for the classifier verdict. The value is an
+ * `InjectionClassifierMetadata` object.
+ */
+export const INJECTION_METADATA_KEY = 'injection';
+/**
+ * G9 follow-up — zod schema for the `ctx.metadata.injection` record the
+ * middleware emits. Every emitted record has a `verdict` field; the schema
+ * exists so internal test code (and a follow-up public surface, once we
+ * decide how to expose audit-record types) can catch shape regressions —
+ * notably the pre-fix behavior where a regex-timeout emitted timing
+ * metadata under a different key without ever writing a verdict.
+ *
+ * INTERNAL today. Not reachable via the published package `exports` map
+ * (only `.`, `./policy`, `./middleware`, and `./audit` are public). If
+ * downstream consumers (e.g. Helix) need to validate audit records they
+ * read off `.rea/audit.jsonl`, we will promote this to a public entrypoint
+ * in a follow-up (filed as G9.2). Do not rely on this symbol from outside
+ * the rea repo yet.
+ */
+export const InjectionMetadataSchema = z
+    .object({
+    verdict: z.enum(['suspicious', 'likely_injection', 'error']),
+    matched_patterns: z.array(z.string()),
+    base64_decoded: z.boolean(),
+})
+    .strict();
 /**
  * Decode a base64 string, returning the decoded text or null if decoding fails.
  * Only decodes if the input looks like base64 (64-char alphabet, length divisible by 4 or padded).
@@ -53,6 +150,108 @@ function tryDecodeBase64(input, safe) {
         return null;
     }
 }
+/**
+ * Minimum token length considered for standalone base64 probing via
+ * `decodeBase64Strings`. Below this, the decoded payload is too short to
+ * plausibly contain an injection phrase (the shortest phrase in
+ * `INJECTION_PHRASES` is 16 chars; 24 base64 chars → 18 decoded chars, with
+ * some slack for leading/trailing noise).
+ */
+const MIN_BASE64_PROBE_LENGTH = 24;
+/**
+ * Maximum token length considered for standalone base64 probing via
+ * `decodeBase64Strings`. Strings longer than this are skipped — base64
+ * payloads this large are unlikely to be valid whole-string injection
+ * vectors (they would need padding-aligned framing) and decoding them
+ * unboundedly can force significant CPU/memory. 16 KiB gives ample room
+ * for any plausible injection phrase.
+ */
+const MAX_BASE64_PROBE_LENGTH = 16384; // 16 KiB — beyond this, base64 strings are truncated or padding-invalid
+/**
+ * G9 — printable-ASCII ratio threshold for accepting a base64 decode as a
+ * potential injection payload. The spec requires ≥95% printable characters
+ * and no null bytes; stricter than the inline decoder used by
+ * `scanForInjection` (which accepts any successful UTF-8 decode) because this
+ * probe is used to FORCE-escalate to `likely_injection`, and we want the
+ * probe's positive signal to be near-certain.
+ */
+const BASE64_PRINTABLE_RATIO = 0.95;
+/**
+ * Return true when `s` is printable-enough to plausibly be an injection
+ * payload. Printable = ASCII 0x20..0x7E, plus tab/newline/CR. Null bytes
+ * (often used for payload truncation games) disqualify the string outright.
+ */
+function isPrintableDecoded(s) {
+    if (s.length === 0)
+        return false;
+    let printable = 0;
+    for (let i = 0; i < s.length; i++) {
+        const code = s.charCodeAt(i);
+        if (code === 0)
+            return false; // null byte → fail closed
+        if ((code >= 0x20 && code <= 0x7e) ||
+            code === 0x09 /* tab */ ||
+            code === 0x0a /* LF */ ||
+            code === 0x0d /* CR */) {
+            printable++;
+        }
+    }
+    return printable / s.length >= BASE64_PRINTABLE_RATIO;
+}
+/**
+ * G9 — pure helper that walks an arbitrary `unknown` value and returns every
+ * successfully decoded base64-looking string. Decoding is attempted only for
+ * strings that:
+ *   - are ≥ `MIN_BASE64_PROBE_LENGTH` (24) chars
+ *   - have length divisible by 4 (base64 framing)
+ *   - match the `INJECTION_BASE64_SHAPE` (`^[A-Za-z0-9+/]+=*$`)
+ *   - decode to a UTF-8 string that is ≥95% printable and contains no null bytes
+ *
+ * NOTE: This function is NOT called from the middleware body. The inline base64
+ * probe in `scanStringForInjection` (via `INJECTION_BASE64_PATTERN`) already
+ * covers embedded base64 token detection. Calling `decodeBase64Strings` as a
+ * second full-tree pass would duplicate that work and add an avoidable DoS
+ * amplification surface (full tree traversal + decoded-string allocation for
+ * every base64-shaped leaf). This function is exported for testing and external
+ * use only.
+ */
+export function decodeBase64Strings(input) {
+    const out = [];
+    const visit = (v) => {
+        if (typeof v === 'string') {
+            if (v.length < MIN_BASE64_PROBE_LENGTH)
+                return;
+            if (v.length > MAX_BASE64_PROBE_LENGTH)
+                return;
+            if (v.length % 4 !== 0)
+                return;
+            if (!INJECTION_BASE64_SHAPE.test(v))
+                return;
+            let decoded;
+            try {
+                decoded = Buffer.from(v, 'base64').toString('utf8');
+            }
+            catch {
+                return;
+            }
+            if (!isPrintableDecoded(decoded))
+                return;
+            out.push(decoded);
+            return;
+        }
+        if (Array.isArray(v)) {
+            for (const item of v)
+                visit(item);
+            return;
+        }
+        if (v !== null && typeof v === 'object') {
+            for (const val of Object.values(v))
+                visit(val);
+        }
+    };
+    visit(input);
+    return out;
+}
 /**
  * Build compiled injection patterns with the provided timeout. Precompiled at
  * middleware creation so the worker spawn is the only per-call overhead.
@@ -74,64 +273,144 @@ export function compileInjectionPatterns(timeoutMs, onTimeout) {
     };
 }
 /**
- * Scan a string for known prompt injection phrases.
- * Also decodes base64 tokens and checks the decoded content.
- * Returns an array of matched phrase descriptions, empty if clean.
+ * Scan a single string and record hits into the provided `InjectionScanResult`
+ * buckets. Exported for test surface and for callers who want to scan a known
+ * string without walking a tree.
+ *
+ * - Literal matches (case-insensitive substring) go into `literalMatches`.
+ * - Base64-decoded matches (tokens extracted via `INJECTION_BASE64_PATTERN`,
+ *   decoded, then re-scanned for literals) go into `base64DecodedMatches`.
  *
- * The `safe` parameter carries precompiled SafeRegex wrappers; callers build
- * it once via `compileInjectionPatterns`.
+ * Set semantics dedupe by phrase: the same phrase matched five times in one
+ * string counts as one distinct pattern, which is intentional for the
+ * classifier's "≥2 distinct patterns → likely" rule.
  */
-export function scanForInjection(input, safe) {
+export function scanStringForInjection(input, result, safe) {
     if (!input || typeof input !== 'string')
-        return [];
-    const lower = input.toLowerCase();
-    const matches = [];
-    // Check literal phrases (indexOf — no regex, no ReDoS surface).
+        return;
+    // G9 follow-up: normalize before matching so NBSP / zero-width / fullwidth
+    // variants of injection phrases cannot bypass the literal check. The raw
+    // input is still scanned by the base64 tokenizer (SafeRegex expects the
+    // pre-normalization bytes).
+    const normalized = normalizeForMatch(input);
+    // Literal phrases (indexOf — no regex, no ReDoS surface).
     for (const phrase of INJECTION_PHRASES) {
-        if (lower.includes(phrase)) {
-            matches.push(`literal: "${phrase}"`);
+        if (normalized.includes(phrase)) {
+            result.literalMatches.add(phrase);
         }
     }
-    // Check base64-encoded variants — scan word-like tokens that look like
-    // base64. The regex match is bounded via SafeRegex (timeout + hard worker
-    // kill).
+    // Embedded base64 tokens. SafeRegex wraps the scan so a pathological input
+    // cannot hang the event loop.
     const tokenResult = safe.base64Token.matchAll(input);
     const base64Tokens = tokenResult.matches;
     for (const token of base64Tokens) {
         const decoded = tryDecodeBase64(token, safe);
         if (!decoded)
             continue;
-        const decodedLower = decoded.toLowerCase();
+        const decodedNormalized = normalizeForMatch(decoded);
         for (const phrase of INJECTION_PHRASES) {
-            if (decodedLower.includes(phrase)) {
-                matches.push(`base64-encoded: "${phrase}"`);
-                break; // One report per token is enough
+            if (decodedNormalized.includes(phrase)) {
+                result.base64DecodedMatches.add(phrase);
             }
         }
     }
-    return matches;
 }
 /**
- * Scan an unknown value recursively, collecting all injection matches.
- * Walks strings, arrays, and plain objects.
+ * Back-compat wrapper: legacy callers (and the old audit-metadata consumer)
+ * received a flat `string[]` of "literal: …" / "base64-encoded: …" descriptions.
+ * Kept as an exported helper so `scripts/lint-safe-regex.mjs` and any external
+ * consumer that imported it continue to work. New code should call
+ * `scanStringForInjection` directly.
  */
-function scanValue(value, matches, safe) {
+export function scanForInjection(input, safe) {
+    const result = {
+        literalMatches: new Set(),
+        base64DecodedMatches: new Set(),
+    };
+    scanStringForInjection(input, result, safe);
+    const out = [];
+    for (const p of result.literalMatches)
+        out.push(`literal: "${p}"`);
+    for (const p of result.base64DecodedMatches)
+        out.push(`base64-encoded: "${p}"`);
+    return out;
+}
+/**
+ * Recursively scan an unknown value (string, array, or plain object) and
+ * accumulate matches into the supplied `InjectionScanResult` buckets.
+ */
+export function scanValueForInjection(value, result, safe) {
     if (typeof value === 'string') {
-        matches.push(...scanForInjection(value, safe));
+        scanStringForInjection(value, result, safe);
         return;
     }
     if (Array.isArray(value)) {
-        for (const item of value) {
-            scanValue(item, matches, safe);
-        }
+        for (const item of value)
+            scanValueForInjection(item, result, safe);
         return;
     }
     if (value !== null && typeof value === 'object') {
         for (const v of Object.values(value)) {
-            scanValue(v, matches, safe);
+            scanValueForInjection(v, result, safe);
         }
     }
 }
+export function classifyInjection(scan, tier) {
+    const literalCount = scan.literalMatches.size;
+    const base64Count = scan.base64DecodedMatches.size;
+    if (literalCount === 0 && base64Count === 0) {
+        return { verdict: 'clean' };
+    }
+    // Dedupe: a phrase that appears both literally AND in a base64-decoded
+    // payload in the same input counts once in `matched_patterns`. Union via
+    // Set before sorting.
+    const matched = [
+        ...new Set([...scan.literalMatches, ...scan.base64DecodedMatches]),
+    ].sort();
+    // Rule 2 — base64 always escalates, regardless of count or tier.
+    if (base64Count > 0) {
+        return {
+            verdict: 'likely_injection',
+            matched_patterns: matched,
+            base64_decoded: true,
+        };
+    }
+    // Rule 3 — multi-literal (distinct patterns) always escalates.
+    if (literalCount >= 2) {
+        return {
+            verdict: 'likely_injection',
+            matched_patterns: matched,
+            base64_decoded: false,
+        };
+    }
+    // Rule 4 — any match at read-tier, or unknown tier, is anomalous enough
+    // to treat as likely. Tier middleware runs before injection middleware,
+    // so an undefined tier here means tier-classification failed; fail closed.
+    if (tier === Tier.Read || tier === undefined) {
+        return {
+            verdict: 'likely_injection',
+            matched_patterns: matched,
+            base64_decoded: false,
+        };
+    }
+    // TODO (G9-follow-up): per-pattern "deny-tag" metadata can force this
+    // branch to `likely_injection` even for a single literal at write tier.
+    // Not shipped in this PR; pattern list is unchanged.
+    // Rule 5 — exactly 1 literal at write/destructive.
+    return {
+        verdict: 'suspicious',
+        matched_patterns: matched,
+        base64_decoded: false,
+    };
+}
+/**
+ * Maximum result size (in UTF-8 bytes) that the injection scanner will attempt
+ * to scan. Payloads larger than this cannot be scanned within the 100ms timeout
+ * budget before result-size-cap (which runs later in the chain) has had a
+ * chance to truncate them. Treat oversized payloads the same as a scan timeout:
+ * deny in block mode, pass in warn mode.
+ */
+const MAX_RESULT_SCAN_BYTES = 2 * 1024 * 1024; // 2 MiB
 /**
  * Record a regex-timeout event on `ctx.metadata`. Array-valued so multiple
  * timeouts in one invocation are all recorded.
@@ -155,50 +434,153 @@ function recordInjectionTimeout(ctx, patternId, inputBytes, timeoutMs) {
     }
 }
 /**
- * PostToolUse middleware: scans tool results for prompt injection patterns.
+ * PostToolUse middleware: classifies tool results for prompt injection.
+ *
+ * G9 tiered classifier:
+ *   - `clean` → allow, no log
+ *   - `suspicious` → warn (stderr + audit metadata `injection.suspicious`).
+ *     Denies only when `suspiciousBlocksWrites: true`.
+ *   - `likely_injection` → always deny, always log.
  *
  * Operates on tool output (ctx.result) returned from downstream MCP servers.
- * On detection:
- *   - Always logs to audit metadata and emits a warning to stderr.
- *   - If action is 'block' (default), sets ctx.status to Denied and blocks the result.
- *   - If action is 'warn', allows the result through with a warning only.
  *
- * SECURITY: Checking PostToolUse (after downstream execution, before the result
- * reaches the LLM) is the correct place to catch injection in tool descriptions
- * and resource content coming from potentially untrusted downstream servers.
+ * SECURITY: Checking PostToolUse (after downstream execution, before the
+ * result reaches the LLM) is the correct place to catch injection in tool
+ * descriptions and resource content coming from potentially untrusted
+ * downstream servers.
  *
  * SECURITY (G3): The only regexes this middleware runs are wrapped in
  * `SafeRegex` with a 100ms default per-call timeout. On timeout the scanner
  * records an audit event and proceeds — blocking is governed by the literal
  * substring checks (which have no ReDoS surface).
+ *
+ * The legacy `action` parameter (`'block' | 'warn'`) selects the fallback
+ * behavior for `suspicious` verdicts when the G9 flag is unset — preserving
+ * 0.2.x `injection_detection: 'warn'` semantics for operators who pinned it.
+ * `likely_injection` ignores this parameter.
  */
 export function createInjectionMiddleware(action = 'block', opts = {}) {
     const timeoutMs = opts.matchTimeoutMs ?? 100;
+    // Default `suspiciousBlocksWrites` to `false` when unset to preserve 0.3.x
+    // behavior for existing installs that omit the `injection:` policy block.
+    // A consumer who had `injection_detection: block` in 0.3.x without the new
+    // field would otherwise silently start hard-failing benign tool writes that
+    // contain a single matching phrase on upgrade — a breaking change disguised
+    // as a default. The tighter posture (single literal hit → deny) must be
+    // opted into explicitly via `injection.suspicious_blocks_writes: true`, or
+    // by using a profile (e.g. bst-internal) that already sets it.
+    //
+    // Fail-closed-on-timeout (Finding 1 fix) already tightens security for
+    // incomplete scans; this default preserves parity for complete scans.
+    const denyOnSuspicious = action === 'warn'
+        ? false // warn mode hard-overrides suspicious deny — 0.2.x parity with `injection_detection: warn`
+        : (opts.suspiciousBlocksWrites ?? false); // block mode: default false (0.3.x default preserved)
     return async (ctx, next) => {
         await next();
         // Only scan if we have a result to inspect
         if (ctx.result == null)
             return;
+        // Pre-scan size check: if the result is too large to scan within the timeout
+        // budget, treat as a timeout. Result-size-cap runs later in the chain, so we
+        // bound the scan here rather than relying on downstream truncation.
+        const resultBytes = Buffer.byteLength(JSON.stringify(ctx.result), 'utf8');
+        if (resultBytes > MAX_RESULT_SCAN_BYTES) {
+            if (action === 'block') {
+                const errorMeta = {
+                    verdict: 'error',
+                    matched_patterns: [],
+                    base64_decoded: false,
+                };
+                ctx.metadata[INJECTION_METADATA_KEY] = errorMeta;
+                ctx.status = InvocationStatus.Denied;
+                ctx.error = `injection scan skipped — result exceeds ${MAX_RESULT_SCAN_BYTES} bytes; failing closed under block policy`;
+                return;
+            }
+            // warn mode: let through — result-size-cap will truncate downstream.
+            return;
+        }
+        // G9 follow-up (finding #4): track scanner timeout via a closure flag so
+        // we can emit a stable `verdict: 'error'` metadata record alongside the
+        // existing `injection.regex_timeout` event. Downstream audit consumers
+        // that key off `metadata.injection.verdict` no longer see a bare timing
+        // record with no verdict shape.
+        let scanTimedOut = false;
         const safe = compileInjectionPatterns(timeoutMs, (patternId, input) => {
+            scanTimedOut = true;
             recordInjectionTimeout(ctx, patternId, Buffer.byteLength(input, 'utf8'), timeoutMs);
         });
-        const matches = [];
-        scanValue(ctx.result, matches, safe);
-        if (matches.length === 0)
+        const scan = {
+            literalMatches: new Set(),
+            base64DecodedMatches: new Set(),
+        };
+        scanValueForInjection(ctx.result, scan, safe);
+        // Fail closed: in block mode, ANY timeout denies — regardless of what the
+        // partial scan found. An incomplete scan cannot prove the unscanned suffix
+        // is safe. If the provisional classification were `suspicious` (one early
+        // literal hit before the timeout), falling through to the normal policy
+        // path could still allow the call under `suspiciousBlocksWrites: false`,
+        // even though the unscanned suffix might contain a second phrase that
+        // would have escalated to `likely_injection`. Hoisting this check before
+        // `classifyInjection` closes that gap.
+        if (scanTimedOut && action === 'block') {
+            const errorMeta = {
+                verdict: 'error',
+                matched_patterns: [],
+                base64_decoded: false,
+            };
+            ctx.metadata[INJECTION_METADATA_KEY] = errorMeta;
+            process.stderr.write(`[rea] INJECTION-GUARD (error): regex-timeout during scan of tool "${ctx.tool_name}" result; verdict inconclusive\n`);
+            ctx.status = InvocationStatus.Denied;
+            ctx.error = 'injection scan timed out — failing closed under block policy';
+            return; // do NOT call next()
+        }
+        const classification = classifyInjection(scan, ctx.tier);
+        // warn/log mode + timeout: fail-open — emit a verdict:'error' metadata
+        // record alongside the existing injection.regex_timeout event so
+        // downstream audit consumers see a stable verdict shape, then allow
+        // through. This branch only fires in warn mode (block mode was handled
+        // above) when no actionable signal was collected before the timeout.
+        if (scanTimedOut && classification.verdict === 'clean') {
+            const errorMeta = {
+                verdict: 'error',
+                matched_patterns: [],
+                base64_decoded: false,
+            };
+            ctx.metadata[INJECTION_METADATA_KEY] = errorMeta;
+            process.stderr.write(`[rea] INJECTION-GUARD (error): regex-timeout during scan of tool "${ctx.tool_name}" result; verdict inconclusive\n`);
+            // warn/log mode: let through but record — verdict:'error' is written above.
+            return;
+        }
+        if (classification.verdict === 'clean')
             return;
-        // Deduplicate matches
-        const unique = [...new Set(matches)];
-        // Always log to audit metadata
-        ctx.metadata.injection_matches = unique;
-        // Always emit warning to stderr
-        process.stderr.write(`[rea] INJECTION-GUARD: Prompt injection pattern detected in tool "${ctx.tool_name}" result\n`);
-        for (const match of unique) {
-            process.stderr.write(`  Pattern: ${match}\n`);
+        // Write audit metadata. Export verdict + distinct matched phrases +
+        // base64 flag. NEVER export the input text.
+        const auditMeta = {
+            verdict: classification.verdict,
+            matched_patterns: classification.matched_patterns,
+            base64_decoded: classification.base64_decoded,
+        };
+        ctx.metadata[INJECTION_METADATA_KEY] = auditMeta;
+        // Always emit a stderr warning. Operators rely on this as the live signal.
+        process.stderr.write(`[rea] INJECTION-GUARD (${classification.verdict}): pattern(s) detected in tool "${ctx.tool_name}" result\n`);
+        for (const p of classification.matched_patterns) {
+            process.stderr.write(`  Pattern: ${p}\n`);
+        }
+        if (classification.base64_decoded) {
+            process.stderr.write(`  Base64-decoded match detected\n`);
         }
-        process.stderr.write(`  Action: ${action} — review the downstream server "${ctx.server_name}" for compromise.\n`);
-        if (action === 'block') {
+        process.stderr.write(`  Action: review the downstream server "${ctx.server_name}" for compromise.\n`);
+        // Deny policy:
+        //   likely_injection → always deny
+        //   suspicious       → deny iff denyOnSuspicious (constructed above)
+        const shouldDeny = classification.verdict === 'likely_injection' ||
+            (classification.verdict === 'suspicious' && denyOnSuspicious);
+        if (shouldDeny) {
             ctx.status = InvocationStatus.Denied;
-            ctx.error = `Prompt injection detected in tool result (${unique.length} pattern(s) matched). Result blocked.`;
+            ctx.error =
+                classification.verdict === 'likely_injection'
+                    ? `Likely prompt injection detected in tool result (${classification.matched_patterns.length} pattern(s), base64=${classification.base64_decoded}). Result blocked.`
+                    : `Suspicious prompt injection pattern in tool result (1 pattern at ${String(ctx.tier)} tier). Result blocked by policy.`;
             ctx.result = undefined;
         }
     };

package/dist/gateway/middleware/kill-switch.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { Middleware } from './chain.js';
+import type { MetricsRegistry } from '../observability/metrics.js';
 /**
  * HALT semantic guarantee:
  *   - HALT is read exactly once per invocation, at the top of this middleware layer.
@@ -22,4 +23,12 @@ import type { Middleware } from './chain.js';
  *   - The decision is recorded on `ctx.metadata.halt_decision` for audit and is
  *     never re-consulted by downstream middleware.
  */
-export declare function createKillSwitchMiddleware(baseDir: string): Middleware;
+export declare function createKillSwitchMiddleware(baseDir: string,
+/**
+ * Optional metrics registry. When supplied, every invocation marks the
+ * `rea_seconds_since_last_halt_check` gauge with a fresh timestamp so the
+ * exposed gauge reflects real per-call check cadence rather than the
+ * startup-time mark `rea serve` sets once. When omitted, no metric is
+ * emitted.
+ */
+metrics?: MetricsRegistry): Middleware;

package/dist/gateway/middleware/kill-switch.js CHANGED Viewed

@@ -28,9 +28,28 @@ const HALT_FILE = 'HALT';
  *   - The decision is recorded on `ctx.metadata.halt_decision` for audit and is
  *     never re-consulted by downstream middleware.
  */
-export function createKillSwitchMiddleware(baseDir) {
+export function createKillSwitchMiddleware(baseDir,
+/**
+ * Optional metrics registry. When supplied, every invocation marks the
+ * `rea_seconds_since_last_halt_check` gauge with a fresh timestamp so the
+ * exposed gauge reflects real per-call check cadence rather than the
+ * startup-time mark `rea serve` sets once. When omitted, no metric is
+ * emitted.
+ */
+metrics) {
     return async (ctx, next) => {
         const haltPath = path.join(baseDir, REA_DIR, HALT_FILE);
+        // Record the HALT-check attempt BEFORE we probe the filesystem so the
+        // gauge reflects "how long since we last looked", regardless of whether
+        // this check succeeds or fails. Fresh on every invocation; failure to
+        // update metrics must not crash the gateway.
+        try {
+            metrics?.markHaltCheck();
+        }
+        catch {
+            // Metrics registry implementations are expected to be infallible,
+            // but we refuse to let them take down the chain in any case.
+        }
         let fh;
         try {
             fh = await fs.open(haltPath, fsConstants.O_RDONLY);