npm - haechi - Versions diffs - 0.3.2 → 0.5.0 - Mend

haechi 0.3.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.ko.md +227 -0
package/README.md +126 -1
package/docs/README.md +3 -6
package/docs/current/api-stability.ko.md +11 -4
package/docs/current/api-stability.md +11 -4
package/docs/current/configuration.ko.md +210 -0
package/docs/current/configuration.md +210 -0
package/docs/current/release-0.3.2-hardening-scope.ko.md +2 -1
package/docs/current/release-0.3.2-hardening-scope.md +2 -1
package/docs/current/release-0.4-implementation-scope.ko.md +2 -1
package/docs/current/release-0.4-implementation-scope.md +2 -1
package/docs/current/release-0.5-implementation-scope.ko.md +69 -0
package/docs/current/release-0.5-implementation-scope.md +69 -0
package/docs/current/release-process.ko.md +14 -4
package/docs/current/release-process.md +14 -4
package/docs/current/risk-register-release-gate.ko.md +11 -11
package/docs/current/risk-register-release-gate.md +12 -12
package/docs/current/threat-model.ko.md +8 -4
package/docs/current/threat-model.md +8 -4
package/haechi.config.example.json +7 -2
package/package.json +8 -2
package/packages/audit/index.mjs +3 -1
package/packages/cli/bin/haechi.mjs +310 -21
package/packages/cli/runtime.mjs +28 -3
package/packages/core/index.mjs +128 -10
package/packages/crypto/index.mjs +13 -1
package/packages/filter/index.mjs +52 -3
package/packages/mcp-stdio/index.mjs +103 -22
package/packages/policy/index.mjs +6 -0
package/packages/protocol-adapters/index.mjs +33 -14
package/packages/proxy/index.mjs +149 -4
package/packages/stream-filter/index.mjs +194 -0
package/packages/token-vault/index.mjs +70 -2

package/packages/crypto/index.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { createCipheriv, createDecipheriv, createHash, randomBytes } from "node:crypto";
+import { createCipheriv, createDecipheriv, createHash, createHmac, randomBytes } from "node:crypto";
 import { dirname } from "node:path";
 import { mkdir, readFile, writeFile } from "node:fs/promises";
@@ -60,6 +60,18 @@ export function createLocalCryptoProvider({ keyFile }) {
         aadHash: sha256(aadBytes)
       };
     },
+    // Keyed hash over a domain-separated derived key. The raw stored key is an
+    // AES-256-GCM key and must never be used for HMAC directly; every use case
+    // gets its own versioned domain string (e.g. deterministic tokenization,
+    // identity hashing). Uses the active key, so rotation changes outputs.
+    async hmac({ data, domain }) {
+      if (!domain || typeof domain !== "string") {
+        throw new Error("hmac requires a non-empty domain string");
+      }
+      const { active: { key } } = await loadKeys();
+      const derived = createHmac("sha256", key).update(domain).digest();
+      return createHmac("sha256", derived).update(data).digest("hex");
+    },
     async decrypt({ envelope, aad }) {
       const { active, byKid } = await loadKeys();
       if (envelope.alg && envelope.alg !== ALG) {

package/packages/filter/index.mjs CHANGED Viewed

@@ -51,6 +51,50 @@ const DEFAULT_RULES = [
     pattern: "(?<=\\b(?:api[_-]?key|secret|token|password)\\s*[:=]\\s*['\\\"]?)[A-Za-z0-9._~+/-]{12,}",
     flags: "gi",
     confidence: 0.85
+  },
+  // Indirect prompt injection heuristics. Response/tool-result direction only,
+  // and the policy default for the injection type is `allow` (report-only):
+  // detections are audited regardless of action, and false-positive blocks
+  // would erode trust faster than missed detections.
+  {
+    id: "injection-instruction-override",
+    type: "injection",
+    pattern: "\\b(?:ignore|disregard|forget)\\s+(?:all\\s+|any\\s+|the\\s+|your\\s+)?(?:previous|prior|earlier|above|system)\\s+(?:instructions?|rules?|prompts?|guidelines)",
+    flags: "gi",
+    confidence: 0.7,
+    direction: "response"
+  },
+  {
+    id: "injection-role-reassignment",
+    type: "injection",
+    pattern: "\\b(?:you are now|act as)\\s+(?:an?\\s+)?(?:unrestricted|jailbroken|uncensored|developer mode|dan\\b)|\\bnew (?:system )?instructions?\\s*:",
+    flags: "gi",
+    confidence: 0.65,
+    direction: "response"
+  },
+  {
+    id: "injection-prompt-markers",
+    type: "injection",
+    pattern: "<\\|im_start\\|>|<<SYS>>|\\[\\[?system\\]\\]?\\s*:",
+    flags: "gi",
+    confidence: 0.7,
+    direction: "response"
+  },
+  {
+    id: "injection-conceal-from-user",
+    type: "injection",
+    pattern: "\\bdo not (?:tell|inform|mention|reveal|show)(?:\\s+this)?(?:\\s+to)?\\s+the user\\b",
+    flags: "gi",
+    confidence: 0.7,
+    direction: "response"
+  },
+  {
+    id: "injection-tool-induction",
+    type: "injection",
+    pattern: "\\b(?:silently|secretly|immediately)\\s+(?:call|invoke|run|execute)\\s+(?:the\\s+)?[\\w.-]+\\s+tool\\b",
+    flags: "gi",
+    confidence: 0.6,
+    direction: "response"
   }
 ];
@@ -64,16 +108,21 @@ export function createDefaultFilterEngine({ customRules = [] } = {}) {
       readsPlaintext: true,
       networkEgress: false
     },
-    async detect({ entries }) {
-      return entries.flatMap((entry) => detectEntry(entry, rules));
+    async detect({ entries, context }) {
+      return entries.flatMap((entry) => detectEntry(entry, rules, context));
     }
   };
 }
-export function detectEntry(entry, rules) {
+export function detectEntry(entry, rules, context = {}) {
   const detections = [];
   for (const rule of rules) {
+    // Direction-scoped rules (e.g. injection heuristics) only run on the
+    // matching traffic direction; rules without a direction run everywhere.
+    if (rule.direction && rule.direction !== context?.direction) {
+      continue;
+    }
     const regex = new RegExp(rule.pattern, rule.flags.includes("g") ? rule.flags : `${rule.flags}g`);
     for (const match of entry.value.matchAll(regex)) {
       const value = match[0];

package/packages/mcp-stdio/index.mjs CHANGED Viewed

@@ -1,25 +1,35 @@
 import { createInterface } from "node:readline";
-export async function protectMcpJsonRpcMessage(message, runtime) {
+// Tagged core used by both the one-direction line filter and mcp-wrap.
+// kinds: "forward" (deliver the protected message), "reject" (send the error
+// back to the CLIENT instead of delivering), "drop" (notification — deliver
+// nothing, per JSON-RPC).
+async function protectTagged(message, runtime, { enforceMethodAllowlist = true } = {}) {
   if (!message || typeof message !== "object" || Array.isArray(message)) {
     throw new Error(Array.isArray(message)
       ? "JSON-RPC batch messages are not supported by the MCP stdio filter"
       : "MCP message must be a JSON object");
   }
   const policy = runtime.config.mcp;
-  // JSON-RPC notifications (method, no id) must not receive responses; a
-  // rejected or blocked notification is dropped (returns null) instead.
   const isNotification = message.method !== undefined
     && !Object.prototype.hasOwnProperty.call(message, "id");
+  function reject(error) {
+    return isNotification ? { kind: "drop" } : { kind: "reject", message: error };
+  }
   if (policy.requireJsonRpc && message.jsonrpc !== "2.0") {
-    return isNotification ? null : errorJsonRpc(message.id, -32002, "haechi_mcp_invalid_jsonrpc", {
+    return reject(errorJsonRpc(message.id, -32002, "haechi_mcp_invalid_jsonrpc", {
       reason: "MCP messages must use JSON-RPC 2.0"
-    });
+    }));
   }
-  if (message.method && !methodAllowed(message.method, policy.allowedMethods)) {
-    return isNotification ? null : errorJsonRpc(message.id, -32003, "haechi_mcp_method_not_allowed", {
+  // The allowlist describes CLIENT-callable methods. Server-initiated requests
+  // (e.g. sampling/createMessage) are exempted by the caller via
+  // enforceMethodAllowlist: false, but their params are still protected.
+  if (enforceMethodAllowlist && message.method && !methodAllowed(message.method, policy.allowedMethods)) {
+    return reject(errorJsonRpc(message.id, -32003, "haechi_mcp_method_not_allowed", {
       method: message.method
-    });
+    }));
   }
   const next = structuredClone(message);
@@ -28,10 +38,11 @@ export async function protectMcpJsonRpcMessage(message, runtime) {
     const result = await runtime.haechi.protectJson(next.params, {
       protocol: "mcp-stdio",
       operation: next.method ?? "params",
+      direction: "request",
       mode: runtime.config.policy.mode ?? runtime.config.mode
     });
     if (result.blocked) {
-      return isNotification ? null : blockedJsonRpc(next.id, result);
+      return reject(blockedJsonRpc(next.id, result));
     }
     next.params = result.payload;
   }
@@ -40,15 +51,21 @@ export async function protectMcpJsonRpcMessage(message, runtime) {
     const result = await runtime.haechi.protectJson(next.result, {
       protocol: "mcp-stdio",
       operation: "result",
+      direction: "response",
       mode: runtime.config.policy.mode ?? runtime.config.mode
     });
     if (result.blocked) {
-      return blockedJsonRpc(next.id, result);
+      return { kind: "reject", message: blockedJsonRpc(next.id, result) };
     }
     next.result = result.payload;
   }
-  return next;
+  return { kind: "forward", message: next };
+}
+export async function protectMcpJsonRpcMessage(message, runtime, options = {}) {
+  const tagged = await protectTagged(message, runtime, options);
+  return tagged.kind === "drop" ? null : tagged.message;
 }
 export async function runMcpStdioFilter({ input = process.stdin, output = process.stdout, runtime }) {
@@ -66,21 +83,85 @@ export async function runMcpStdioFilter({ input = process.stdin, output = proces
       }
       output.write(`${JSON.stringify(protectedMessage)}\n`);
     } catch (error) {
-      output.write(`${JSON.stringify({
-        jsonrpc: "2.0",
-        error: {
-          code: -32000,
-          message: "haechi_mcp_stdio_error",
-          data: {
-            reason: error.message
-          }
-        },
-        id: null
-      })}\n`);
+      output.write(`${JSON.stringify(stdioError(error))}\n`);
     }
   }
 }
+// Bidirectional wrap around a spawned MCP server child process:
+//   client → (allowlist + params protection) → child stdin
+//   child stdout → (params/result protection, no client allowlist) → client
+// Rejections in BOTH directions are answered to the client; nothing reaches
+// the child for a rejected client message. Resolves with the child exit code.
+export function wrapMcpChild({ runtime, child, input = process.stdin, output = process.stdout }) {
+  const clientLines = createInterface({ input, crlfDelay: Infinity });
+  const serverLines = createInterface({ input: child.stdout, crlfDelay: Infinity });
+  const clientPump = (async () => {
+    for await (const line of clientLines) {
+      if (!line.trim()) {
+        continue;
+      }
+      try {
+        const tagged = await protectTagged(JSON.parse(line), runtime, { enforceMethodAllowlist: true });
+        if (tagged.kind === "forward" && child.stdin.writable) {
+          child.stdin.write(`${JSON.stringify(tagged.message)}\n`);
+        } else if (tagged.kind === "reject") {
+          output.write(`${JSON.stringify(tagged.message)}\n`);
+        }
+      } catch (error) {
+        output.write(`${JSON.stringify(stdioError(error))}\n`);
+      }
+    }
+    if (child.stdin.writable) {
+      child.stdin.end();
+    }
+  })();
+  const serverPump = (async () => {
+    for await (const line of serverLines) {
+      if (!line.trim()) {
+        continue;
+      }
+      try {
+        const tagged = await protectTagged(JSON.parse(line), runtime, { enforceMethodAllowlist: false });
+        if (tagged.kind !== "drop") {
+          output.write(`${JSON.stringify(tagged.message)}\n`);
+        }
+      } catch (error) {
+        output.write(`${JSON.stringify(stdioError(error))}\n`);
+      }
+    }
+  })();
+  return new Promise((resolve, reject) => {
+    child.once("error", reject);
+    child.once("exit", (code, signal) => {
+      // The child is gone: stop consuming client input so the pumps can
+      // settle even when the caller's input stream stays open.
+      clientLines.close();
+      serverLines.close();
+      Promise.allSettled([clientPump, serverPump]).then(() => {
+        resolve({ code: code ?? (signal ? 1 : 0), signal });
+      });
+    });
+  });
+}
+function stdioError(error) {
+  return {
+    jsonrpc: "2.0",
+    error: {
+      code: -32000,
+      message: "haechi_mcp_stdio_error",
+      data: {
+        reason: error.message
+      }
+    },
+    id: null
+  };
+}
 function blockedJsonRpc(id, result) {
   return errorJsonRpc(id, -32001, "haechi_policy_block", {
     auditId: result.auditEvent.id,

package/packages/policy/index.mjs CHANGED Viewed

@@ -95,6 +95,12 @@ export function buildPolicy({
       allowUnsafeOverrides
     });
   }
+  // Injection heuristics ship report-only: unless a preset or the user sets an
+  // explicit action, injection detections are audited but never transform or
+  // block. This intentionally bypasses defaultAction.
+  if (!merged.actions.injection) {
+    merged.actions.injection = "allow";
+  }
   validatePolicy(merged);
   return merged;
 }

package/packages/protocol-adapters/index.mjs CHANGED Viewed

@@ -1,11 +1,22 @@
+// Streaming descriptors: `format` is the wire framing, `deltaPath` is the
+// primary incremental-text channel (index 0 of choices for OpenAI-style).
+// A null deltaPath means "no known channel" — frames still get within-frame
+// protection but no cross-frame buffering.
+const SSE_CHAT = { format: "sse", deltaPath: ["choices", 0, "delta", "content"] };
+const SSE_COMPLETION = { format: "sse", deltaPath: ["choices", 0, "text"] };
+const SSE_RESPONSES = { format: "sse", deltaPath: null };
+const SSE_LLAMA_LEGACY = { format: "sse", deltaPath: ["content"] };
+const NDJSON_OLLAMA_CHAT = { format: "ndjson", deltaPath: ["message", "content"] };
+const NDJSON_OLLAMA_GENERATE = { format: "ndjson", deltaPath: ["response"] };
 const ADAPTERS = {
   "openai-compatible": {
     id: "openai-compatible",
     protocol: "llm-http",
     routes: [
-      route("/v1/chat/completions", "chat-completions"),
-      route("/v1/completions", "completions"),
-      route("/v1/responses", "responses"),
+      route("/v1/chat/completions", "chat-completions", { streaming: SSE_CHAT }),
+      route("/v1/completions", "completions", { streaming: SSE_COMPLETION }),
+      route("/v1/responses", "responses", { streaming: SSE_RESPONSES }),
       route("/v1/embeddings", "embeddings")
     ]
   },
@@ -13,9 +24,9 @@ const ADAPTERS = {
     id: "vllm-openai",
     protocol: "vllm-openai",
     routes: [
-      route("/v1/chat/completions", "chat-completions"),
-      route("/v1/completions", "completions"),
-      route("/v1/responses", "responses"),
+      route("/v1/chat/completions", "chat-completions", { streaming: SSE_CHAT }),
+      route("/v1/completions", "completions", { streaming: SSE_COMPLETION }),
+      route("/v1/responses", "responses", { streaming: SSE_RESPONSES }),
       route("/v1/embeddings", "embeddings")
     ]
   },
@@ -23,10 +34,10 @@ const ADAPTERS = {
     id: "llama-cpp",
     protocol: "llama-cpp",
     routes: [
-      route("/v1/chat/completions", "chat-completions"),
-      route("/v1/completions", "completions"),
+      route("/v1/chat/completions", "chat-completions", { streaming: SSE_CHAT }),
+      route("/v1/completions", "completions", { streaming: SSE_COMPLETION }),
       route("/v1/embeddings", "embeddings"),
-      route("/completion", "legacy-completion")
+      route("/completion", "legacy-completion", { streaming: SSE_LLAMA_LEGACY })
     ]
   },
   "ollama": {
@@ -34,8 +45,8 @@ const ADAPTERS = {
     protocol: "ollama",
     routes: [
       // Ollama streams /api/chat and /api/generate unless the request sets stream:false.
-      route("/api/chat", "chat", { streamingDefault: true }),
-      route("/api/generate", "generate", { streamingDefault: true }),
+      route("/api/chat", "chat", { streamingDefault: true, streaming: NDJSON_OLLAMA_CHAT }),
+      route("/api/generate", "generate", { streamingDefault: true, streaming: NDJSON_OLLAMA_GENERATE }),
       route("/api/embed", "embed"),
       route("/api/embeddings", "embeddings")
     ]
@@ -47,7 +58,13 @@ const TARGET_TYPE_ALIASES = {
 };
 export function createProtocolAdapter(target = {}) {
-  const adapterId = target.adapter ?? adapterFromTargetType(target.type);
+  // A specific target.type (vllm-openai, ollama, llama-cpp) names its own
+  // adapter and wins over a generic/default target.adapter — otherwise the
+  // default config's adapter ("openai-compatible") would shadow the type after
+  // a deep merge and silently route an Ollama target to OpenAI paths.
+  const adapterId = ADAPTERS[target.type]
+    ? target.type
+    : (target.adapter ?? adapterFromTargetType(target.type));
   const adapter = ADAPTERS[adapterId];
   if (!adapter) {
     throw new Error(`Unknown protocol adapter: ${adapterId}`);
@@ -71,7 +88,8 @@ export function createProtocolAdapter(target = {}) {
         operation,
         protectRequest: matched?.protectRequest ?? true,
         protectResponse: matched?.protectResponse ?? true,
-        streamingByDefault: matched?.streamingDefault ?? false
+        streamingByDefault: matched?.streamingDefault ?? false,
+        streaming: matched?.streaming ?? null
       };
     }
   };
@@ -98,7 +116,8 @@ function route(path, operation, options = {}) {
     operation,
     protectRequest: options.protectRequest ?? true,
     protectResponse: options.protectResponse ?? true,
-    streamingDefault: options.streamingDefault ?? false
+    streamingDefault: options.streamingDefault ?? false,
+    streaming: options.streaming ?? null
   };
 }

package/packages/proxy/index.mjs CHANGED Viewed

@@ -1,5 +1,6 @@
 import { createServer } from "node:http";
 import { createHash, randomUUID } from "node:crypto";
+import { inspectResponseStream } from "../stream-filter/index.mjs";
 export const DEFAULT_PROXY_PORT = 1016;
@@ -22,6 +23,11 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
       const json = parseJsonBody(body);
       if (isStreamingRequest(json, routeContext)) {
+        if (config.streaming.requestMode === "inspect") {
+          await handleInspectedStream({ runtime, request, response, routeContext, json });
+          return;
+        }
         if (config.streaming.requestMode === "pass-through") {
           await recordProxyDecision({
             runtime,
@@ -45,7 +51,7 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
         writeJson(response, 501, {
           error: "haechi_streaming_unsupported",
-          message: "Streaming requests are blocked unless streaming.requestMode is explicitly set to pass-through"
+          message: "Streaming requests are blocked unless streaming.requestMode is set to pass-through or inspect"
         });
         return;
       }
@@ -54,6 +60,7 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
         ? await haechi.protectJson(json, {
           ...routeContext,
           operation: `request:${routeContext.operation}`,
+          direction: "request",
           mode: config.policy.mode ?? config.mode
         })
         : { payload: json, blocked: false };
@@ -77,7 +84,8 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
       const forwarded = await maybeProtectResponse({
         upstreamResponse,
         routeContext,
-        runtime
+        runtime,
+        issuedTokens: result.issuedTokens ?? []
       });
       response.writeHead(forwarded.status, forwarded.headers);
@@ -112,7 +120,108 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
   };
 }
-async function maybeProtectResponse({ upstreamResponse, routeContext, runtime }) {
+async function handleInspectedStream({ runtime, request, response, routeContext, json }) {
+  const { haechi, config } = runtime;
+  // Inspection needs to know the wire format and delta channel for this route.
+  if (!routeContext.streaming) {
+    writeJson(response, 501, {
+      error: "haechi_streaming_uninspectable_route",
+      message: `streaming.requestMode is "inspect" but route ${routeContext.routeId} has no known streaming format`
+    });
+    return;
+  }
+  // The request body is ordinary JSON even when the response streams, so it is
+  // protected like any other request.
+  const requestResult = routeContext.protectRequest
+    ? await haechi.protectJson(json, {
+      ...routeContext,
+      operation: `request:${routeContext.operation}`,
+      direction: "request",
+      mode: config.policy.mode ?? config.mode
+    })
+    : { payload: json, blocked: false };
+  if (requestResult.blocked) {
+    writeJson(response, 403, {
+      error: "haechi_policy_block",
+      summary: requestResult.summary,
+      auditId: requestResult.auditEvent.id
+    });
+    return;
+  }
+  const upstreamResponse = await forward({
+    upstream: config.target.upstream,
+    request,
+    body: JSON.stringify(requestResult.payload),
+    timeoutMs: config.limits.upstreamTimeoutMs
+  });
+  const streamMode = config.streaming.responseMode ?? config.responseProtection.mode ?? config.policy.mode ?? config.mode;
+  const protector = haechi.createStreamProtector({
+    ...routeContext,
+    operation: `response-stream:${routeContext.operation}`,
+    direction: "response",
+    mode: streamMode,
+    maxMatchBytes: config.streaming.maxMatchBytes
+  });
+  response.writeHead(upstreamResponse.status, streamingResponseHeaders(upstreamResponse));
+  const { blocked, summary } = await inspectResponseStream({
+    source: upstreamResponse.body ?? emptyAsyncIterable(),
+    sink: nodeResponseSink(response),
+    streaming: routeContext.streaming,
+    protector
+  });
+  await recordStreamDecision({ runtime, routeContext, blocked, summary, mode: streamMode });
+  response.end();
+}
+function streamingResponseHeaders(upstreamResponse) {
+  const headers = Object.fromEntries(upstreamResponse.headers.entries());
+  delete headers["content-length"];
+  delete headers["content-encoding"];
+  return headers;
+}
+function nodeResponseSink(response) {
+  return {
+    write(text) {
+      response.write(text);
+    }
+  };
+}
+async function* emptyAsyncIterable() {
+  // No upstream body to inspect.
+}
+async function recordStreamDecision({ runtime, routeContext, blocked, summary, mode }) {
+  if (typeof runtime.auditSink?.record !== "function") {
+    return;
+  }
+  await runtime.auditSink.record({
+    id: randomUUID(),
+    timestamp: new Date().toISOString(),
+    protocol: routeContext?.protocol ?? "proxy",
+    operation: `response-stream:${routeContext?.operation ?? "unknown"}`,
+    mode,
+    identity: null,
+    enforced: !["dry-run", "report-only"].includes(mode),
+    blocked,
+    decision: blocked ? "stream_blocked" : "stream_inspected",
+    reason: blocked ? "stream_policy_block" : "stream_inspected",
+    routeId: routeContext?.routeId ?? "unknown",
+    pathHash: routeContext?.path ? shortHash(routeContext.path) : null,
+    summary
+  });
+}
+async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, issuedTokens = [] }) {
   const headers = Object.fromEntries(upstreamResponse.headers.entries());
   if (!runtime.config.responseProtection.enabled || !routeContext.protectResponse) {
@@ -203,6 +312,7 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime })
   const result = await runtime.haechi.protectJson(json, {
     ...routeContext,
     operation: `response:${routeContext.operation}`,
+    direction: "response",
     mode: runtime.config.responseProtection.mode ?? runtime.config.policy.mode ?? runtime.config.mode
   });
@@ -218,13 +328,47 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime })
     };
   }
+  let responsePayload = result.payload;
+  // Request-scoped token round-trip: restore ONLY tokens issued/reused while
+  // protecting this request, so the model sees tokens but the caller sees
+  // plaintext. Explicit opt-in; runs after response protection, so an opt-in
+  // here intentionally overrides response-direction transforms for values the
+  // caller already sent.
+  if (runtime.config.tokenVault.detokenizeResponses
+    && issuedTokens.length > 0
+    && typeof runtime.tokenVault?.detokenize === "function") {
+    const values = await runtime.tokenVault.detokenize({ tokens: issuedTokens });
+    if (values.size > 0) {
+      responsePayload = restoreTokens(responsePayload, values);
+    }
+  }
   return {
     status: upstreamResponse.status,
     headers: transformedJsonHeaders(headers),
-    body: Buffer.from(`${JSON.stringify(result.payload)}\n`)
+    body: Buffer.from(`${JSON.stringify(responsePayload)}\n`)
   };
 }
+function restoreTokens(value, tokenValues) {
+  if (typeof value === "string") {
+    let output = value;
+    for (const [token, plaintext] of tokenValues) {
+      output = output.split(`[TOKEN:${token}]`).join(plaintext);
+    }
+    return output;
+  }
+  if (Array.isArray(value)) {
+    return value.map((item) => restoreTokens(item, tokenValues));
+  }
+  if (value && typeof value === "object") {
+    return Object.fromEntries(Object.entries(value)
+      .map(([key, item]) => [restoreTokens(key, tokenValues), restoreTokens(item, tokenValues)]));
+  }
+  return value;
+}
 async function forward({ upstream, request, body, timeoutMs = null }) {
   const target = buildUpstreamUrl({ upstream, requestUrl: request.url });
   try {
@@ -462,6 +606,7 @@ async function recordProxyDecision({ runtime, routeContext, decision, reason, en
     protocol: routeContext?.protocol ?? "proxy",
     operation: routeContext ? `proxy:${routeContext.protocol}:${routeContext.routeId ?? "unknown"}` : "proxy",
     mode: runtime.config.policy.mode ?? runtime.config.mode,
+    identity: null,
     enforced,
     blocked,
     decision,