haechi 0.3.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/README.ko.md +227 -0
  2. package/README.md +126 -1
  3. package/docs/README.md +3 -6
  4. package/docs/current/api-stability.ko.md +11 -4
  5. package/docs/current/api-stability.md +11 -4
  6. package/docs/current/configuration.ko.md +210 -0
  7. package/docs/current/configuration.md +210 -0
  8. package/docs/current/release-0.3.2-hardening-scope.ko.md +2 -1
  9. package/docs/current/release-0.3.2-hardening-scope.md +2 -1
  10. package/docs/current/release-0.4-implementation-scope.ko.md +2 -1
  11. package/docs/current/release-0.4-implementation-scope.md +2 -1
  12. package/docs/current/release-0.5-implementation-scope.ko.md +69 -0
  13. package/docs/current/release-0.5-implementation-scope.md +69 -0
  14. package/docs/current/release-process.ko.md +14 -4
  15. package/docs/current/release-process.md +14 -4
  16. package/docs/current/risk-register-release-gate.ko.md +11 -11
  17. package/docs/current/risk-register-release-gate.md +12 -12
  18. package/docs/current/threat-model.ko.md +8 -4
  19. package/docs/current/threat-model.md +8 -4
  20. package/haechi.config.example.json +7 -2
  21. package/package.json +8 -2
  22. package/packages/audit/index.mjs +3 -1
  23. package/packages/cli/bin/haechi.mjs +310 -21
  24. package/packages/cli/runtime.mjs +28 -3
  25. package/packages/core/index.mjs +128 -10
  26. package/packages/crypto/index.mjs +13 -1
  27. package/packages/filter/index.mjs +52 -3
  28. package/packages/mcp-stdio/index.mjs +103 -22
  29. package/packages/policy/index.mjs +6 -0
  30. package/packages/protocol-adapters/index.mjs +33 -14
  31. package/packages/proxy/index.mjs +149 -4
  32. package/packages/stream-filter/index.mjs +194 -0
  33. package/packages/token-vault/index.mjs +70 -2
@@ -1,4 +1,4 @@
1
- import { createCipheriv, createDecipheriv, createHash, randomBytes } from "node:crypto";
1
+ import { createCipheriv, createDecipheriv, createHash, createHmac, randomBytes } from "node:crypto";
2
2
  import { dirname } from "node:path";
3
3
  import { mkdir, readFile, writeFile } from "node:fs/promises";
4
4
 
@@ -60,6 +60,18 @@ export function createLocalCryptoProvider({ keyFile }) {
60
60
  aadHash: sha256(aadBytes)
61
61
  };
62
62
  },
63
+ // Keyed hash over a domain-separated derived key. The raw stored key is an
64
+ // AES-256-GCM key and must never be used for HMAC directly; every use case
65
+ // gets its own versioned domain string (e.g. deterministic tokenization,
66
+ // identity hashing). Uses the active key, so rotation changes outputs.
67
+ async hmac({ data, domain }) {
68
+ if (!domain || typeof domain !== "string") {
69
+ throw new Error("hmac requires a non-empty domain string");
70
+ }
71
+ const { active: { key } } = await loadKeys();
72
+ const derived = createHmac("sha256", key).update(domain).digest();
73
+ return createHmac("sha256", derived).update(data).digest("hex");
74
+ },
63
75
  async decrypt({ envelope, aad }) {
64
76
  const { active, byKid } = await loadKeys();
65
77
  if (envelope.alg && envelope.alg !== ALG) {
@@ -51,6 +51,50 @@ const DEFAULT_RULES = [
51
51
  pattern: "(?<=\\b(?:api[_-]?key|secret|token|password)\\s*[:=]\\s*['\\\"]?)[A-Za-z0-9._~+/-]{12,}",
52
52
  flags: "gi",
53
53
  confidence: 0.85
54
+ },
55
+ // Indirect prompt injection heuristics. Response/tool-result direction only,
56
+ // and the policy default for the injection type is `allow` (report-only):
57
+ // detections are audited regardless of action, and false-positive blocks
58
+ // would erode trust faster than missed detections.
59
+ {
60
+ id: "injection-instruction-override",
61
+ type: "injection",
62
+ pattern: "\\b(?:ignore|disregard|forget)\\s+(?:all\\s+|any\\s+|the\\s+|your\\s+)?(?:previous|prior|earlier|above|system)\\s+(?:instructions?|rules?|prompts?|guidelines)",
63
+ flags: "gi",
64
+ confidence: 0.7,
65
+ direction: "response"
66
+ },
67
+ {
68
+ id: "injection-role-reassignment",
69
+ type: "injection",
70
+ pattern: "\\b(?:you are now|act as)\\s+(?:an?\\s+)?(?:unrestricted|jailbroken|uncensored|developer mode|dan\\b)|\\bnew (?:system )?instructions?\\s*:",
71
+ flags: "gi",
72
+ confidence: 0.65,
73
+ direction: "response"
74
+ },
75
+ {
76
+ id: "injection-prompt-markers",
77
+ type: "injection",
78
+ pattern: "<\\|im_start\\|>|<<SYS>>|\\[\\[?system\\]\\]?\\s*:",
79
+ flags: "gi",
80
+ confidence: 0.7,
81
+ direction: "response"
82
+ },
83
+ {
84
+ id: "injection-conceal-from-user",
85
+ type: "injection",
86
+ pattern: "\\bdo not (?:tell|inform|mention|reveal|show)(?:\\s+this)?(?:\\s+to)?\\s+the user\\b",
87
+ flags: "gi",
88
+ confidence: 0.7,
89
+ direction: "response"
90
+ },
91
+ {
92
+ id: "injection-tool-induction",
93
+ type: "injection",
94
+ pattern: "\\b(?:silently|secretly|immediately)\\s+(?:call|invoke|run|execute)\\s+(?:the\\s+)?[\\w.-]+\\s+tool\\b",
95
+ flags: "gi",
96
+ confidence: 0.6,
97
+ direction: "response"
54
98
  }
55
99
  ];
56
100
 
@@ -64,16 +108,21 @@ export function createDefaultFilterEngine({ customRules = [] } = {}) {
64
108
  readsPlaintext: true,
65
109
  networkEgress: false
66
110
  },
67
- async detect({ entries }) {
68
- return entries.flatMap((entry) => detectEntry(entry, rules));
111
+ async detect({ entries, context }) {
112
+ return entries.flatMap((entry) => detectEntry(entry, rules, context));
69
113
  }
70
114
  };
71
115
  }
72
116
 
73
- export function detectEntry(entry, rules) {
117
+ export function detectEntry(entry, rules, context = {}) {
74
118
  const detections = [];
75
119
 
76
120
  for (const rule of rules) {
121
+ // Direction-scoped rules (e.g. injection heuristics) only run on the
122
+ // matching traffic direction; rules without a direction run everywhere.
123
+ if (rule.direction && rule.direction !== context?.direction) {
124
+ continue;
125
+ }
77
126
  const regex = new RegExp(rule.pattern, rule.flags.includes("g") ? rule.flags : `${rule.flags}g`);
78
127
  for (const match of entry.value.matchAll(regex)) {
79
128
  const value = match[0];
@@ -1,25 +1,35 @@
1
1
  import { createInterface } from "node:readline";
2
2
 
3
- export async function protectMcpJsonRpcMessage(message, runtime) {
3
+ // Tagged core used by both the one-direction line filter and mcp-wrap.
4
+ // kinds: "forward" (deliver the protected message), "reject" (send the error
5
+ // back to the CLIENT instead of delivering), "drop" (notification — deliver
6
+ // nothing, per JSON-RPC).
7
+ async function protectTagged(message, runtime, { enforceMethodAllowlist = true } = {}) {
4
8
  if (!message || typeof message !== "object" || Array.isArray(message)) {
5
9
  throw new Error(Array.isArray(message)
6
10
  ? "JSON-RPC batch messages are not supported by the MCP stdio filter"
7
11
  : "MCP message must be a JSON object");
8
12
  }
9
13
  const policy = runtime.config.mcp;
10
- // JSON-RPC notifications (method, no id) must not receive responses; a
11
- // rejected or blocked notification is dropped (returns null) instead.
12
14
  const isNotification = message.method !== undefined
13
15
  && !Object.prototype.hasOwnProperty.call(message, "id");
16
+
17
+ function reject(error) {
18
+ return isNotification ? { kind: "drop" } : { kind: "reject", message: error };
19
+ }
20
+
14
21
  if (policy.requireJsonRpc && message.jsonrpc !== "2.0") {
15
- return isNotification ? null : errorJsonRpc(message.id, -32002, "haechi_mcp_invalid_jsonrpc", {
22
+ return reject(errorJsonRpc(message.id, -32002, "haechi_mcp_invalid_jsonrpc", {
16
23
  reason: "MCP messages must use JSON-RPC 2.0"
17
- });
24
+ }));
18
25
  }
19
- if (message.method && !methodAllowed(message.method, policy.allowedMethods)) {
20
- return isNotification ? null : errorJsonRpc(message.id, -32003, "haechi_mcp_method_not_allowed", {
26
+ // The allowlist describes CLIENT-callable methods. Server-initiated requests
27
+ // (e.g. sampling/createMessage) are exempted by the caller via
28
+ // enforceMethodAllowlist: false, but their params are still protected.
29
+ if (enforceMethodAllowlist && message.method && !methodAllowed(message.method, policy.allowedMethods)) {
30
+ return reject(errorJsonRpc(message.id, -32003, "haechi_mcp_method_not_allowed", {
21
31
  method: message.method
22
- });
32
+ }));
23
33
  }
24
34
 
25
35
  const next = structuredClone(message);
@@ -28,10 +38,11 @@ export async function protectMcpJsonRpcMessage(message, runtime) {
28
38
  const result = await runtime.haechi.protectJson(next.params, {
29
39
  protocol: "mcp-stdio",
30
40
  operation: next.method ?? "params",
41
+ direction: "request",
31
42
  mode: runtime.config.policy.mode ?? runtime.config.mode
32
43
  });
33
44
  if (result.blocked) {
34
- return isNotification ? null : blockedJsonRpc(next.id, result);
45
+ return reject(blockedJsonRpc(next.id, result));
35
46
  }
36
47
  next.params = result.payload;
37
48
  }
@@ -40,15 +51,21 @@ export async function protectMcpJsonRpcMessage(message, runtime) {
40
51
  const result = await runtime.haechi.protectJson(next.result, {
41
52
  protocol: "mcp-stdio",
42
53
  operation: "result",
54
+ direction: "response",
43
55
  mode: runtime.config.policy.mode ?? runtime.config.mode
44
56
  });
45
57
  if (result.blocked) {
46
- return blockedJsonRpc(next.id, result);
58
+ return { kind: "reject", message: blockedJsonRpc(next.id, result) };
47
59
  }
48
60
  next.result = result.payload;
49
61
  }
50
62
 
51
- return next;
63
+ return { kind: "forward", message: next };
64
+ }
65
+
66
+ export async function protectMcpJsonRpcMessage(message, runtime, options = {}) {
67
+ const tagged = await protectTagged(message, runtime, options);
68
+ return tagged.kind === "drop" ? null : tagged.message;
52
69
  }
53
70
 
54
71
  export async function runMcpStdioFilter({ input = process.stdin, output = process.stdout, runtime }) {
@@ -66,21 +83,85 @@ export async function runMcpStdioFilter({ input = process.stdin, output = proces
66
83
  }
67
84
  output.write(`${JSON.stringify(protectedMessage)}\n`);
68
85
  } catch (error) {
69
- output.write(`${JSON.stringify({
70
- jsonrpc: "2.0",
71
- error: {
72
- code: -32000,
73
- message: "haechi_mcp_stdio_error",
74
- data: {
75
- reason: error.message
76
- }
77
- },
78
- id: null
79
- })}\n`);
86
+ output.write(`${JSON.stringify(stdioError(error))}\n`);
80
87
  }
81
88
  }
82
89
  }
83
90
 
91
+ // Bidirectional wrap around a spawned MCP server child process:
92
+ // client → (allowlist + params protection) → child stdin
93
+ // child stdout → (params/result protection, no client allowlist) → client
94
+ // Rejections in BOTH directions are answered to the client; nothing reaches
95
+ // the child for a rejected client message. Resolves with the child exit code.
96
+ export function wrapMcpChild({ runtime, child, input = process.stdin, output = process.stdout }) {
97
+ const clientLines = createInterface({ input, crlfDelay: Infinity });
98
+ const serverLines = createInterface({ input: child.stdout, crlfDelay: Infinity });
99
+
100
+ const clientPump = (async () => {
101
+ for await (const line of clientLines) {
102
+ if (!line.trim()) {
103
+ continue;
104
+ }
105
+ try {
106
+ const tagged = await protectTagged(JSON.parse(line), runtime, { enforceMethodAllowlist: true });
107
+ if (tagged.kind === "forward" && child.stdin.writable) {
108
+ child.stdin.write(`${JSON.stringify(tagged.message)}\n`);
109
+ } else if (tagged.kind === "reject") {
110
+ output.write(`${JSON.stringify(tagged.message)}\n`);
111
+ }
112
+ } catch (error) {
113
+ output.write(`${JSON.stringify(stdioError(error))}\n`);
114
+ }
115
+ }
116
+ if (child.stdin.writable) {
117
+ child.stdin.end();
118
+ }
119
+ })();
120
+
121
+ const serverPump = (async () => {
122
+ for await (const line of serverLines) {
123
+ if (!line.trim()) {
124
+ continue;
125
+ }
126
+ try {
127
+ const tagged = await protectTagged(JSON.parse(line), runtime, { enforceMethodAllowlist: false });
128
+ if (tagged.kind !== "drop") {
129
+ output.write(`${JSON.stringify(tagged.message)}\n`);
130
+ }
131
+ } catch (error) {
132
+ output.write(`${JSON.stringify(stdioError(error))}\n`);
133
+ }
134
+ }
135
+ })();
136
+
137
+ return new Promise((resolve, reject) => {
138
+ child.once("error", reject);
139
+ child.once("exit", (code, signal) => {
140
+ // The child is gone: stop consuming client input so the pumps can
141
+ // settle even when the caller's input stream stays open.
142
+ clientLines.close();
143
+ serverLines.close();
144
+ Promise.allSettled([clientPump, serverPump]).then(() => {
145
+ resolve({ code: code ?? (signal ? 1 : 0), signal });
146
+ });
147
+ });
148
+ });
149
+ }
150
+
151
+ function stdioError(error) {
152
+ return {
153
+ jsonrpc: "2.0",
154
+ error: {
155
+ code: -32000,
156
+ message: "haechi_mcp_stdio_error",
157
+ data: {
158
+ reason: error.message
159
+ }
160
+ },
161
+ id: null
162
+ };
163
+ }
164
+
84
165
  function blockedJsonRpc(id, result) {
85
166
  return errorJsonRpc(id, -32001, "haechi_policy_block", {
86
167
  auditId: result.auditEvent.id,
@@ -95,6 +95,12 @@ export function buildPolicy({
95
95
  allowUnsafeOverrides
96
96
  });
97
97
  }
98
+ // Injection heuristics ship report-only: unless a preset or the user sets an
99
+ // explicit action, injection detections are audited but never transform or
100
+ // block. This intentionally bypasses defaultAction.
101
+ if (!merged.actions.injection) {
102
+ merged.actions.injection = "allow";
103
+ }
98
104
  validatePolicy(merged);
99
105
  return merged;
100
106
  }
@@ -1,11 +1,22 @@
1
+ // Streaming descriptors: `format` is the wire framing, `deltaPath` is the
2
+ // primary incremental-text channel (index 0 of choices for OpenAI-style).
3
+ // A null deltaPath means "no known channel" — frames still get within-frame
4
+ // protection but no cross-frame buffering.
5
+ const SSE_CHAT = { format: "sse", deltaPath: ["choices", 0, "delta", "content"] };
6
+ const SSE_COMPLETION = { format: "sse", deltaPath: ["choices", 0, "text"] };
7
+ const SSE_RESPONSES = { format: "sse", deltaPath: null };
8
+ const SSE_LLAMA_LEGACY = { format: "sse", deltaPath: ["content"] };
9
+ const NDJSON_OLLAMA_CHAT = { format: "ndjson", deltaPath: ["message", "content"] };
10
+ const NDJSON_OLLAMA_GENERATE = { format: "ndjson", deltaPath: ["response"] };
11
+
1
12
  const ADAPTERS = {
2
13
  "openai-compatible": {
3
14
  id: "openai-compatible",
4
15
  protocol: "llm-http",
5
16
  routes: [
6
- route("/v1/chat/completions", "chat-completions"),
7
- route("/v1/completions", "completions"),
8
- route("/v1/responses", "responses"),
17
+ route("/v1/chat/completions", "chat-completions", { streaming: SSE_CHAT }),
18
+ route("/v1/completions", "completions", { streaming: SSE_COMPLETION }),
19
+ route("/v1/responses", "responses", { streaming: SSE_RESPONSES }),
9
20
  route("/v1/embeddings", "embeddings")
10
21
  ]
11
22
  },
@@ -13,9 +24,9 @@ const ADAPTERS = {
13
24
  id: "vllm-openai",
14
25
  protocol: "vllm-openai",
15
26
  routes: [
16
- route("/v1/chat/completions", "chat-completions"),
17
- route("/v1/completions", "completions"),
18
- route("/v1/responses", "responses"),
27
+ route("/v1/chat/completions", "chat-completions", { streaming: SSE_CHAT }),
28
+ route("/v1/completions", "completions", { streaming: SSE_COMPLETION }),
29
+ route("/v1/responses", "responses", { streaming: SSE_RESPONSES }),
19
30
  route("/v1/embeddings", "embeddings")
20
31
  ]
21
32
  },
@@ -23,10 +34,10 @@ const ADAPTERS = {
23
34
  id: "llama-cpp",
24
35
  protocol: "llama-cpp",
25
36
  routes: [
26
- route("/v1/chat/completions", "chat-completions"),
27
- route("/v1/completions", "completions"),
37
+ route("/v1/chat/completions", "chat-completions", { streaming: SSE_CHAT }),
38
+ route("/v1/completions", "completions", { streaming: SSE_COMPLETION }),
28
39
  route("/v1/embeddings", "embeddings"),
29
- route("/completion", "legacy-completion")
40
+ route("/completion", "legacy-completion", { streaming: SSE_LLAMA_LEGACY })
30
41
  ]
31
42
  },
32
43
  "ollama": {
@@ -34,8 +45,8 @@ const ADAPTERS = {
34
45
  protocol: "ollama",
35
46
  routes: [
36
47
  // Ollama streams /api/chat and /api/generate unless the request sets stream:false.
37
- route("/api/chat", "chat", { streamingDefault: true }),
38
- route("/api/generate", "generate", { streamingDefault: true }),
48
+ route("/api/chat", "chat", { streamingDefault: true, streaming: NDJSON_OLLAMA_CHAT }),
49
+ route("/api/generate", "generate", { streamingDefault: true, streaming: NDJSON_OLLAMA_GENERATE }),
39
50
  route("/api/embed", "embed"),
40
51
  route("/api/embeddings", "embeddings")
41
52
  ]
@@ -47,7 +58,13 @@ const TARGET_TYPE_ALIASES = {
47
58
  };
48
59
 
49
60
  export function createProtocolAdapter(target = {}) {
50
- const adapterId = target.adapter ?? adapterFromTargetType(target.type);
61
+ // A specific target.type (vllm-openai, ollama, llama-cpp) names its own
62
+ // adapter and wins over a generic/default target.adapter — otherwise the
63
+ // default config's adapter ("openai-compatible") would shadow the type after
64
+ // a deep merge and silently route an Ollama target to OpenAI paths.
65
+ const adapterId = ADAPTERS[target.type]
66
+ ? target.type
67
+ : (target.adapter ?? adapterFromTargetType(target.type));
51
68
  const adapter = ADAPTERS[adapterId];
52
69
  if (!adapter) {
53
70
  throw new Error(`Unknown protocol adapter: ${adapterId}`);
@@ -71,7 +88,8 @@ export function createProtocolAdapter(target = {}) {
71
88
  operation,
72
89
  protectRequest: matched?.protectRequest ?? true,
73
90
  protectResponse: matched?.protectResponse ?? true,
74
- streamingByDefault: matched?.streamingDefault ?? false
91
+ streamingByDefault: matched?.streamingDefault ?? false,
92
+ streaming: matched?.streaming ?? null
75
93
  };
76
94
  }
77
95
  };
@@ -98,7 +116,8 @@ function route(path, operation, options = {}) {
98
116
  operation,
99
117
  protectRequest: options.protectRequest ?? true,
100
118
  protectResponse: options.protectResponse ?? true,
101
- streamingDefault: options.streamingDefault ?? false
119
+ streamingDefault: options.streamingDefault ?? false,
120
+ streaming: options.streaming ?? null
102
121
  };
103
122
  }
104
123
 
@@ -1,5 +1,6 @@
1
1
  import { createServer } from "node:http";
2
2
  import { createHash, randomUUID } from "node:crypto";
3
+ import { inspectResponseStream } from "../stream-filter/index.mjs";
3
4
 
4
5
  export const DEFAULT_PROXY_PORT = 1016;
5
6
 
@@ -22,6 +23,11 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
22
23
  const json = parseJsonBody(body);
23
24
 
24
25
  if (isStreamingRequest(json, routeContext)) {
26
+ if (config.streaming.requestMode === "inspect") {
27
+ await handleInspectedStream({ runtime, request, response, routeContext, json });
28
+ return;
29
+ }
30
+
25
31
  if (config.streaming.requestMode === "pass-through") {
26
32
  await recordProxyDecision({
27
33
  runtime,
@@ -45,7 +51,7 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
45
51
 
46
52
  writeJson(response, 501, {
47
53
  error: "haechi_streaming_unsupported",
48
- message: "Streaming requests are blocked unless streaming.requestMode is explicitly set to pass-through"
54
+ message: "Streaming requests are blocked unless streaming.requestMode is set to pass-through or inspect"
49
55
  });
50
56
  return;
51
57
  }
@@ -54,6 +60,7 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
54
60
  ? await haechi.protectJson(json, {
55
61
  ...routeContext,
56
62
  operation: `request:${routeContext.operation}`,
63
+ direction: "request",
57
64
  mode: config.policy.mode ?? config.mode
58
65
  })
59
66
  : { payload: json, blocked: false };
@@ -77,7 +84,8 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
77
84
  const forwarded = await maybeProtectResponse({
78
85
  upstreamResponse,
79
86
  routeContext,
80
- runtime
87
+ runtime,
88
+ issuedTokens: result.issuedTokens ?? []
81
89
  });
82
90
 
83
91
  response.writeHead(forwarded.status, forwarded.headers);
@@ -112,7 +120,108 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
112
120
  };
113
121
  }
114
122
 
115
- async function maybeProtectResponse({ upstreamResponse, routeContext, runtime }) {
123
+ async function handleInspectedStream({ runtime, request, response, routeContext, json }) {
124
+ const { haechi, config } = runtime;
125
+
126
+ // Inspection needs to know the wire format and delta channel for this route.
127
+ if (!routeContext.streaming) {
128
+ writeJson(response, 501, {
129
+ error: "haechi_streaming_uninspectable_route",
130
+ message: `streaming.requestMode is "inspect" but route ${routeContext.routeId} has no known streaming format`
131
+ });
132
+ return;
133
+ }
134
+
135
+ // The request body is ordinary JSON even when the response streams, so it is
136
+ // protected like any other request.
137
+ const requestResult = routeContext.protectRequest
138
+ ? await haechi.protectJson(json, {
139
+ ...routeContext,
140
+ operation: `request:${routeContext.operation}`,
141
+ direction: "request",
142
+ mode: config.policy.mode ?? config.mode
143
+ })
144
+ : { payload: json, blocked: false };
145
+
146
+ if (requestResult.blocked) {
147
+ writeJson(response, 403, {
148
+ error: "haechi_policy_block",
149
+ summary: requestResult.summary,
150
+ auditId: requestResult.auditEvent.id
151
+ });
152
+ return;
153
+ }
154
+
155
+ const upstreamResponse = await forward({
156
+ upstream: config.target.upstream,
157
+ request,
158
+ body: JSON.stringify(requestResult.payload),
159
+ timeoutMs: config.limits.upstreamTimeoutMs
160
+ });
161
+
162
+ const streamMode = config.streaming.responseMode ?? config.responseProtection.mode ?? config.policy.mode ?? config.mode;
163
+ const protector = haechi.createStreamProtector({
164
+ ...routeContext,
165
+ operation: `response-stream:${routeContext.operation}`,
166
+ direction: "response",
167
+ mode: streamMode,
168
+ maxMatchBytes: config.streaming.maxMatchBytes
169
+ });
170
+
171
+ response.writeHead(upstreamResponse.status, streamingResponseHeaders(upstreamResponse));
172
+
173
+ const { blocked, summary } = await inspectResponseStream({
174
+ source: upstreamResponse.body ?? emptyAsyncIterable(),
175
+ sink: nodeResponseSink(response),
176
+ streaming: routeContext.streaming,
177
+ protector
178
+ });
179
+
180
+ await recordStreamDecision({ runtime, routeContext, blocked, summary, mode: streamMode });
181
+ response.end();
182
+ }
183
+
184
+ function streamingResponseHeaders(upstreamResponse) {
185
+ const headers = Object.fromEntries(upstreamResponse.headers.entries());
186
+ delete headers["content-length"];
187
+ delete headers["content-encoding"];
188
+ return headers;
189
+ }
190
+
191
+ function nodeResponseSink(response) {
192
+ return {
193
+ write(text) {
194
+ response.write(text);
195
+ }
196
+ };
197
+ }
198
+
199
+ async function* emptyAsyncIterable() {
200
+ // No upstream body to inspect.
201
+ }
202
+
203
+ async function recordStreamDecision({ runtime, routeContext, blocked, summary, mode }) {
204
+ if (typeof runtime.auditSink?.record !== "function") {
205
+ return;
206
+ }
207
+ await runtime.auditSink.record({
208
+ id: randomUUID(),
209
+ timestamp: new Date().toISOString(),
210
+ protocol: routeContext?.protocol ?? "proxy",
211
+ operation: `response-stream:${routeContext?.operation ?? "unknown"}`,
212
+ mode,
213
+ identity: null,
214
+ enforced: !["dry-run", "report-only"].includes(mode),
215
+ blocked,
216
+ decision: blocked ? "stream_blocked" : "stream_inspected",
217
+ reason: blocked ? "stream_policy_block" : "stream_inspected",
218
+ routeId: routeContext?.routeId ?? "unknown",
219
+ pathHash: routeContext?.path ? shortHash(routeContext.path) : null,
220
+ summary
221
+ });
222
+ }
223
+
224
+ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, issuedTokens = [] }) {
116
225
  const headers = Object.fromEntries(upstreamResponse.headers.entries());
117
226
 
118
227
  if (!runtime.config.responseProtection.enabled || !routeContext.protectResponse) {
@@ -203,6 +312,7 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime })
203
312
  const result = await runtime.haechi.protectJson(json, {
204
313
  ...routeContext,
205
314
  operation: `response:${routeContext.operation}`,
315
+ direction: "response",
206
316
  mode: runtime.config.responseProtection.mode ?? runtime.config.policy.mode ?? runtime.config.mode
207
317
  });
208
318
 
@@ -218,13 +328,47 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime })
218
328
  };
219
329
  }
220
330
 
331
+ let responsePayload = result.payload;
332
+
333
+ // Request-scoped token round-trip: restore ONLY tokens issued/reused while
334
+ // protecting this request, so the model sees tokens but the caller sees
335
+ // plaintext. Explicit opt-in; runs after response protection, so an opt-in
336
+ // here intentionally overrides response-direction transforms for values the
337
+ // caller already sent.
338
+ if (runtime.config.tokenVault.detokenizeResponses
339
+ && issuedTokens.length > 0
340
+ && typeof runtime.tokenVault?.detokenize === "function") {
341
+ const values = await runtime.tokenVault.detokenize({ tokens: issuedTokens });
342
+ if (values.size > 0) {
343
+ responsePayload = restoreTokens(responsePayload, values);
344
+ }
345
+ }
346
+
221
347
  return {
222
348
  status: upstreamResponse.status,
223
349
  headers: transformedJsonHeaders(headers),
224
- body: Buffer.from(`${JSON.stringify(result.payload)}\n`)
350
+ body: Buffer.from(`${JSON.stringify(responsePayload)}\n`)
225
351
  };
226
352
  }
227
353
 
354
+ function restoreTokens(value, tokenValues) {
355
+ if (typeof value === "string") {
356
+ let output = value;
357
+ for (const [token, plaintext] of tokenValues) {
358
+ output = output.split(`[TOKEN:${token}]`).join(plaintext);
359
+ }
360
+ return output;
361
+ }
362
+ if (Array.isArray(value)) {
363
+ return value.map((item) => restoreTokens(item, tokenValues));
364
+ }
365
+ if (value && typeof value === "object") {
366
+ return Object.fromEntries(Object.entries(value)
367
+ .map(([key, item]) => [restoreTokens(key, tokenValues), restoreTokens(item, tokenValues)]));
368
+ }
369
+ return value;
370
+ }
371
+
228
372
  async function forward({ upstream, request, body, timeoutMs = null }) {
229
373
  const target = buildUpstreamUrl({ upstream, requestUrl: request.url });
230
374
  try {
@@ -462,6 +606,7 @@ async function recordProxyDecision({ runtime, routeContext, decision, reason, en
462
606
  protocol: routeContext?.protocol ?? "proxy",
463
607
  operation: routeContext ? `proxy:${routeContext.protocol}:${routeContext.routeId ?? "unknown"}` : "proxy",
464
608
  mode: runtime.config.policy.mode ?? runtime.config.mode,
609
+ identity: null,
465
610
  enforced,
466
611
  blocked,
467
612
  decision,