haechi 1.2.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.ko.md +57 -11
  2. package/README.md +57 -11
  3. package/docs/current/code-review-risk-register-2026-06-16.ko.md +377 -0
  4. package/docs/current/code-review-risk-register-2026-06-16.md +377 -0
  5. package/docs/current/config-version.ko.md +2 -2
  6. package/docs/current/config-version.md +2 -2
  7. package/docs/current/configuration.ko.md +28 -11
  8. package/docs/current/configuration.md +28 -11
  9. package/docs/current/operations-runbook.ko.md +36 -2
  10. package/docs/current/operations-runbook.md +39 -2
  11. package/docs/current/release-process.ko.md +5 -1
  12. package/docs/current/release-process.md +5 -1
  13. package/docs/current/risk-register-release-gate.ko.md +34 -8
  14. package/docs/current/risk-register-release-gate.md +34 -8
  15. package/docs/current/shared-responsibility.ko.md +12 -3
  16. package/docs/current/shared-responsibility.md +12 -3
  17. package/docs/current/threat-model.ko.md +7 -3
  18. package/docs/current/threat-model.md +7 -3
  19. package/examples/local-proxy-demo/README.md +51 -0
  20. package/examples/local-proxy-demo/demo.mjs +144 -0
  21. package/examples/local-proxy-demo/demo.tape +19 -0
  22. package/examples/local-proxy-demo/live-demo.mjs +121 -0
  23. package/examples/local-proxy-demo/live-demo.tape +25 -0
  24. package/haechi.config.example.json +2 -1
  25. package/package.json +3 -1
  26. package/packages/cli/bin/haechi.mjs +95 -5
  27. package/packages/cli/runtime.mjs +61 -1
  28. package/packages/core/index.mjs +15 -0
  29. package/packages/crypto/index.mjs +42 -20
  30. package/packages/filter/index.mjs +679 -6
  31. package/packages/privacy-profiles/index.mjs +72 -3
  32. package/packages/protocol-adapters/index.mjs +99 -1
  33. package/packages/proxy/index.mjs +270 -29
  34. package/packages/ssrf/index.mjs +60 -4
  35. package/packages/stream-filter/index.mjs +194 -17
@@ -12,7 +12,11 @@ const PROFILES = {
12
12
  email: "redact",
13
13
  card: "block",
14
14
  api_key: "block",
15
- secret: "block"
15
+ secret: "block",
16
+ // A Japan My Number leak is as sensitive as a national ID and is a
17
+ // checksummed true-positive — block it in every profile so a non-JP
18
+ // deployment that happens to process JP data is still covered.
19
+ jp_mynumber: "block"
16
20
  }
17
21
  },
18
22
  transfer: {
@@ -31,7 +35,17 @@ const PROFILES = {
31
35
  card: "block",
32
36
  api_key: "block",
33
37
  secret: "block",
34
- kr_rrn: "block"
38
+ kr_rrn: "block",
39
+ // EU national IDs — France NIR, Spain DNI/NIE, UK National Insurance
40
+ // Number, Italy codice fiscale, Germany tax ID, Netherlands BSN — are
41
+ // GDPR special-category-adjacent identifiers; block them.
42
+ fr_nir: "block",
43
+ es_dni: "block",
44
+ uk_nino: "block",
45
+ it_codice_fiscale: "block",
46
+ de_steuer_id: "block",
47
+ nl_bsn: "block",
48
+ jp_mynumber: "block"
35
49
  }
36
50
  },
37
51
  transfer: {
@@ -39,6 +53,37 @@ const PROFILES = {
39
53
  note: "Treat model/tool transfer as processor/subprocessor transfer and document SCC/TIA evidence outside Haechi."
40
54
  }
41
55
  },
56
+ "asia-pdpa": {
57
+ id: "asia-pdpa",
58
+ region: "ASIA",
59
+ regulations: ["Singapore PDPA", "India DPDP Act"],
60
+ policy: {
61
+ actions: {
62
+ // Asia national IDs — Singapore NRIC/FIN and India Aadhaar — are sensitive
63
+ // identifiers under the Singapore PDPA / India DPDP Act; block them. The
64
+ // other checksummed national IDs are also blocked so a mixed-region payload
65
+ // is covered, matching the cross-profile convention.
66
+ sg_nric: "block",
67
+ in_aadhaar: "block",
68
+ jp_mynumber: "block",
69
+ kr_rrn: "block",
70
+ fr_nir: "block",
71
+ es_dni: "block",
72
+ it_codice_fiscale: "block",
73
+ de_steuer_id: "block",
74
+ nl_bsn: "block",
75
+ phone: "mask",
76
+ email: "redact",
77
+ card: "block",
78
+ api_key: "block",
79
+ secret: "block"
80
+ }
81
+ },
82
+ transfer: {
83
+ requiresAssessment: true,
84
+ note: "Document the PDPA/DPDP handling basis, purpose limitation, and cross-border transfer notice before production use."
85
+ }
86
+ },
42
87
  "us-general": {
43
88
  id: "us-general",
44
89
  region: "US",
@@ -49,13 +94,37 @@ const PROFILES = {
49
94
  phone: "mask",
50
95
  card: "block",
51
96
  api_key: "block",
52
- secret: "block"
97
+ secret: "block",
98
+ jp_mynumber: "block"
53
99
  }
54
100
  },
55
101
  transfer: {
56
102
  requiresAssessment: false,
57
103
  note: "Classify sector rules separately before using protected health, payment, or children's data."
58
104
  }
105
+ },
106
+ "jp-appi": {
107
+ id: "jp-appi",
108
+ region: "JP",
109
+ regulations: ["APPI"],
110
+ policy: {
111
+ actions: {
112
+ // My Number (個人番号) is a special-care personal-information identifier
113
+ // under the My Number Act; block it. The EU/KR IDs are also blocked so a
114
+ // mixed-region payload is covered, matching the cross-profile convention.
115
+ jp_mynumber: "block",
116
+ phone: "mask",
117
+ email: "redact",
118
+ card: "block",
119
+ api_key: "block",
120
+ secret: "block",
121
+ kr_rrn: "block"
122
+ }
123
+ },
124
+ transfer: {
125
+ requiresAssessment: true,
126
+ note: "Document the My Number Act handling basis, purpose limitation, and cross-border transfer notice before production use."
127
+ }
59
128
  }
60
129
  };
61
130
 
@@ -8,6 +8,28 @@ const SSE_RESPONSES = { format: "sse", deltaPath: null };
8
8
  const SSE_LLAMA_LEGACY = { format: "sse", deltaPath: ["content"] };
9
9
  const NDJSON_OLLAMA_CHAT = { format: "ndjson", deltaPath: ["message", "content"] };
10
10
  const NDJSON_OLLAMA_GENERATE = { format: "ndjson", deltaPath: ["response"] };
11
+ // Anthropic Messages API streams event-typed SSE frames; the incremental text
12
+ // channel is `delta.text` inside a `content_block_delta` frame. Other frame
13
+ // types (message_start, ping, etc.) don't carry deltaPath, so they get
14
+ // within-frame protection but no cross-frame buffering. The stream-filter
15
+ // preserves each frame's `event:` line on re-serialize. `flushOnType` lists the
16
+ // frame types that END a delta sequence: before one of them the held cross-frame
17
+ // buffer tail is flushed as a valid `content_block_delta`, so the residual lands
18
+ // IN ORDER (before content_block_stop/message_stop) rather than after the stream
19
+ // terminates. `ping` is intentionally absent — a match split across a keepalive
20
+ // must still be caught by the sliding buffer. Legacy /v1/complete streams a
21
+ // `completion` delta (no block framing, so no flushOnType needed).
22
+ const SSE_ANTHROPIC_MESSAGES = {
23
+ format: "sse",
24
+ deltaPath: ["delta", "text"],
25
+ flushOnType: { path: ["type"], values: ["content_block_stop", "message_delta", "message_stop"] }
26
+ };
27
+ const SSE_ANTHROPIC_COMPLETE = { format: "sse", deltaPath: ["completion"] };
28
+ // Google Gemini streams :streamGenerateContent as DATA-ONLY SSE (no `event:`
29
+ // lines, like OpenAI). Each `data:` frame is a FULL GenerateContentResponse;
30
+ // the incremental text channel is just deeper. Because frames are data-only,
31
+ // the held cross-frame tail can flush at end-of-stream — no flushOnType needed.
32
+ const SSE_GEMINI = { format: "sse", deltaPath: ["candidates", 0, "content", "parts", 0, "text"] };
11
33
 
12
34
  const ADAPTERS = {
13
35
  "openai-compatible": {
@@ -50,6 +72,47 @@ const ADAPTERS = {
50
72
  route("/api/embed", "embed"),
51
73
  route("/api/embeddings", "embeddings")
52
74
  ]
75
+ },
76
+ "anthropic": {
77
+ id: "anthropic",
78
+ protocol: "anthropic",
79
+ routes: [
80
+ // Anthropic Messages API. PII can sit in the top-level `system` string/blocks
81
+ // or any `messages[].content` string or content-block text/input — the core
82
+ // tree walk (collectStringEntries) covers every string leaf, so no custom
83
+ // extraction is needed. Streams via content_block_delta `delta.text`.
84
+ route("/v1/messages", "messages", { streaming: SSE_ANTHROPIC_MESSAGES }),
85
+ // count_tokens is a utility, but it carries prompt content, so protect it.
86
+ route("/v1/messages/count_tokens", "count-tokens", { protectRequest: true }),
87
+ // Legacy text completions: `prompt` is a top-level string; streams a `completion` delta.
88
+ route("/v1/complete", "complete", { streaming: SSE_ANTHROPIC_COMPLETE })
89
+ ]
90
+ },
91
+ "gemini": {
92
+ id: "gemini",
93
+ protocol: "gemini",
94
+ routes: [
95
+ // Google Gemini API. Endpoints are MODEL-IN-PATH with a `:method` suffix:
96
+ // POST /v1beta/models/{model}:generateContent (and /v1, and arbitrary
97
+ // model names like gemini-2.0-flash). The route key is therefore the
98
+ // `:method` SUFFIX, not a fixed path — declared via `methodSuffix`, which
99
+ // matchRoute checks only AFTER exact-path matches (so existing adapters
100
+ // are unaffected). PII can sit in systemInstruction.parts[].text and any
101
+ // contents[].parts[].text; the core tree walk (collectStringEntries)
102
+ // covers every string leaf, so no custom extraction is needed.
103
+ suffixRoute("generateContent", "generate-content"),
104
+ // Streaming variant: data-only SSE, full GenerateContentResponse per frame;
105
+ // delta text lives at candidates[0].content.parts[0].text. The :stream*
106
+ // endpoint ALWAYS streams (the intent is in the path, not a body flag), so
107
+ // mark it streamingDefault — there is no `stream:false` body field for
108
+ // Gemini, so isStreamingRequest always classifies it as streaming.
109
+ suffixRoute("streamGenerateContent", "stream-generate-content", { streamingDefault: true, streaming: SSE_GEMINI }),
110
+ // countTokens carries prompt content (contents/systemInstruction), so protect it.
111
+ suffixRoute("countTokens", "count-tokens", { protectRequest: true }),
112
+ // Embedding endpoints: request carries text to embed; protect it.
113
+ suffixRoute("embedContent", "embed", { protectRequest: true }),
114
+ suffixRoute("batchEmbedContents", "batch-embed", { protectRequest: true })
115
+ ]
53
116
  }
54
117
  };
55
118
 
@@ -121,10 +184,45 @@ function route(path, operation, options = {}) {
121
184
  };
122
185
  }
123
186
 
187
+ // A SUFFIX route matches by a `:method` suffix instead of an exact pathname —
188
+ // for model-in-path APIs (Gemini) where the path embeds an arbitrary model name
189
+ // and a version prefix (e.g. /v1beta/models/gemini-2.0-flash:generateContent).
190
+ // `path` stays null (there is no fixed path); `methodSuffix` carries the bare
191
+ // method name and matchRoute matches when the pathname ends with `:${suffix}`.
192
+ // matchRoute tries exact-path routes FIRST, so this never changes existing
193
+ // exact-match behavior for openai/anthropic/ollama/llama-cpp.
194
+ function suffixRoute(methodSuffix, operation, options = {}) {
195
+ return {
196
+ id: operation,
197
+ path: null,
198
+ methodSuffix,
199
+ operation,
200
+ protectRequest: options.protectRequest ?? true,
201
+ protectResponse: options.protectResponse ?? true,
202
+ streamingDefault: options.streamingDefault ?? false,
203
+ streaming: options.streaming ?? null
204
+ };
205
+ }
206
+
124
207
  function pathFromRequestUrl(url) {
125
208
  return new URL(url, "http://haechi.local").pathname;
126
209
  }
127
210
 
128
211
  function matchRoute(routes, pathname) {
129
- return routes.find((candidate) => candidate.path === pathname);
212
+ // 1) EXACT pathname match (unchanged) the only matcher for openai/anthropic/
213
+ // ollama/llama-cpp, so their classification is byte-for-byte identical.
214
+ const exact = routes.find((candidate) => candidate.path === pathname);
215
+ if (exact) {
216
+ return exact;
217
+ }
218
+ // 2) ADDITIVE: `:method`-SUFFIX match for model-in-path APIs (Gemini). A path
219
+ // like /v1beta/models/gemini-2.0-flash:generateContent matches the route
220
+ // whose methodSuffix the pathname ends with (`...:generateContent`). The
221
+ // `:` guard prevents a bare substring (e.g. a model literally named
222
+ // "generateContent") from matching without the method delimiter.
223
+ return routes.find(
224
+ (candidate) =>
225
+ typeof candidate.methodSuffix === "string" &&
226
+ pathname.endsWith(`:${candidate.methodSuffix}`)
227
+ );
130
228
  }
@@ -4,6 +4,7 @@ import { createHash, randomUUID } from "node:crypto";
4
4
  import { isUtf8 } from "node:buffer";
5
5
  import { readFileSync } from "node:fs";
6
6
  import { fileURLToPath } from "node:url";
7
+ import { once } from "node:events";
7
8
  import { inspectResponseStream } from "../stream-filter/index.mjs";
8
9
 
9
10
  export const DEFAULT_PROXY_PORT = 11016;
@@ -107,6 +108,19 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
107
108
  const metrics = runtime.metrics ?? noopMetrics();
108
109
  const logger = createLogger(config.logging?.format ?? "text");
109
110
 
111
+ // P0-CR-001 — the upstream header forward policy, derived ONCE from config.
112
+ // gatewayConsumedAuthorization is true whenever the gateway authenticates the
113
+ // CLIENT (auth.provider !== "none"): the request's Authorization is then the
114
+ // gateway credential Haechi consumed and must NOT be forwarded to the model
115
+ // upstream. With auth.provider "none" the client's Authorization is the
116
+ // upstream provider key and IS forwarded. extraHeaders is the operator's
117
+ // additive target.forwardHeaders allowlist (validated lowercase in
118
+ // normalizeConfig); it can only widen, never override the always-drop set.
119
+ const forwardPolicy = {
120
+ gatewayConsumedAuthorization: (config.auth?.provider ?? "none") !== "none",
121
+ extraHeaders: new Set(config.target?.forwardHeaders ?? [])
122
+ };
123
+
110
124
  // WS4-B backpressure: a configurable global max-in-flight ceiling. 0 (default)
111
125
  // disables it, preserving 1.1 behavior. When > 0 and the live count is at the
112
126
  // ceiling, a NEW non-exempt request is rejected 503 + Retry-After BEFORE auth
@@ -237,7 +251,7 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
237
251
 
238
252
  if (isStreamingRequest(json, routeContext)) {
239
253
  if (config.streaming.requestMode === "inspect") {
240
- await handleInspectedStream({ runtime, request, response, routeContext, json, authContext, metrics });
254
+ await handleInspectedStream({ runtime, request, response, routeContext, json, authContext, metrics, forwardPolicy });
241
255
  return;
242
256
  }
243
257
 
@@ -259,11 +273,24 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
259
273
  request,
260
274
  body,
261
275
  timeoutMs: config.limits.upstreamTimeoutMs,
262
- metrics
276
+ metrics,
277
+ forwardPolicy
278
+ });
279
+ // P1-CR-003 — sanitize response headers (strip the upstream's
280
+ // content-encoding/content-length/transfer/hop-by-hop) on this path
281
+ // too: Node fetch() auto-decompressed the body, so the original
282
+ // compressed headers would now be wrong. P1-CR-004 — TRUE bounded
283
+ // streaming pass-through: pipe the upstream body to the client with a
284
+ // running byte cap instead of buffering the whole response.
285
+ response.writeHead(upstreamResponse.status, sanitizeResponseHeaders(upstreamResponse));
286
+ await pipeUpstreamBodyBounded({
287
+ upstreamResponse,
288
+ response,
289
+ maxBytes: streamingPassThroughMaxBytes(config),
290
+ logger,
291
+ metrics,
292
+ correlationId
263
293
  });
264
- const { body: rawBody } = await readUpstreamBody(upstreamResponse);
265
- response.writeHead(upstreamResponse.status, Object.fromEntries(upstreamResponse.headers.entries()));
266
- response.end(rawBody);
267
294
  return;
268
295
  }
269
296
 
@@ -301,7 +328,8 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
301
328
  request,
302
329
  body: JSON.stringify(result.payload),
303
330
  timeoutMs: config.limits.upstreamTimeoutMs,
304
- metrics
331
+ metrics,
332
+ forwardPolicy
305
333
  });
306
334
 
307
335
  const forwarded = await maybeProtectResponse({
@@ -545,7 +573,13 @@ async function authorizeRequest({ runtime, request, routeContext, rateLimiter, m
545
573
 
546
574
  if (resolved.rate && resolved.rate.requestsPerMinute) {
547
575
  const key = identity?.id ?? "anonymous";
548
- if (!rateLimiter.allow(key, resolved.rate.requestsPerMinute)) {
576
+ // allow() may return a boolean OR a Promise<boolean>: the built-in default is
577
+ // synchronous, but a shared-store (e.g. Redis-backed) limiter is inherently
578
+ // async. We await unconditionally — `await <boolean>` returns the boolean
579
+ // unchanged, so the sync default keeps working, while `!somePromise` (always
580
+ // false, because a Promise is truthy) can no longer let an async limiter
581
+ // silently fail open. See haechi-ratelimit-redis (shared-store satellite).
582
+ if (!(await rateLimiter.allow(key, resolved.rate.requestsPerMinute))) {
549
583
  await recordProxyDecision({
550
584
  runtime, routeContext, identity, profile: resolved.profile, correlationId,
551
585
  decision: "rate_limited",
@@ -627,7 +661,7 @@ async function recordAuthDenied({ runtime, routeContext, reason, correlationId =
627
661
  });
628
662
  }
629
663
 
630
- async function handleInspectedStream({ runtime, request, response, routeContext, json, authContext = {}, metrics = null }) {
664
+ async function handleInspectedStream({ runtime, request, response, routeContext, json, authContext = {}, metrics = null, forwardPolicy = {} }) {
631
665
  const { haechi, config } = runtime;
632
666
  const requestMode = config.policy.mode ?? config.mode;
633
667
 
@@ -668,7 +702,8 @@ async function handleInspectedStream({ runtime, request, response, routeContext,
668
702
  request,
669
703
  body: JSON.stringify(requestResult.payload),
670
704
  timeoutMs: config.limits.upstreamTimeoutMs,
671
- metrics
705
+ metrics,
706
+ forwardPolicy
672
707
  });
673
708
 
674
709
  const streamMode = config.streaming.responseMode ?? config.responseProtection.mode ?? config.policy.mode ?? config.mode;
@@ -681,7 +716,7 @@ async function handleInspectedStream({ runtime, request, response, routeContext,
681
716
  maxMatchBytes: config.streaming.maxMatchBytes
682
717
  });
683
718
 
684
- response.writeHead(upstreamResponse.status, streamingResponseHeaders(upstreamResponse));
719
+ response.writeHead(upstreamResponse.status, sanitizeResponseHeaders(upstreamResponse));
685
720
 
686
721
  const { blocked, summary } = await inspectResponseStream({
687
722
  source: upstreamResponse.body ?? emptyAsyncIterable(),
@@ -702,13 +737,97 @@ async function handleInspectedStream({ runtime, request, response, routeContext,
702
737
  response.end();
703
738
  }
704
739
 
705
- function streamingResponseHeaders(upstreamResponse) {
740
+ // P1-CR-003 — the SINGLE centralized response-header sanitizer used on EVERY
741
+ // response path (pass-through, forwarded/unprotected, protected, streaming).
742
+ // Node fetch() auto-decompresses gzip/br/deflate, so the upstream's original
743
+ // content-encoding/content-length now describe the WIRE bytes Haechi no longer
744
+ // emits — forwarding them makes a downstream client see "content-encoding: gzip"
745
+ // on plain bytes and fail with "incorrect header check". transfer-encoding and
746
+ // the hop-by-hop control headers (RFC 7230 §6.1) likewise describe the upstream
747
+ // hop, not Haechi's connection to the client, so they are stripped too. A
748
+ // correct content-length is re-set ONLY by a caller that emits a fully-buffered
749
+ // body (transformedJsonHeaders / the buffered-body helper below); a streamed or
750
+ // raw-piped body intentionally carries no content-length.
751
+ const RESPONSE_HOP_BY_HOP_HEADERS = [
752
+ "content-encoding",
753
+ "content-length",
754
+ "transfer-encoding",
755
+ "connection",
756
+ "keep-alive",
757
+ "te",
758
+ "trailer",
759
+ "upgrade",
760
+ "proxy-authenticate"
761
+ ];
762
+
763
+ function sanitizeResponseHeaders(upstreamResponse) {
706
764
  const headers = Object.fromEntries(upstreamResponse.headers.entries());
707
- delete headers["content-length"];
708
- delete headers["content-encoding"];
765
+ for (const name of RESPONSE_HOP_BY_HOP_HEADERS) {
766
+ delete headers[name];
767
+ }
709
768
  return headers;
710
769
  }
711
770
 
771
+ // P1-CR-004 — the byte cap for the streaming pass-through path. Reuse
772
+ // responseProtection.maxBytes (the existing hard response-size cap) so a single
773
+ // dial governs all raw upstream-body reads; falls back to a 1 MiB default for a
774
+ // hand-built config without responseProtection.
775
+ function streamingPassThroughMaxBytes(config) {
776
+ const cap = config.responseProtection?.maxBytes;
777
+ return typeof cap === "number" && cap > 0 ? cap : 1048576;
778
+ }
779
+
780
+ // P1-CR-004 — TRUE bounded streaming pass-through. Pipe the upstream body to the
781
+ // client response as it arrives (real streaming) while counting bytes; if the
782
+ // running total exceeds maxBytes, abort: cancel the upstream reader and destroy
783
+ // the client response so a long-lived or malicious stream cannot hold memory or
784
+ // the connection open unbounded. Bytes already written cannot be retracted, so
785
+ // this caps total memory/throughput, not the already-flushed prefix.
786
+ async function pipeUpstreamBodyBounded({ upstreamResponse, response, maxBytes, logger = null, metrics = null, correlationId = null }) {
787
+ if (!upstreamResponse.body) {
788
+ response.end();
789
+ return;
790
+ }
791
+
792
+ const reader = upstreamResponse.body.getReader();
793
+ let received = 0;
794
+ try {
795
+ while (true) {
796
+ const { done, value } = await reader.read();
797
+ if (done) {
798
+ break;
799
+ }
800
+ received += value.byteLength;
801
+ if (maxBytes && received > maxBytes) {
802
+ // Over the cap: stop reading upstream and tear down the client write so
803
+ // the oversize stream is bounded (fail-closed on size).
804
+ void cancelReader(reader);
805
+ metrics?.increment("haechi_response_stream_truncated_total");
806
+ logger?.error("proxy_stream_pass_through_too_large", {
807
+ correlationId,
808
+ maxBytes
809
+ });
810
+ if (!response.writableEnded) {
811
+ response.destroy();
812
+ }
813
+ return;
814
+ }
815
+ // Respect downstream backpressure: stop pulling upstream until the client
816
+ // socket has drained.
817
+ const ok = response.write(Buffer.from(value));
818
+ if (!ok) {
819
+ await once(response, "drain");
820
+ }
821
+ }
822
+ response.end();
823
+ } catch (error) {
824
+ void cancelReader(reader);
825
+ if (!response.writableEnded) {
826
+ response.destroy();
827
+ }
828
+ }
829
+ }
830
+
712
831
  function nodeResponseSink(response) {
713
832
  return {
714
833
  write(text) {
@@ -745,20 +864,42 @@ async function recordStreamDecision({ runtime, routeContext, blocked, summary, m
745
864
  }
746
865
 
747
866
  async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, authContext = {}, issuedTokens = [], metrics = null }) {
748
- const headers = Object.fromEntries(upstreamResponse.headers.entries());
867
+ // P1-CR-003 content-encoding is read off the RAW upstream headers (before
868
+ // sanitation) for the compressed-response gate; the headers RETURNED to the
869
+ // client are always the sanitized set (no stale compression/length metadata).
870
+ const rawHeaders = Object.fromEntries(upstreamResponse.headers.entries());
871
+ const headers = sanitizeResponseHeaders(upstreamResponse);
749
872
 
750
873
  if (!runtime.config.responseProtection.enabled || !routeContext.protectResponse) {
751
- const { body: rawBody } = await readUpstreamBody(upstreamResponse);
874
+ // P1-CR-004 apply the same byte cap to this raw upstream-body read so an
875
+ // unprotected/forwarded response cannot be buffered unbounded. Fail closed
876
+ // (502) when the upstream body exceeds the cap.
877
+ const passThroughMax = streamingPassThroughMaxBytes(runtime.config);
878
+ const { body: rawBody, tooLarge } = await readUpstreamBody(upstreamResponse, { maxBytes: passThroughMax });
879
+ if (tooLarge) {
880
+ metrics?.increment("haechi_response_stream_truncated_total");
881
+ return {
882
+ decision: "response_unprotected_blocked",
883
+ status: 502,
884
+ headers: { "content-type": "application/json" },
885
+ body: Buffer.from(`${JSON.stringify({
886
+ error: "haechi_response_too_large",
887
+ reason: "response_body_too_large",
888
+ message: `Response body exceeds responseProtection.maxBytes (${passThroughMax})`
889
+ }, null, 2)}\n`)
890
+ };
891
+ }
752
892
  return {
753
893
  status: upstreamResponse.status,
754
- headers,
894
+ // Re-set a correct content-length: this is a fully-buffered body.
895
+ headers: { ...headers, "content-length": String(rawBody.byteLength) },
755
896
  body: rawBody,
756
897
  decision: "forwarded"
757
898
  };
758
899
  }
759
900
 
760
901
  const responsePolicy = runtime.config.responseProtection;
761
- const contentEncoding = headers["content-encoding"] ?? "";
902
+ const contentEncoding = rawHeaders["content-encoding"] ?? "";
762
903
  const bodyRead = await readUpstreamBody(upstreamResponse, { maxBytes: responsePolicy.maxBytes });
763
904
 
764
905
  if (bodyRead.tooLarge) {
@@ -910,12 +1051,12 @@ function restoreTokens(value, tokenValues) {
910
1051
  return value;
911
1052
  }
912
1053
 
913
- async function forward({ upstream, request, body, timeoutMs = null, metrics = null }) {
1054
+ async function forward({ upstream, request, body, timeoutMs = null, metrics = null, forwardPolicy = {} }) {
914
1055
  const target = buildUpstreamUrl({ upstream, requestUrl: request.url });
915
1056
  try {
916
1057
  return await fetch(target, {
917
1058
  method: request.method,
918
- headers: filteredHeaders(request.headers),
1059
+ headers: filteredHeaders(request.headers, forwardPolicy),
919
1060
  body: request.method === "GET" || request.method === "HEAD" ? undefined : body,
920
1061
  signal: timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined
921
1062
  });
@@ -943,24 +1084,117 @@ function buildUpstreamUrl({ upstream, requestUrl }) {
943
1084
  return new URL(`${parsed.pathname}${parsed.search}`, upstream.endsWith("/") ? upstream : `${upstream}/`);
944
1085
  }
945
1086
 
946
- function filteredHeaders(headers) {
1087
+ // P0-CR-001 — DEFAULT-DROP upstream header allowlist. The client's request
1088
+ // headers cross from the local gateway trust boundary into the MODEL PROVIDER
1089
+ // boundary, so the policy is: forward ONLY a known-safe set; everything else
1090
+ // (including ambient client credentials — Cookie, Proxy-Authorization, and the
1091
+ // client's gateway Authorization) is dropped. The conditional `authorization`
1092
+ // rule is handled in filteredHeaders against the forward policy. An operator can
1093
+ // additively widen the set with `target.forwardHeaders` for an unusual upstream.
1094
+ //
1095
+ // The forwarded set is exactly the headers the OpenAI-compatible / Anthropic /
1096
+ // Gemini adapters need: the provider key headers (x-api-key, x-goog-api-key,
1097
+ // openai-organization, openai-beta), provider version/feature pins
1098
+ // (anthropic-version, anthropic-beta), and benign request metadata (accept,
1099
+ // content-type — always rewritten to application/json, user-agent,
1100
+ // accept-language). content-type is set unconditionally below so it is NOT in
1101
+ // this set.
1102
+ const FORWARD_HEADER_ALLOWLIST = new Set([
1103
+ "x-api-key",
1104
+ "anthropic-version",
1105
+ "anthropic-beta",
1106
+ "x-goog-api-key",
1107
+ "openai-organization",
1108
+ "openai-beta",
1109
+ "accept",
1110
+ "user-agent",
1111
+ "accept-language"
1112
+ ]);
1113
+
1114
+ // ALWAYS-DROP: ambient client credentials + hop-by-hop control headers. These
1115
+ // must NEVER reach the upstream regardless of the allowlist or the operator's
1116
+ // target.forwardHeaders extension (a fail-closed denylist that wins over both).
1117
+ // - host / content-length: rewritten/recomputed by fetch for the new request.
1118
+ // - cookie / set-cookie / proxy-authorization: ambient client credentials.
1119
+ // - connection / keep-alive / te / trailer / transfer-encoding / upgrade:
1120
+ // hop-by-hop headers (RFC 7230 §6.1) that must not be tunneled end-to-end.
1121
+ const FORWARD_HEADER_DENYLIST = new Set([
1122
+ "host",
1123
+ "content-length",
1124
+ "cookie",
1125
+ "set-cookie",
1126
+ "proxy-authorization",
1127
+ "connection",
1128
+ "keep-alive",
1129
+ "te",
1130
+ "trailer",
1131
+ "transfer-encoding",
1132
+ "upgrade"
1133
+ ]);
1134
+
1135
+ // `forwardPolicy` is built by createHaechiProxy from the runtime: it carries
1136
+ // - gatewayConsumedAuthorization: true when auth.provider !== "none", i.e. the
1137
+ // gateway authenticated the CLIENT with the request's Authorization. That
1138
+ // header is the GATEWAY credential Haechi already consumed; forwarding it
1139
+ // would leak a gateway secret into the model provider, so it is DROPPED.
1140
+ // When false (auth.provider "none"), the client's Authorization is the
1141
+ // UPSTREAM provider key (the OpenAI-compatible pass-through pattern), so it
1142
+ // is FORWARDED.
1143
+ // - extraHeaders: the operator's additive target.forwardHeaders allowlist
1144
+ // (lowercase names) — never able to override the always-drop denylist.
1145
+ function filteredHeaders(headers, forwardPolicy = {}) {
1146
+ const gatewayConsumedAuthorization = Boolean(forwardPolicy.gatewayConsumedAuthorization);
1147
+ const extraHeaders = forwardPolicy.extraHeaders instanceof Set
1148
+ ? forwardPolicy.extraHeaders
1149
+ : new Set(Array.isArray(forwardPolicy.extraHeaders) ? forwardPolicy.extraHeaders : []);
1150
+
947
1151
  const next = new Headers();
948
1152
  for (const [key, value] of Object.entries(headers)) {
949
- if (!value || ["host", "content-length"].includes(key.toLowerCase())) {
1153
+ if (!value) {
950
1154
  continue;
951
1155
  }
952
- if (Array.isArray(value)) {
953
- for (const item of value) {
954
- next.append(key, item);
1156
+ const name = key.toLowerCase();
1157
+
1158
+ // Always-drop wins over everything (credentials + hop-by-hop).
1159
+ if (FORWARD_HEADER_DENYLIST.has(name)) {
1160
+ continue;
1161
+ }
1162
+
1163
+ // Conditional gateway-vs-upstream Authorization separation.
1164
+ if (name === "authorization") {
1165
+ if (gatewayConsumedAuthorization) {
1166
+ // Gateway token Haechi already consumed — must not leak upstream.
1167
+ continue;
955
1168
  }
956
- } else {
957
- next.set(key, value);
1169
+ // auth.provider "none": the client put the UPSTREAM provider key here.
1170
+ appendHeader(next, key, value);
1171
+ continue;
958
1172
  }
1173
+
1174
+ // content-type is rewritten unconditionally below; skip the client's value.
1175
+ if (name === "content-type") {
1176
+ continue;
1177
+ }
1178
+
1179
+ if (FORWARD_HEADER_ALLOWLIST.has(name) || extraHeaders.has(name)) {
1180
+ appendHeader(next, key, value);
1181
+ }
1182
+ // Everything else is default-dropped (fail-closed).
959
1183
  }
960
1184
  next.set("content-type", "application/json");
961
1185
  return next;
962
1186
  }
963
1187
 
1188
+ function appendHeader(target, key, value) {
1189
+ if (Array.isArray(value)) {
1190
+ for (const item of value) {
1191
+ target.append(key, item);
1192
+ }
1193
+ } else {
1194
+ target.set(key, value);
1195
+ }
1196
+ }
1197
+
964
1198
  function readBody(request, { maxBytes }) {
965
1199
  return new Promise((resolve, reject) => {
966
1200
  const chunks = [];
@@ -1036,9 +1270,13 @@ function isJson(contentType = "") {
1036
1270
  }
1037
1271
 
1038
1272
  function transformedJsonHeaders(headers) {
1273
+ // P1-CR-003 — defensively strip the full hop-by-hop/compression set (the
1274
+ // caller already passes the sanitized headers, but the transformed JSON body
1275
+ // is freshly serialized, so any stale length/encoding metadata must not leak).
1039
1276
  const next = { ...headers, "content-type": "application/json" };
1040
- delete next["content-length"];
1041
- delete next["content-encoding"];
1277
+ for (const name of RESPONSE_HOP_BY_HOP_HEADERS) {
1278
+ delete next[name];
1279
+ }
1042
1280
  return next;
1043
1281
  }
1044
1282
 
@@ -1071,10 +1309,13 @@ async function unprotectedResponseDecision({
1071
1309
  metrics?.increment("haechi_response_unprotected_total");
1072
1310
 
1073
1311
  if (allowed) {
1312
+ // P1-CR-003 — `headers` is already the sanitized set (no stale
1313
+ // compression/length metadata). Re-set a correct content-length for this
1314
+ // fully-buffered body.
1074
1315
  return {
1075
1316
  decision,
1076
1317
  status: upstreamResponse.status,
1077
- headers,
1318
+ headers: { ...headers, "content-length": String(rawBody.byteLength) },
1078
1319
  body: rawBody
1079
1320
  };
1080
1321
  }