haechi 1.2.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ko.md +57 -11
- package/README.md +57 -11
- package/docs/current/code-review-risk-register-2026-06-16.ko.md +377 -0
- package/docs/current/code-review-risk-register-2026-06-16.md +377 -0
- package/docs/current/config-version.ko.md +2 -2
- package/docs/current/config-version.md +2 -2
- package/docs/current/configuration.ko.md +28 -11
- package/docs/current/configuration.md +28 -11
- package/docs/current/operations-runbook.ko.md +36 -2
- package/docs/current/operations-runbook.md +39 -2
- package/docs/current/release-process.ko.md +5 -1
- package/docs/current/release-process.md +5 -1
- package/docs/current/risk-register-release-gate.ko.md +34 -8
- package/docs/current/risk-register-release-gate.md +34 -8
- package/docs/current/shared-responsibility.ko.md +12 -3
- package/docs/current/shared-responsibility.md +12 -3
- package/docs/current/threat-model.ko.md +7 -3
- package/docs/current/threat-model.md +7 -3
- package/examples/local-proxy-demo/README.md +51 -0
- package/examples/local-proxy-demo/demo.mjs +144 -0
- package/examples/local-proxy-demo/demo.tape +19 -0
- package/examples/local-proxy-demo/live-demo.mjs +121 -0
- package/examples/local-proxy-demo/live-demo.tape +25 -0
- package/haechi.config.example.json +2 -1
- package/package.json +3 -1
- package/packages/cli/bin/haechi.mjs +95 -5
- package/packages/cli/runtime.mjs +61 -1
- package/packages/core/index.mjs +15 -0
- package/packages/crypto/index.mjs +42 -20
- package/packages/filter/index.mjs +679 -6
- package/packages/privacy-profiles/index.mjs +72 -3
- package/packages/protocol-adapters/index.mjs +99 -1
- package/packages/proxy/index.mjs +270 -29
- package/packages/ssrf/index.mjs +60 -4
- package/packages/stream-filter/index.mjs +194 -17
|
@@ -12,7 +12,11 @@ const PROFILES = {
|
|
|
12
12
|
email: "redact",
|
|
13
13
|
card: "block",
|
|
14
14
|
api_key: "block",
|
|
15
|
-
secret: "block"
|
|
15
|
+
secret: "block",
|
|
16
|
+
// A Japan My Number leak is as sensitive as a national ID and is a
|
|
17
|
+
// checksummed true-positive — block it in every profile so a non-JP
|
|
18
|
+
// deployment that happens to process JP data is still covered.
|
|
19
|
+
jp_mynumber: "block"
|
|
16
20
|
}
|
|
17
21
|
},
|
|
18
22
|
transfer: {
|
|
@@ -31,7 +35,17 @@ const PROFILES = {
|
|
|
31
35
|
card: "block",
|
|
32
36
|
api_key: "block",
|
|
33
37
|
secret: "block",
|
|
34
|
-
kr_rrn: "block"
|
|
38
|
+
kr_rrn: "block",
|
|
39
|
+
// EU national IDs — France NIR, Spain DNI/NIE, UK National Insurance
|
|
40
|
+
// Number, Italy codice fiscale, Germany tax ID, Netherlands BSN — are
|
|
41
|
+
// GDPR special-category-adjacent identifiers; block them.
|
|
42
|
+
fr_nir: "block",
|
|
43
|
+
es_dni: "block",
|
|
44
|
+
uk_nino: "block",
|
|
45
|
+
it_codice_fiscale: "block",
|
|
46
|
+
de_steuer_id: "block",
|
|
47
|
+
nl_bsn: "block",
|
|
48
|
+
jp_mynumber: "block"
|
|
35
49
|
}
|
|
36
50
|
},
|
|
37
51
|
transfer: {
|
|
@@ -39,6 +53,37 @@ const PROFILES = {
|
|
|
39
53
|
note: "Treat model/tool transfer as processor/subprocessor transfer and document SCC/TIA evidence outside Haechi."
|
|
40
54
|
}
|
|
41
55
|
},
|
|
56
|
+
"asia-pdpa": {
|
|
57
|
+
id: "asia-pdpa",
|
|
58
|
+
region: "ASIA",
|
|
59
|
+
regulations: ["Singapore PDPA", "India DPDP Act"],
|
|
60
|
+
policy: {
|
|
61
|
+
actions: {
|
|
62
|
+
// Asia national IDs — Singapore NRIC/FIN and India Aadhaar — are sensitive
|
|
63
|
+
// identifiers under the Singapore PDPA / India DPDP Act; block them. The
|
|
64
|
+
// other checksummed national IDs are also blocked so a mixed-region payload
|
|
65
|
+
// is covered, matching the cross-profile convention.
|
|
66
|
+
sg_nric: "block",
|
|
67
|
+
in_aadhaar: "block",
|
|
68
|
+
jp_mynumber: "block",
|
|
69
|
+
kr_rrn: "block",
|
|
70
|
+
fr_nir: "block",
|
|
71
|
+
es_dni: "block",
|
|
72
|
+
it_codice_fiscale: "block",
|
|
73
|
+
de_steuer_id: "block",
|
|
74
|
+
nl_bsn: "block",
|
|
75
|
+
phone: "mask",
|
|
76
|
+
email: "redact",
|
|
77
|
+
card: "block",
|
|
78
|
+
api_key: "block",
|
|
79
|
+
secret: "block"
|
|
80
|
+
}
|
|
81
|
+
},
|
|
82
|
+
transfer: {
|
|
83
|
+
requiresAssessment: true,
|
|
84
|
+
note: "Document the PDPA/DPDP handling basis, purpose limitation, and cross-border transfer notice before production use."
|
|
85
|
+
}
|
|
86
|
+
},
|
|
42
87
|
"us-general": {
|
|
43
88
|
id: "us-general",
|
|
44
89
|
region: "US",
|
|
@@ -49,13 +94,37 @@ const PROFILES = {
|
|
|
49
94
|
phone: "mask",
|
|
50
95
|
card: "block",
|
|
51
96
|
api_key: "block",
|
|
52
|
-
secret: "block"
|
|
97
|
+
secret: "block",
|
|
98
|
+
jp_mynumber: "block"
|
|
53
99
|
}
|
|
54
100
|
},
|
|
55
101
|
transfer: {
|
|
56
102
|
requiresAssessment: false,
|
|
57
103
|
note: "Classify sector rules separately before using protected health, payment, or children's data."
|
|
58
104
|
}
|
|
105
|
+
},
|
|
106
|
+
"jp-appi": {
|
|
107
|
+
id: "jp-appi",
|
|
108
|
+
region: "JP",
|
|
109
|
+
regulations: ["APPI"],
|
|
110
|
+
policy: {
|
|
111
|
+
actions: {
|
|
112
|
+
// My Number (個人番号) is a special-care personal-information identifier
|
|
113
|
+
// under the My Number Act; block it. The EU/KR IDs are also blocked so a
|
|
114
|
+
// mixed-region payload is covered, matching the cross-profile convention.
|
|
115
|
+
jp_mynumber: "block",
|
|
116
|
+
phone: "mask",
|
|
117
|
+
email: "redact",
|
|
118
|
+
card: "block",
|
|
119
|
+
api_key: "block",
|
|
120
|
+
secret: "block",
|
|
121
|
+
kr_rrn: "block"
|
|
122
|
+
}
|
|
123
|
+
},
|
|
124
|
+
transfer: {
|
|
125
|
+
requiresAssessment: true,
|
|
126
|
+
note: "Document the My Number Act handling basis, purpose limitation, and cross-border transfer notice before production use."
|
|
127
|
+
}
|
|
59
128
|
}
|
|
60
129
|
};
|
|
61
130
|
|
|
@@ -8,6 +8,28 @@ const SSE_RESPONSES = { format: "sse", deltaPath: null };
|
|
|
8
8
|
const SSE_LLAMA_LEGACY = { format: "sse", deltaPath: ["content"] };
|
|
9
9
|
const NDJSON_OLLAMA_CHAT = { format: "ndjson", deltaPath: ["message", "content"] };
|
|
10
10
|
const NDJSON_OLLAMA_GENERATE = { format: "ndjson", deltaPath: ["response"] };
|
|
11
|
+
// Anthropic Messages API streams event-typed SSE frames; the incremental text
|
|
12
|
+
// channel is `delta.text` inside a `content_block_delta` frame. Other frame
|
|
13
|
+
// types (message_start, ping, etc.) don't carry deltaPath, so they get
|
|
14
|
+
// within-frame protection but no cross-frame buffering. The stream-filter
|
|
15
|
+
// preserves each frame's `event:` line on re-serialize. `flushOnType` lists the
|
|
16
|
+
// frame types that END a delta sequence: before one of them the held cross-frame
|
|
17
|
+
// buffer tail is flushed as a valid `content_block_delta`, so the residual lands
|
|
18
|
+
// IN ORDER (before content_block_stop/message_stop) rather than after the stream
|
|
19
|
+
// terminates. `ping` is intentionally absent — a match split across a keepalive
|
|
20
|
+
// must still be caught by the sliding buffer. Legacy /v1/complete streams a
|
|
21
|
+
// `completion` delta (no block framing, so no flushOnType needed).
|
|
22
|
+
const SSE_ANTHROPIC_MESSAGES = {
|
|
23
|
+
format: "sse",
|
|
24
|
+
deltaPath: ["delta", "text"],
|
|
25
|
+
flushOnType: { path: ["type"], values: ["content_block_stop", "message_delta", "message_stop"] }
|
|
26
|
+
};
|
|
27
|
+
const SSE_ANTHROPIC_COMPLETE = { format: "sse", deltaPath: ["completion"] };
|
|
28
|
+
// Google Gemini streams :streamGenerateContent as DATA-ONLY SSE (no `event:`
|
|
29
|
+
// lines, like OpenAI). Each `data:` frame is a FULL GenerateContentResponse;
|
|
30
|
+
// the incremental text channel is just deeper. Because frames are data-only,
|
|
31
|
+
// the held cross-frame tail can flush at end-of-stream — no flushOnType needed.
|
|
32
|
+
const SSE_GEMINI = { format: "sse", deltaPath: ["candidates", 0, "content", "parts", 0, "text"] };
|
|
11
33
|
|
|
12
34
|
const ADAPTERS = {
|
|
13
35
|
"openai-compatible": {
|
|
@@ -50,6 +72,47 @@ const ADAPTERS = {
|
|
|
50
72
|
route("/api/embed", "embed"),
|
|
51
73
|
route("/api/embeddings", "embeddings")
|
|
52
74
|
]
|
|
75
|
+
},
|
|
76
|
+
"anthropic": {
|
|
77
|
+
id: "anthropic",
|
|
78
|
+
protocol: "anthropic",
|
|
79
|
+
routes: [
|
|
80
|
+
// Anthropic Messages API. PII can sit in the top-level `system` string/blocks
|
|
81
|
+
// or any `messages[].content` string or content-block text/input — the core
|
|
82
|
+
// tree walk (collectStringEntries) covers every string leaf, so no custom
|
|
83
|
+
// extraction is needed. Streams via content_block_delta `delta.text`.
|
|
84
|
+
route("/v1/messages", "messages", { streaming: SSE_ANTHROPIC_MESSAGES }),
|
|
85
|
+
// count_tokens is a utility, but it carries prompt content, so protect it.
|
|
86
|
+
route("/v1/messages/count_tokens", "count-tokens", { protectRequest: true }),
|
|
87
|
+
// Legacy text completions: `prompt` is a top-level string; streams a `completion` delta.
|
|
88
|
+
route("/v1/complete", "complete", { streaming: SSE_ANTHROPIC_COMPLETE })
|
|
89
|
+
]
|
|
90
|
+
},
|
|
91
|
+
"gemini": {
|
|
92
|
+
id: "gemini",
|
|
93
|
+
protocol: "gemini",
|
|
94
|
+
routes: [
|
|
95
|
+
// Google Gemini API. Endpoints are MODEL-IN-PATH with a `:method` suffix:
|
|
96
|
+
// POST /v1beta/models/{model}:generateContent (and /v1, and arbitrary
|
|
97
|
+
// model names like gemini-2.0-flash). The route key is therefore the
|
|
98
|
+
// `:method` SUFFIX, not a fixed path — declared via `methodSuffix`, which
|
|
99
|
+
// matchRoute checks only AFTER exact-path matches (so existing adapters
|
|
100
|
+
// are unaffected). PII can sit in systemInstruction.parts[].text and any
|
|
101
|
+
// contents[].parts[].text; the core tree walk (collectStringEntries)
|
|
102
|
+
// covers every string leaf, so no custom extraction is needed.
|
|
103
|
+
suffixRoute("generateContent", "generate-content"),
|
|
104
|
+
// Streaming variant: data-only SSE, full GenerateContentResponse per frame;
|
|
105
|
+
// delta text lives at candidates[0].content.parts[0].text. The :stream*
|
|
106
|
+
// endpoint ALWAYS streams (the intent is in the path, not a body flag), so
|
|
107
|
+
// mark it streamingDefault — there is no `stream:false` body field for
|
|
108
|
+
// Gemini, so isStreamingRequest always classifies it as streaming.
|
|
109
|
+
suffixRoute("streamGenerateContent", "stream-generate-content", { streamingDefault: true, streaming: SSE_GEMINI }),
|
|
110
|
+
// countTokens carries prompt content (contents/systemInstruction), so protect it.
|
|
111
|
+
suffixRoute("countTokens", "count-tokens", { protectRequest: true }),
|
|
112
|
+
// Embedding endpoints: request carries text to embed; protect it.
|
|
113
|
+
suffixRoute("embedContent", "embed", { protectRequest: true }),
|
|
114
|
+
suffixRoute("batchEmbedContents", "batch-embed", { protectRequest: true })
|
|
115
|
+
]
|
|
53
116
|
}
|
|
54
117
|
};
|
|
55
118
|
|
|
@@ -121,10 +184,45 @@ function route(path, operation, options = {}) {
|
|
|
121
184
|
};
|
|
122
185
|
}
|
|
123
186
|
|
|
187
|
+
// A SUFFIX route matches by a `:method` suffix instead of an exact pathname —
|
|
188
|
+
// for model-in-path APIs (Gemini) where the path embeds an arbitrary model name
|
|
189
|
+
// and a version prefix (e.g. /v1beta/models/gemini-2.0-flash:generateContent).
|
|
190
|
+
// `path` stays null (there is no fixed path); `methodSuffix` carries the bare
|
|
191
|
+
// method name and matchRoute matches when the pathname ends with `:${suffix}`.
|
|
192
|
+
// matchRoute tries exact-path routes FIRST, so this never changes existing
|
|
193
|
+
// exact-match behavior for openai/anthropic/ollama/llama-cpp.
|
|
194
|
+
function suffixRoute(methodSuffix, operation, options = {}) {
|
|
195
|
+
return {
|
|
196
|
+
id: operation,
|
|
197
|
+
path: null,
|
|
198
|
+
methodSuffix,
|
|
199
|
+
operation,
|
|
200
|
+
protectRequest: options.protectRequest ?? true,
|
|
201
|
+
protectResponse: options.protectResponse ?? true,
|
|
202
|
+
streamingDefault: options.streamingDefault ?? false,
|
|
203
|
+
streaming: options.streaming ?? null
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
|
|
124
207
|
function pathFromRequestUrl(url) {
|
|
125
208
|
return new URL(url, "http://haechi.local").pathname;
|
|
126
209
|
}
|
|
127
210
|
|
|
128
211
|
function matchRoute(routes, pathname) {
|
|
129
|
-
|
|
212
|
+
// 1) EXACT pathname match (unchanged) — the only matcher for openai/anthropic/
|
|
213
|
+
// ollama/llama-cpp, so their classification is byte-for-byte identical.
|
|
214
|
+
const exact = routes.find((candidate) => candidate.path === pathname);
|
|
215
|
+
if (exact) {
|
|
216
|
+
return exact;
|
|
217
|
+
}
|
|
218
|
+
// 2) ADDITIVE: `:method`-SUFFIX match for model-in-path APIs (Gemini). A path
|
|
219
|
+
// like /v1beta/models/gemini-2.0-flash:generateContent matches the route
|
|
220
|
+
// whose methodSuffix the pathname ends with (`...:generateContent`). The
|
|
221
|
+
// `:` guard prevents a bare substring (e.g. a model literally named
|
|
222
|
+
// "generateContent") from matching without the method delimiter.
|
|
223
|
+
return routes.find(
|
|
224
|
+
(candidate) =>
|
|
225
|
+
typeof candidate.methodSuffix === "string" &&
|
|
226
|
+
pathname.endsWith(`:${candidate.methodSuffix}`)
|
|
227
|
+
);
|
|
130
228
|
}
|
package/packages/proxy/index.mjs
CHANGED
|
@@ -4,6 +4,7 @@ import { createHash, randomUUID } from "node:crypto";
|
|
|
4
4
|
import { isUtf8 } from "node:buffer";
|
|
5
5
|
import { readFileSync } from "node:fs";
|
|
6
6
|
import { fileURLToPath } from "node:url";
|
|
7
|
+
import { once } from "node:events";
|
|
7
8
|
import { inspectResponseStream } from "../stream-filter/index.mjs";
|
|
8
9
|
|
|
9
10
|
export const DEFAULT_PROXY_PORT = 11016;
|
|
@@ -107,6 +108,19 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
|
|
|
107
108
|
const metrics = runtime.metrics ?? noopMetrics();
|
|
108
109
|
const logger = createLogger(config.logging?.format ?? "text");
|
|
109
110
|
|
|
111
|
+
// P0-CR-001 — the upstream header forward policy, derived ONCE from config.
|
|
112
|
+
// gatewayConsumedAuthorization is true whenever the gateway authenticates the
|
|
113
|
+
// CLIENT (auth.provider !== "none"): the request's Authorization is then the
|
|
114
|
+
// gateway credential Haechi consumed and must NOT be forwarded to the model
|
|
115
|
+
// upstream. With auth.provider "none" the client's Authorization is the
|
|
116
|
+
// upstream provider key and IS forwarded. extraHeaders is the operator's
|
|
117
|
+
// additive target.forwardHeaders allowlist (validated lowercase in
|
|
118
|
+
// normalizeConfig); it can only widen, never override the always-drop set.
|
|
119
|
+
const forwardPolicy = {
|
|
120
|
+
gatewayConsumedAuthorization: (config.auth?.provider ?? "none") !== "none",
|
|
121
|
+
extraHeaders: new Set(config.target?.forwardHeaders ?? [])
|
|
122
|
+
};
|
|
123
|
+
|
|
110
124
|
// WS4-B backpressure: a configurable global max-in-flight ceiling. 0 (default)
|
|
111
125
|
// disables it, preserving 1.1 behavior. When > 0 and the live count is at the
|
|
112
126
|
// ceiling, a NEW non-exempt request is rejected 503 + Retry-After BEFORE auth
|
|
@@ -237,7 +251,7 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
|
|
|
237
251
|
|
|
238
252
|
if (isStreamingRequest(json, routeContext)) {
|
|
239
253
|
if (config.streaming.requestMode === "inspect") {
|
|
240
|
-
await handleInspectedStream({ runtime, request, response, routeContext, json, authContext, metrics });
|
|
254
|
+
await handleInspectedStream({ runtime, request, response, routeContext, json, authContext, metrics, forwardPolicy });
|
|
241
255
|
return;
|
|
242
256
|
}
|
|
243
257
|
|
|
@@ -259,11 +273,24 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
|
|
|
259
273
|
request,
|
|
260
274
|
body,
|
|
261
275
|
timeoutMs: config.limits.upstreamTimeoutMs,
|
|
262
|
-
metrics
|
|
276
|
+
metrics,
|
|
277
|
+
forwardPolicy
|
|
278
|
+
});
|
|
279
|
+
// P1-CR-003 — sanitize response headers (strip the upstream's
|
|
280
|
+
// content-encoding/content-length/transfer/hop-by-hop) on this path
|
|
281
|
+
// too: Node fetch() auto-decompressed the body, so the original
|
|
282
|
+
// compressed headers would now be wrong. P1-CR-004 — TRUE bounded
|
|
283
|
+
// streaming pass-through: pipe the upstream body to the client with a
|
|
284
|
+
// running byte cap instead of buffering the whole response.
|
|
285
|
+
response.writeHead(upstreamResponse.status, sanitizeResponseHeaders(upstreamResponse));
|
|
286
|
+
await pipeUpstreamBodyBounded({
|
|
287
|
+
upstreamResponse,
|
|
288
|
+
response,
|
|
289
|
+
maxBytes: streamingPassThroughMaxBytes(config),
|
|
290
|
+
logger,
|
|
291
|
+
metrics,
|
|
292
|
+
correlationId
|
|
263
293
|
});
|
|
264
|
-
const { body: rawBody } = await readUpstreamBody(upstreamResponse);
|
|
265
|
-
response.writeHead(upstreamResponse.status, Object.fromEntries(upstreamResponse.headers.entries()));
|
|
266
|
-
response.end(rawBody);
|
|
267
294
|
return;
|
|
268
295
|
}
|
|
269
296
|
|
|
@@ -301,7 +328,8 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
|
|
|
301
328
|
request,
|
|
302
329
|
body: JSON.stringify(result.payload),
|
|
303
330
|
timeoutMs: config.limits.upstreamTimeoutMs,
|
|
304
|
-
metrics
|
|
331
|
+
metrics,
|
|
332
|
+
forwardPolicy
|
|
305
333
|
});
|
|
306
334
|
|
|
307
335
|
const forwarded = await maybeProtectResponse({
|
|
@@ -545,7 +573,13 @@ async function authorizeRequest({ runtime, request, routeContext, rateLimiter, m
|
|
|
545
573
|
|
|
546
574
|
if (resolved.rate && resolved.rate.requestsPerMinute) {
|
|
547
575
|
const key = identity?.id ?? "anonymous";
|
|
548
|
-
|
|
576
|
+
// allow() may return a boolean OR a Promise<boolean>: the built-in default is
|
|
577
|
+
// synchronous, but a shared-store (e.g. Redis-backed) limiter is inherently
|
|
578
|
+
// async. We await unconditionally — `await <boolean>` returns the boolean
|
|
579
|
+
// unchanged, so the sync default keeps working, while `!somePromise` (always
|
|
580
|
+
// false, because a Promise is truthy) can no longer let an async limiter
|
|
581
|
+
// silently fail open. See haechi-ratelimit-redis (shared-store satellite).
|
|
582
|
+
if (!(await rateLimiter.allow(key, resolved.rate.requestsPerMinute))) {
|
|
549
583
|
await recordProxyDecision({
|
|
550
584
|
runtime, routeContext, identity, profile: resolved.profile, correlationId,
|
|
551
585
|
decision: "rate_limited",
|
|
@@ -627,7 +661,7 @@ async function recordAuthDenied({ runtime, routeContext, reason, correlationId =
|
|
|
627
661
|
});
|
|
628
662
|
}
|
|
629
663
|
|
|
630
|
-
async function handleInspectedStream({ runtime, request, response, routeContext, json, authContext = {}, metrics = null }) {
|
|
664
|
+
async function handleInspectedStream({ runtime, request, response, routeContext, json, authContext = {}, metrics = null, forwardPolicy = {} }) {
|
|
631
665
|
const { haechi, config } = runtime;
|
|
632
666
|
const requestMode = config.policy.mode ?? config.mode;
|
|
633
667
|
|
|
@@ -668,7 +702,8 @@ async function handleInspectedStream({ runtime, request, response, routeContext,
|
|
|
668
702
|
request,
|
|
669
703
|
body: JSON.stringify(requestResult.payload),
|
|
670
704
|
timeoutMs: config.limits.upstreamTimeoutMs,
|
|
671
|
-
metrics
|
|
705
|
+
metrics,
|
|
706
|
+
forwardPolicy
|
|
672
707
|
});
|
|
673
708
|
|
|
674
709
|
const streamMode = config.streaming.responseMode ?? config.responseProtection.mode ?? config.policy.mode ?? config.mode;
|
|
@@ -681,7 +716,7 @@ async function handleInspectedStream({ runtime, request, response, routeContext,
|
|
|
681
716
|
maxMatchBytes: config.streaming.maxMatchBytes
|
|
682
717
|
});
|
|
683
718
|
|
|
684
|
-
response.writeHead(upstreamResponse.status,
|
|
719
|
+
response.writeHead(upstreamResponse.status, sanitizeResponseHeaders(upstreamResponse));
|
|
685
720
|
|
|
686
721
|
const { blocked, summary } = await inspectResponseStream({
|
|
687
722
|
source: upstreamResponse.body ?? emptyAsyncIterable(),
|
|
@@ -702,13 +737,97 @@ async function handleInspectedStream({ runtime, request, response, routeContext,
|
|
|
702
737
|
response.end();
|
|
703
738
|
}
|
|
704
739
|
|
|
705
|
-
|
|
740
|
+
// P1-CR-003 — the SINGLE centralized response-header sanitizer used on EVERY
|
|
741
|
+
// response path (pass-through, forwarded/unprotected, protected, streaming).
|
|
742
|
+
// Node fetch() auto-decompresses gzip/br/deflate, so the upstream's original
|
|
743
|
+
// content-encoding/content-length now describe the WIRE bytes Haechi no longer
|
|
744
|
+
// emits — forwarding them makes a downstream client see "content-encoding: gzip"
|
|
745
|
+
// on plain bytes and fail with "incorrect header check". transfer-encoding and
|
|
746
|
+
// the hop-by-hop control headers (RFC 7230 §6.1) likewise describe the upstream
|
|
747
|
+
// hop, not Haechi's connection to the client, so they are stripped too. A
|
|
748
|
+
// correct content-length is re-set ONLY by a caller that emits a fully-buffered
|
|
749
|
+
// body (transformedJsonHeaders / the buffered-body helper below); a streamed or
|
|
750
|
+
// raw-piped body intentionally carries no content-length.
|
|
751
|
+
const RESPONSE_HOP_BY_HOP_HEADERS = [
|
|
752
|
+
"content-encoding",
|
|
753
|
+
"content-length",
|
|
754
|
+
"transfer-encoding",
|
|
755
|
+
"connection",
|
|
756
|
+
"keep-alive",
|
|
757
|
+
"te",
|
|
758
|
+
"trailer",
|
|
759
|
+
"upgrade",
|
|
760
|
+
"proxy-authenticate"
|
|
761
|
+
];
|
|
762
|
+
|
|
763
|
+
function sanitizeResponseHeaders(upstreamResponse) {
|
|
706
764
|
const headers = Object.fromEntries(upstreamResponse.headers.entries());
|
|
707
|
-
|
|
708
|
-
|
|
765
|
+
for (const name of RESPONSE_HOP_BY_HOP_HEADERS) {
|
|
766
|
+
delete headers[name];
|
|
767
|
+
}
|
|
709
768
|
return headers;
|
|
710
769
|
}
|
|
711
770
|
|
|
771
|
+
// P1-CR-004 — the byte cap for the streaming pass-through path. Reuse
|
|
772
|
+
// responseProtection.maxBytes (the existing hard response-size cap) so a single
|
|
773
|
+
// dial governs all raw upstream-body reads; falls back to a 1 MiB default for a
|
|
774
|
+
// hand-built config without responseProtection.
|
|
775
|
+
function streamingPassThroughMaxBytes(config) {
|
|
776
|
+
const cap = config.responseProtection?.maxBytes;
|
|
777
|
+
return typeof cap === "number" && cap > 0 ? cap : 1048576;
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
// P1-CR-004 — TRUE bounded streaming pass-through. Pipe the upstream body to the
|
|
781
|
+
// client response as it arrives (real streaming) while counting bytes; if the
|
|
782
|
+
// running total exceeds maxBytes, abort: cancel the upstream reader and destroy
|
|
783
|
+
// the client response so a long-lived or malicious stream cannot hold memory or
|
|
784
|
+
// the connection open unbounded. Bytes already written cannot be retracted, so
|
|
785
|
+
// this caps total memory/throughput, not the already-flushed prefix.
|
|
786
|
+
async function pipeUpstreamBodyBounded({ upstreamResponse, response, maxBytes, logger = null, metrics = null, correlationId = null }) {
|
|
787
|
+
if (!upstreamResponse.body) {
|
|
788
|
+
response.end();
|
|
789
|
+
return;
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
const reader = upstreamResponse.body.getReader();
|
|
793
|
+
let received = 0;
|
|
794
|
+
try {
|
|
795
|
+
while (true) {
|
|
796
|
+
const { done, value } = await reader.read();
|
|
797
|
+
if (done) {
|
|
798
|
+
break;
|
|
799
|
+
}
|
|
800
|
+
received += value.byteLength;
|
|
801
|
+
if (maxBytes && received > maxBytes) {
|
|
802
|
+
// Over the cap: stop reading upstream and tear down the client write so
|
|
803
|
+
// the oversize stream is bounded (fail-closed on size).
|
|
804
|
+
void cancelReader(reader);
|
|
805
|
+
metrics?.increment("haechi_response_stream_truncated_total");
|
|
806
|
+
logger?.error("proxy_stream_pass_through_too_large", {
|
|
807
|
+
correlationId,
|
|
808
|
+
maxBytes
|
|
809
|
+
});
|
|
810
|
+
if (!response.writableEnded) {
|
|
811
|
+
response.destroy();
|
|
812
|
+
}
|
|
813
|
+
return;
|
|
814
|
+
}
|
|
815
|
+
// Respect downstream backpressure: stop pulling upstream until the client
|
|
816
|
+
// socket has drained.
|
|
817
|
+
const ok = response.write(Buffer.from(value));
|
|
818
|
+
if (!ok) {
|
|
819
|
+
await once(response, "drain");
|
|
820
|
+
}
|
|
821
|
+
}
|
|
822
|
+
response.end();
|
|
823
|
+
} catch (error) {
|
|
824
|
+
void cancelReader(reader);
|
|
825
|
+
if (!response.writableEnded) {
|
|
826
|
+
response.destroy();
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
|
|
712
831
|
function nodeResponseSink(response) {
|
|
713
832
|
return {
|
|
714
833
|
write(text) {
|
|
@@ -745,20 +864,42 @@ async function recordStreamDecision({ runtime, routeContext, blocked, summary, m
|
|
|
745
864
|
}
|
|
746
865
|
|
|
747
866
|
async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, authContext = {}, issuedTokens = [], metrics = null }) {
|
|
748
|
-
|
|
867
|
+
// P1-CR-003 — content-encoding is read off the RAW upstream headers (before
|
|
868
|
+
// sanitation) for the compressed-response gate; the headers RETURNED to the
|
|
869
|
+
// client are always the sanitized set (no stale compression/length metadata).
|
|
870
|
+
const rawHeaders = Object.fromEntries(upstreamResponse.headers.entries());
|
|
871
|
+
const headers = sanitizeResponseHeaders(upstreamResponse);
|
|
749
872
|
|
|
750
873
|
if (!runtime.config.responseProtection.enabled || !routeContext.protectResponse) {
|
|
751
|
-
|
|
874
|
+
// P1-CR-004 — apply the same byte cap to this raw upstream-body read so an
|
|
875
|
+
// unprotected/forwarded response cannot be buffered unbounded. Fail closed
|
|
876
|
+
// (502) when the upstream body exceeds the cap.
|
|
877
|
+
const passThroughMax = streamingPassThroughMaxBytes(runtime.config);
|
|
878
|
+
const { body: rawBody, tooLarge } = await readUpstreamBody(upstreamResponse, { maxBytes: passThroughMax });
|
|
879
|
+
if (tooLarge) {
|
|
880
|
+
metrics?.increment("haechi_response_stream_truncated_total");
|
|
881
|
+
return {
|
|
882
|
+
decision: "response_unprotected_blocked",
|
|
883
|
+
status: 502,
|
|
884
|
+
headers: { "content-type": "application/json" },
|
|
885
|
+
body: Buffer.from(`${JSON.stringify({
|
|
886
|
+
error: "haechi_response_too_large",
|
|
887
|
+
reason: "response_body_too_large",
|
|
888
|
+
message: `Response body exceeds responseProtection.maxBytes (${passThroughMax})`
|
|
889
|
+
}, null, 2)}\n`)
|
|
890
|
+
};
|
|
891
|
+
}
|
|
752
892
|
return {
|
|
753
893
|
status: upstreamResponse.status,
|
|
754
|
-
|
|
894
|
+
// Re-set a correct content-length: this is a fully-buffered body.
|
|
895
|
+
headers: { ...headers, "content-length": String(rawBody.byteLength) },
|
|
755
896
|
body: rawBody,
|
|
756
897
|
decision: "forwarded"
|
|
757
898
|
};
|
|
758
899
|
}
|
|
759
900
|
|
|
760
901
|
const responsePolicy = runtime.config.responseProtection;
|
|
761
|
-
const contentEncoding =
|
|
902
|
+
const contentEncoding = rawHeaders["content-encoding"] ?? "";
|
|
762
903
|
const bodyRead = await readUpstreamBody(upstreamResponse, { maxBytes: responsePolicy.maxBytes });
|
|
763
904
|
|
|
764
905
|
if (bodyRead.tooLarge) {
|
|
@@ -910,12 +1051,12 @@ function restoreTokens(value, tokenValues) {
|
|
|
910
1051
|
return value;
|
|
911
1052
|
}
|
|
912
1053
|
|
|
913
|
-
async function forward({ upstream, request, body, timeoutMs = null, metrics = null }) {
|
|
1054
|
+
async function forward({ upstream, request, body, timeoutMs = null, metrics = null, forwardPolicy = {} }) {
|
|
914
1055
|
const target = buildUpstreamUrl({ upstream, requestUrl: request.url });
|
|
915
1056
|
try {
|
|
916
1057
|
return await fetch(target, {
|
|
917
1058
|
method: request.method,
|
|
918
|
-
headers: filteredHeaders(request.headers),
|
|
1059
|
+
headers: filteredHeaders(request.headers, forwardPolicy),
|
|
919
1060
|
body: request.method === "GET" || request.method === "HEAD" ? undefined : body,
|
|
920
1061
|
signal: timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined
|
|
921
1062
|
});
|
|
@@ -943,24 +1084,117 @@ function buildUpstreamUrl({ upstream, requestUrl }) {
|
|
|
943
1084
|
return new URL(`${parsed.pathname}${parsed.search}`, upstream.endsWith("/") ? upstream : `${upstream}/`);
|
|
944
1085
|
}
|
|
945
1086
|
|
|
946
|
-
|
|
1087
|
+
// P0-CR-001 — DEFAULT-DROP upstream header allowlist. The client's request
|
|
1088
|
+
// headers cross from the local gateway trust boundary into the MODEL PROVIDER
|
|
1089
|
+
// boundary, so the policy is: forward ONLY a known-safe set; everything else
|
|
1090
|
+
// (including ambient client credentials — Cookie, Proxy-Authorization, and the
|
|
1091
|
+
// client's gateway Authorization) is dropped. The conditional `authorization`
|
|
1092
|
+
// rule is handled in filteredHeaders against the forward policy. An operator can
|
|
1093
|
+
// additively widen the set with `target.forwardHeaders` for an unusual upstream.
|
|
1094
|
+
//
|
|
1095
|
+
// The forwarded set is exactly the headers the OpenAI-compatible / Anthropic /
|
|
1096
|
+
// Gemini adapters need: the provider key headers (x-api-key, x-goog-api-key,
|
|
1097
|
+
// openai-organization, openai-beta), provider version/feature pins
|
|
1098
|
+
// (anthropic-version, anthropic-beta), and benign request metadata (accept,
|
|
1099
|
+
// content-type — always rewritten to application/json, user-agent,
|
|
1100
|
+
// accept-language). content-type is set unconditionally below so it is NOT in
|
|
1101
|
+
// this set.
|
|
1102
|
+
const FORWARD_HEADER_ALLOWLIST = new Set([
|
|
1103
|
+
"x-api-key",
|
|
1104
|
+
"anthropic-version",
|
|
1105
|
+
"anthropic-beta",
|
|
1106
|
+
"x-goog-api-key",
|
|
1107
|
+
"openai-organization",
|
|
1108
|
+
"openai-beta",
|
|
1109
|
+
"accept",
|
|
1110
|
+
"user-agent",
|
|
1111
|
+
"accept-language"
|
|
1112
|
+
]);
|
|
1113
|
+
|
|
1114
|
+
// ALWAYS-DROP: ambient client credentials + hop-by-hop control headers. These
|
|
1115
|
+
// must NEVER reach the upstream regardless of the allowlist or the operator's
|
|
1116
|
+
// target.forwardHeaders extension (a fail-closed denylist that wins over both).
|
|
1117
|
+
// - host / content-length: rewritten/recomputed by fetch for the new request.
|
|
1118
|
+
// - cookie / set-cookie / proxy-authorization: ambient client credentials.
|
|
1119
|
+
// - connection / keep-alive / te / trailer / transfer-encoding / upgrade:
|
|
1120
|
+
// hop-by-hop headers (RFC 7230 §6.1) that must not be tunneled end-to-end.
|
|
1121
|
+
const FORWARD_HEADER_DENYLIST = new Set([
|
|
1122
|
+
"host",
|
|
1123
|
+
"content-length",
|
|
1124
|
+
"cookie",
|
|
1125
|
+
"set-cookie",
|
|
1126
|
+
"proxy-authorization",
|
|
1127
|
+
"connection",
|
|
1128
|
+
"keep-alive",
|
|
1129
|
+
"te",
|
|
1130
|
+
"trailer",
|
|
1131
|
+
"transfer-encoding",
|
|
1132
|
+
"upgrade"
|
|
1133
|
+
]);
|
|
1134
|
+
|
|
1135
|
+
// `forwardPolicy` is built by createHaechiProxy from the runtime: it carries
|
|
1136
|
+
// - gatewayConsumedAuthorization: true when auth.provider !== "none", i.e. the
|
|
1137
|
+
// gateway authenticated the CLIENT with the request's Authorization. That
|
|
1138
|
+
// header is the GATEWAY credential Haechi already consumed; forwarding it
|
|
1139
|
+
// would leak a gateway secret into the model provider, so it is DROPPED.
|
|
1140
|
+
// When false (auth.provider "none"), the client's Authorization is the
|
|
1141
|
+
// UPSTREAM provider key (the OpenAI-compatible pass-through pattern), so it
|
|
1142
|
+
// is FORWARDED.
|
|
1143
|
+
// - extraHeaders: the operator's additive target.forwardHeaders allowlist
|
|
1144
|
+
// (lowercase names) — never able to override the always-drop denylist.
|
|
1145
|
+
function filteredHeaders(headers, forwardPolicy = {}) {
|
|
1146
|
+
const gatewayConsumedAuthorization = Boolean(forwardPolicy.gatewayConsumedAuthorization);
|
|
1147
|
+
const extraHeaders = forwardPolicy.extraHeaders instanceof Set
|
|
1148
|
+
? forwardPolicy.extraHeaders
|
|
1149
|
+
: new Set(Array.isArray(forwardPolicy.extraHeaders) ? forwardPolicy.extraHeaders : []);
|
|
1150
|
+
|
|
947
1151
|
const next = new Headers();
|
|
948
1152
|
for (const [key, value] of Object.entries(headers)) {
|
|
949
|
-
if (!value
|
|
1153
|
+
if (!value) {
|
|
950
1154
|
continue;
|
|
951
1155
|
}
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
1156
|
+
const name = key.toLowerCase();
|
|
1157
|
+
|
|
1158
|
+
// Always-drop wins over everything (credentials + hop-by-hop).
|
|
1159
|
+
if (FORWARD_HEADER_DENYLIST.has(name)) {
|
|
1160
|
+
continue;
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1163
|
+
// Conditional gateway-vs-upstream Authorization separation.
|
|
1164
|
+
if (name === "authorization") {
|
|
1165
|
+
if (gatewayConsumedAuthorization) {
|
|
1166
|
+
// Gateway token Haechi already consumed — must not leak upstream.
|
|
1167
|
+
continue;
|
|
955
1168
|
}
|
|
956
|
-
|
|
957
|
-
next
|
|
1169
|
+
// auth.provider "none": the client put the UPSTREAM provider key here.
|
|
1170
|
+
appendHeader(next, key, value);
|
|
1171
|
+
continue;
|
|
958
1172
|
}
|
|
1173
|
+
|
|
1174
|
+
// content-type is rewritten unconditionally below; skip the client's value.
|
|
1175
|
+
if (name === "content-type") {
|
|
1176
|
+
continue;
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1179
|
+
if (FORWARD_HEADER_ALLOWLIST.has(name) || extraHeaders.has(name)) {
|
|
1180
|
+
appendHeader(next, key, value);
|
|
1181
|
+
}
|
|
1182
|
+
// Everything else is default-dropped (fail-closed).
|
|
959
1183
|
}
|
|
960
1184
|
next.set("content-type", "application/json");
|
|
961
1185
|
return next;
|
|
962
1186
|
}
|
|
963
1187
|
|
|
1188
|
+
function appendHeader(target, key, value) {
|
|
1189
|
+
if (Array.isArray(value)) {
|
|
1190
|
+
for (const item of value) {
|
|
1191
|
+
target.append(key, item);
|
|
1192
|
+
}
|
|
1193
|
+
} else {
|
|
1194
|
+
target.set(key, value);
|
|
1195
|
+
}
|
|
1196
|
+
}
|
|
1197
|
+
|
|
964
1198
|
function readBody(request, { maxBytes }) {
|
|
965
1199
|
return new Promise((resolve, reject) => {
|
|
966
1200
|
const chunks = [];
|
|
@@ -1036,9 +1270,13 @@ function isJson(contentType = "") {
|
|
|
1036
1270
|
}
|
|
1037
1271
|
|
|
1038
1272
|
function transformedJsonHeaders(headers) {
|
|
1273
|
+
// P1-CR-003 — defensively strip the full hop-by-hop/compression set (the
|
|
1274
|
+
// caller already passes the sanitized headers, but the transformed JSON body
|
|
1275
|
+
// is freshly serialized, so any stale length/encoding metadata must not leak).
|
|
1039
1276
|
const next = { ...headers, "content-type": "application/json" };
|
|
1040
|
-
|
|
1041
|
-
|
|
1277
|
+
for (const name of RESPONSE_HOP_BY_HOP_HEADERS) {
|
|
1278
|
+
delete next[name];
|
|
1279
|
+
}
|
|
1042
1280
|
return next;
|
|
1043
1281
|
}
|
|
1044
1282
|
|
|
@@ -1071,10 +1309,13 @@ async function unprotectedResponseDecision({
|
|
|
1071
1309
|
metrics?.increment("haechi_response_unprotected_total");
|
|
1072
1310
|
|
|
1073
1311
|
if (allowed) {
|
|
1312
|
+
// P1-CR-003 — `headers` is already the sanitized set (no stale
|
|
1313
|
+
// compression/length metadata). Re-set a correct content-length for this
|
|
1314
|
+
// fully-buffered body.
|
|
1074
1315
|
return {
|
|
1075
1316
|
decision,
|
|
1076
1317
|
status: upstreamResponse.status,
|
|
1077
|
-
headers,
|
|
1318
|
+
headers: { ...headers, "content-length": String(rawBody.byteLength) },
|
|
1078
1319
|
body: rawBody
|
|
1079
1320
|
};
|
|
1080
1321
|
}
|