haechi 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.ko.md +97 -97
  2. package/README.md +2 -2
  3. package/SECURITY.md +19 -11
  4. package/docs/README.md +2 -0
  5. package/docs/current/api-stability.ko.md +26 -26
  6. package/docs/current/compliance-mapping.ko.md +53 -0
  7. package/docs/current/compliance-mapping.md +53 -0
  8. package/docs/current/config-version.ko.md +30 -0
  9. package/docs/current/config-version.md +51 -0
  10. package/docs/current/configuration.ko.md +242 -102
  11. package/docs/current/configuration.md +149 -9
  12. package/docs/current/operations-runbook.ko.md +121 -0
  13. package/docs/current/operations-runbook.md +204 -0
  14. package/docs/current/release-process.ko.md +19 -20
  15. package/docs/current/release-process.md +1 -2
  16. package/docs/current/reliability-hardening-track.ko.md +77 -0
  17. package/docs/current/reliability-hardening-track.md +77 -0
  18. package/docs/current/risk-register-release-gate.ko.md +26 -27
  19. package/docs/current/risk-register-release-gate.md +27 -20
  20. package/docs/current/security-whitepaper.ko.md +102 -0
  21. package/docs/current/security-whitepaper.md +102 -0
  22. package/docs/current/shared-responsibility.ko.md +33 -24
  23. package/docs/current/shared-responsibility.md +12 -3
  24. package/docs/current/threat-model.ko.md +12 -12
  25. package/docs/current/threat-model.md +3 -3
  26. package/haechi.config.example.json +19 -3
  27. package/package.json +6 -2
  28. package/packages/audit/index.mjs +26 -2
  29. package/packages/cli/bin/haechi.mjs +54 -8
  30. package/packages/cli/runtime.mjs +398 -10
  31. package/packages/core/index.mjs +189 -15
  32. package/packages/filter/index.mjs +299 -9
  33. package/packages/metrics/index.mjs +181 -0
  34. package/packages/proxy/index.mjs +535 -41
@@ -1,28 +1,207 @@
1
1
  import { createServer } from "node:http";
2
+ import { createServer as createHttpsServer } from "node:https";
2
3
  import { createHash, randomUUID } from "node:crypto";
4
+ import { isUtf8 } from "node:buffer";
5
+ import { readFileSync } from "node:fs";
6
+ import { fileURLToPath } from "node:url";
3
7
  import { inspectResponseStream } from "../stream-filter/index.mjs";
4
8
 
5
9
  export const DEFAULT_PROXY_PORT = 11016;
6
10
 
7
- export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "127.0.0.1", allowRemoteBind = false }) {
8
- assertSafeProxyBind({ host, allowRemoteBind });
11
+ // The published package version, read once from the package's own manifest.
12
+ // package.json IS in the published tarball, and packages/proxy/index.mjs sits
13
+ // two levels below the repo root, so this URL resolves in both the dev tree and
14
+ // the packed tarball. Falls back to "unknown" rather than throwing — a version
15
+ // read must never break proxy startup.
16
+ export const HAECHI_VERSION = readPackageVersion();
17
+
18
+ function readPackageVersion() {
19
+ try {
20
+ const pkgUrl = new URL("../../package.json", import.meta.url);
21
+ const pkg = JSON.parse(readFileSync(fileURLToPath(pkgUrl), "utf8"));
22
+ return typeof pkg.version === "string" ? pkg.version : "unknown";
23
+ } catch {
24
+ return "unknown";
25
+ }
26
+ }
27
+
28
+ // A tlsContext is usable iff it can actually terminate TLS: (key && cert) or pfx.
29
+ // This is the SINGLE source of truth for both the bind guard and server
30
+ // selection — the SAME shape the haechi-dashboard satellite uses, so the proxy
31
+ // and the dashboard share one TLS-material predicate. A non-null tlsContext that
32
+ // fails this check must fail closed (never green-light a remote bind that then
33
+ // builds a plaintext http server).
34
+ export function hasUsableTlsMaterial(ctx) {
35
+ if (!ctx || typeof ctx !== "object" || Array.isArray(ctx)) {
36
+ return false;
37
+ }
38
+ const hasKeyCert = Boolean(ctx.key) && Boolean(ctx.cert);
39
+ const hasPfx = Boolean(ctx.pfx);
40
+ return hasKeyCert || hasPfx;
41
+ }
42
+
43
+ // Structured logger honoring config.logging.format. In "json" mode it emits a
44
+ // single JSON line carrying a correlationId and an error NAME/class — NEVER a
45
+ // request/response payload, headers, token, or any PII. In "text" mode it
46
+ // preserves the prior human-readable console output.
47
+ function createLogger(format = "text") {
48
+ const json = format === "json";
49
+ return {
50
+ error(event, fields = {}) {
51
+ if (json) {
52
+ process.stderr.write(`${JSON.stringify({ level: "error", event, ...fields })}\n`);
53
+ } else {
54
+ const parts = Object.entries(fields)
55
+ .filter(([, value]) => value !== undefined && value !== null)
56
+ .map(([key, value]) => `${key}=${value}`);
57
+ process.stderr.write(`haechi ${event}${parts.length ? `: ${parts.join(" ")}` : ""}\n`);
58
+ }
59
+ }
60
+ };
61
+ }
62
+
63
+ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "127.0.0.1", allowRemoteBind = false, tlsContext, trustForwardedProto }) {
9
64
  const { haechi, config, protocolAdapter } = runtime;
10
- const rateLimiter = createRateLimiter();
11
65
 
12
- const server = createServer(async (request, response) => {
66
+ // WS6 TLS hardening. The tlsContext / trustForwardedProto source of truth is
67
+ // the normalized config (proxy.tls is loaded into a tlsContext at startup;
68
+ // proxy.trustForwardedProto is a boolean), but an explicit argument overrides
69
+ // it (so a hand-built runtime / a test can drive these directly). hasUsableTls
70
+ // is the same predicate the dashboard satellite uses.
71
+ const resolvedTlsContext = tlsContext !== undefined ? tlsContext : (config.proxy?.tls ?? null);
72
+ const resolvedTrustForwardedProto = trustForwardedProto !== undefined
73
+ ? trustForwardedProto
74
+ : Boolean(config.proxy?.trustForwardedProto);
75
+ const usableTls = hasUsableTlsMaterial(resolvedTlsContext);
76
+
77
+ // Bind guard, two layers. (1) the loopback/remote-bind gate (shared with the
78
+ // dashboard). (2) WS6: a remote bind ADDITIONALLY requires a usable tlsContext
79
+ // OR an explicit trustForwardedProto acknowledgement (a trusted reverse proxy
80
+ // terminates TLS in front of Haechi). Otherwise it THROWS at startup — the
81
+ // proxy must NEVER serve bearer tokens + payloads in plaintext on a remote
82
+ // bind. Loopback dev is unaffected (plain http, no TLS).
83
+ assertSafeProxyBind({ host, allowRemoteBind });
84
+ assertSafeProxyTransport({
85
+ host,
86
+ allowRemoteBind,
87
+ hasUsableTls: usableTls,
88
+ trustForwardedProto: resolvedTrustForwardedProto
89
+ });
90
+
91
+ // When the remote bind rests on trustForwardedProto (plain http behind a
92
+ // trusted TLS hop) we REJECT any protected-route request whose
93
+ // X-Forwarded-Proto is not https — a plaintext request that bypassed the hop.
94
+ // This is only meaningful for a non-loopback, plain-http, trust-forwarded
95
+ // listener; a loopback dev server or an https-terminating server never gates.
96
+ const enforceForwardedProto = !isLoopbackHost(host)
97
+ && allowRemoteBind
98
+ && !usableTls
99
+ && resolvedTrustForwardedProto;
100
+ // The runtime owns the rate limiter (an injectable collaborator). Fall back to
101
+ // a local per-process default so a hand-built runtime object without a
102
+ // rateLimiter still works (backward-compatible). The default and the runtime's
103
+ // default share the same allow(key, limit) -> boolean fixed-window contract.
104
+ const rateLimiter = runtime.rateLimiter ?? createRateLimiter();
105
+ // The metrics collector is owned by the runtime (injectable). Fall back to a
106
+ // no-op so a hand-built runtime object without metrics still works.
107
+ const metrics = runtime.metrics ?? noopMetrics();
108
+ const logger = createLogger(config.logging?.format ?? "text");
109
+
110
+ // WS4-B backpressure: a configurable global max-in-flight ceiling. 0 (default)
111
+ // disables it, preserving 1.1 behavior. When > 0 and the live count is at the
112
+ // ceiling, a NEW non-exempt request is rejected 503 + Retry-After BEFORE auth
113
+ // and body-read. The /__haechi/* observability routes are EXEMPT so metrics +
114
+ // liveness can be scraped under saturation.
115
+ const maxInFlight = config.limits.maxInFlight ?? 0;
116
+ const retryAfterSeconds = Math.max(1, Math.ceil((config.limits.shutdownGraceMs ?? 10000) / 1000));
117
+ // Live in-flight request count for the drain-tracking AND the ceiling. A
118
+ // bounded integer — never identity/value bearing.
119
+ let inFlight = 0;
120
+ // Resolves once in-flight drains to zero during a graceful close().
121
+ let drained = null;
122
+ let resolveDrained = null;
123
+
124
+ const requestHandler = async (request, response) => {
125
+ // Per-REQUEST correlation id: generated here, threaded into every protect
126
+ // context (so the audit events of one request share it) AND into the error
127
+ // log. A UUID — never a payload/identity/PII value.
128
+ const correlationId = randomUUID();
129
+ const startedAt = process.hrtime.bigint();
130
+ let routeId = "unknown";
131
+
132
+ // Observability routes are exempt from the in-flight ceiling and are NOT
133
+ // counted toward it: liveness/readiness/metrics must answer under saturation.
134
+ const exemptRoute = isObservabilityRoute(request);
135
+
136
+ // Backpressure: reject at the ceiling BEFORE doing any work. Counted in
137
+ // metrics by a bounded enum decision; the body is never read.
138
+ if (!exemptRoute && maxInFlight > 0 && inFlight >= maxInFlight) {
139
+ metrics.increment("haechi_overloaded_total");
140
+ response.writeHead(503, {
141
+ "content-type": "application/json",
142
+ "retry-after": String(retryAfterSeconds)
143
+ });
144
+ response.end(`${JSON.stringify({ error: "haechi_overloaded", message: "Server at max in-flight capacity; retry later" }, null, 2)}\n`);
145
+ return;
146
+ }
147
+
148
+ // Track in-flight for graceful drain + the ceiling. Decrement in finally so a
149
+ // throw/early-return can never leak the count (which would wedge close()).
150
+ let counted = false;
151
+ if (!exemptRoute) {
152
+ inFlight += 1;
153
+ counted = true;
154
+ }
13
155
  try {
156
+ // WS6 forwarded-proto enforcement. When this is a non-loopback plain-http
157
+ // listener resting on trustForwardedProto (a trusted reverse proxy
158
+ // terminates TLS in front of Haechi), a request whose X-Forwarded-Proto is
159
+ // not "https" arrived over plaintext that BYPASSED the TLS hop — reject it
160
+ // fail-closed BEFORE auth and body-read, so a protected route never serves
161
+ // tokens/payloads over an unverified-plaintext hop. The /__haechi/* liveness
162
+ // routes are EXEMPT (they leak nothing) so a health check / metrics scrape
163
+ // from the loopback sidecar still answers.
164
+ if (enforceForwardedProto && !exemptRoute && !isForwardedHttps(request)) {
165
+ writeJson(response, 403, {
166
+ error: "haechi_forwarded_proto_required",
167
+ message: "This proxy runs behind a trusted TLS-terminating hop (proxy.trustForwardedProto). A request without X-Forwarded-Proto: https bypassed the hop and is refused."
168
+ });
169
+ return;
170
+ }
171
+ // Health + telemetry endpoints are unauthenticated and checked BEFORE auth
172
+ // and body-read. They live under the reserved /__haechi/* prefix.
173
+ if (request.method === "GET" && request.url === "/__haechi/live") {
174
+ // Cheap process liveness. Always 200 while the event loop is serving.
175
+ writeJson(response, 200, { ok: true, version: HAECHI_VERSION });
176
+ return;
177
+ }
178
+ if (request.method === "GET" && request.url === "/__haechi/ready") {
179
+ await handleReady({ runtime, response });
180
+ return;
181
+ }
14
182
  if (request.method === "GET" && request.url === "/__haechi/health") {
15
- // Intentionally unauthenticated; exposes only the mode.
16
- writeJson(response, 200, { ok: true, mode: config.mode });
183
+ // Back-compat: keep the original shape (ok + mode) and add version.
184
+ writeJson(response, 200, { ok: true, mode: config.mode, version: HAECHI_VERSION });
185
+ return;
186
+ }
187
+ if (request.method === "GET" && request.url === "/__haechi/metrics") {
188
+ if (!config.metrics?.enabled) {
189
+ writeJson(response, 404, { error: "haechi_metrics_disabled", message: "Metrics endpoint is disabled (metrics.enabled: false)" });
190
+ return;
191
+ }
192
+ response.writeHead(200, { "content-type": "text/plain; version=0.0.4; charset=utf-8" });
193
+ response.end(metrics.render());
17
194
  return;
18
195
  }
19
196
 
20
197
  assertRelativeProxyTarget(request.url);
21
198
  const routeContext = protocolAdapter.classifyRequest(request);
199
+ routeId = routeContext?.routeId ?? "unknown";
200
+ const mode = config.policy.mode ?? config.mode;
22
201
 
23
202
  // Authenticate, resolve the policy profile, and rate-limit BEFORE reading
24
203
  // the body, so a denied/throttled request cannot stream a large body.
25
- const gate = await authorizeRequest({ runtime, request, routeContext, rateLimiter });
204
+ const gate = await authorizeRequest({ runtime, request, routeContext, rateLimiter, metrics, correlationId });
26
205
  if (gate.denied) {
27
206
  writeJson(response, gate.denied.status, {
28
207
  error: gate.denied.error,
@@ -31,7 +210,7 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
31
210
  return;
32
211
  }
33
212
  const { identity, profile, policyEngine, modelAllowlist } = gate;
34
- const authContext = { identity, profile, policyEngine };
213
+ const authContext = { identity, profile, policyEngine, correlationId };
35
214
 
36
215
  const body = await readBody(request, {
37
216
  maxBytes: config.limits.maxRequestBytes
@@ -41,12 +220,14 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
41
220
  // Model allowlist runs after body read (the model field is in the body).
42
221
  if (modelAllowlist && typeof json?.model === "string" && !modelAllowlist.includes(json.model)) {
43
222
  await recordProxyDecision({
44
- runtime, routeContext, identity, profile,
223
+ runtime, routeContext, identity, profile, correlationId,
45
224
  decision: "model_not_allowed",
46
225
  reason: `model:${json.model}`,
47
226
  enforced: true,
48
227
  blocked: true
49
228
  });
229
+ countDecision(metrics, { routeContext, mode, decision: "model_not_allowed" });
230
+ metrics.increment("haechi_blocks_total");
50
231
  writeJson(response, 403, {
51
232
  error: "haechi_model_not_allowed",
52
233
  message: `Model not allowed: ${json.model}`
@@ -56,7 +237,7 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
56
237
 
57
238
  if (isStreamingRequest(json, routeContext)) {
58
239
  if (config.streaming.requestMode === "inspect") {
59
- await handleInspectedStream({ runtime, request, response, routeContext, json, authContext });
240
+ await handleInspectedStream({ runtime, request, response, routeContext, json, authContext, metrics });
60
241
  return;
61
242
  }
62
243
 
@@ -66,16 +247,19 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
66
247
  routeContext,
67
248
  identity,
68
249
  profile,
250
+ correlationId,
69
251
  decision: "streaming_request_pass_through",
70
252
  reason: "streaming_request_pass_through",
71
253
  enforced: false,
72
254
  blocked: false
73
255
  });
256
+ countDecision(metrics, { routeContext, mode, decision: "forwarded" });
74
257
  const upstreamResponse = await forward({
75
258
  upstream: config.target.upstream,
76
259
  request,
77
260
  body,
78
- timeoutMs: config.limits.upstreamTimeoutMs
261
+ timeoutMs: config.limits.upstreamTimeoutMs,
262
+ metrics
79
263
  });
80
264
  const { body: rawBody } = await readUpstreamBody(upstreamResponse);
81
265
  response.writeHead(upstreamResponse.status, Object.fromEntries(upstreamResponse.headers.entries()));
@@ -83,6 +267,7 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
83
267
  return;
84
268
  }
85
269
 
270
+ countDecision(metrics, { routeContext, mode, decision: "streaming_blocked" });
86
271
  writeJson(response, 501, {
87
272
  error: "haechi_streaming_unsupported",
88
273
  message: "Streaming requests are blocked unless streaming.requestMode is set to pass-through or inspect"
@@ -96,11 +281,13 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
96
281
  ...authContext,
97
282
  operation: `request:${routeContext.operation}`,
98
283
  direction: "request",
99
- mode: config.policy.mode ?? config.mode
284
+ mode
100
285
  })
101
286
  : { payload: json, blocked: false };
102
287
 
103
288
  if (result.blocked) {
289
+ countDecision(metrics, { routeContext, mode, decision: "blocked" });
290
+ metrics.increment("haechi_blocks_total");
104
291
  writeJson(response, 403, {
105
292
  error: "haechi_policy_block",
106
293
  summary: result.summary,
@@ -113,7 +300,8 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
113
300
  upstream: config.target.upstream,
114
301
  request,
115
302
  body: JSON.stringify(result.payload),
116
- timeoutMs: config.limits.upstreamTimeoutMs
303
+ timeoutMs: config.limits.upstreamTimeoutMs,
304
+ metrics
117
305
  });
118
306
 
119
307
  const forwarded = await maybeProtectResponse({
@@ -121,58 +309,234 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
121
309
  routeContext,
122
310
  runtime,
123
311
  authContext,
124
- issuedTokens: result.issuedTokens ?? []
312
+ issuedTokens: result.issuedTokens ?? [],
313
+ metrics
125
314
  });
126
315
 
316
+ countDecision(metrics, {
317
+ routeContext,
318
+ mode,
319
+ decision: forwarded.decision ?? "forwarded"
320
+ });
127
321
  response.writeHead(forwarded.status, forwarded.headers);
128
322
  response.end(forwarded.body);
129
323
  } catch (error) {
130
324
  const expected = typeof error?.statusCode === "number";
131
325
  if (!expected) {
132
- console.error(`haechi proxy internal error: ${error?.stack ?? error?.message ?? error}`);
326
+ // Carry the error NAME/class + correlationId only — NEVER the payload,
327
+ // headers, token, or any PII.
328
+ logger.error("proxy_internal_error", {
329
+ correlationId,
330
+ errorName: error?.name ?? "Error",
331
+ statusCode: error?.statusCode ?? 500
332
+ });
333
+ metrics.increment("haechi_internal_error_total");
133
334
  }
134
335
  writeJson(response, error.statusCode ?? 500, {
135
336
  error: error.errorCode ?? "haechi_proxy_error",
136
337
  message: expected ? error.message : "Internal proxy error"
137
338
  });
339
+ } finally {
340
+ const elapsedSeconds = Number(process.hrtime.bigint() - startedAt) / 1e9;
341
+ // route label is a bounded route id (or "unknown") — never an identity/value.
342
+ metrics.observe("haechi_request_duration_seconds", elapsedSeconds, { route: routeId });
343
+ if (counted) {
344
+ inFlight -= 1;
345
+ // If a graceful close() is awaiting drain and we just hit zero, resolve it.
346
+ if (resolveDrained && inFlight <= 0) {
347
+ resolveDrained();
348
+ }
349
+ }
138
350
  }
139
- });
351
+ };
352
+
353
+ // Server selection: a usable tlsContext → an https listener terminating TLS in
354
+ // this process; otherwise plain http (unchanged for loopback/dev). The bind
355
+ // guard above already guarantees a non-loopback bind without usable TLS carries
356
+ // an explicit trustForwardedProto acknowledgement, so a plain-http server is
357
+ // only ever exposed remotely behind a trusted TLS hop (and gated below).
358
+ const server = usableTls
359
+ ? createHttpsServer(resolvedTlsContext, requestHandler)
360
+ : createServer(requestHandler);
361
+ const servesHttps = usableTls;
362
+
363
+ // WS4-B tuned timeouts. Only override Node's server defaults when a value is
364
+ // configured (null = leave Node's default untouched, so behavior is unchanged
365
+ // unless an operator opts in). requestTimeout caps the whole request; a value
366
+ // of 0 disables the timeout (Node semantics) — validated upstream.
367
+ if (config.limits.requestTimeoutMs !== null && config.limits.requestTimeoutMs !== undefined) {
368
+ server.requestTimeout = config.limits.requestTimeoutMs;
369
+ }
370
+ if (config.limits.headersTimeoutMs !== null && config.limits.headersTimeoutMs !== undefined) {
371
+ server.headersTimeout = config.limits.headersTimeoutMs;
372
+ }
140
373
 
141
374
  return {
142
375
  server,
376
+ // Whether THIS listener terminates TLS (https) — the CLI/log line reflects
377
+ // the right scheme, and a caller can assert the selected transport.
378
+ servesHttps,
143
379
  listen() {
144
380
  return new Promise((resolve) => {
145
381
  server.listen(port, host, () => {
146
382
  const address = server.address();
147
- resolve({ host: address.address, port: address.port });
383
+ resolve({ host: address.address, port: address.port, tls: servesHttps });
148
384
  });
149
385
  });
150
386
  },
387
+ // WS4-B graceful drain. Stop accepting new connections, immediately close
388
+ // idle keep-alive sockets, and wait for in-flight requests to drain. After a
389
+ // configurable grace period (limits.shutdownGraceMs) force-close any lingering
390
+ // socket so a stuck keep-alive cannot hold shutdown open forever. The grace
391
+ // timer is .unref()-ed and cleared on a clean drain so `node --test` never
392
+ // hangs on a leaked timer.
151
393
  close() {
394
+ const graceMs = config.limits.shutdownGraceMs ?? 10000;
152
395
  return new Promise((resolve, reject) => {
153
- server.close((error) => error ? reject(error) : resolve());
396
+ let settled = false;
397
+ let graceTimer = null;
398
+
399
+ const finish = (error) => {
400
+ if (settled) {
401
+ return;
402
+ }
403
+ settled = true;
404
+ if (graceTimer) {
405
+ clearTimeout(graceTimer);
406
+ graceTimer = null;
407
+ }
408
+ resolveDrained = null;
409
+ drained = null;
410
+ if (error) {
411
+ reject(error);
412
+ } else {
413
+ resolve();
414
+ }
415
+ };
416
+
417
+ // Stop accepting new connections; the callback fires once all
418
+ // connections are closed (idle ones we close now, in-flight ones once
419
+ // they drain or the grace timer force-closes them).
420
+ server.close((error) => finish(error));
421
+
422
+ // Close idle keep-alive sockets immediately so they don't keep the
423
+ // server open waiting for a request that will never come.
424
+ if (typeof server.closeIdleConnections === "function") {
425
+ server.closeIdleConnections();
426
+ }
427
+
428
+ // If nothing is in flight, the close callback will fire promptly; still
429
+ // arm a drain resolver in case requests are mid-flight.
430
+ if (inFlight <= 0) {
431
+ // No in-flight work; closeIdleConnections handled keep-alive, so
432
+ // server.close() resolves on its own. Nothing more to wait for.
433
+ return;
434
+ }
435
+
436
+ // Wait for in-flight requests to drain, then force-close stragglers
437
+ // (the force close covers a request whose socket lingers after we stop).
438
+ drained = new Promise((res) => { resolveDrained = res; });
439
+ drained.then(() => {
440
+ if (typeof server.closeAllConnections === "function") {
441
+ server.closeAllConnections();
442
+ }
443
+ });
444
+
445
+ // Grace cap: after graceMs force every remaining connection closed so a
446
+ // lingering keep-alive socket cannot hold shutdown open forever. unref()
447
+ // so this timer alone never keeps the event loop (and `node --test`) alive.
448
+ graceTimer = setTimeout(() => {
449
+ if (typeof server.closeAllConnections === "function") {
450
+ server.closeAllConnections();
451
+ }
452
+ }, graceMs);
453
+ if (typeof graceTimer.unref === "function") {
454
+ graceTimer.unref();
455
+ }
154
456
  });
155
457
  }
156
458
  };
157
459
  }
158
460
 
461
+ // True for the reserved /__haechi/* observability routes (live/ready/health/
462
+ // metrics). These are EXEMPT from the in-flight ceiling so liveness + metrics
463
+ // stay scrapable under saturation, and they do not count toward drain tracking.
464
+ function isObservabilityRoute(request) {
465
+ return request.method === "GET" && typeof request.url === "string" && request.url.startsWith("/__haechi/");
466
+ }
467
+
468
+ // Readiness probe (WS4-A). FAIL-CLOSED: a gateway that cannot write its audit
469
+ // log is NOT ready (503). Runs the audit sink's optional ready()/healthCheck()
470
+ // — if the sink lacks one, audit is treated as ready. The checks object carries
471
+ // only booleans/enums; never a path, payload, or PII value.
472
+ async function handleReady({ runtime, response }) {
473
+ const checks = {};
474
+ let ready = true;
475
+
476
+ const probe = runtime.auditSink?.ready ?? runtime.auditSink?.healthCheck;
477
+ if (typeof probe === "function") {
478
+ try {
479
+ const result = await probe.call(runtime.auditSink);
480
+ checks.auditWritable = result === true || result?.ok === true;
481
+ } catch {
482
+ checks.auditWritable = false;
483
+ }
484
+ } else {
485
+ // No probe method on the sink → cannot disprove writability; treat as ready.
486
+ checks.auditWritable = true;
487
+ }
488
+ if (checks.auditWritable !== true) {
489
+ ready = false;
490
+ }
491
+
492
+ writeJson(response, ready ? 200 : 503, { ready, version: HAECHI_VERSION, checks });
493
+ }
494
+
495
+ // Increment the request counter with a bounded enum label set. route is a route
496
+ // id, mode is the policy mode, decision is a fixed decision class — NEVER an
497
+ // identity/token/detected value (no-PII-in-telemetry invariant).
498
+ function countDecision(metrics, { routeContext, mode, decision }) {
499
+ metrics?.increment("haechi_requests_total", {
500
+ route: routeContext?.routeId ?? "unknown",
501
+ mode: mode ?? "unknown",
502
+ decision
503
+ });
504
+ }
505
+
506
+ // Backward-compat fallback for a hand-built runtime object without metrics: a
507
+ // no-op collector with the same increment/observe/render contract.
508
+ function noopMetrics() {
509
+ return {
510
+ increment() {},
511
+ observe() {},
512
+ render() {
513
+ return "";
514
+ }
515
+ };
516
+ }
517
+
159
518
  // Authenticate → resolve policy profile → rate-limit. Returns the request's
160
519
  // identity/profile/policyEngine/modelAllowlist, or a denial. Auth is required
161
520
  // exactly when an authProvider is configured (auth.provider !== "none").
162
- async function authorizeRequest({ runtime, request, routeContext, rateLimiter }) {
163
- const { authProvider, policyProfiles } = runtime;
521
+ async function authorizeRequest({ runtime, request, routeContext, rateLimiter, metrics, correlationId }) {
522
+ const { authProvider, policyProfiles, config } = runtime;
523
+ const mode = config.policy.mode ?? config.mode;
164
524
  let identity = null;
165
525
 
166
526
  if (authProvider) {
167
527
  try {
168
528
  identity = await authProvider.authenticate(request);
169
529
  } catch {
170
- await recordAuthDenied({ runtime, routeContext, reason: "provider_error" });
530
+ await recordAuthDenied({ runtime, routeContext, reason: "provider_error", correlationId });
531
+ countDecision(metrics, { routeContext, mode, decision: "auth_denied" });
532
+ metrics.increment("haechi_auth_denied_total");
171
533
  return { denied: { status: 401, error: "haechi_auth_denied", message: "Authentication failed" } };
172
534
  }
173
535
  if (!identity) {
174
536
  const reason = hasBearerHeader(request) ? "invalid_token" : "no_token";
175
- await recordAuthDenied({ runtime, routeContext, reason });
537
+ await recordAuthDenied({ runtime, routeContext, reason, correlationId });
538
+ countDecision(metrics, { routeContext, mode, decision: "auth_denied" });
539
+ metrics.increment("haechi_auth_denied_total");
176
540
  return { denied: { status: 401, error: "haechi_auth_denied", message: "Authentication required" } };
177
541
  }
178
542
  }
@@ -183,12 +547,14 @@ async function authorizeRequest({ runtime, request, routeContext, rateLimiter })
183
547
  const key = identity?.id ?? "anonymous";
184
548
  if (!rateLimiter.allow(key, resolved.rate.requestsPerMinute)) {
185
549
  await recordProxyDecision({
186
- runtime, routeContext, identity, profile: resolved.profile,
550
+ runtime, routeContext, identity, profile: resolved.profile, correlationId,
187
551
  decision: "rate_limited",
188
552
  reason: `rate:${resolved.rate.requestsPerMinute}`,
189
553
  enforced: true,
190
554
  blocked: true
191
555
  });
556
+ countDecision(metrics, { routeContext, mode, decision: "rate_limited" });
557
+ metrics.increment("haechi_rate_limited_total");
192
558
  return { denied: { status: 429, error: "haechi_rate_limited", message: "Rate limit exceeded" } };
193
559
  }
194
560
  }
@@ -207,13 +573,36 @@ function hasBearerHeader(request) {
207
573
  }
208
574
 
209
575
  function createRateLimiter() {
210
- // In-memory fixed-window counter. Per-process: resets on restart, not shared
211
- // across replicas acceptable for a single-process self-hosted preview.
576
+ // Backward-compat fallback ONLY: the canonical default lives in the runtime
577
+ // (createRuntime owns providers.rateLimiter). This path runs when a hand-built
578
+ // runtime object lacks rateLimiter. In-memory fixed-window counter, per-process
579
+ // (resets on restart, not shared across replicas). The window Map is bounded by
580
+ // a lazy, amortized sweep — NO timer — so aged-out one-shot identities do not
581
+ // accumulate unboundedly (mirrors runtime's createRateLimiter).
212
582
  const windows = new Map();
213
583
  const windowMs = 60000;
584
+ const sweepThreshold = 1024;
585
+ const sweepBudget = 256;
586
+
587
+ function sweepExpired(now) {
588
+ let scanned = 0;
589
+ for (const [key, slot] of windows) {
590
+ if (scanned >= sweepBudget) {
591
+ break;
592
+ }
593
+ scanned += 1;
594
+ if (now - slot.windowStart >= windowMs) {
595
+ windows.delete(key);
596
+ }
597
+ }
598
+ }
599
+
214
600
  return {
215
601
  allow(key, limit) {
216
602
  const now = Date.now();
603
+ if (windows.size >= sweepThreshold) {
604
+ sweepExpired(now);
605
+ }
217
606
  const slot = windows.get(key);
218
607
  if (!slot || now - slot.windowStart >= windowMs) {
219
608
  windows.set(key, { windowStart: now, count: 1 });
@@ -228,9 +617,9 @@ function createRateLimiter() {
228
617
  };
229
618
  }
230
619
 
231
- async function recordAuthDenied({ runtime, routeContext, reason }) {
620
+ async function recordAuthDenied({ runtime, routeContext, reason, correlationId = null }) {
232
621
  await recordProxyDecision({
233
- runtime, routeContext, identity: null, profile: null,
622
+ runtime, routeContext, identity: null, profile: null, correlationId,
234
623
  decision: "auth_denied",
235
624
  reason,
236
625
  enforced: true,
@@ -238,8 +627,9 @@ async function recordAuthDenied({ runtime, routeContext, reason }) {
238
627
  });
239
628
  }
240
629
 
241
- async function handleInspectedStream({ runtime, request, response, routeContext, json, authContext = {} }) {
630
+ async function handleInspectedStream({ runtime, request, response, routeContext, json, authContext = {}, metrics = null }) {
242
631
  const { haechi, config } = runtime;
632
+ const requestMode = config.policy.mode ?? config.mode;
243
633
 
244
634
  // Inspection needs to know the wire format and delta channel for this route.
245
635
  if (!routeContext.streaming) {
@@ -263,6 +653,8 @@ async function handleInspectedStream({ runtime, request, response, routeContext,
263
653
  : { payload: json, blocked: false };
264
654
 
265
655
  if (requestResult.blocked) {
656
+ countDecision(metrics, { routeContext, mode: requestMode, decision: "blocked" });
657
+ metrics?.increment("haechi_blocks_total");
266
658
  writeJson(response, 403, {
267
659
  error: "haechi_policy_block",
268
660
  summary: requestResult.summary,
@@ -275,7 +667,8 @@ async function handleInspectedStream({ runtime, request, response, routeContext,
275
667
  upstream: config.target.upstream,
276
668
  request,
277
669
  body: JSON.stringify(requestResult.payload),
278
- timeoutMs: config.limits.upstreamTimeoutMs
670
+ timeoutMs: config.limits.upstreamTimeoutMs,
671
+ metrics
279
672
  });
280
673
 
281
674
  const streamMode = config.streaming.responseMode ?? config.responseProtection.mode ?? config.policy.mode ?? config.mode;
@@ -299,8 +692,13 @@ async function handleInspectedStream({ runtime, request, response, routeContext,
299
692
 
300
693
  await recordStreamDecision({
301
694
  runtime, routeContext, blocked, summary, mode: streamMode,
302
- identity: authContext.identity ?? null, profile: authContext.profile ?? null
695
+ identity: authContext.identity ?? null, profile: authContext.profile ?? null,
696
+ correlationId: authContext.correlationId ?? null
303
697
  });
698
+ countDecision(metrics, { routeContext, mode: streamMode, decision: blocked ? "stream_blocked" : "stream_inspected" });
699
+ if (blocked) {
700
+ metrics?.increment("haechi_blocks_total");
701
+ }
304
702
  response.end();
305
703
  }
306
704
 
@@ -323,12 +721,13 @@ async function* emptyAsyncIterable() {
323
721
  // No upstream body to inspect.
324
722
  }
325
723
 
326
- async function recordStreamDecision({ runtime, routeContext, blocked, summary, mode, identity = null, profile = null }) {
724
+ async function recordStreamDecision({ runtime, routeContext, blocked, summary, mode, identity = null, profile = null, correlationId = null }) {
327
725
  if (typeof runtime.auditSink?.record !== "function") {
328
726
  return;
329
727
  }
330
728
  await runtime.auditSink.record({
331
729
  id: randomUUID(),
730
+ correlationId,
332
731
  timestamp: new Date().toISOString(),
333
732
  protocol: routeContext?.protocol ?? "proxy",
334
733
  operation: `response-stream:${routeContext?.operation ?? "unknown"}`,
@@ -345,7 +744,7 @@ async function recordStreamDecision({ runtime, routeContext, blocked, summary, m
345
744
  });
346
745
  }
347
746
 
348
- async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, authContext = {}, issuedTokens = [] }) {
747
+ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, authContext = {}, issuedTokens = [], metrics = null }) {
349
748
  const headers = Object.fromEntries(upstreamResponse.headers.entries());
350
749
 
351
750
  if (!runtime.config.responseProtection.enabled || !routeContext.protectResponse) {
@@ -353,7 +752,8 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, a
353
752
  return {
354
753
  status: upstreamResponse.status,
355
754
  headers,
356
- body: rawBody
755
+ body: rawBody,
756
+ decision: "forwarded"
357
757
  };
358
758
  }
359
759
 
@@ -371,6 +771,8 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, a
371
771
  responsePolicy,
372
772
  routeContext,
373
773
  runtime,
774
+ correlationId: authContext.correlationId ?? null,
775
+ metrics,
374
776
  hardDeny: true
375
777
  });
376
778
  }
@@ -387,6 +789,8 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, a
387
789
  responsePolicy,
388
790
  routeContext,
389
791
  runtime,
792
+ correlationId: authContext.correlationId ?? null,
793
+ metrics,
390
794
  hardDeny: true
391
795
  });
392
796
  }
@@ -400,7 +804,9 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, a
400
804
  rawBody,
401
805
  responsePolicy,
402
806
  routeContext,
403
- runtime
807
+ runtime,
808
+ correlationId: authContext.correlationId ?? null,
809
+ metrics
404
810
  });
405
811
  }
406
812
 
@@ -413,7 +819,9 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, a
413
819
  rawBody,
414
820
  responsePolicy,
415
821
  routeContext,
416
- runtime
822
+ runtime,
823
+ correlationId: authContext.correlationId ?? null,
824
+ metrics
417
825
  });
418
826
  }
419
827
 
@@ -429,7 +837,9 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, a
429
837
  rawBody,
430
838
  responsePolicy,
431
839
  routeContext,
432
- runtime
840
+ runtime,
841
+ correlationId: authContext.correlationId ?? null,
842
+ metrics
433
843
  });
434
844
  }
435
845
 
@@ -445,7 +855,9 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, a
445
855
  });
446
856
 
447
857
  if (result.blocked) {
858
+ metrics?.increment("haechi_blocks_total");
448
859
  return {
860
+ decision: "response_blocked",
449
861
  status: 502,
450
862
  headers: { "content-type": "application/json" },
451
863
  body: Buffer.from(`${JSON.stringify({
@@ -473,6 +885,7 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, a
473
885
  }
474
886
 
475
887
  return {
888
+ decision: "forwarded",
476
889
  status: upstreamResponse.status,
477
890
  headers: transformedJsonHeaders(headers),
478
891
  body: Buffer.from(`${JSON.stringify(responsePayload)}\n`)
@@ -497,7 +910,7 @@ function restoreTokens(value, tokenValues) {
497
910
  return value;
498
911
  }
499
912
 
500
- async function forward({ upstream, request, body, timeoutMs = null }) {
913
+ async function forward({ upstream, request, body, timeoutMs = null, metrics = null }) {
501
914
  const target = buildUpstreamUrl({ upstream, requestUrl: request.url });
502
915
  try {
503
916
  return await fetch(target, {
@@ -508,12 +921,14 @@ async function forward({ upstream, request, body, timeoutMs = null }) {
508
921
  });
509
922
  } catch (error) {
510
923
  if (error?.name === "TimeoutError" || error?.name === "AbortError") {
924
+ metrics?.increment("haechi_upstream_timeout_total");
511
925
  throw proxyError({
512
926
  statusCode: 504,
513
927
  errorCode: "haechi_upstream_timeout",
514
928
  message: `Upstream did not respond within limits.upstreamTimeoutMs (${timeoutMs})`
515
929
  });
516
930
  }
931
+ metrics?.increment("haechi_upstream_error_total");
517
932
  throw proxyError({
518
933
  statusCode: 502,
519
934
  errorCode: "haechi_upstream_unreachable",
@@ -569,9 +984,23 @@ function readBody(request, { maxBytes }) {
569
984
  chunks.push(chunk);
570
985
  });
571
986
  request.on("end", () => {
572
- if (!rejected) {
573
- resolve(Buffer.concat(chunks).toString("utf8"));
987
+ if (rejected) {
988
+ return;
989
+ }
990
+ const raw = Buffer.concat(chunks);
991
+ // Fail closed on a non-UTF-8 body: Buffer.toString("utf8") would otherwise
992
+ // replace invalid bytes with U+FFFD BEFORE detection runs, so a secret/PII
993
+ // could be smuggled past the regex rules via invalid encoding. Reject with
994
+ // a clear 4xx instead of lossily decoding.
995
+ if (raw.byteLength > 0 && !isUtf8(raw)) {
996
+ reject(proxyError({
997
+ statusCode: 400,
998
+ errorCode: "haechi_request_body_not_utf8",
999
+ message: "Request body is not valid UTF-8"
1000
+ }));
1001
+ return;
574
1002
  }
1003
+ resolve(raw.toString("utf8"));
575
1004
  });
576
1005
  request.on("error", (error) => {
577
1006
  if (!rejected) {
@@ -622,27 +1051,39 @@ async function unprotectedResponseDecision({
622
1051
  responsePolicy,
623
1052
  routeContext,
624
1053
  runtime,
1054
+ metrics = null,
1055
+ correlationId = null,
625
1056
  hardDeny = false
626
1057
  }) {
627
1058
  const allowed = responsePolicy.failureMode === "allow" && !hardDeny;
1059
+ const decision = allowed ? "response_unprotected_allowed" : "response_unprotected_blocked";
628
1060
  await recordProxyDecision({
629
1061
  runtime,
630
1062
  routeContext,
631
- decision: allowed ? "response_unprotected_allowed" : "response_unprotected_blocked",
1063
+ correlationId,
1064
+ decision,
632
1065
  reason,
633
1066
  enforced: !allowed,
634
1067
  blocked: !allowed
635
1068
  });
1069
+ // A forwarded-without-protection (or blocked-because-unprotectable) response is
1070
+ // an operability signal. The label is the bounded reason enum, never a value.
1071
+ metrics?.increment("haechi_response_unprotected_total");
636
1072
 
637
1073
  if (allowed) {
638
1074
  return {
1075
+ decision,
639
1076
  status: upstreamResponse.status,
640
1077
  headers,
641
1078
  body: rawBody
642
1079
  };
643
1080
  }
644
1081
 
1082
+ if (!hardDeny) {
1083
+ metrics?.increment("haechi_blocks_total");
1084
+ }
645
1085
  return {
1086
+ decision,
646
1087
  status: 502,
647
1088
  headers: { "content-type": "application/json" },
648
1089
  body: Buffer.from(`${JSON.stringify({
@@ -723,13 +1164,16 @@ async function cancelReader(reader) {
723
1164
  }
724
1165
  }
725
1166
 
726
- async function recordProxyDecision({ runtime, routeContext, decision, reason, enforced, blocked, identity = null, profile = null }) {
1167
+ async function recordProxyDecision({ runtime, routeContext, decision, reason, enforced, blocked, identity = null, profile = null, correlationId = null }) {
727
1168
  if (typeof runtime.auditSink?.record !== "function") {
728
1169
  return;
729
1170
  }
730
1171
 
731
1172
  await runtime.auditSink.record({
732
1173
  id: randomUUID(),
1174
+ // Per-request correlation id so a proxy-decision event shares the id of the
1175
+ // protect events of the same request. A UUID — never a payload/PII value.
1176
+ correlationId,
733
1177
  timestamp: new Date().toISOString(),
734
1178
  protocol: routeContext?.protocol ?? "proxy",
735
1179
  operation: routeContext ? `proxy:${routeContext.protocol}:${routeContext.routeId ?? "unknown"}` : "proxy",
@@ -789,6 +1233,27 @@ function shortHash(value) {
789
1233
  return createHash("sha256").update(value).digest("hex").slice(0, 12);
790
1234
  }
791
1235
 
1236
+ // X-Forwarded-Proto enforcement helper. Node lowercases header names; a comma-
1237
+ // joined multi-value header (e.g. "https, http" from a chain of proxies) is
1238
+ // trusted only when the FIRST hop — the one closest to the client, set by the
1239
+ // trusted terminator — is https. Any other value (http, missing, malformed)
1240
+ // fails closed.
1241
+ function isForwardedHttps(request) {
1242
+ const raw = request?.headers?.["x-forwarded-proto"];
1243
+ if (typeof raw !== "string" || raw.length === 0) {
1244
+ return false;
1245
+ }
1246
+ const first = raw.split(",")[0].trim().toLowerCase();
1247
+ return first === "https";
1248
+ }
1249
+
1250
+ // Loopback / remote-bind gate. Loopback always binds (plain http for dev). A
1251
+ // non-loopback bind is refused UNLESS allowRemoteBind is set. This is the SHARED
1252
+ // primitive the haechi-dashboard satellite reuses, so its contract is preserved
1253
+ // unchanged: { host, allowRemoteBind } and nothing more. The WS6 TLS fail-closed
1254
+ // rule is layered ON TOP of this in assertSafeProxyTransport (the proxy's own
1255
+ // requirement that a remote bind carry TLS), mirroring how the dashboard layers
1256
+ // its own tlsContext precedence after calling assertSafeProxyBind.
792
1257
  export function assertSafeProxyBind({ host = "127.0.0.1", allowRemoteBind = false } = {}) {
793
1258
  if (allowRemoteBind || isLoopbackHost(host)) {
794
1259
  return;
@@ -797,6 +1262,35 @@ export function assertSafeProxyBind({ host = "127.0.0.1", allowRemoteBind = fals
797
1262
  throw new Error(`Refusing to bind Haechi proxy to non-loopback host ${host}. Use --allow-remote-bind only for explicitly secured environments.`);
798
1263
  }
799
1264
 
1265
+ // WS6 fail-closed TLS requirement for a REMOTE bind. After the loopback/remote
1266
+ // gate above, a non-loopback (allowRemoteBind) listener must ALSO carry usable
1267
+ // TLS material (Haechi terminates TLS) OR an explicit trustForwardedProto
1268
+ // acknowledgement (a trusted reverse proxy terminates TLS in front of Haechi).
1269
+ // Neither → THROW: never serve bearer tokens + payloads in plaintext on a remote
1270
+ // bind. Loopback dev is exempt (plain http, no TLS needed). Separate from
1271
+ // assertSafeProxyBind so the dashboard satellite's reuse of that primitive is
1272
+ // unaffected.
1273
+ export function assertSafeProxyTransport({
1274
+ host = "127.0.0.1",
1275
+ allowRemoteBind = false,
1276
+ hasUsableTls = false,
1277
+ trustForwardedProto = false
1278
+ } = {}) {
1279
+ if (isLoopbackHost(host) || !allowRemoteBind) {
1280
+ // Loopback (or an already-refused remote bind) needs no TLS check here.
1281
+ return;
1282
+ }
1283
+ if (!hasUsableTls && !trustForwardedProto) {
1284
+ throw new Error(
1285
+ `Refusing to bind Haechi proxy to non-loopback host ${host} without TLS. ` +
1286
+ `A remote bind would expose bearer tokens and payloads in plaintext. ` +
1287
+ `Set proxy.tls (a keyFile+certFile or pfxFile so Haechi terminates TLS), ` +
1288
+ `or set proxy.trustForwardedProto: true only when a trusted reverse proxy ` +
1289
+ `terminates TLS in front of Haechi (Haechi will then require X-Forwarded-Proto: https).`
1290
+ );
1291
+ }
1292
+ }
1293
+
800
1294
  function isLoopbackHost(host) {
801
1295
  const normalized = String(host).trim().toLowerCase();
802
1296
  return normalized === "localhost"