haechi 1.1.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.ko.md +46 -11
  2. package/README.md +46 -11
  3. package/SECURITY.md +7 -1
  4. package/docs/README.md +2 -0
  5. package/docs/current/compliance-mapping.ko.md +53 -0
  6. package/docs/current/compliance-mapping.md +53 -0
  7. package/docs/current/config-version.ko.md +30 -0
  8. package/docs/current/config-version.md +51 -0
  9. package/docs/current/configuration.ko.md +165 -9
  10. package/docs/current/configuration.md +165 -9
  11. package/docs/current/operations-runbook.ko.md +155 -0
  12. package/docs/current/operations-runbook.md +241 -0
  13. package/docs/current/release-process.ko.md +5 -1
  14. package/docs/current/release-process.md +5 -1
  15. package/docs/current/risk-register-release-gate.ko.md +5 -3
  16. package/docs/current/risk-register-release-gate.md +13 -3
  17. package/docs/current/security-whitepaper.ko.md +102 -0
  18. package/docs/current/security-whitepaper.md +102 -0
  19. package/docs/current/shared-responsibility.ko.md +2 -2
  20. package/docs/current/shared-responsibility.md +2 -2
  21. package/docs/current/threat-model.ko.md +4 -2
  22. package/docs/current/threat-model.md +4 -2
  23. package/examples/local-proxy-demo/README.md +51 -0
  24. package/examples/local-proxy-demo/demo.mjs +144 -0
  25. package/examples/local-proxy-demo/demo.tape +19 -0
  26. package/examples/local-proxy-demo/live-demo.mjs +121 -0
  27. package/examples/local-proxy-demo/live-demo.tape +25 -0
  28. package/haechi.config.example.json +20 -3
  29. package/package.json +7 -2
  30. package/packages/audit/index.mjs +26 -2
  31. package/packages/cli/bin/haechi.mjs +57 -10
  32. package/packages/cli/runtime.mjs +402 -10
  33. package/packages/core/index.mjs +143 -8
  34. package/packages/filter/index.mjs +975 -12
  35. package/packages/metrics/index.mjs +181 -0
  36. package/packages/privacy-profiles/index.mjs +72 -3
  37. package/packages/protocol-adapters/index.mjs +99 -1
  38. package/packages/proxy/index.mjs +525 -40
  39. package/packages/stream-filter/index.mjs +69 -7
@@ -1,29 +1,207 @@
1
1
  import { createServer } from "node:http";
2
+ import { createServer as createHttpsServer } from "node:https";
2
3
  import { createHash, randomUUID } from "node:crypto";
3
4
  import { isUtf8 } from "node:buffer";
5
+ import { readFileSync } from "node:fs";
6
+ import { fileURLToPath } from "node:url";
4
7
  import { inspectResponseStream } from "../stream-filter/index.mjs";
5
8
 
6
9
  export const DEFAULT_PROXY_PORT = 11016;
7
10
 
8
- export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "127.0.0.1", allowRemoteBind = false }) {
9
- assertSafeProxyBind({ host, allowRemoteBind });
11
+ // The published package version, read once from the package's own manifest.
12
+ // package.json IS in the published tarball, and packages/proxy/index.mjs sits
13
+ // two levels below the repo root, so this URL resolves in both the dev tree and
14
+ // the packed tarball. Falls back to "unknown" rather than throwing — a version
15
+ // read must never break proxy startup.
16
+ export const HAECHI_VERSION = readPackageVersion();
17
+
18
+ function readPackageVersion() {
19
+ try {
20
+ const pkgUrl = new URL("../../package.json", import.meta.url);
21
+ const pkg = JSON.parse(readFileSync(fileURLToPath(pkgUrl), "utf8"));
22
+ return typeof pkg.version === "string" ? pkg.version : "unknown";
23
+ } catch {
24
+ return "unknown";
25
+ }
26
+ }
27
+
28
+ // A tlsContext is usable iff it can actually terminate TLS: (key && cert) or pfx.
29
+ // This is the SINGLE source of truth for both the bind guard and server
30
+ // selection — the SAME shape the haechi-dashboard satellite uses, so the proxy
31
+ // and the dashboard share one TLS-material predicate. A non-null tlsContext that
32
+ // fails this check must fail closed (never green-light a remote bind that then
33
+ // builds a plaintext http server).
34
+ export function hasUsableTlsMaterial(ctx) {
35
+ if (!ctx || typeof ctx !== "object" || Array.isArray(ctx)) {
36
+ return false;
37
+ }
38
+ const hasKeyCert = Boolean(ctx.key) && Boolean(ctx.cert);
39
+ const hasPfx = Boolean(ctx.pfx);
40
+ return hasKeyCert || hasPfx;
41
+ }
42
+
43
+ // Structured logger honoring config.logging.format. In "json" mode it emits a
44
+ // single JSON line carrying a correlationId and an error NAME/class — NEVER a
45
+ // request/response payload, headers, token, or any PII. In "text" mode it
46
+ // preserves the prior human-readable console output.
47
+ function createLogger(format = "text") {
48
+ const json = format === "json";
49
+ return {
50
+ error(event, fields = {}) {
51
+ if (json) {
52
+ process.stderr.write(`${JSON.stringify({ level: "error", event, ...fields })}\n`);
53
+ } else {
54
+ const parts = Object.entries(fields)
55
+ .filter(([, value]) => value !== undefined && value !== null)
56
+ .map(([key, value]) => `${key}=${value}`);
57
+ process.stderr.write(`haechi ${event}${parts.length ? `: ${parts.join(" ")}` : ""}\n`);
58
+ }
59
+ }
60
+ };
61
+ }
62
+
63
+ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "127.0.0.1", allowRemoteBind = false, tlsContext, trustForwardedProto }) {
10
64
  const { haechi, config, protocolAdapter } = runtime;
11
- const rateLimiter = createRateLimiter();
12
65
 
13
- const server = createServer(async (request, response) => {
66
+ // WS6 TLS hardening. The tlsContext / trustForwardedProto source of truth is
67
+ // the normalized config (proxy.tls is loaded into a tlsContext at startup;
68
+ // proxy.trustForwardedProto is a boolean), but an explicit argument overrides
69
+ // it (so a hand-built runtime / a test can drive these directly). hasUsableTls
70
+ // is the same predicate the dashboard satellite uses.
71
+ const resolvedTlsContext = tlsContext !== undefined ? tlsContext : (config.proxy?.tls ?? null);
72
+ const resolvedTrustForwardedProto = trustForwardedProto !== undefined
73
+ ? trustForwardedProto
74
+ : Boolean(config.proxy?.trustForwardedProto);
75
+ const usableTls = hasUsableTlsMaterial(resolvedTlsContext);
76
+
77
+ // Bind guard, two layers. (1) the loopback/remote-bind gate (shared with the
78
+ // dashboard). (2) WS6: a remote bind ADDITIONALLY requires a usable tlsContext
79
+ // OR an explicit trustForwardedProto acknowledgement (a trusted reverse proxy
80
+ // terminates TLS in front of Haechi). Otherwise it THROWS at startup — the
81
+ // proxy must NEVER serve bearer tokens + payloads in plaintext on a remote
82
+ // bind. Loopback dev is unaffected (plain http, no TLS).
83
+ assertSafeProxyBind({ host, allowRemoteBind });
84
+ assertSafeProxyTransport({
85
+ host,
86
+ allowRemoteBind,
87
+ hasUsableTls: usableTls,
88
+ trustForwardedProto: resolvedTrustForwardedProto
89
+ });
90
+
91
+ // When the remote bind rests on trustForwardedProto (plain http behind a
92
+ // trusted TLS hop) we REJECT any protected-route request whose
93
+ // X-Forwarded-Proto is not https — a plaintext request that bypassed the hop.
94
+ // This is only meaningful for a non-loopback, plain-http, trust-forwarded
95
+ // listener; a loopback dev server or an https-terminating server never gates.
96
+ const enforceForwardedProto = !isLoopbackHost(host)
97
+ && allowRemoteBind
98
+ && !usableTls
99
+ && resolvedTrustForwardedProto;
100
+ // The runtime owns the rate limiter (an injectable collaborator). Fall back to
101
+ // a local per-process default so a hand-built runtime object without a
102
+ // rateLimiter still works (backward-compatible). The default and the runtime's
103
+ // default share the same allow(key, limit) -> boolean fixed-window contract.
104
+ const rateLimiter = runtime.rateLimiter ?? createRateLimiter();
105
+ // The metrics collector is owned by the runtime (injectable). Fall back to a
106
+ // no-op so a hand-built runtime object without metrics still works.
107
+ const metrics = runtime.metrics ?? noopMetrics();
108
+ const logger = createLogger(config.logging?.format ?? "text");
109
+
110
+ // WS4-B backpressure: a configurable global max-in-flight ceiling. 0 (default)
111
+ // disables it, preserving 1.1 behavior. When > 0 and the live count is at the
112
+ // ceiling, a NEW non-exempt request is rejected 503 + Retry-After BEFORE auth
113
+ // and body-read. The /__haechi/* observability routes are EXEMPT so metrics +
114
+ // liveness can be scraped under saturation.
115
+ const maxInFlight = config.limits.maxInFlight ?? 0;
116
+ const retryAfterSeconds = Math.max(1, Math.ceil((config.limits.shutdownGraceMs ?? 10000) / 1000));
117
+ // Live in-flight request count for the drain-tracking AND the ceiling. A
118
+ // bounded integer — never identity/value bearing.
119
+ let inFlight = 0;
120
+ // Resolves once in-flight drains to zero during a graceful close().
121
+ let drained = null;
122
+ let resolveDrained = null;
123
+
124
+ const requestHandler = async (request, response) => {
125
+ // Per-REQUEST correlation id: generated here, threaded into every protect
126
+ // context (so the audit events of one request share it) AND into the error
127
+ // log. A UUID — never a payload/identity/PII value.
128
+ const correlationId = randomUUID();
129
+ const startedAt = process.hrtime.bigint();
130
+ let routeId = "unknown";
131
+
132
+ // Observability routes are exempt from the in-flight ceiling and are NOT
133
+ // counted toward it: liveness/readiness/metrics must answer under saturation.
134
+ const exemptRoute = isObservabilityRoute(request);
135
+
136
+ // Backpressure: reject at the ceiling BEFORE doing any work. Counted in
137
+ // metrics by a bounded enum decision; the body is never read.
138
+ if (!exemptRoute && maxInFlight > 0 && inFlight >= maxInFlight) {
139
+ metrics.increment("haechi_overloaded_total");
140
+ response.writeHead(503, {
141
+ "content-type": "application/json",
142
+ "retry-after": String(retryAfterSeconds)
143
+ });
144
+ response.end(`${JSON.stringify({ error: "haechi_overloaded", message: "Server at max in-flight capacity; retry later" }, null, 2)}\n`);
145
+ return;
146
+ }
147
+
148
+ // Track in-flight for graceful drain + the ceiling. Decrement in finally so a
149
+ // throw/early-return can never leak the count (which would wedge close()).
150
+ let counted = false;
151
+ if (!exemptRoute) {
152
+ inFlight += 1;
153
+ counted = true;
154
+ }
14
155
  try {
156
+ // WS6 forwarded-proto enforcement. When this is a non-loopback plain-http
157
+ // listener resting on trustForwardedProto (a trusted reverse proxy
158
+ // terminates TLS in front of Haechi), a request whose X-Forwarded-Proto is
159
+ // not "https" arrived over plaintext that BYPASSED the TLS hop — reject it
160
+ // fail-closed BEFORE auth and body-read, so a protected route never serves
161
+ // tokens/payloads over an unverified-plaintext hop. The /__haechi/* liveness
162
+ // routes are EXEMPT (they leak nothing) so a health check / metrics scrape
163
+ // from the loopback sidecar still answers.
164
+ if (enforceForwardedProto && !exemptRoute && !isForwardedHttps(request)) {
165
+ writeJson(response, 403, {
166
+ error: "haechi_forwarded_proto_required",
167
+ message: "This proxy runs behind a trusted TLS-terminating hop (proxy.trustForwardedProto). A request without X-Forwarded-Proto: https bypassed the hop and is refused."
168
+ });
169
+ return;
170
+ }
171
+ // Health + telemetry endpoints are unauthenticated and checked BEFORE auth
172
+ // and body-read. They live under the reserved /__haechi/* prefix.
173
+ if (request.method === "GET" && request.url === "/__haechi/live") {
174
+ // Cheap process liveness. Always 200 while the event loop is serving.
175
+ writeJson(response, 200, { ok: true, version: HAECHI_VERSION });
176
+ return;
177
+ }
178
+ if (request.method === "GET" && request.url === "/__haechi/ready") {
179
+ await handleReady({ runtime, response });
180
+ return;
181
+ }
15
182
  if (request.method === "GET" && request.url === "/__haechi/health") {
16
- // Intentionally unauthenticated; exposes only the mode.
17
- writeJson(response, 200, { ok: true, mode: config.mode });
183
+ // Back-compat: keep the original shape (ok + mode) and add version.
184
+ writeJson(response, 200, { ok: true, mode: config.mode, version: HAECHI_VERSION });
185
+ return;
186
+ }
187
+ if (request.method === "GET" && request.url === "/__haechi/metrics") {
188
+ if (!config.metrics?.enabled) {
189
+ writeJson(response, 404, { error: "haechi_metrics_disabled", message: "Metrics endpoint is disabled (metrics.enabled: false)" });
190
+ return;
191
+ }
192
+ response.writeHead(200, { "content-type": "text/plain; version=0.0.4; charset=utf-8" });
193
+ response.end(metrics.render());
18
194
  return;
19
195
  }
20
196
 
21
197
  assertRelativeProxyTarget(request.url);
22
198
  const routeContext = protocolAdapter.classifyRequest(request);
199
+ routeId = routeContext?.routeId ?? "unknown";
200
+ const mode = config.policy.mode ?? config.mode;
23
201
 
24
202
  // Authenticate, resolve the policy profile, and rate-limit BEFORE reading
25
203
  // the body, so a denied/throttled request cannot stream a large body.
26
- const gate = await authorizeRequest({ runtime, request, routeContext, rateLimiter });
204
+ const gate = await authorizeRequest({ runtime, request, routeContext, rateLimiter, metrics, correlationId });
27
205
  if (gate.denied) {
28
206
  writeJson(response, gate.denied.status, {
29
207
  error: gate.denied.error,
@@ -32,7 +210,7 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
32
210
  return;
33
211
  }
34
212
  const { identity, profile, policyEngine, modelAllowlist } = gate;
35
- const authContext = { identity, profile, policyEngine };
213
+ const authContext = { identity, profile, policyEngine, correlationId };
36
214
 
37
215
  const body = await readBody(request, {
38
216
  maxBytes: config.limits.maxRequestBytes
@@ -42,12 +220,14 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
42
220
  // Model allowlist runs after body read (the model field is in the body).
43
221
  if (modelAllowlist && typeof json?.model === "string" && !modelAllowlist.includes(json.model)) {
44
222
  await recordProxyDecision({
45
- runtime, routeContext, identity, profile,
223
+ runtime, routeContext, identity, profile, correlationId,
46
224
  decision: "model_not_allowed",
47
225
  reason: `model:${json.model}`,
48
226
  enforced: true,
49
227
  blocked: true
50
228
  });
229
+ countDecision(metrics, { routeContext, mode, decision: "model_not_allowed" });
230
+ metrics.increment("haechi_blocks_total");
51
231
  writeJson(response, 403, {
52
232
  error: "haechi_model_not_allowed",
53
233
  message: `Model not allowed: ${json.model}`
@@ -57,7 +237,7 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
57
237
 
58
238
  if (isStreamingRequest(json, routeContext)) {
59
239
  if (config.streaming.requestMode === "inspect") {
60
- await handleInspectedStream({ runtime, request, response, routeContext, json, authContext });
240
+ await handleInspectedStream({ runtime, request, response, routeContext, json, authContext, metrics });
61
241
  return;
62
242
  }
63
243
 
@@ -67,16 +247,19 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
67
247
  routeContext,
68
248
  identity,
69
249
  profile,
250
+ correlationId,
70
251
  decision: "streaming_request_pass_through",
71
252
  reason: "streaming_request_pass_through",
72
253
  enforced: false,
73
254
  blocked: false
74
255
  });
256
+ countDecision(metrics, { routeContext, mode, decision: "forwarded" });
75
257
  const upstreamResponse = await forward({
76
258
  upstream: config.target.upstream,
77
259
  request,
78
260
  body,
79
- timeoutMs: config.limits.upstreamTimeoutMs
261
+ timeoutMs: config.limits.upstreamTimeoutMs,
262
+ metrics
80
263
  });
81
264
  const { body: rawBody } = await readUpstreamBody(upstreamResponse);
82
265
  response.writeHead(upstreamResponse.status, Object.fromEntries(upstreamResponse.headers.entries()));
@@ -84,6 +267,7 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
84
267
  return;
85
268
  }
86
269
 
270
+ countDecision(metrics, { routeContext, mode, decision: "streaming_blocked" });
87
271
  writeJson(response, 501, {
88
272
  error: "haechi_streaming_unsupported",
89
273
  message: "Streaming requests are blocked unless streaming.requestMode is set to pass-through or inspect"
@@ -97,11 +281,13 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
97
281
  ...authContext,
98
282
  operation: `request:${routeContext.operation}`,
99
283
  direction: "request",
100
- mode: config.policy.mode ?? config.mode
284
+ mode
101
285
  })
102
286
  : { payload: json, blocked: false };
103
287
 
104
288
  if (result.blocked) {
289
+ countDecision(metrics, { routeContext, mode, decision: "blocked" });
290
+ metrics.increment("haechi_blocks_total");
105
291
  writeJson(response, 403, {
106
292
  error: "haechi_policy_block",
107
293
  summary: result.summary,
@@ -114,7 +300,8 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
114
300
  upstream: config.target.upstream,
115
301
  request,
116
302
  body: JSON.stringify(result.payload),
117
- timeoutMs: config.limits.upstreamTimeoutMs
303
+ timeoutMs: config.limits.upstreamTimeoutMs,
304
+ metrics
118
305
  });
119
306
 
120
307
  const forwarded = await maybeProtectResponse({
@@ -122,58 +309,234 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
122
309
  routeContext,
123
310
  runtime,
124
311
  authContext,
125
- issuedTokens: result.issuedTokens ?? []
312
+ issuedTokens: result.issuedTokens ?? [],
313
+ metrics
126
314
  });
127
315
 
316
+ countDecision(metrics, {
317
+ routeContext,
318
+ mode,
319
+ decision: forwarded.decision ?? "forwarded"
320
+ });
128
321
  response.writeHead(forwarded.status, forwarded.headers);
129
322
  response.end(forwarded.body);
130
323
  } catch (error) {
131
324
  const expected = typeof error?.statusCode === "number";
132
325
  if (!expected) {
133
- console.error(`haechi proxy internal error: ${error?.stack ?? error?.message ?? error}`);
326
+ // Carry the error NAME/class + correlationId only — NEVER the payload,
327
+ // headers, token, or any PII.
328
+ logger.error("proxy_internal_error", {
329
+ correlationId,
330
+ errorName: error?.name ?? "Error",
331
+ statusCode: error?.statusCode ?? 500
332
+ });
333
+ metrics.increment("haechi_internal_error_total");
134
334
  }
135
335
  writeJson(response, error.statusCode ?? 500, {
136
336
  error: error.errorCode ?? "haechi_proxy_error",
137
337
  message: expected ? error.message : "Internal proxy error"
138
338
  });
339
+ } finally {
340
+ const elapsedSeconds = Number(process.hrtime.bigint() - startedAt) / 1e9;
341
+ // route label is a bounded route id (or "unknown") — never an identity/value.
342
+ metrics.observe("haechi_request_duration_seconds", elapsedSeconds, { route: routeId });
343
+ if (counted) {
344
+ inFlight -= 1;
345
+ // If a graceful close() is awaiting drain and we just hit zero, resolve it.
346
+ if (resolveDrained && inFlight <= 0) {
347
+ resolveDrained();
348
+ }
349
+ }
139
350
  }
140
- });
351
+ };
352
+
353
+ // Server selection: a usable tlsContext → an https listener terminating TLS in
354
+ // this process; otherwise plain http (unchanged for loopback/dev). The bind
355
+ // guard above already guarantees a non-loopback bind without usable TLS carries
356
+ // an explicit trustForwardedProto acknowledgement, so a plain-http server is
357
+ // only ever exposed remotely behind a trusted TLS hop (and gated below).
358
+ const server = usableTls
359
+ ? createHttpsServer(resolvedTlsContext, requestHandler)
360
+ : createServer(requestHandler);
361
+ const servesHttps = usableTls;
362
+
363
+ // WS4-B tuned timeouts. Only override Node's server defaults when a value is
364
+ // configured (null = leave Node's default untouched, so behavior is unchanged
365
+ // unless an operator opts in). requestTimeout caps the whole request; a value
366
+ // of 0 disables the timeout (Node semantics) — validated upstream.
367
+ if (config.limits.requestTimeoutMs !== null && config.limits.requestTimeoutMs !== undefined) {
368
+ server.requestTimeout = config.limits.requestTimeoutMs;
369
+ }
370
+ if (config.limits.headersTimeoutMs !== null && config.limits.headersTimeoutMs !== undefined) {
371
+ server.headersTimeout = config.limits.headersTimeoutMs;
372
+ }
141
373
 
142
374
  return {
143
375
  server,
376
+ // Whether THIS listener terminates TLS (https) — the CLI/log line reflects
377
+ // the right scheme, and a caller can assert the selected transport.
378
+ servesHttps,
144
379
  listen() {
145
380
  return new Promise((resolve) => {
146
381
  server.listen(port, host, () => {
147
382
  const address = server.address();
148
- resolve({ host: address.address, port: address.port });
383
+ resolve({ host: address.address, port: address.port, tls: servesHttps });
149
384
  });
150
385
  });
151
386
  },
387
+ // WS4-B graceful drain. Stop accepting new connections, immediately close
388
+ // idle keep-alive sockets, and wait for in-flight requests to drain. After a
389
+ // configurable grace period (limits.shutdownGraceMs) force-close any lingering
390
+ // socket so a stuck keep-alive cannot hold shutdown open forever. The grace
391
+ // timer is .unref()-ed and cleared on a clean drain so `node --test` never
392
+ // hangs on a leaked timer.
152
393
  close() {
394
+ const graceMs = config.limits.shutdownGraceMs ?? 10000;
153
395
  return new Promise((resolve, reject) => {
154
- server.close((error) => error ? reject(error) : resolve());
396
+ let settled = false;
397
+ let graceTimer = null;
398
+
399
+ const finish = (error) => {
400
+ if (settled) {
401
+ return;
402
+ }
403
+ settled = true;
404
+ if (graceTimer) {
405
+ clearTimeout(graceTimer);
406
+ graceTimer = null;
407
+ }
408
+ resolveDrained = null;
409
+ drained = null;
410
+ if (error) {
411
+ reject(error);
412
+ } else {
413
+ resolve();
414
+ }
415
+ };
416
+
417
+ // Stop accepting new connections; the callback fires once all
418
+ // connections are closed (idle ones we close now, in-flight ones once
419
+ // they drain or the grace timer force-closes them).
420
+ server.close((error) => finish(error));
421
+
422
+ // Close idle keep-alive sockets immediately so they don't keep the
423
+ // server open waiting for a request that will never come.
424
+ if (typeof server.closeIdleConnections === "function") {
425
+ server.closeIdleConnections();
426
+ }
427
+
428
+ // If nothing is in flight, the close callback will fire promptly; still
429
+ // arm a drain resolver in case requests are mid-flight.
430
+ if (inFlight <= 0) {
431
+ // No in-flight work; closeIdleConnections handled keep-alive, so
432
+ // server.close() resolves on its own. Nothing more to wait for.
433
+ return;
434
+ }
435
+
436
+ // Wait for in-flight requests to drain, then force-close stragglers
437
+ // (the force close covers a request whose socket lingers after we stop).
438
+ drained = new Promise((res) => { resolveDrained = res; });
439
+ drained.then(() => {
440
+ if (typeof server.closeAllConnections === "function") {
441
+ server.closeAllConnections();
442
+ }
443
+ });
444
+
445
+ // Grace cap: after graceMs force every remaining connection closed so a
446
+ // lingering keep-alive socket cannot hold shutdown open forever. unref()
447
+ // so this timer alone never keeps the event loop (and `node --test`) alive.
448
+ graceTimer = setTimeout(() => {
449
+ if (typeof server.closeAllConnections === "function") {
450
+ server.closeAllConnections();
451
+ }
452
+ }, graceMs);
453
+ if (typeof graceTimer.unref === "function") {
454
+ graceTimer.unref();
455
+ }
155
456
  });
156
457
  }
157
458
  };
158
459
  }
159
460
 
461
+ // True for the reserved /__haechi/* observability routes (live/ready/health/
462
+ // metrics). These are EXEMPT from the in-flight ceiling so liveness + metrics
463
+ // stay scrapable under saturation, and they do not count toward drain tracking.
464
+ function isObservabilityRoute(request) {
465
+ return request.method === "GET" && typeof request.url === "string" && request.url.startsWith("/__haechi/");
466
+ }
467
+
468
+ // Readiness probe (WS4-A). FAIL-CLOSED: a gateway that cannot write its audit
469
+ // log is NOT ready (503). Runs the audit sink's optional ready()/healthCheck()
470
+ // — if the sink lacks one, audit is treated as ready. The checks object carries
471
+ // only booleans/enums; never a path, payload, or PII value.
472
+ async function handleReady({ runtime, response }) {
473
+ const checks = {};
474
+ let ready = true;
475
+
476
+ const probe = runtime.auditSink?.ready ?? runtime.auditSink?.healthCheck;
477
+ if (typeof probe === "function") {
478
+ try {
479
+ const result = await probe.call(runtime.auditSink);
480
+ checks.auditWritable = result === true || result?.ok === true;
481
+ } catch {
482
+ checks.auditWritable = false;
483
+ }
484
+ } else {
485
+ // No probe method on the sink → cannot disprove writability; treat as ready.
486
+ checks.auditWritable = true;
487
+ }
488
+ if (checks.auditWritable !== true) {
489
+ ready = false;
490
+ }
491
+
492
+ writeJson(response, ready ? 200 : 503, { ready, version: HAECHI_VERSION, checks });
493
+ }
494
+
495
+ // Increment the request counter with a bounded enum label set. route is a route
496
+ // id, mode is the policy mode, decision is a fixed decision class — NEVER an
497
+ // identity/token/detected value (no-PII-in-telemetry invariant).
498
+ function countDecision(metrics, { routeContext, mode, decision }) {
499
+ metrics?.increment("haechi_requests_total", {
500
+ route: routeContext?.routeId ?? "unknown",
501
+ mode: mode ?? "unknown",
502
+ decision
503
+ });
504
+ }
505
+
506
+ // Backward-compat fallback for a hand-built runtime object without metrics: a
507
+ // no-op collector with the same increment/observe/render contract.
508
+ function noopMetrics() {
509
+ return {
510
+ increment() {},
511
+ observe() {},
512
+ render() {
513
+ return "";
514
+ }
515
+ };
516
+ }
517
+
160
518
  // Authenticate → resolve policy profile → rate-limit. Returns the request's
161
519
  // identity/profile/policyEngine/modelAllowlist, or a denial. Auth is required
162
520
  // exactly when an authProvider is configured (auth.provider !== "none").
163
- async function authorizeRequest({ runtime, request, routeContext, rateLimiter }) {
164
- const { authProvider, policyProfiles } = runtime;
521
+ async function authorizeRequest({ runtime, request, routeContext, rateLimiter, metrics, correlationId }) {
522
+ const { authProvider, policyProfiles, config } = runtime;
523
+ const mode = config.policy.mode ?? config.mode;
165
524
  let identity = null;
166
525
 
167
526
  if (authProvider) {
168
527
  try {
169
528
  identity = await authProvider.authenticate(request);
170
529
  } catch {
171
- await recordAuthDenied({ runtime, routeContext, reason: "provider_error" });
530
+ await recordAuthDenied({ runtime, routeContext, reason: "provider_error", correlationId });
531
+ countDecision(metrics, { routeContext, mode, decision: "auth_denied" });
532
+ metrics.increment("haechi_auth_denied_total");
172
533
  return { denied: { status: 401, error: "haechi_auth_denied", message: "Authentication failed" } };
173
534
  }
174
535
  if (!identity) {
175
536
  const reason = hasBearerHeader(request) ? "invalid_token" : "no_token";
176
- await recordAuthDenied({ runtime, routeContext, reason });
537
+ await recordAuthDenied({ runtime, routeContext, reason, correlationId });
538
+ countDecision(metrics, { routeContext, mode, decision: "auth_denied" });
539
+ metrics.increment("haechi_auth_denied_total");
177
540
  return { denied: { status: 401, error: "haechi_auth_denied", message: "Authentication required" } };
178
541
  }
179
542
  }
@@ -182,14 +545,22 @@ async function authorizeRequest({ runtime, request, routeContext, rateLimiter })
182
545
 
183
546
  if (resolved.rate && resolved.rate.requestsPerMinute) {
184
547
  const key = identity?.id ?? "anonymous";
185
- if (!rateLimiter.allow(key, resolved.rate.requestsPerMinute)) {
548
+ // allow() may return a boolean OR a Promise<boolean>: the built-in default is
549
+ // synchronous, but a shared-store (e.g. Redis-backed) limiter is inherently
550
+ // async. We await unconditionally — `await <boolean>` returns the boolean
551
+ // unchanged, so the sync default keeps working, while `!somePromise` (always
552
+ // false, because a Promise is truthy) can no longer let an async limiter
553
+ // silently fail open. See haechi-ratelimit-redis (shared-store satellite).
554
+ if (!(await rateLimiter.allow(key, resolved.rate.requestsPerMinute))) {
186
555
  await recordProxyDecision({
187
- runtime, routeContext, identity, profile: resolved.profile,
556
+ runtime, routeContext, identity, profile: resolved.profile, correlationId,
188
557
  decision: "rate_limited",
189
558
  reason: `rate:${resolved.rate.requestsPerMinute}`,
190
559
  enforced: true,
191
560
  blocked: true
192
561
  });
562
+ countDecision(metrics, { routeContext, mode, decision: "rate_limited" });
563
+ metrics.increment("haechi_rate_limited_total");
193
564
  return { denied: { status: 429, error: "haechi_rate_limited", message: "Rate limit exceeded" } };
194
565
  }
195
566
  }
@@ -208,13 +579,36 @@ function hasBearerHeader(request) {
208
579
  }
209
580
 
210
581
  function createRateLimiter() {
211
- // In-memory fixed-window counter. Per-process: resets on restart, not shared
212
- // across replicas acceptable for a single-process self-hosted preview.
582
+ // Backward-compat fallback ONLY: the canonical default lives in the runtime
583
+ // (createRuntime owns providers.rateLimiter). This path runs when a hand-built
584
+ // runtime object lacks rateLimiter. In-memory fixed-window counter, per-process
585
+ // (resets on restart, not shared across replicas). The window Map is bounded by
586
+ // a lazy, amortized sweep — NO timer — so aged-out one-shot identities do not
587
+ // accumulate unboundedly (mirrors runtime's createRateLimiter).
213
588
  const windows = new Map();
214
589
  const windowMs = 60000;
590
+ const sweepThreshold = 1024;
591
+ const sweepBudget = 256;
592
+
593
+ function sweepExpired(now) {
594
+ let scanned = 0;
595
+ for (const [key, slot] of windows) {
596
+ if (scanned >= sweepBudget) {
597
+ break;
598
+ }
599
+ scanned += 1;
600
+ if (now - slot.windowStart >= windowMs) {
601
+ windows.delete(key);
602
+ }
603
+ }
604
+ }
605
+
215
606
  return {
216
607
  allow(key, limit) {
217
608
  const now = Date.now();
609
+ if (windows.size >= sweepThreshold) {
610
+ sweepExpired(now);
611
+ }
218
612
  const slot = windows.get(key);
219
613
  if (!slot || now - slot.windowStart >= windowMs) {
220
614
  windows.set(key, { windowStart: now, count: 1 });
@@ -229,9 +623,9 @@ function createRateLimiter() {
229
623
  };
230
624
  }
231
625
 
232
- async function recordAuthDenied({ runtime, routeContext, reason }) {
626
+ async function recordAuthDenied({ runtime, routeContext, reason, correlationId = null }) {
233
627
  await recordProxyDecision({
234
- runtime, routeContext, identity: null, profile: null,
628
+ runtime, routeContext, identity: null, profile: null, correlationId,
235
629
  decision: "auth_denied",
236
630
  reason,
237
631
  enforced: true,
@@ -239,8 +633,9 @@ async function recordAuthDenied({ runtime, routeContext, reason }) {
239
633
  });
240
634
  }
241
635
 
242
- async function handleInspectedStream({ runtime, request, response, routeContext, json, authContext = {} }) {
636
+ async function handleInspectedStream({ runtime, request, response, routeContext, json, authContext = {}, metrics = null }) {
243
637
  const { haechi, config } = runtime;
638
+ const requestMode = config.policy.mode ?? config.mode;
244
639
 
245
640
  // Inspection needs to know the wire format and delta channel for this route.
246
641
  if (!routeContext.streaming) {
@@ -264,6 +659,8 @@ async function handleInspectedStream({ runtime, request, response, routeContext,
264
659
  : { payload: json, blocked: false };
265
660
 
266
661
  if (requestResult.blocked) {
662
+ countDecision(metrics, { routeContext, mode: requestMode, decision: "blocked" });
663
+ metrics?.increment("haechi_blocks_total");
267
664
  writeJson(response, 403, {
268
665
  error: "haechi_policy_block",
269
666
  summary: requestResult.summary,
@@ -276,7 +673,8 @@ async function handleInspectedStream({ runtime, request, response, routeContext,
276
673
  upstream: config.target.upstream,
277
674
  request,
278
675
  body: JSON.stringify(requestResult.payload),
279
- timeoutMs: config.limits.upstreamTimeoutMs
676
+ timeoutMs: config.limits.upstreamTimeoutMs,
677
+ metrics
280
678
  });
281
679
 
282
680
  const streamMode = config.streaming.responseMode ?? config.responseProtection.mode ?? config.policy.mode ?? config.mode;
@@ -300,8 +698,13 @@ async function handleInspectedStream({ runtime, request, response, routeContext,
300
698
 
301
699
  await recordStreamDecision({
302
700
  runtime, routeContext, blocked, summary, mode: streamMode,
303
- identity: authContext.identity ?? null, profile: authContext.profile ?? null
701
+ identity: authContext.identity ?? null, profile: authContext.profile ?? null,
702
+ correlationId: authContext.correlationId ?? null
304
703
  });
704
+ countDecision(metrics, { routeContext, mode: streamMode, decision: blocked ? "stream_blocked" : "stream_inspected" });
705
+ if (blocked) {
706
+ metrics?.increment("haechi_blocks_total");
707
+ }
305
708
  response.end();
306
709
  }
307
710
 
@@ -324,12 +727,13 @@ async function* emptyAsyncIterable() {
324
727
  // No upstream body to inspect.
325
728
  }
326
729
 
327
- async function recordStreamDecision({ runtime, routeContext, blocked, summary, mode, identity = null, profile = null }) {
730
+ async function recordStreamDecision({ runtime, routeContext, blocked, summary, mode, identity = null, profile = null, correlationId = null }) {
328
731
  if (typeof runtime.auditSink?.record !== "function") {
329
732
  return;
330
733
  }
331
734
  await runtime.auditSink.record({
332
735
  id: randomUUID(),
736
+ correlationId,
333
737
  timestamp: new Date().toISOString(),
334
738
  protocol: routeContext?.protocol ?? "proxy",
335
739
  operation: `response-stream:${routeContext?.operation ?? "unknown"}`,
@@ -346,7 +750,7 @@ async function recordStreamDecision({ runtime, routeContext, blocked, summary, m
346
750
  });
347
751
  }
348
752
 
349
- async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, authContext = {}, issuedTokens = [] }) {
753
+ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, authContext = {}, issuedTokens = [], metrics = null }) {
350
754
  const headers = Object.fromEntries(upstreamResponse.headers.entries());
351
755
 
352
756
  if (!runtime.config.responseProtection.enabled || !routeContext.protectResponse) {
@@ -354,7 +758,8 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, a
354
758
  return {
355
759
  status: upstreamResponse.status,
356
760
  headers,
357
- body: rawBody
761
+ body: rawBody,
762
+ decision: "forwarded"
358
763
  };
359
764
  }
360
765
 
@@ -372,6 +777,8 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, a
372
777
  responsePolicy,
373
778
  routeContext,
374
779
  runtime,
780
+ correlationId: authContext.correlationId ?? null,
781
+ metrics,
375
782
  hardDeny: true
376
783
  });
377
784
  }
@@ -388,6 +795,8 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, a
388
795
  responsePolicy,
389
796
  routeContext,
390
797
  runtime,
798
+ correlationId: authContext.correlationId ?? null,
799
+ metrics,
391
800
  hardDeny: true
392
801
  });
393
802
  }
@@ -401,7 +810,9 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, a
401
810
  rawBody,
402
811
  responsePolicy,
403
812
  routeContext,
404
- runtime
813
+ runtime,
814
+ correlationId: authContext.correlationId ?? null,
815
+ metrics
405
816
  });
406
817
  }
407
818
 
@@ -414,7 +825,9 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, a
414
825
  rawBody,
415
826
  responsePolicy,
416
827
  routeContext,
417
- runtime
828
+ runtime,
829
+ correlationId: authContext.correlationId ?? null,
830
+ metrics
418
831
  });
419
832
  }
420
833
 
@@ -430,7 +843,9 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, a
430
843
  rawBody,
431
844
  responsePolicy,
432
845
  routeContext,
433
- runtime
846
+ runtime,
847
+ correlationId: authContext.correlationId ?? null,
848
+ metrics
434
849
  });
435
850
  }
436
851
 
@@ -446,7 +861,9 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, a
446
861
  });
447
862
 
448
863
  if (result.blocked) {
864
+ metrics?.increment("haechi_blocks_total");
449
865
  return {
866
+ decision: "response_blocked",
450
867
  status: 502,
451
868
  headers: { "content-type": "application/json" },
452
869
  body: Buffer.from(`${JSON.stringify({
@@ -474,6 +891,7 @@ async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, a
474
891
  }
475
892
 
476
893
  return {
894
+ decision: "forwarded",
477
895
  status: upstreamResponse.status,
478
896
  headers: transformedJsonHeaders(headers),
479
897
  body: Buffer.from(`${JSON.stringify(responsePayload)}\n`)
@@ -498,7 +916,7 @@ function restoreTokens(value, tokenValues) {
498
916
  return value;
499
917
  }
500
918
 
501
- async function forward({ upstream, request, body, timeoutMs = null }) {
919
+ async function forward({ upstream, request, body, timeoutMs = null, metrics = null }) {
502
920
  const target = buildUpstreamUrl({ upstream, requestUrl: request.url });
503
921
  try {
504
922
  return await fetch(target, {
@@ -509,12 +927,14 @@ async function forward({ upstream, request, body, timeoutMs = null }) {
509
927
  });
510
928
  } catch (error) {
511
929
  if (error?.name === "TimeoutError" || error?.name === "AbortError") {
930
+ metrics?.increment("haechi_upstream_timeout_total");
512
931
  throw proxyError({
513
932
  statusCode: 504,
514
933
  errorCode: "haechi_upstream_timeout",
515
934
  message: `Upstream did not respond within limits.upstreamTimeoutMs (${timeoutMs})`
516
935
  });
517
936
  }
937
+ metrics?.increment("haechi_upstream_error_total");
518
938
  throw proxyError({
519
939
  statusCode: 502,
520
940
  errorCode: "haechi_upstream_unreachable",
@@ -637,27 +1057,39 @@ async function unprotectedResponseDecision({
637
1057
  responsePolicy,
638
1058
  routeContext,
639
1059
  runtime,
1060
+ metrics = null,
1061
+ correlationId = null,
640
1062
  hardDeny = false
641
1063
  }) {
642
1064
  const allowed = responsePolicy.failureMode === "allow" && !hardDeny;
1065
+ const decision = allowed ? "response_unprotected_allowed" : "response_unprotected_blocked";
643
1066
  await recordProxyDecision({
644
1067
  runtime,
645
1068
  routeContext,
646
- decision: allowed ? "response_unprotected_allowed" : "response_unprotected_blocked",
1069
+ correlationId,
1070
+ decision,
647
1071
  reason,
648
1072
  enforced: !allowed,
649
1073
  blocked: !allowed
650
1074
  });
1075
+ // A forwarded-without-protection (or blocked-because-unprotectable) response is
1076
+ // an operability signal. The label is the bounded reason enum, never a value.
1077
+ metrics?.increment("haechi_response_unprotected_total");
651
1078
 
652
1079
  if (allowed) {
653
1080
  return {
1081
+ decision,
654
1082
  status: upstreamResponse.status,
655
1083
  headers,
656
1084
  body: rawBody
657
1085
  };
658
1086
  }
659
1087
 
1088
+ if (!hardDeny) {
1089
+ metrics?.increment("haechi_blocks_total");
1090
+ }
660
1091
  return {
1092
+ decision,
661
1093
  status: 502,
662
1094
  headers: { "content-type": "application/json" },
663
1095
  body: Buffer.from(`${JSON.stringify({
@@ -738,13 +1170,16 @@ async function cancelReader(reader) {
738
1170
  }
739
1171
  }
740
1172
 
741
- async function recordProxyDecision({ runtime, routeContext, decision, reason, enforced, blocked, identity = null, profile = null }) {
1173
+ async function recordProxyDecision({ runtime, routeContext, decision, reason, enforced, blocked, identity = null, profile = null, correlationId = null }) {
742
1174
  if (typeof runtime.auditSink?.record !== "function") {
743
1175
  return;
744
1176
  }
745
1177
 
746
1178
  await runtime.auditSink.record({
747
1179
  id: randomUUID(),
1180
+ // Per-request correlation id so a proxy-decision event shares the id of the
1181
+ // protect events of the same request. A UUID — never a payload/PII value.
1182
+ correlationId,
748
1183
  timestamp: new Date().toISOString(),
749
1184
  protocol: routeContext?.protocol ?? "proxy",
750
1185
  operation: routeContext ? `proxy:${routeContext.protocol}:${routeContext.routeId ?? "unknown"}` : "proxy",
@@ -804,6 +1239,27 @@ function shortHash(value) {
804
1239
  return createHash("sha256").update(value).digest("hex").slice(0, 12);
805
1240
  }
806
1241
 
1242
+ // X-Forwarded-Proto enforcement helper. Node lowercases header names; a comma-
1243
+ // joined multi-value header (e.g. "https, http" from a chain of proxies) is
1244
+ // trusted only when the FIRST hop — the one closest to the client, set by the
1245
+ // trusted terminator — is https. Any other value (http, missing, malformed)
1246
+ // fails closed.
1247
+ function isForwardedHttps(request) {
1248
+ const raw = request?.headers?.["x-forwarded-proto"];
1249
+ if (typeof raw !== "string" || raw.length === 0) {
1250
+ return false;
1251
+ }
1252
+ const first = raw.split(",")[0].trim().toLowerCase();
1253
+ return first === "https";
1254
+ }
1255
+
1256
+ // Loopback / remote-bind gate. Loopback always binds (plain http for dev). A
1257
+ // non-loopback bind is refused UNLESS allowRemoteBind is set. This is the SHARED
1258
+ // primitive the haechi-dashboard satellite reuses, so its contract is preserved
1259
+ // unchanged: { host, allowRemoteBind } and nothing more. The WS6 TLS fail-closed
1260
+ // rule is layered ON TOP of this in assertSafeProxyTransport (the proxy's own
1261
+ // requirement that a remote bind carry TLS), mirroring how the dashboard layers
1262
+ // its own tlsContext precedence after calling assertSafeProxyBind.
807
1263
  export function assertSafeProxyBind({ host = "127.0.0.1", allowRemoteBind = false } = {}) {
808
1264
  if (allowRemoteBind || isLoopbackHost(host)) {
809
1265
  return;
@@ -812,6 +1268,35 @@ export function assertSafeProxyBind({ host = "127.0.0.1", allowRemoteBind = fals
812
1268
  throw new Error(`Refusing to bind Haechi proxy to non-loopback host ${host}. Use --allow-remote-bind only for explicitly secured environments.`);
813
1269
  }
814
1270
 
1271
+ // WS6 fail-closed TLS requirement for a REMOTE bind. After the loopback/remote
1272
+ // gate above, a non-loopback (allowRemoteBind) listener must ALSO carry usable
1273
+ // TLS material (Haechi terminates TLS) OR an explicit trustForwardedProto
1274
+ // acknowledgement (a trusted reverse proxy terminates TLS in front of Haechi).
1275
+ // Neither → THROW: never serve bearer tokens + payloads in plaintext on a remote
1276
+ // bind. Loopback dev is exempt (plain http, no TLS needed). Separate from
1277
+ // assertSafeProxyBind so the dashboard satellite's reuse of that primitive is
1278
+ // unaffected.
1279
+ export function assertSafeProxyTransport({
1280
+ host = "127.0.0.1",
1281
+ allowRemoteBind = false,
1282
+ hasUsableTls = false,
1283
+ trustForwardedProto = false
1284
+ } = {}) {
1285
+ if (isLoopbackHost(host) || !allowRemoteBind) {
1286
+ // Loopback (or an already-refused remote bind) needs no TLS check here.
1287
+ return;
1288
+ }
1289
+ if (!hasUsableTls && !trustForwardedProto) {
1290
+ throw new Error(
1291
+ `Refusing to bind Haechi proxy to non-loopback host ${host} without TLS. ` +
1292
+ `A remote bind would expose bearer tokens and payloads in plaintext. ` +
1293
+ `Set proxy.tls (a keyFile+certFile or pfxFile so Haechi terminates TLS), ` +
1294
+ `or set proxy.trustForwardedProto: true only when a trusted reverse proxy ` +
1295
+ `terminates TLS in front of Haechi (Haechi will then require X-Forwarded-Proto: https).`
1296
+ );
1297
+ }
1298
+ }
1299
+
815
1300
  function isLoopbackHost(host) {
816
1301
  const normalized = String(host).trim().toLowerCase();
817
1302
  return normalized === "localhost"