haechi 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.ko.md +97 -97
  2. package/README.md +2 -2
  3. package/SECURITY.md +19 -11
  4. package/docs/README.md +2 -0
  5. package/docs/current/api-stability.ko.md +26 -26
  6. package/docs/current/compliance-mapping.ko.md +53 -0
  7. package/docs/current/compliance-mapping.md +53 -0
  8. package/docs/current/config-version.ko.md +30 -0
  9. package/docs/current/config-version.md +51 -0
  10. package/docs/current/configuration.ko.md +242 -102
  11. package/docs/current/configuration.md +149 -9
  12. package/docs/current/operations-runbook.ko.md +121 -0
  13. package/docs/current/operations-runbook.md +204 -0
  14. package/docs/current/release-process.ko.md +19 -20
  15. package/docs/current/release-process.md +1 -2
  16. package/docs/current/reliability-hardening-track.ko.md +77 -0
  17. package/docs/current/reliability-hardening-track.md +77 -0
  18. package/docs/current/risk-register-release-gate.ko.md +26 -27
  19. package/docs/current/risk-register-release-gate.md +27 -20
  20. package/docs/current/security-whitepaper.ko.md +102 -0
  21. package/docs/current/security-whitepaper.md +102 -0
  22. package/docs/current/shared-responsibility.ko.md +33 -24
  23. package/docs/current/shared-responsibility.md +12 -3
  24. package/docs/current/threat-model.ko.md +12 -12
  25. package/docs/current/threat-model.md +3 -3
  26. package/haechi.config.example.json +19 -3
  27. package/package.json +6 -2
  28. package/packages/audit/index.mjs +26 -2
  29. package/packages/cli/bin/haechi.mjs +54 -8
  30. package/packages/cli/runtime.mjs +398 -10
  31. package/packages/core/index.mjs +189 -15
  32. package/packages/filter/index.mjs +299 -9
  33. package/packages/metrics/index.mjs +181 -0
  34. package/packages/proxy/index.mjs +535 -41
@@ -1,12 +1,35 @@
1
1
  import { createHash, randomUUID } from "node:crypto";
2
+ import { HARD_BLOCK_TYPES } from "../filter/index.mjs";
2
3
 
3
4
  const NO_ENFORCE_MODES = new Set(["dry-run", "report-only"]);
4
5
 
5
- export function createHaechi({ filterEngine, policyEngine, cryptoProvider, auditSink, tokenVault = null, mode = "dry-run" }) {
6
+ // Safe built-in ceiling on JSON nesting depth. collectStringEntries walks the
7
+ // tree recursively, so an attacker-shaped deeply-nested payload (within
8
+ // limits.maxRequestBytes) would otherwise overflow the call stack and crash the
9
+ // process uncaught. This default protects direct callers of the exported
10
+ // collectStringEntries; the proxy path threads the configurable
11
+ // limits.maxNestingDepth through createHaechi → protectJson instead.
12
+ export const DEFAULT_MAX_NESTING_DEPTH = 256;
13
+
14
+ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, auditSink, tokenVault = null, mode = "dry-run", limits = {}, precision = {} }) {
6
15
  if (!filterEngine || !policyEngine || !cryptoProvider || !auditSink) {
7
16
  throw new Error("Haechi requires filterEngine, policyEngine, cryptoProvider, and auditSink");
8
17
  }
9
18
 
19
+ // Resolve once at construction; protectJson and the stream protector reuse it.
20
+ const maxNestingDepth = Number.isInteger(limits.maxNestingDepth) && limits.maxNestingDepth > 0
21
+ ? limits.maxNestingDepth
22
+ : DEFAULT_MAX_NESTING_DEPTH;
23
+
24
+ // WS2c precision controls, resolved once. `minConfidence` is the precision dial
25
+ // (drop a detection below the threshold) and `allowlist` is the operator FP
26
+ // exception set. Both are FAIL-OPEN-FOR-PROTECTION: they may only TRIM
27
+ // precision-risky soft-type detections and can NEVER suppress a hard-block type
28
+ // (secret/api_key/kr_rrn/card) — that load-bearing exemption is enforced in
29
+ // applyPrecisionControls, not trusted to config. Default {} = current behavior.
30
+ const minConfidence = Number.isFinite(precision.minConfidence) ? precision.minConfidence : 0;
31
+ const allowlist = compileAllowlist(precision.allowlist);
32
+
10
33
  async function protectJson(payload, rawContext = {}) {
11
34
  // A per-request policy engine (a named profile selected from identity)
12
35
  // overrides the default. It is a control object, NOT data: strip it before
@@ -14,12 +37,21 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
14
37
  const { policyEngine: contextEngine, ...context } = rawContext;
15
38
  const effectiveMode = context.mode ?? mode;
16
39
  const engine = contextEngine ?? policyEngine;
17
- const entries = collectStringEntries(payload);
40
+ // Fail closed on an over-deep payload BEFORE any detection/transform work,
41
+ // mirroring the byte-limit path: the thrown error carries statusCode 413 so
42
+ // the proxy surfaces a clean 4xx rather than a stack-overflow 500.
43
+ const entries = collectStringEntries(payload, [], { maxDepth: maxNestingDepth });
18
44
  // `context` is threaded into detection as-is and is LOAD-BEARING: e.g.
19
45
  // `context.direction` ("request" | "response") gates direction-scoped rules
20
46
  // (injection) and the response-only marker exclusion in the filter engine.
21
47
  // The proxy sets it per direction; do not drop it here.
22
- const detections = await filterEngine.detect({ entries, context });
48
+ const rawDetections = await filterEngine.detect({ entries, context });
49
+ // WS2c precision controls run AFTER detect and BEFORE decide: drop a low-
50
+ // confidence soft-type detection (minConfidence) and suppress an allowlisted
51
+ // soft-type detection — never a hard-block type. `precisionAudit` carries the
52
+ // per-type counts of what was suppressed/dropped so the audit event records
53
+ // it (counts/types only, never the raw value). See applyPrecisionControls.
54
+ const { detections, precisionAudit } = applyPrecisionControls(rawDetections, { minConfidence, allowlist });
23
55
  const decisions = [];
24
56
 
25
57
  for (const detection of detections) {
@@ -46,7 +78,8 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
46
78
  blocked,
47
79
  payload,
48
80
  detections,
49
- decisions
81
+ decisions,
82
+ precisionAudit
50
83
  });
51
84
 
52
85
  await auditSink.record(auditEvent);
@@ -54,7 +87,7 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
54
87
  return {
55
88
  payload: protectedPayload,
56
89
  blocked,
57
- summary: summarize(detections, decisions),
90
+ summary: summarize(detections, decisions, precisionAudit),
58
91
  auditEvent,
59
92
  issuedTokens: [...issuedTokens]
60
93
  };
@@ -97,7 +130,7 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
97
130
  // Transform a complete, committed text segment.
98
131
  async function transformSegment(text) {
99
132
  const detections = await filterEngine.detect({
100
- entries: collectStringEntries(text),
133
+ entries: collectStringEntries(text, [], { maxDepth: maxNestingDepth }),
101
134
  context
102
135
  });
103
136
  const decisions = await decideAll(detections);
@@ -119,7 +152,7 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
119
152
  // delta text (e.g. tool-call arguments). Returns the mutated object.
120
153
  async protectFrameExtras(value) {
121
154
  const detections = await filterEngine.detect({
122
- entries: collectStringEntries(value),
155
+ entries: collectStringEntries(value, [], { maxDepth: maxNestingDepth }),
123
156
  context
124
157
  });
125
158
  if (detections.length === 0) {
@@ -143,7 +176,7 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
143
176
  async push(text) {
144
177
  pending += text;
145
178
  const detections = await filterEngine.detect({
146
- entries: collectStringEntries(pending),
179
+ entries: collectStringEntries(pending, [], { maxDepth: maxNestingDepth }),
147
180
  context
148
181
  });
149
182
  let commit = Math.max(0, pending.length - maxMatchBytes);
@@ -176,7 +209,14 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
176
209
  return { protectJson, createStreamProtector };
177
210
  }
178
211
 
179
- export function collectStringEntries(value, path = []) {
212
+ export function collectStringEntries(value, path = [], options = {}) {
213
+ // `options.maxDepth` bounds recursion to fail closed on a deeply-nested
214
+ // payload (which would otherwise overflow the call stack → uncaught crash).
215
+ // Additive third arg: existing 2-arg callers get DEFAULT_MAX_NESTING_DEPTH.
216
+ const maxDepth = Number.isInteger(options.maxDepth) && options.maxDepth > 0
217
+ ? options.maxDepth
218
+ : DEFAULT_MAX_NESTING_DEPTH;
219
+
180
220
  if (typeof value === "string") {
181
221
  return [{ path, pathText: safePathToString(path), value, kind: "value" }];
182
222
  }
@@ -187,8 +227,15 @@ export function collectStringEntries(value, path = []) {
187
227
  return [{ path, pathText: safePathToString(path), value: String(value), kind: "number" }];
188
228
  }
189
229
 
230
+ // Descending into an array/object would exceed the configured depth. Throw a
231
+ // fail-closed error carrying statusCode 413 (mirroring the byte-limit path) so
232
+ // the proxy returns a clean 4xx instead of a stack-overflow 500.
233
+ if ((Array.isArray(value) || (value && typeof value === "object")) && path.length >= maxDepth) {
234
+ throw nestingDepthError(maxDepth);
235
+ }
236
+
190
237
  if (Array.isArray(value)) {
191
- return value.flatMap((item, index) => collectStringEntries(item, path.concat(index)));
238
+ return value.flatMap((item, index) => collectStringEntries(item, path.concat(index), { maxDepth }));
192
239
  }
193
240
 
194
241
  if (value && typeof value === "object") {
@@ -196,13 +243,22 @@ export function collectStringEntries(value, path = []) {
196
243
  // otherwise be forwarded upstream in plaintext.
197
244
  return Object.entries(value).flatMap(([key, item]) => [
198
245
  { path: path.concat(key), pathText: safePathToString(path.concat(key)), value: key, kind: "key" },
199
- ...collectStringEntries(item, path.concat(key))
246
+ ...collectStringEntries(item, path.concat(key), { maxDepth })
200
247
  ]);
201
248
  }
202
249
 
203
250
  return [];
204
251
  }
205
252
 
253
+ function nestingDepthError(maxDepth) {
254
+ const error = new Error(`Request JSON nesting exceeds limits.maxNestingDepth (${maxDepth})`);
255
+ // statusCode/errorCode let the proxy catch-all surface this as a clean 4xx,
256
+ // exactly like the request-body-too-large guard in the proxy body reader.
257
+ error.statusCode = 413;
258
+ error.errorCode = "haechi_request_too_deeply_nested";
259
+ return error;
260
+ }
261
+
206
262
  export function pathToString(path) {
207
263
  return path.reduce((text, part, index) => {
208
264
  if (typeof part === "number") {
@@ -235,7 +291,7 @@ export function shapeOnly(value) {
235
291
  return { type: value === null ? "null" : typeof value };
236
292
  }
237
293
 
238
- export function summarize(detections, decisions) {
294
+ export function summarize(detections, decisions, precisionAudit = null) {
239
295
  const byType = {};
240
296
  const byAction = {};
241
297
 
@@ -247,11 +303,121 @@ export function summarize(detections, decisions) {
247
303
  byAction[decision.action] = (byAction[decision.action] ?? 0) + 1;
248
304
  }
249
305
 
250
- return {
306
+ const summary = {
251
307
  detectionCount: detections.length,
252
308
  byType,
253
309
  byAction
254
310
  };
311
+
312
+ // WS2c: additively record how many detections the precision controls removed
313
+ // before decide — `suppressedCount`/`suppressedByType` for allowlist FP
314
+ // exceptions and `droppedCount`/`droppedByType` for sub-minConfidence drops.
315
+ // Counts and types only; the matched value is NEVER recorded (no-plaintext-in-
316
+ // audit). Omitted entirely when nothing was removed, so 1.1 events are byte-
317
+ // identical and the audit hash-chain canonicalization is unaffected.
318
+ if (precisionAudit && precisionAudit.suppressedCount > 0) {
319
+ summary.suppressedCount = precisionAudit.suppressedCount;
320
+ summary.suppressedByType = precisionAudit.suppressedByType;
321
+ }
322
+ if (precisionAudit && precisionAudit.droppedCount > 0) {
323
+ summary.droppedCount = precisionAudit.droppedCount;
324
+ summary.droppedByType = precisionAudit.droppedByType;
325
+ }
326
+
327
+ return summary;
328
+ }
329
+
330
+ // Compile the configured allowlist into fast lookup sets. An entry is either a
331
+ // bare string (an exact matched-VALUE exception) or an object { value?, path? }
332
+ // (value exception, JSON-path exception via the PII-safe pathText, or both —
333
+ // when both are present BOTH must match). Returns null when there is nothing to
334
+ // allowlist so the hot path can skip the work entirely.
335
+ function compileAllowlist(allowlist) {
336
+ if (!Array.isArray(allowlist) || allowlist.length === 0) {
337
+ return null;
338
+ }
339
+ const values = new Set();
340
+ const paths = new Set();
341
+ const pairs = [];
342
+ for (const entry of allowlist) {
343
+ if (typeof entry === "string") {
344
+ values.add(entry);
345
+ continue;
346
+ }
347
+ const hasValue = typeof entry.value === "string";
348
+ const hasPath = typeof entry.path === "string";
349
+ if (hasValue && hasPath) {
350
+ pairs.push({ value: entry.value, path: entry.path });
351
+ } else if (hasValue) {
352
+ values.add(entry.value);
353
+ } else if (hasPath) {
354
+ paths.add(entry.path);
355
+ }
356
+ }
357
+ return { values, paths, pairs };
358
+ }
359
+
360
+ // Does this detection's matched value / JSON path match an allowlist entry? The
361
+ // path comparison uses the PII-safe `pathText` (the same hashed path the audit
362
+ // records), so an operator allowlists `key_<hash>.…` — never a raw key name.
363
+ function isAllowlisted(detection, allowlist) {
364
+ if (!allowlist) {
365
+ return false;
366
+ }
367
+ const { values, paths, pairs } = allowlist;
368
+ if (typeof detection.value === "string" && values.has(detection.value)) {
369
+ return true;
370
+ }
371
+ if (typeof detection.pathText === "string" && paths.has(detection.pathText)) {
372
+ return true;
373
+ }
374
+ for (const pair of pairs) {
375
+ if (detection.value === pair.value && detection.pathText === pair.path) {
376
+ return true;
377
+ }
378
+ }
379
+ return false;
380
+ }
381
+
382
+ // WS2c precision controls — run AFTER detect, BEFORE decide. Returns the kept
383
+ // detections plus a precisionAudit of what was removed (counts/types only).
384
+ //
385
+ // HARD-BLOCK INVARIANT (load-bearing, fail-closed): a detection whose type is in
386
+ // HARD_BLOCK_TYPES (secret/api_key/kr_rrn/card) is NEVER removed here — neither a
387
+ // low confidence nor an allowlist entry can suppress it. minConfidence trims only
388
+ // the precision-risky SOFT types; an allowlist entry that would suppress a hard-
389
+ // block type is ignored and the detection still fires. This guard lives in core
390
+ // (not trusted to config) so the invariant holds for every caller.
391
+ export function applyPrecisionControls(detections, { minConfidence = 0, allowlist = null } = {}) {
392
+ const kept = [];
393
+ const suppressedByType = {};
394
+ const droppedByType = {};
395
+ let suppressedCount = 0;
396
+ let droppedCount = 0;
397
+
398
+ for (const detection of detections) {
399
+ const hardBlock = HARD_BLOCK_TYPES.has(detection.type);
400
+ // Allowlist suppression first (an operator-declared FP exception), but never
401
+ // for a hard-block type.
402
+ if (!hardBlock && isAllowlisted(detection, allowlist)) {
403
+ suppressedByType[detection.type] = (suppressedByType[detection.type] ?? 0) + 1;
404
+ suppressedCount += 1;
405
+ continue;
406
+ }
407
+ // minConfidence drop — only for soft types. A low-confidence hard-block
408
+ // detection (e.g. a card at confidence 0.75) is kept and acted on.
409
+ if (!hardBlock && Number.isFinite(detection.confidence) && detection.confidence < minConfidence) {
410
+ droppedByType[detection.type] = (droppedByType[detection.type] ?? 0) + 1;
411
+ droppedCount += 1;
412
+ continue;
413
+ }
414
+ kept.push(detection);
415
+ }
416
+
417
+ return {
418
+ detections: kept,
419
+ precisionAudit: { suppressedCount, suppressedByType, droppedCount, droppedByType }
420
+ };
255
421
  }
256
422
 
257
423
  async function transformPayload(payload, detections, decisions, { context, cryptoProvider, tokenVault, enforced, issuedTokens = null }) {
@@ -385,7 +551,7 @@ async function replacementFor(segment, detection, decision, { context, cryptoPro
385
551
  }
386
552
  }
387
553
 
388
- function buildAuditEvent({ context, mode, enforced, blocked, payload, detections, decisions }) {
554
+ function buildAuditEvent({ context, mode, enforced, blocked, payload, detections, decisions, precisionAudit = null }) {
389
555
  return {
390
556
  // Reader-facing audit-event schema version (frozen as part of the 1.0 API
391
557
  // contract — see docs/current/api-stability.md). Additive-only: a new field
@@ -394,6 +560,14 @@ function buildAuditEvent({ context, mode, enforced, blocked, payload, detections
394
560
  // and so is self-consistent for hash-chain verification of new events.
395
561
  schemaVersion: "1",
396
562
  id: randomUUID(),
563
+ // Per-REQUEST correlation id (WS4-A). Additive top-level field: the proxy
564
+ // generates one randomUUID() per request and threads it into the protect
565
+ // context, so the request- and response-direction events of ONE request
566
+ // share it (and it appears in the structured error log for the same request).
567
+ // It is null when no context.correlationId is set, preserving the existing
568
+ // non-proxy protectJson() behavior and keeping the api-contract subset green.
569
+ // It is a UUID — never a payload/identity/PII value.
570
+ correlationId: context.correlationId ?? null,
397
571
  timestamp: new Date().toISOString(),
398
572
  protocol: context.protocol ?? "custom",
399
573
  operation: context.operation ?? "protect",
@@ -424,7 +598,7 @@ function buildAuditEvent({ context, mode, enforced, blocked, payload, detections
424
598
  action: decisions[index]?.action ?? "unknown",
425
599
  enforced
426
600
  })),
427
- summary: summarize(detections, decisions)
601
+ summary: summarize(detections, decisions, precisionAudit)
428
602
  };
429
603
  }
430
604