npm - haechi - Versions diffs - 1.1.1 → 1.2.0 - Mend

haechi 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/README.ko.md +97 -97
package/README.md +2 -2
package/SECURITY.md +19 -11
package/docs/README.md +2 -0
package/docs/current/api-stability.ko.md +26 -26
package/docs/current/compliance-mapping.ko.md +53 -0
package/docs/current/compliance-mapping.md +53 -0
package/docs/current/config-version.ko.md +30 -0
package/docs/current/config-version.md +51 -0
package/docs/current/configuration.ko.md +242 -102
package/docs/current/configuration.md +149 -9
package/docs/current/operations-runbook.ko.md +121 -0
package/docs/current/operations-runbook.md +204 -0
package/docs/current/release-process.ko.md +19 -20
package/docs/current/release-process.md +1 -2
package/docs/current/reliability-hardening-track.ko.md +77 -0
package/docs/current/reliability-hardening-track.md +77 -0
package/docs/current/risk-register-release-gate.ko.md +26 -27
package/docs/current/risk-register-release-gate.md +27 -20
package/docs/current/security-whitepaper.ko.md +102 -0
package/docs/current/security-whitepaper.md +102 -0
package/docs/current/shared-responsibility.ko.md +33 -24
package/docs/current/shared-responsibility.md +12 -3
package/docs/current/threat-model.ko.md +12 -12
package/docs/current/threat-model.md +3 -3
package/haechi.config.example.json +19 -3
package/package.json +6 -2
package/packages/audit/index.mjs +26 -2
package/packages/cli/bin/haechi.mjs +54 -8
package/packages/cli/runtime.mjs +398 -10
package/packages/core/index.mjs +189 -15
package/packages/filter/index.mjs +299 -9
package/packages/metrics/index.mjs +181 -0
package/packages/proxy/index.mjs +535 -41

package/packages/core/index.mjs CHANGED Viewed

@@ -1,12 +1,35 @@
 import { createHash, randomUUID } from "node:crypto";
+import { HARD_BLOCK_TYPES } from "../filter/index.mjs";
 const NO_ENFORCE_MODES = new Set(["dry-run", "report-only"]);
-export function createHaechi({ filterEngine, policyEngine, cryptoProvider, auditSink, tokenVault = null, mode = "dry-run" }) {
+// Safe built-in ceiling on JSON nesting depth. collectStringEntries walks the
+// tree recursively, so an attacker-shaped deeply-nested payload (within
+// limits.maxRequestBytes) would otherwise overflow the call stack and crash the
+// process uncaught. This default protects direct callers of the exported
+// collectStringEntries; the proxy path threads the configurable
+// limits.maxNestingDepth through createHaechi → protectJson instead.
+export const DEFAULT_MAX_NESTING_DEPTH = 256;
+export function createHaechi({ filterEngine, policyEngine, cryptoProvider, auditSink, tokenVault = null, mode = "dry-run", limits = {}, precision = {} }) {
   if (!filterEngine || !policyEngine || !cryptoProvider || !auditSink) {
     throw new Error("Haechi requires filterEngine, policyEngine, cryptoProvider, and auditSink");
   }
+  // Resolve once at construction; protectJson and the stream protector reuse it.
+  const maxNestingDepth = Number.isInteger(limits.maxNestingDepth) && limits.maxNestingDepth > 0
+    ? limits.maxNestingDepth
+    : DEFAULT_MAX_NESTING_DEPTH;
+  // WS2c precision controls, resolved once. `minConfidence` is the precision dial
+  // (drop a detection below the threshold) and `allowlist` is the operator FP
+  // exception set. Both are FAIL-OPEN-FOR-PROTECTION: they may only TRIM
+  // precision-risky soft-type detections and can NEVER suppress a hard-block type
+  // (secret/api_key/kr_rrn/card) — that load-bearing exemption is enforced in
+  // applyPrecisionControls, not trusted to config. Default {} = current behavior.
+  const minConfidence = Number.isFinite(precision.minConfidence) ? precision.minConfidence : 0;
+  const allowlist = compileAllowlist(precision.allowlist);
   async function protectJson(payload, rawContext = {}) {
     // A per-request policy engine (a named profile selected from identity)
     // overrides the default. It is a control object, NOT data: strip it before
@@ -14,12 +37,21 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
     const { policyEngine: contextEngine, ...context } = rawContext;
     const effectiveMode = context.mode ?? mode;
     const engine = contextEngine ?? policyEngine;
-    const entries = collectStringEntries(payload);
+    // Fail closed on an over-deep payload BEFORE any detection/transform work,
+    // mirroring the byte-limit path: the thrown error carries statusCode 413 so
+    // the proxy surfaces a clean 4xx rather than a stack-overflow 500.
+    const entries = collectStringEntries(payload, [], { maxDepth: maxNestingDepth });
     // `context` is threaded into detection as-is and is LOAD-BEARING: e.g.
     // `context.direction` ("request" | "response") gates direction-scoped rules
     // (injection) and the response-only marker exclusion in the filter engine.
     // The proxy sets it per direction; do not drop it here.
-    const detections = await filterEngine.detect({ entries, context });
+    const rawDetections = await filterEngine.detect({ entries, context });
+    // WS2c precision controls run AFTER detect and BEFORE decide: drop a low-
+    // confidence soft-type detection (minConfidence) and suppress an allowlisted
+    // soft-type detection — never a hard-block type. `precisionAudit` carries the
+    // per-type counts of what was suppressed/dropped so the audit event records
+    // it (counts/types only, never the raw value). See applyPrecisionControls.
+    const { detections, precisionAudit } = applyPrecisionControls(rawDetections, { minConfidence, allowlist });
     const decisions = [];
     for (const detection of detections) {
@@ -46,7 +78,8 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
       blocked,
       payload,
       detections,
-      decisions
+      decisions,
+      precisionAudit
     });
     await auditSink.record(auditEvent);
@@ -54,7 +87,7 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
     return {
       payload: protectedPayload,
       blocked,
-      summary: summarize(detections, decisions),
+      summary: summarize(detections, decisions, precisionAudit),
       auditEvent,
       issuedTokens: [...issuedTokens]
     };
@@ -97,7 +130,7 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
     // Transform a complete, committed text segment.
     async function transformSegment(text) {
       const detections = await filterEngine.detect({
-        entries: collectStringEntries(text),
+        entries: collectStringEntries(text, [], { maxDepth: maxNestingDepth }),
         context
       });
       const decisions = await decideAll(detections);
@@ -119,7 +152,7 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
       // delta text (e.g. tool-call arguments). Returns the mutated object.
       async protectFrameExtras(value) {
         const detections = await filterEngine.detect({
-          entries: collectStringEntries(value),
+          entries: collectStringEntries(value, [], { maxDepth: maxNestingDepth }),
           context
         });
         if (detections.length === 0) {
@@ -143,7 +176,7 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
       async push(text) {
         pending += text;
         const detections = await filterEngine.detect({
-          entries: collectStringEntries(pending),
+          entries: collectStringEntries(pending, [], { maxDepth: maxNestingDepth }),
           context
         });
         let commit = Math.max(0, pending.length - maxMatchBytes);
@@ -176,7 +209,14 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
   return { protectJson, createStreamProtector };
 }
-export function collectStringEntries(value, path = []) {
+export function collectStringEntries(value, path = [], options = {}) {
+  // `options.maxDepth` bounds recursion to fail closed on a deeply-nested
+  // payload (which would otherwise overflow the call stack → uncaught crash).
+  // Additive third arg: existing 2-arg callers get DEFAULT_MAX_NESTING_DEPTH.
+  const maxDepth = Number.isInteger(options.maxDepth) && options.maxDepth > 0
+    ? options.maxDepth
+    : DEFAULT_MAX_NESTING_DEPTH;
   if (typeof value === "string") {
     return [{ path, pathText: safePathToString(path), value, kind: "value" }];
   }
@@ -187,8 +227,15 @@ export function collectStringEntries(value, path = []) {
     return [{ path, pathText: safePathToString(path), value: String(value), kind: "number" }];
   }
+  // Descending into an array/object would exceed the configured depth. Throw a
+  // fail-closed error carrying statusCode 413 (mirroring the byte-limit path) so
+  // the proxy returns a clean 4xx instead of a stack-overflow 500.
+  if ((Array.isArray(value) || (value && typeof value === "object")) && path.length >= maxDepth) {
+    throw nestingDepthError(maxDepth);
+  }
   if (Array.isArray(value)) {
-    return value.flatMap((item, index) => collectStringEntries(item, path.concat(index)));
+    return value.flatMap((item, index) => collectStringEntries(item, path.concat(index), { maxDepth }));
   }
   if (value && typeof value === "object") {
@@ -196,13 +243,22 @@ export function collectStringEntries(value, path = []) {
     // otherwise be forwarded upstream in plaintext.
     return Object.entries(value).flatMap(([key, item]) => [
       { path: path.concat(key), pathText: safePathToString(path.concat(key)), value: key, kind: "key" },
-      ...collectStringEntries(item, path.concat(key))
+      ...collectStringEntries(item, path.concat(key), { maxDepth })
     ]);
   }
   return [];
 }
+function nestingDepthError(maxDepth) {
+  const error = new Error(`Request JSON nesting exceeds limits.maxNestingDepth (${maxDepth})`);
+  // statusCode/errorCode let the proxy catch-all surface this as a clean 4xx,
+  // exactly like the request-body-too-large guard in the proxy body reader.
+  error.statusCode = 413;
+  error.errorCode = "haechi_request_too_deeply_nested";
+  return error;
+}
 export function pathToString(path) {
   return path.reduce((text, part, index) => {
     if (typeof part === "number") {
@@ -235,7 +291,7 @@ export function shapeOnly(value) {
   return { type: value === null ? "null" : typeof value };
 }
-export function summarize(detections, decisions) {
+export function summarize(detections, decisions, precisionAudit = null) {
   const byType = {};
   const byAction = {};
@@ -247,11 +303,121 @@ export function summarize(detections, decisions) {
     byAction[decision.action] = (byAction[decision.action] ?? 0) + 1;
   }
-  return {
+  const summary = {
     detectionCount: detections.length,
     byType,
     byAction
   };
+  // WS2c: additively record how many detections the precision controls removed
+  // before decide — `suppressedCount`/`suppressedByType` for allowlist FP
+  // exceptions and `droppedCount`/`droppedByType` for sub-minConfidence drops.
+  // Counts and types only; the matched value is NEVER recorded (no-plaintext-in-
+  // audit). Omitted entirely when nothing was removed, so 1.1 events are byte-
+  // identical and the audit hash-chain canonicalization is unaffected.
+  if (precisionAudit && precisionAudit.suppressedCount > 0) {
+    summary.suppressedCount = precisionAudit.suppressedCount;
+    summary.suppressedByType = precisionAudit.suppressedByType;
+  }
+  if (precisionAudit && precisionAudit.droppedCount > 0) {
+    summary.droppedCount = precisionAudit.droppedCount;
+    summary.droppedByType = precisionAudit.droppedByType;
+  }
+  return summary;
+}
+// Compile the configured allowlist into fast lookup sets. An entry is either a
+// bare string (an exact matched-VALUE exception) or an object { value?, path? }
+// (value exception, JSON-path exception via the PII-safe pathText, or both —
+// when both are present BOTH must match). Returns null when there is nothing to
+// allowlist so the hot path can skip the work entirely.
+function compileAllowlist(allowlist) {
+  if (!Array.isArray(allowlist) || allowlist.length === 0) {
+    return null;
+  }
+  const values = new Set();
+  const paths = new Set();
+  const pairs = [];
+  for (const entry of allowlist) {
+    if (typeof entry === "string") {
+      values.add(entry);
+      continue;
+    }
+    const hasValue = typeof entry.value === "string";
+    const hasPath = typeof entry.path === "string";
+    if (hasValue && hasPath) {
+      pairs.push({ value: entry.value, path: entry.path });
+    } else if (hasValue) {
+      values.add(entry.value);
+    } else if (hasPath) {
+      paths.add(entry.path);
+    }
+  }
+  return { values, paths, pairs };
+}
+// Does this detection's matched value / JSON path match an allowlist entry? The
+// path comparison uses the PII-safe `pathText` (the same hashed path the audit
+// records), so an operator allowlists `key_<hash>.…` — never a raw key name.
+function isAllowlisted(detection, allowlist) {
+  if (!allowlist) {
+    return false;
+  }
+  const { values, paths, pairs } = allowlist;
+  if (typeof detection.value === "string" && values.has(detection.value)) {
+    return true;
+  }
+  if (typeof detection.pathText === "string" && paths.has(detection.pathText)) {
+    return true;
+  }
+  for (const pair of pairs) {
+    if (detection.value === pair.value && detection.pathText === pair.path) {
+      return true;
+    }
+  }
+  return false;
+}
+// WS2c precision controls — run AFTER detect, BEFORE decide. Returns the kept
+// detections plus a precisionAudit of what was removed (counts/types only).
+//
+// HARD-BLOCK INVARIANT (load-bearing, fail-closed): a detection whose type is in
+// HARD_BLOCK_TYPES (secret/api_key/kr_rrn/card) is NEVER removed here — neither a
+// low confidence nor an allowlist entry can suppress it. minConfidence trims only
+// the precision-risky SOFT types; an allowlist entry that would suppress a hard-
+// block type is ignored and the detection still fires. This guard lives in core
+// (not trusted to config) so the invariant holds for every caller.
+export function applyPrecisionControls(detections, { minConfidence = 0, allowlist = null } = {}) {
+  const kept = [];
+  const suppressedByType = {};
+  const droppedByType = {};
+  let suppressedCount = 0;
+  let droppedCount = 0;
+  for (const detection of detections) {
+    const hardBlock = HARD_BLOCK_TYPES.has(detection.type);
+    // Allowlist suppression first (an operator-declared FP exception), but never
+    // for a hard-block type.
+    if (!hardBlock && isAllowlisted(detection, allowlist)) {
+      suppressedByType[detection.type] = (suppressedByType[detection.type] ?? 0) + 1;
+      suppressedCount += 1;
+      continue;
+    }
+    // minConfidence drop — only for soft types. A low-confidence hard-block
+    // detection (e.g. a card at confidence 0.75) is kept and acted on.
+    if (!hardBlock && Number.isFinite(detection.confidence) && detection.confidence < minConfidence) {
+      droppedByType[detection.type] = (droppedByType[detection.type] ?? 0) + 1;
+      droppedCount += 1;
+      continue;
+    }
+    kept.push(detection);
+  }
+  return {
+    detections: kept,
+    precisionAudit: { suppressedCount, suppressedByType, droppedCount, droppedByType }
+  };
 }
 async function transformPayload(payload, detections, decisions, { context, cryptoProvider, tokenVault, enforced, issuedTokens = null }) {
@@ -385,7 +551,7 @@ async function replacementFor(segment, detection, decision, { context, cryptoPro
   }
 }
-function buildAuditEvent({ context, mode, enforced, blocked, payload, detections, decisions }) {
+function buildAuditEvent({ context, mode, enforced, blocked, payload, detections, decisions, precisionAudit = null }) {
   return {
     // Reader-facing audit-event schema version (frozen as part of the 1.0 API
     // contract — see docs/current/api-stability.md). Additive-only: a new field
@@ -394,6 +560,14 @@ function buildAuditEvent({ context, mode, enforced, blocked, payload, detections
     // and so is self-consistent for hash-chain verification of new events.
     schemaVersion: "1",
     id: randomUUID(),
+    // Per-REQUEST correlation id (WS4-A). Additive top-level field: the proxy
+    // generates one randomUUID() per request and threads it into the protect
+    // context, so the request- and response-direction events of ONE request
+    // share it (and it appears in the structured error log for the same request).
+    // It is null when no context.correlationId is set, preserving the existing
+    // non-proxy protectJson() behavior and keeping the api-contract subset green.
+    // It is a UUID — never a payload/identity/PII value.
+    correlationId: context.correlationId ?? null,
     timestamp: new Date().toISOString(),
     protocol: context.protocol ?? "custom",
     operation: context.operation ?? "protect",
@@ -424,7 +598,7 @@ function buildAuditEvent({ context, mode, enforced, blocked, payload, detections
       action: decisions[index]?.action ?? "unknown",
       enforced
     })),
-    summary: summarize(detections, decisions)
+    summary: summarize(detections, decisions, precisionAudit)
   };
 }