haechi 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ko.md +97 -97
- package/README.md +2 -2
- package/SECURITY.md +19 -11
- package/docs/README.md +2 -0
- package/docs/current/api-stability.ko.md +26 -26
- package/docs/current/compliance-mapping.ko.md +53 -0
- package/docs/current/compliance-mapping.md +53 -0
- package/docs/current/config-version.ko.md +30 -0
- package/docs/current/config-version.md +51 -0
- package/docs/current/configuration.ko.md +242 -102
- package/docs/current/configuration.md +149 -9
- package/docs/current/operations-runbook.ko.md +121 -0
- package/docs/current/operations-runbook.md +204 -0
- package/docs/current/release-process.ko.md +19 -20
- package/docs/current/release-process.md +1 -2
- package/docs/current/reliability-hardening-track.ko.md +77 -0
- package/docs/current/reliability-hardening-track.md +77 -0
- package/docs/current/risk-register-release-gate.ko.md +26 -27
- package/docs/current/risk-register-release-gate.md +27 -20
- package/docs/current/security-whitepaper.ko.md +102 -0
- package/docs/current/security-whitepaper.md +102 -0
- package/docs/current/shared-responsibility.ko.md +33 -24
- package/docs/current/shared-responsibility.md +12 -3
- package/docs/current/threat-model.ko.md +12 -12
- package/docs/current/threat-model.md +3 -3
- package/haechi.config.example.json +19 -3
- package/package.json +6 -2
- package/packages/audit/index.mjs +26 -2
- package/packages/cli/bin/haechi.mjs +54 -8
- package/packages/cli/runtime.mjs +398 -10
- package/packages/core/index.mjs +189 -15
- package/packages/filter/index.mjs +299 -9
- package/packages/metrics/index.mjs +181 -0
- package/packages/proxy/index.mjs +535 -41
package/packages/core/index.mjs
CHANGED
|
@@ -1,12 +1,35 @@
|
|
|
1
1
|
import { createHash, randomUUID } from "node:crypto";
|
|
2
|
+
import { HARD_BLOCK_TYPES } from "../filter/index.mjs";
|
|
2
3
|
|
|
3
4
|
const NO_ENFORCE_MODES = new Set(["dry-run", "report-only"]);
|
|
4
5
|
|
|
5
|
-
|
|
6
|
+
// Safe built-in ceiling on JSON nesting depth. collectStringEntries walks the
|
|
7
|
+
// tree recursively, so an attacker-shaped deeply-nested payload (within
|
|
8
|
+
// limits.maxRequestBytes) would otherwise overflow the call stack and crash the
|
|
9
|
+
// process uncaught. This default protects direct callers of the exported
|
|
10
|
+
// collectStringEntries; the proxy path threads the configurable
|
|
11
|
+
// limits.maxNestingDepth through createHaechi → protectJson instead.
|
|
12
|
+
export const DEFAULT_MAX_NESTING_DEPTH = 256;
|
|
13
|
+
|
|
14
|
+
export function createHaechi({ filterEngine, policyEngine, cryptoProvider, auditSink, tokenVault = null, mode = "dry-run", limits = {}, precision = {} }) {
|
|
6
15
|
if (!filterEngine || !policyEngine || !cryptoProvider || !auditSink) {
|
|
7
16
|
throw new Error("Haechi requires filterEngine, policyEngine, cryptoProvider, and auditSink");
|
|
8
17
|
}
|
|
9
18
|
|
|
19
|
+
// Resolve once at construction; protectJson and the stream protector reuse it.
|
|
20
|
+
const maxNestingDepth = Number.isInteger(limits.maxNestingDepth) && limits.maxNestingDepth > 0
|
|
21
|
+
? limits.maxNestingDepth
|
|
22
|
+
: DEFAULT_MAX_NESTING_DEPTH;
|
|
23
|
+
|
|
24
|
+
// WS2c precision controls, resolved once. `minConfidence` is the precision dial
|
|
25
|
+
// (drop a detection below the threshold) and `allowlist` is the operator FP
|
|
26
|
+
// exception set. Both are FAIL-OPEN-FOR-PROTECTION: they may only TRIM
|
|
27
|
+
// precision-risky soft-type detections and can NEVER suppress a hard-block type
|
|
28
|
+
// (secret/api_key/kr_rrn/card) — that load-bearing exemption is enforced in
|
|
29
|
+
// applyPrecisionControls, not trusted to config. Default {} = current behavior.
|
|
30
|
+
const minConfidence = Number.isFinite(precision.minConfidence) ? precision.minConfidence : 0;
|
|
31
|
+
const allowlist = compileAllowlist(precision.allowlist);
|
|
32
|
+
|
|
10
33
|
async function protectJson(payload, rawContext = {}) {
|
|
11
34
|
// A per-request policy engine (a named profile selected from identity)
|
|
12
35
|
// overrides the default. It is a control object, NOT data: strip it before
|
|
@@ -14,12 +37,21 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
|
|
|
14
37
|
const { policyEngine: contextEngine, ...context } = rawContext;
|
|
15
38
|
const effectiveMode = context.mode ?? mode;
|
|
16
39
|
const engine = contextEngine ?? policyEngine;
|
|
17
|
-
|
|
40
|
+
// Fail closed on an over-deep payload BEFORE any detection/transform work,
|
|
41
|
+
// mirroring the byte-limit path: the thrown error carries statusCode 413 so
|
|
42
|
+
// the proxy surfaces a clean 4xx rather than a stack-overflow 500.
|
|
43
|
+
const entries = collectStringEntries(payload, [], { maxDepth: maxNestingDepth });
|
|
18
44
|
// `context` is threaded into detection as-is and is LOAD-BEARING: e.g.
|
|
19
45
|
// `context.direction` ("request" | "response") gates direction-scoped rules
|
|
20
46
|
// (injection) and the response-only marker exclusion in the filter engine.
|
|
21
47
|
// The proxy sets it per direction; do not drop it here.
|
|
22
|
-
const
|
|
48
|
+
const rawDetections = await filterEngine.detect({ entries, context });
|
|
49
|
+
// WS2c precision controls run AFTER detect and BEFORE decide: drop a low-
|
|
50
|
+
// confidence soft-type detection (minConfidence) and suppress an allowlisted
|
|
51
|
+
// soft-type detection — never a hard-block type. `precisionAudit` carries the
|
|
52
|
+
// per-type counts of what was suppressed/dropped so the audit event records
|
|
53
|
+
// it (counts/types only, never the raw value). See applyPrecisionControls.
|
|
54
|
+
const { detections, precisionAudit } = applyPrecisionControls(rawDetections, { minConfidence, allowlist });
|
|
23
55
|
const decisions = [];
|
|
24
56
|
|
|
25
57
|
for (const detection of detections) {
|
|
@@ -46,7 +78,8 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
|
|
|
46
78
|
blocked,
|
|
47
79
|
payload,
|
|
48
80
|
detections,
|
|
49
|
-
decisions
|
|
81
|
+
decisions,
|
|
82
|
+
precisionAudit
|
|
50
83
|
});
|
|
51
84
|
|
|
52
85
|
await auditSink.record(auditEvent);
|
|
@@ -54,7 +87,7 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
|
|
|
54
87
|
return {
|
|
55
88
|
payload: protectedPayload,
|
|
56
89
|
blocked,
|
|
57
|
-
summary: summarize(detections, decisions),
|
|
90
|
+
summary: summarize(detections, decisions, precisionAudit),
|
|
58
91
|
auditEvent,
|
|
59
92
|
issuedTokens: [...issuedTokens]
|
|
60
93
|
};
|
|
@@ -97,7 +130,7 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
|
|
|
97
130
|
// Transform a complete, committed text segment.
|
|
98
131
|
async function transformSegment(text) {
|
|
99
132
|
const detections = await filterEngine.detect({
|
|
100
|
-
entries: collectStringEntries(text),
|
|
133
|
+
entries: collectStringEntries(text, [], { maxDepth: maxNestingDepth }),
|
|
101
134
|
context
|
|
102
135
|
});
|
|
103
136
|
const decisions = await decideAll(detections);
|
|
@@ -119,7 +152,7 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
|
|
|
119
152
|
// delta text (e.g. tool-call arguments). Returns the mutated object.
|
|
120
153
|
async protectFrameExtras(value) {
|
|
121
154
|
const detections = await filterEngine.detect({
|
|
122
|
-
entries: collectStringEntries(value),
|
|
155
|
+
entries: collectStringEntries(value, [], { maxDepth: maxNestingDepth }),
|
|
123
156
|
context
|
|
124
157
|
});
|
|
125
158
|
if (detections.length === 0) {
|
|
@@ -143,7 +176,7 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
|
|
|
143
176
|
async push(text) {
|
|
144
177
|
pending += text;
|
|
145
178
|
const detections = await filterEngine.detect({
|
|
146
|
-
entries: collectStringEntries(pending),
|
|
179
|
+
entries: collectStringEntries(pending, [], { maxDepth: maxNestingDepth }),
|
|
147
180
|
context
|
|
148
181
|
});
|
|
149
182
|
let commit = Math.max(0, pending.length - maxMatchBytes);
|
|
@@ -176,7 +209,14 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
|
|
|
176
209
|
return { protectJson, createStreamProtector };
|
|
177
210
|
}
|
|
178
211
|
|
|
179
|
-
export function collectStringEntries(value, path = []) {
|
|
212
|
+
export function collectStringEntries(value, path = [], options = {}) {
|
|
213
|
+
// `options.maxDepth` bounds recursion to fail closed on a deeply-nested
|
|
214
|
+
// payload (which would otherwise overflow the call stack → uncaught crash).
|
|
215
|
+
// Additive third arg: existing 2-arg callers get DEFAULT_MAX_NESTING_DEPTH.
|
|
216
|
+
const maxDepth = Number.isInteger(options.maxDepth) && options.maxDepth > 0
|
|
217
|
+
? options.maxDepth
|
|
218
|
+
: DEFAULT_MAX_NESTING_DEPTH;
|
|
219
|
+
|
|
180
220
|
if (typeof value === "string") {
|
|
181
221
|
return [{ path, pathText: safePathToString(path), value, kind: "value" }];
|
|
182
222
|
}
|
|
@@ -187,8 +227,15 @@ export function collectStringEntries(value, path = []) {
|
|
|
187
227
|
return [{ path, pathText: safePathToString(path), value: String(value), kind: "number" }];
|
|
188
228
|
}
|
|
189
229
|
|
|
230
|
+
// Descending into an array/object would exceed the configured depth. Throw a
|
|
231
|
+
// fail-closed error carrying statusCode 413 (mirroring the byte-limit path) so
|
|
232
|
+
// the proxy returns a clean 4xx instead of a stack-overflow 500.
|
|
233
|
+
if ((Array.isArray(value) || (value && typeof value === "object")) && path.length >= maxDepth) {
|
|
234
|
+
throw nestingDepthError(maxDepth);
|
|
235
|
+
}
|
|
236
|
+
|
|
190
237
|
if (Array.isArray(value)) {
|
|
191
|
-
return value.flatMap((item, index) => collectStringEntries(item, path.concat(index)));
|
|
238
|
+
return value.flatMap((item, index) => collectStringEntries(item, path.concat(index), { maxDepth }));
|
|
192
239
|
}
|
|
193
240
|
|
|
194
241
|
if (value && typeof value === "object") {
|
|
@@ -196,13 +243,22 @@ export function collectStringEntries(value, path = []) {
|
|
|
196
243
|
// otherwise be forwarded upstream in plaintext.
|
|
197
244
|
return Object.entries(value).flatMap(([key, item]) => [
|
|
198
245
|
{ path: path.concat(key), pathText: safePathToString(path.concat(key)), value: key, kind: "key" },
|
|
199
|
-
...collectStringEntries(item, path.concat(key))
|
|
246
|
+
...collectStringEntries(item, path.concat(key), { maxDepth })
|
|
200
247
|
]);
|
|
201
248
|
}
|
|
202
249
|
|
|
203
250
|
return [];
|
|
204
251
|
}
|
|
205
252
|
|
|
253
|
+
function nestingDepthError(maxDepth) {
|
|
254
|
+
const error = new Error(`Request JSON nesting exceeds limits.maxNestingDepth (${maxDepth})`);
|
|
255
|
+
// statusCode/errorCode let the proxy catch-all surface this as a clean 4xx,
|
|
256
|
+
// exactly like the request-body-too-large guard in the proxy body reader.
|
|
257
|
+
error.statusCode = 413;
|
|
258
|
+
error.errorCode = "haechi_request_too_deeply_nested";
|
|
259
|
+
return error;
|
|
260
|
+
}
|
|
261
|
+
|
|
206
262
|
export function pathToString(path) {
|
|
207
263
|
return path.reduce((text, part, index) => {
|
|
208
264
|
if (typeof part === "number") {
|
|
@@ -235,7 +291,7 @@ export function shapeOnly(value) {
|
|
|
235
291
|
return { type: value === null ? "null" : typeof value };
|
|
236
292
|
}
|
|
237
293
|
|
|
238
|
-
export function summarize(detections, decisions) {
|
|
294
|
+
export function summarize(detections, decisions, precisionAudit = null) {
|
|
239
295
|
const byType = {};
|
|
240
296
|
const byAction = {};
|
|
241
297
|
|
|
@@ -247,11 +303,121 @@ export function summarize(detections, decisions) {
|
|
|
247
303
|
byAction[decision.action] = (byAction[decision.action] ?? 0) + 1;
|
|
248
304
|
}
|
|
249
305
|
|
|
250
|
-
|
|
306
|
+
const summary = {
|
|
251
307
|
detectionCount: detections.length,
|
|
252
308
|
byType,
|
|
253
309
|
byAction
|
|
254
310
|
};
|
|
311
|
+
|
|
312
|
+
// WS2c: additively record how many detections the precision controls removed
|
|
313
|
+
// before decide — `suppressedCount`/`suppressedByType` for allowlist FP
|
|
314
|
+
// exceptions and `droppedCount`/`droppedByType` for sub-minConfidence drops.
|
|
315
|
+
// Counts and types only; the matched value is NEVER recorded (no-plaintext-in-
|
|
316
|
+
// audit). Omitted entirely when nothing was removed, so 1.1 events are byte-
|
|
317
|
+
// identical and the audit hash-chain canonicalization is unaffected.
|
|
318
|
+
if (precisionAudit && precisionAudit.suppressedCount > 0) {
|
|
319
|
+
summary.suppressedCount = precisionAudit.suppressedCount;
|
|
320
|
+
summary.suppressedByType = precisionAudit.suppressedByType;
|
|
321
|
+
}
|
|
322
|
+
if (precisionAudit && precisionAudit.droppedCount > 0) {
|
|
323
|
+
summary.droppedCount = precisionAudit.droppedCount;
|
|
324
|
+
summary.droppedByType = precisionAudit.droppedByType;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
return summary;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// Compile the configured allowlist into fast lookup sets. An entry is either a
|
|
331
|
+
// bare string (an exact matched-VALUE exception) or an object { value?, path? }
|
|
332
|
+
// (value exception, JSON-path exception via the PII-safe pathText, or both —
|
|
333
|
+
// when both are present BOTH must match). Returns null when there is nothing to
|
|
334
|
+
// allowlist so the hot path can skip the work entirely.
|
|
335
|
+
function compileAllowlist(allowlist) {
|
|
336
|
+
if (!Array.isArray(allowlist) || allowlist.length === 0) {
|
|
337
|
+
return null;
|
|
338
|
+
}
|
|
339
|
+
const values = new Set();
|
|
340
|
+
const paths = new Set();
|
|
341
|
+
const pairs = [];
|
|
342
|
+
for (const entry of allowlist) {
|
|
343
|
+
if (typeof entry === "string") {
|
|
344
|
+
values.add(entry);
|
|
345
|
+
continue;
|
|
346
|
+
}
|
|
347
|
+
const hasValue = typeof entry.value === "string";
|
|
348
|
+
const hasPath = typeof entry.path === "string";
|
|
349
|
+
if (hasValue && hasPath) {
|
|
350
|
+
pairs.push({ value: entry.value, path: entry.path });
|
|
351
|
+
} else if (hasValue) {
|
|
352
|
+
values.add(entry.value);
|
|
353
|
+
} else if (hasPath) {
|
|
354
|
+
paths.add(entry.path);
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
return { values, paths, pairs };
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Does this detection's matched value / JSON path match an allowlist entry? The
|
|
361
|
+
// path comparison uses the PII-safe `pathText` (the same hashed path the audit
|
|
362
|
+
// records), so an operator allowlists `key_<hash>.…` — never a raw key name.
|
|
363
|
+
function isAllowlisted(detection, allowlist) {
|
|
364
|
+
if (!allowlist) {
|
|
365
|
+
return false;
|
|
366
|
+
}
|
|
367
|
+
const { values, paths, pairs } = allowlist;
|
|
368
|
+
if (typeof detection.value === "string" && values.has(detection.value)) {
|
|
369
|
+
return true;
|
|
370
|
+
}
|
|
371
|
+
if (typeof detection.pathText === "string" && paths.has(detection.pathText)) {
|
|
372
|
+
return true;
|
|
373
|
+
}
|
|
374
|
+
for (const pair of pairs) {
|
|
375
|
+
if (detection.value === pair.value && detection.pathText === pair.path) {
|
|
376
|
+
return true;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
return false;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
// WS2c precision controls — run AFTER detect, BEFORE decide. Returns the kept
|
|
383
|
+
// detections plus a precisionAudit of what was removed (counts/types only).
|
|
384
|
+
//
|
|
385
|
+
// HARD-BLOCK INVARIANT (load-bearing, fail-closed): a detection whose type is in
|
|
386
|
+
// HARD_BLOCK_TYPES (secret/api_key/kr_rrn/card) is NEVER removed here — neither a
|
|
387
|
+
// low confidence nor an allowlist entry can suppress it. minConfidence trims only
|
|
388
|
+
// the precision-risky SOFT types; an allowlist entry that would suppress a hard-
|
|
389
|
+
// block type is ignored and the detection still fires. This guard lives in core
|
|
390
|
+
// (not trusted to config) so the invariant holds for every caller.
|
|
391
|
+
export function applyPrecisionControls(detections, { minConfidence = 0, allowlist = null } = {}) {
|
|
392
|
+
const kept = [];
|
|
393
|
+
const suppressedByType = {};
|
|
394
|
+
const droppedByType = {};
|
|
395
|
+
let suppressedCount = 0;
|
|
396
|
+
let droppedCount = 0;
|
|
397
|
+
|
|
398
|
+
for (const detection of detections) {
|
|
399
|
+
const hardBlock = HARD_BLOCK_TYPES.has(detection.type);
|
|
400
|
+
// Allowlist suppression first (an operator-declared FP exception), but never
|
|
401
|
+
// for a hard-block type.
|
|
402
|
+
if (!hardBlock && isAllowlisted(detection, allowlist)) {
|
|
403
|
+
suppressedByType[detection.type] = (suppressedByType[detection.type] ?? 0) + 1;
|
|
404
|
+
suppressedCount += 1;
|
|
405
|
+
continue;
|
|
406
|
+
}
|
|
407
|
+
// minConfidence drop — only for soft types. A low-confidence hard-block
|
|
408
|
+
// detection (e.g. a card at confidence 0.75) is kept and acted on.
|
|
409
|
+
if (!hardBlock && Number.isFinite(detection.confidence) && detection.confidence < minConfidence) {
|
|
410
|
+
droppedByType[detection.type] = (droppedByType[detection.type] ?? 0) + 1;
|
|
411
|
+
droppedCount += 1;
|
|
412
|
+
continue;
|
|
413
|
+
}
|
|
414
|
+
kept.push(detection);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
return {
|
|
418
|
+
detections: kept,
|
|
419
|
+
precisionAudit: { suppressedCount, suppressedByType, droppedCount, droppedByType }
|
|
420
|
+
};
|
|
255
421
|
}
|
|
256
422
|
|
|
257
423
|
async function transformPayload(payload, detections, decisions, { context, cryptoProvider, tokenVault, enforced, issuedTokens = null }) {
|
|
@@ -385,7 +551,7 @@ async function replacementFor(segment, detection, decision, { context, cryptoPro
|
|
|
385
551
|
}
|
|
386
552
|
}
|
|
387
553
|
|
|
388
|
-
function buildAuditEvent({ context, mode, enforced, blocked, payload, detections, decisions }) {
|
|
554
|
+
function buildAuditEvent({ context, mode, enforced, blocked, payload, detections, decisions, precisionAudit = null }) {
|
|
389
555
|
return {
|
|
390
556
|
// Reader-facing audit-event schema version (frozen as part of the 1.0 API
|
|
391
557
|
// contract — see docs/current/api-stability.md). Additive-only: a new field
|
|
@@ -394,6 +560,14 @@ function buildAuditEvent({ context, mode, enforced, blocked, payload, detections
|
|
|
394
560
|
// and so is self-consistent for hash-chain verification of new events.
|
|
395
561
|
schemaVersion: "1",
|
|
396
562
|
id: randomUUID(),
|
|
563
|
+
// Per-REQUEST correlation id (WS4-A). Additive top-level field: the proxy
|
|
564
|
+
// generates one randomUUID() per request and threads it into the protect
|
|
565
|
+
// context, so the request- and response-direction events of ONE request
|
|
566
|
+
// share it (and it appears in the structured error log for the same request).
|
|
567
|
+
// It is null when no context.correlationId is set, preserving the existing
|
|
568
|
+
// non-proxy protectJson() behavior and keeping the api-contract subset green.
|
|
569
|
+
// It is a UUID — never a payload/identity/PII value.
|
|
570
|
+
correlationId: context.correlationId ?? null,
|
|
397
571
|
timestamp: new Date().toISOString(),
|
|
398
572
|
protocol: context.protocol ?? "custom",
|
|
399
573
|
operation: context.operation ?? "protect",
|
|
@@ -424,7 +598,7 @@ function buildAuditEvent({ context, mode, enforced, blocked, payload, detections
|
|
|
424
598
|
action: decisions[index]?.action ?? "unknown",
|
|
425
599
|
enforced
|
|
426
600
|
})),
|
|
427
|
-
summary: summarize(detections, decisions)
|
|
601
|
+
summary: summarize(detections, decisions, precisionAudit)
|
|
428
602
|
};
|
|
429
603
|
}
|
|
430
604
|
|