haechi 1.2.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.ko.md +57 -11
  2. package/README.md +57 -11
  3. package/docs/current/code-review-risk-register-2026-06-16.ko.md +377 -0
  4. package/docs/current/code-review-risk-register-2026-06-16.md +377 -0
  5. package/docs/current/config-version.ko.md +2 -2
  6. package/docs/current/config-version.md +2 -2
  7. package/docs/current/configuration.ko.md +28 -11
  8. package/docs/current/configuration.md +28 -11
  9. package/docs/current/operations-runbook.ko.md +36 -2
  10. package/docs/current/operations-runbook.md +39 -2
  11. package/docs/current/release-process.ko.md +5 -1
  12. package/docs/current/release-process.md +5 -1
  13. package/docs/current/risk-register-release-gate.ko.md +34 -8
  14. package/docs/current/risk-register-release-gate.md +34 -8
  15. package/docs/current/shared-responsibility.ko.md +12 -3
  16. package/docs/current/shared-responsibility.md +12 -3
  17. package/docs/current/threat-model.ko.md +7 -3
  18. package/docs/current/threat-model.md +7 -3
  19. package/examples/local-proxy-demo/README.md +51 -0
  20. package/examples/local-proxy-demo/demo.mjs +144 -0
  21. package/examples/local-proxy-demo/demo.tape +19 -0
  22. package/examples/local-proxy-demo/live-demo.mjs +121 -0
  23. package/examples/local-proxy-demo/live-demo.tape +25 -0
  24. package/haechi.config.example.json +2 -1
  25. package/package.json +3 -1
  26. package/packages/cli/bin/haechi.mjs +95 -5
  27. package/packages/cli/runtime.mjs +61 -1
  28. package/packages/core/index.mjs +15 -0
  29. package/packages/crypto/index.mjs +42 -20
  30. package/packages/filter/index.mjs +679 -6
  31. package/packages/privacy-profiles/index.mjs +72 -3
  32. package/packages/protocol-adapters/index.mjs +99 -1
  33. package/packages/proxy/index.mjs +270 -29
  34. package/packages/ssrf/index.mjs +60 -4
  35. package/packages/stream-filter/index.mjs +194 -17
@@ -18,6 +18,61 @@ import { lookup as dnsLookup } from "node:dns/promises";
18
18
  const DEFAULT_FETCH_TIMEOUT_MS = 5_000;
19
19
  const DEFAULT_MAX_BYTES = 1024 * 1024; // 1 MiB
20
20
 
21
+ // Parse an IPv6 literal into its 16 octets (or null when it is not a valid IPv6
22
+ // text form). This is the SOUND way to recognise an IPv4-mapped IPv6 address in
23
+ // EVERY textual form: dotted (::ffff:127.0.0.1), HEX (::ffff:7f00:1), bracketed
24
+ // ([::ffff:7f00:1], stripped by the caller), leading-zero (::ffff:7f00:0001),
25
+ // mixed `::` compression, and case-insensitive ffff. We classify the last 32
26
+ // bits as the embedded IPv4 ONLY when bytes 0..9 are zero and bytes 10..11 are
27
+ // 0xffff (the ::ffff:0:0/96 IPv4-mapped prefix), so a genuinely public mapped
28
+ // address (::ffff:8.8.8.8 == ::ffff:808:808) stays allowed and a non-mapped v6
29
+ // (::ffff:0:7f00:1, NAT64 64:ff9b::…) is NOT mistaken for an embedded IPv4.
30
+ function ipv6ToBytes(str) {
31
+ let s = str;
32
+ // A trailing dotted IPv4 quad (::ffff:127.0.0.1) — peel it off into the final
33
+ // two hextets so the remaining text is pure hex groups.
34
+ let tailV4 = null;
35
+ if (s.includes(".")) {
36
+ const idx = s.lastIndexOf(":");
37
+ if (idx === -1) return null;
38
+ const quad = s.slice(idx + 1).split(".");
39
+ if (quad.length !== 4) return null;
40
+ const oct = quad.map(Number);
41
+ if (oct.some((n) => !Number.isInteger(n) || n < 0 || n > 255)) return null;
42
+ tailV4 = oct;
43
+ s = `${s.slice(0, idx + 1)}0:0`; // placeholder hextets; overwritten below
44
+ }
45
+ const halves = s.split("::");
46
+ if (halves.length > 2) return null; // at most one "::"
47
+ const toGroups = (g) => (g === "" ? [] : g.split(":").map((h) => (/^[0-9a-fA-F]{1,4}$/.test(h) ? parseInt(h, 16) : NaN)));
48
+ const head = toGroups(halves[0]);
49
+ const tail = halves.length === 2 ? toGroups(halves[1]) : null;
50
+ if (head.some(Number.isNaN) || (tail && tail.some(Number.isNaN))) return null;
51
+ let groups;
52
+ if (tail === null) {
53
+ if (head.length !== 8) return null;
54
+ groups = head;
55
+ } else {
56
+ const missing = 8 - head.length - tail.length;
57
+ if (missing < 0) return null;
58
+ groups = [...head, ...Array(missing).fill(0), ...tail];
59
+ }
60
+ if (groups.length !== 8) return null;
61
+ const bytes = [];
62
+ for (const g of groups) bytes.push((g >> 8) & 0xff, g & 0xff);
63
+ if (tailV4) { bytes[12] = tailV4[0]; bytes[13] = tailV4[1]; bytes[14] = tailV4[2]; bytes[15] = tailV4[3]; }
64
+ return bytes;
65
+ }
66
+
67
+ // Return the embedded IPv4 dotted quad of an IPv4-mapped IPv6 address, or null.
68
+ function mappedIpv4(bare) {
69
+ const b = ipv6ToBytes(bare);
70
+ if (!b) return null;
71
+ for (let i = 0; i < 10; i += 1) if (b[i] !== 0) return null; // bytes 0..9 must be zero
72
+ if (b[10] !== 0xff || b[11] !== 0xff) return null; // bytes 10..11 must be 0xffff
73
+ return `${b[12]}.${b[13]}.${b[14]}.${b[15]}`;
74
+ }
75
+
21
76
  // Block literal addresses in private/loopback/link-local ranges + cloud metadata.
22
77
  // Applied to both a literal host in the URL and every DNS-resolved address. This
23
78
  // is the canonical copy; the satellite copies must agree (parity-tested).
@@ -39,10 +94,11 @@ export function isBlockedAddress(host) {
39
94
  if (v === 6) {
40
95
  const h = bare.toLowerCase();
41
96
  if (h === "::1" || h === "::") return true; // loopback / unspecified
42
- if (h.startsWith("::ffff:")) { // IPv4-mapped
43
- const mapped = h.slice("::ffff:".length);
44
- if (isIP(mapped) === 4) return isBlockedAddress(mapped);
45
- }
97
+ // IPv4-mapped IPv6 — normalise to the embedded IPv4 (handles dotted AND hex
98
+ // forms, e.g. ::ffff:127.0.0.1 and ::ffff:7f00:1) and run the v4 check, so a
99
+ // private/loopback/metadata target can't slip past as hex (P1-CR-002).
100
+ const mapped = mappedIpv4(bare);
101
+ if (mapped !== null) return isBlockedAddress(mapped);
46
102
  // Range-check the first hextet: fe80::/10 link-local, fc00::/7 ULA, ff00::/8 multicast.
47
103
  const firstHextet = parseInt(h.split(":")[0] || "", 16);
48
104
  if (Number.isFinite(firstHextet)) {
@@ -4,28 +4,108 @@
4
4
  // through a bounded sliding buffer (cross-frame matches caught up to
5
5
  // streaming.maxMatchBytes), and all other string leaves in a frame get
6
6
  // within-frame protection. The whole stream is audited once at the end.
7
+ //
8
+ // P1-CR-005 — a frame whose data: payload is not JSON is NOT raw-passed. A
9
+ // CONTROL frame (the [DONE] sentinel, comment-only, empty/keepalive) has no
10
+ // inspectable text and passes through; a non-JSON CONTENT frame is inspected as
11
+ // text (single-shot protector.protectText, distinct from the delta buffer) so
12
+ // plain-text PII/secrets cannot bypass protection in inspect mode.
7
13
 
8
14
  const SSE_DONE = "[DONE]";
9
15
 
10
16
  export async function inspectResponseStream({ source, sink, streaming, protector, format }) {
11
17
  const wireFormat = format ?? streaming?.format ?? "ndjson";
12
18
  const deltaPath = streaming?.deltaPath ?? null;
19
+ // Frame types that TERMINATE a delta sequence (declared per-adapter, e.g.
20
+ // Anthropic's content_block_stop/message_delta/message_stop). Before such a
21
+ // frame the held cross-frame buffer tail is flushed as a valid delta frame, so
22
+ // the residual lands in-order BEFORE the terminator — never after message_stop.
23
+ // Keepalives (ping) are deliberately NOT listed, so a match split across a ping
24
+ // is still caught by the sliding buffer.
25
+ const flushOnType = streaming?.flushOnType ?? null;
13
26
  const decoder = new TextDecoder("utf-8");
14
27
  const frames = createFrameSplitter(wireFormat);
15
28
 
16
29
  let blocked = false;
30
+ // A structural template of the last frame that carried delta text, used to
31
+ // re-emit a held buffer tail as a VALID delta frame (preserving its wire
32
+ // wrapper — Anthropic's `event:` line — plus sibling fields like type/index).
33
+ let lastDeltaTemplate = null;
34
+
35
+ async function flushHeldTail() {
36
+ const flushed = await protector.flush();
37
+ if (flushed.blocked) {
38
+ blocked = true;
39
+ return;
40
+ }
41
+ if (!flushed.text || !deltaPath) {
42
+ return;
43
+ }
44
+ if (lastDeltaTemplate) {
45
+ const object = structuredClone(lastDeltaTemplate.object);
46
+ setByPath(object, deltaPath, flushed.text);
47
+ sink.write(serializeFrame(object, wireFormat, lastDeltaTemplate.original));
48
+ } else {
49
+ // No prior delta frame to model — fall back to a minimal synthesized frame.
50
+ sink.write(serializeFrame(buildPathObject(deltaPath, flushed.text), wireFormat, null));
51
+ }
52
+ }
17
53
 
18
54
  async function handleFrame(raw) {
19
55
  const frame = { raw, body: raw.trim() };
20
56
  const parsed = parseFrame(frame, wireFormat);
21
57
  if (!parsed.ok) {
22
- // Non-JSON frame (e.g. `data: [DONE]`, comments, keep-alives): pass
23
- // through verbatim there is nothing to inspect.
24
- sink.write(frame.raw);
58
+ // P1-CR-005 — a parse-failed frame is one of two things:
59
+ // (1) a CONTROL frame with no inspectable text (the SSE [DONE] sentinel,
60
+ // a comment-only frame, an empty/whitespace/keepalive frame) — there
61
+ // is genuinely nothing to protect, so pass it through verbatim; or
62
+ // (2) a CONTENT frame whose data: payload is NOT JSON (plain text,
63
+ // partial/malformed JSON, provider-specific text). That text CAN carry
64
+ // PII/secrets, so it must be INSPECTED AS TEXT, not raw-passed.
65
+ if (parsed.control || parsed.text == null) {
66
+ sink.write(frame.raw);
67
+ return;
68
+ }
69
+ // Inspect the reconstructed data text as a single self-contained payload.
70
+ // protectText is DISTINCT from the delta-channel push/flush buffer, so a
71
+ // non-JSON frame's text never corrupts the JSON delta sliding buffer. A
72
+ // block-action detection fails the stream closed; otherwise re-emit the
73
+ // protected text (preserving the original wire wrapper / event: lines).
74
+ const protectedText = await protector.protectText(parsed.text);
75
+ if (protectedText.blocked) {
76
+ blocked = true;
77
+ return;
78
+ }
79
+ sink.write(serializeTextFrame(protectedText.text, wireFormat, frame));
25
80
  return;
26
81
  }
27
82
 
28
83
  const json = parsed.json;
84
+
85
+ // A bare PRIMITIVE JSON value (string/number/boolean/null) has no object
86
+ // structure for the delta/extras object path — a deltaPath setByPath on a
87
+ // string root would throw an uncaught TypeError on an attacker-influenceable
88
+ // frame. A JSON string can itself carry PII, so inspect the re-serialized
89
+ // value as text (same single-shot path as a non-JSON content frame).
90
+ if (json === null || typeof json !== "object") {
91
+ const protectedPrimitive = await protector.protectText(JSON.stringify(json));
92
+ if (protectedPrimitive.blocked) {
93
+ blocked = true;
94
+ return;
95
+ }
96
+ sink.write(serializeTextFrame(protectedPrimitive.text, wireFormat, frame));
97
+ return;
98
+ }
99
+
100
+ // A delta-terminating frame: flush the held tail (as a valid delta frame)
101
+ // before emitting it, so the residual is correctly ordered.
102
+ if (flushOnType && flushOnType.values.includes(getByPath(json, flushOnType.path))) {
103
+ await flushHeldTail();
104
+ if (blocked) {
105
+ return;
106
+ }
107
+ }
108
+
29
109
  let deltaText = null;
30
110
  if (deltaPath) {
31
111
  const found = getByPath(json, deltaPath);
@@ -50,6 +130,8 @@ export async function inspectResponseStream({ source, sink, streaming, protector
50
130
  return;
51
131
  }
52
132
  setByPath(frameObject, deltaPath, pushed.text);
133
+ // Snapshot this frame's structure + wire wrapper as the flush template.
134
+ lastDeltaTemplate = { object: structuredClone(frameObject), original: frame };
53
135
  }
54
136
 
55
137
  sink.write(serializeFrame(frameObject, wireFormat, frame));
@@ -77,13 +159,8 @@ export async function inspectResponseStream({ source, sink, streaming, protector
77
159
  }
78
160
 
79
161
  if (!blocked) {
80
- // Flush the held tail of the delta buffer as a synthesized final frame.
81
- const flushed = await protector.flush();
82
- if (flushed.blocked) {
83
- blocked = true;
84
- } else if (flushed.text && deltaPath) {
85
- sink.write(serializeFrame(buildPathObject(deltaPath, flushed.text), wireFormat, null));
86
- }
162
+ // Flush any remaining held tail (a stream that ended on a delta frame).
163
+ await flushHeldTail();
87
164
  }
88
165
 
89
166
  // The caller closes the sink AFTER recording the stream decision, so the
@@ -120,40 +197,140 @@ function createFrameSplitter(format) {
120
197
  };
121
198
  }
122
199
 
200
+ // Parse a frame. On success: { ok:true, json }. On failure the caller needs to
201
+ // know WHICH kind of failure it is (P1-CR-005):
202
+ // - { ok:false, control:true } → a CONTROL frame (no inspectable
203
+ // text: [DONE], comment-only, empty/
204
+ // whitespace/keepalive) → pass raw.
205
+ // - { ok:false, control:false, text } → a CONTENT frame whose data: payload
206
+ // is non-JSON → inspect `text` as text.
207
+ // Recognize an SSE `data:` field line LENIENTLY — allowing (non-spec) leading
208
+ // whitespace before the field name — and return its payload (one leading space
209
+ // after the colon stripped per the SSE spec), or null if the line is not a data
210
+ // field. SECURITY (P1-CR-005 follow-up): recognition MUST be identical in the
211
+ // parser (which inspects/redacts) and the serializers (which re-emit). If the
212
+ // serializer used a stricter `line.startsWith("data:")` it would fail to match a
213
+ // ` data: <pii>` line, emit it VERBATIM, and leak the original plaintext while
214
+ // separately appending the redacted copy. Both sides use this one helper.
215
+ const SSE_DATA_LINE = /^[ \t]*data:/;
216
+ function sseDataPayload(line) {
217
+ const match = /^[ \t]*data:(.*)$/.exec(line);
218
+ return match ? match[1].replace(/^ /, "") : null;
219
+ }
220
+
123
221
  function parseFrame(frame, format) {
124
222
  if (!frame) {
125
- return { ok: false };
223
+ return { ok: false, control: true, text: null };
126
224
  }
127
225
  let payload = frame.body;
128
226
  if (format === "sse") {
227
+ // An empty/whitespace/comment-only/keepalive frame has no data: line → a
228
+ // CONTROL frame with nothing to inspect.
229
+ if (payload === "") {
230
+ return { ok: false, control: true, text: null };
231
+ }
129
232
  const dataLines = payload
130
233
  .split("\n")
131
- .filter((line) => line.startsWith("data:"))
132
- .map((line) => line.slice(5).trim());
234
+ .map(sseDataPayload)
235
+ .filter((value) => value !== null);
133
236
  if (dataLines.length === 0) {
134
- return { ok: false };
237
+ // Comment-only (`:` lines) or field-only (event:/id:/retry:) frame.
238
+ return { ok: false, control: true, text: null };
135
239
  }
136
- payload = dataLines.join("");
240
+ // P2-CR-013 — the SSE model joins multiple data: lines with a NEWLINE, not
241
+ // "". Newlines are valid JSON whitespace between tokens / inside a string, so
242
+ // a multi-line JSON event still JSON.parses; a multi-line plain-text event is
243
+ // reconstructed with its newlines before text inspection.
244
+ payload = dataLines.join("\n");
137
245
  if (payload === SSE_DONE) {
138
- return { ok: false };
246
+ // The [DONE] sentinel: a CONTROL frame, never inspected.
247
+ return { ok: false, control: true, text: null };
139
248
  }
249
+ } else if (payload === "") {
250
+ // NDJSON: an empty/whitespace line is a CONTROL/keepalive frame.
251
+ return { ok: false, control: true, text: null };
140
252
  }
141
253
  try {
142
254
  return { ok: true, json: JSON.parse(payload) };
143
255
  } catch {
144
- return { ok: false };
256
+ // Non-JSON CONTENT: surface the reconstructed payload text for inspection.
257
+ return { ok: false, control: false, text: payload };
145
258
  }
146
259
  }
147
260
 
148
261
  function serializeFrame(json, format, original) {
149
262
  const body = JSON.stringify(json);
150
263
  if (format === "sse") {
264
+ // Preserve the original SSE field lines (`event:`, `id:`, `retry:`, `:`
265
+ // comments) and substitute only the data payload. Event-typed streams
266
+ // (Anthropic Messages) dispatch on the `event:` line, so dropping it would
267
+ // make the stream unconsumable. OpenAI-style frames carry only a `data:`
268
+ // line, so the output is byte-identical to `data: ${body}\n\n`.
269
+ if (original && typeof original.raw === "string") {
270
+ const lines = original.raw.replace(/\n+$/, "").split("\n");
271
+ const out = [];
272
+ let dataWritten = false;
273
+ for (const line of lines) {
274
+ if (SSE_DATA_LINE.test(line)) {
275
+ // Collapse any (multi-line) data payload into the single new body. Use
276
+ // the SAME lenient match as the parser so a ` data:` line is replaced,
277
+ // never emitted verbatim (which would leak the original plaintext).
278
+ if (!dataWritten) {
279
+ out.push(`data: ${body}`);
280
+ dataWritten = true;
281
+ }
282
+ } else {
283
+ out.push(line);
284
+ }
285
+ }
286
+ if (!dataWritten) {
287
+ out.push(`data: ${body}`);
288
+ }
289
+ return `${out.join("\n")}\n\n`;
290
+ }
151
291
  return `data: ${body}\n\n`;
152
292
  }
153
293
  // NDJSON: preserve the original trailing newline style when available.
154
294
  return original && original.raw.endsWith("\n") ? `${body}\n` : `${body}\n`;
155
295
  }
156
296
 
297
+ // P1-CR-005 — re-serialize a parse-failed CONTENT frame after its data text has
298
+ // been inspected/transformed. Unlike serializeFrame (which JSON.stringifies an
299
+ // object), this carries through ARBITRARY text. For SSE it preserves the
300
+ // original non-data field lines (event:/id:/retry:/`:` comments) and re-emits the
301
+ // protected text as data: lines — one per text line, per the SSE spec, so a
302
+ // multi-line payload round-trips correctly. For NDJSON the frame body IS the
303
+ // text, so emit the protected text plus a newline.
304
+ function serializeTextFrame(text, format, original) {
305
+ if (format !== "sse") {
306
+ return `${text}\n`;
307
+ }
308
+ const dataLines = text.split("\n").map((line) => `data: ${line}`);
309
+ if (original && typeof original.raw === "string") {
310
+ const lines = original.raw.replace(/\n+$/, "").split("\n");
311
+ const out = [];
312
+ let dataWritten = false;
313
+ for (const line of lines) {
314
+ if (SSE_DATA_LINE.test(line)) {
315
+ // Replace the (possibly multi-line) data block with the protected lines.
316
+ // Lenient match (same as the parser) so a ` data:` line is replaced, not
317
+ // emitted verbatim (which would leak the original plaintext PII).
318
+ if (!dataWritten) {
319
+ out.push(...dataLines);
320
+ dataWritten = true;
321
+ }
322
+ } else {
323
+ out.push(line);
324
+ }
325
+ }
326
+ if (!dataWritten) {
327
+ out.push(...dataLines);
328
+ }
329
+ return `${out.join("\n")}\n\n`;
330
+ }
331
+ return `${dataLines.join("\n")}\n\n`;
332
+ }
333
+
157
334
  export function getByPath(value, path) {
158
335
  let current = value;
159
336
  for (const part of path) {