haechi 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ko.md +12 -1
- package/README.md +12 -1
- package/docs/current/code-review-risk-register-2026-06-16.ko.md +377 -0
- package/docs/current/code-review-risk-register-2026-06-16.md +377 -0
- package/docs/current/configuration.ko.md +2 -1
- package/docs/current/configuration.md +2 -1
- package/docs/current/risk-register-release-gate.ko.md +30 -5
- package/docs/current/risk-register-release-gate.md +30 -5
- package/docs/current/shared-responsibility.ko.md +10 -1
- package/docs/current/shared-responsibility.md +10 -1
- package/docs/current/threat-model.ko.md +3 -0
- package/docs/current/threat-model.md +3 -0
- package/package.json +1 -1
- package/packages/cli/bin/haechi.mjs +92 -3
- package/packages/cli/runtime.mjs +49 -0
- package/packages/core/index.mjs +15 -0
- package/packages/crypto/index.mjs +42 -20
- package/packages/proxy/index.mjs +263 -28
- package/packages/ssrf/index.mjs +60 -4
- package/packages/stream-filter/index.mjs +127 -12
|
@@ -4,6 +4,12 @@
|
|
|
4
4
|
// through a bounded sliding buffer (cross-frame matches caught up to
|
|
5
5
|
// streaming.maxMatchBytes), and all other string leaves in a frame get
|
|
6
6
|
// within-frame protection. The whole stream is audited once at the end.
|
|
7
|
+
//
|
|
8
|
+
// P1-CR-005 — a frame whose data: payload is not JSON is NOT raw-passed. A
|
|
9
|
+
// CONTROL frame (the [DONE] sentinel, comment-only, empty/keepalive) has no
|
|
10
|
+
// inspectable text and passes through; a non-JSON CONTENT frame is inspected as
|
|
11
|
+
// text (single-shot protector.protectText, distinct from the delta buffer) so
|
|
12
|
+
// plain-text PII/secrets cannot bypass protection in inspect mode.
|
|
7
13
|
|
|
8
14
|
const SSE_DONE = "[DONE]";
|
|
9
15
|
|
|
@@ -49,14 +55,48 @@ export async function inspectResponseStream({ source, sink, streaming, protector
|
|
|
49
55
|
const frame = { raw, body: raw.trim() };
|
|
50
56
|
const parsed = parseFrame(frame, wireFormat);
|
|
51
57
|
if (!parsed.ok) {
|
|
52
|
-
//
|
|
53
|
-
//
|
|
54
|
-
|
|
58
|
+
// P1-CR-005 — a parse-failed frame is one of two things:
|
|
59
|
+
// (1) a CONTROL frame with no inspectable text (the SSE [DONE] sentinel,
|
|
60
|
+
// a comment-only frame, an empty/whitespace/keepalive frame) — there
|
|
61
|
+
// is genuinely nothing to protect, so pass it through verbatim; or
|
|
62
|
+
// (2) a CONTENT frame whose data: payload is NOT JSON (plain text,
|
|
63
|
+
// partial/malformed JSON, provider-specific text). That text CAN carry
|
|
64
|
+
// PII/secrets, so it must be INSPECTED AS TEXT, not raw-passed.
|
|
65
|
+
if (parsed.control || parsed.text == null) {
|
|
66
|
+
sink.write(frame.raw);
|
|
67
|
+
return;
|
|
68
|
+
}
|
|
69
|
+
// Inspect the reconstructed data text as a single self-contained payload.
|
|
70
|
+
// protectText is DISTINCT from the delta-channel push/flush buffer, so a
|
|
71
|
+
// non-JSON frame's text never corrupts the JSON delta sliding buffer. A
|
|
72
|
+
// block-action detection fails the stream closed; otherwise re-emit the
|
|
73
|
+
// protected text (preserving the original wire wrapper / event: lines).
|
|
74
|
+
const protectedText = await protector.protectText(parsed.text);
|
|
75
|
+
if (protectedText.blocked) {
|
|
76
|
+
blocked = true;
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
sink.write(serializeTextFrame(protectedText.text, wireFormat, frame));
|
|
55
80
|
return;
|
|
56
81
|
}
|
|
57
82
|
|
|
58
83
|
const json = parsed.json;
|
|
59
84
|
|
|
85
|
+
// A bare PRIMITIVE JSON value (string/number/boolean/null) has no object
|
|
86
|
+
// structure for the delta/extras object path — a deltaPath setByPath on a
|
|
87
|
+
// string root would throw an uncaught TypeError on an attacker-influenceable
|
|
88
|
+
// frame. A JSON string can itself carry PII, so inspect the re-serialized
|
|
89
|
+
// value as text (same single-shot path as a non-JSON content frame).
|
|
90
|
+
if (json === null || typeof json !== "object") {
|
|
91
|
+
const protectedPrimitive = await protector.protectText(JSON.stringify(json));
|
|
92
|
+
if (protectedPrimitive.blocked) {
|
|
93
|
+
blocked = true;
|
|
94
|
+
return;
|
|
95
|
+
}
|
|
96
|
+
sink.write(serializeTextFrame(protectedPrimitive.text, wireFormat, frame));
|
|
97
|
+
return;
|
|
98
|
+
}
|
|
99
|
+
|
|
60
100
|
// A delta-terminating frame: flush the held tail (as a valid delta frame)
|
|
61
101
|
// before emitting it, so the residual is correctly ordered.
|
|
62
102
|
if (flushOnType && flushOnType.values.includes(getByPath(json, flushOnType.path))) {
|
|
@@ -157,28 +197,64 @@ function createFrameSplitter(format) {
|
|
|
157
197
|
};
|
|
158
198
|
}
|
|
159
199
|
|
|
200
|
+
// Parse a frame. On success: { ok:true, json }. On failure the caller needs to
|
|
201
|
+
// know WHICH kind of failure it is (P1-CR-005):
|
|
202
|
+
// - { ok:false, control:true } → a CONTROL frame (no inspectable
|
|
203
|
+
// text: [DONE], comment-only, empty/
|
|
204
|
+
// whitespace/keepalive) → pass raw.
|
|
205
|
+
// - { ok:false, control:false, text } → a CONTENT frame whose data: payload
|
|
206
|
+
// is non-JSON → inspect `text` as text.
|
|
207
|
+
// Recognize an SSE `data:` field line LENIENTLY — allowing (non-spec) leading
|
|
208
|
+
// whitespace before the field name — and return its payload (one leading space
|
|
209
|
+
// after the colon stripped per the SSE spec), or null if the line is not a data
|
|
210
|
+
// field. SECURITY (P1-CR-005 follow-up): recognition MUST be identical in the
|
|
211
|
+
// parser (which inspects/redacts) and the serializers (which re-emit). If the
|
|
212
|
+
// serializer used a stricter `line.startsWith("data:")` it would fail to match a
|
|
213
|
+
// ` data: <pii>` line, emit it VERBATIM, and leak the original plaintext while
|
|
214
|
+
// separately appending the redacted copy. Both sides use this one helper.
|
|
215
|
+
const SSE_DATA_LINE = /^[ \t]*data:/;
|
|
216
|
+
function sseDataPayload(line) {
|
|
217
|
+
const match = /^[ \t]*data:(.*)$/.exec(line);
|
|
218
|
+
return match ? match[1].replace(/^ /, "") : null;
|
|
219
|
+
}
|
|
220
|
+
|
|
160
221
|
function parseFrame(frame, format) {
|
|
161
222
|
if (!frame) {
|
|
162
|
-
return { ok: false };
|
|
223
|
+
return { ok: false, control: true, text: null };
|
|
163
224
|
}
|
|
164
225
|
let payload = frame.body;
|
|
165
226
|
if (format === "sse") {
|
|
227
|
+
// An empty/whitespace/comment-only/keepalive frame has no data: line → a
|
|
228
|
+
// CONTROL frame with nothing to inspect.
|
|
229
|
+
if (payload === "") {
|
|
230
|
+
return { ok: false, control: true, text: null };
|
|
231
|
+
}
|
|
166
232
|
const dataLines = payload
|
|
167
233
|
.split("\n")
|
|
168
|
-
.
|
|
169
|
-
.
|
|
234
|
+
.map(sseDataPayload)
|
|
235
|
+
.filter((value) => value !== null);
|
|
170
236
|
if (dataLines.length === 0) {
|
|
171
|
-
|
|
237
|
+
// Comment-only (`:` lines) or field-only (event:/id:/retry:) frame.
|
|
238
|
+
return { ok: false, control: true, text: null };
|
|
172
239
|
}
|
|
173
|
-
|
|
240
|
+
// P2-CR-013 — the SSE model joins multiple data: lines with a NEWLINE, not
|
|
241
|
+
// "". Newlines are valid JSON whitespace between tokens / inside a string, so
|
|
242
|
+
// a multi-line JSON event still JSON.parses; a multi-line plain-text event is
|
|
243
|
+
// reconstructed with its newlines before text inspection.
|
|
244
|
+
payload = dataLines.join("\n");
|
|
174
245
|
if (payload === SSE_DONE) {
|
|
175
|
-
|
|
246
|
+
// The [DONE] sentinel: a CONTROL frame, never inspected.
|
|
247
|
+
return { ok: false, control: true, text: null };
|
|
176
248
|
}
|
|
249
|
+
} else if (payload === "") {
|
|
250
|
+
// NDJSON: an empty/whitespace line is a CONTROL/keepalive frame.
|
|
251
|
+
return { ok: false, control: true, text: null };
|
|
177
252
|
}
|
|
178
253
|
try {
|
|
179
254
|
return { ok: true, json: JSON.parse(payload) };
|
|
180
255
|
} catch {
|
|
181
|
-
|
|
256
|
+
// Non-JSON CONTENT: surface the reconstructed payload text for inspection.
|
|
257
|
+
return { ok: false, control: false, text: payload };
|
|
182
258
|
}
|
|
183
259
|
}
|
|
184
260
|
|
|
@@ -195,8 +271,10 @@ function serializeFrame(json, format, original) {
|
|
|
195
271
|
const out = [];
|
|
196
272
|
let dataWritten = false;
|
|
197
273
|
for (const line of lines) {
|
|
198
|
-
if (
|
|
199
|
-
// Collapse any (multi-line) data payload into the single new body.
|
|
274
|
+
if (SSE_DATA_LINE.test(line)) {
|
|
275
|
+
// Collapse any (multi-line) data payload into the single new body. Use
|
|
276
|
+
// the SAME lenient match as the parser so a ` data:` line is replaced,
|
|
277
|
+
// never emitted verbatim (which would leak the original plaintext).
|
|
200
278
|
if (!dataWritten) {
|
|
201
279
|
out.push(`data: ${body}`);
|
|
202
280
|
dataWritten = true;
|
|
@@ -216,6 +294,43 @@ function serializeFrame(json, format, original) {
|
|
|
216
294
|
return original && original.raw.endsWith("\n") ? `${body}\n` : `${body}\n`;
|
|
217
295
|
}
|
|
218
296
|
|
|
297
|
+
// P1-CR-005 — re-serialize a parse-failed CONTENT frame after its data text has
|
|
298
|
+
// been inspected/transformed. Unlike serializeFrame (which JSON.stringifies an
|
|
299
|
+
// object), this carries through ARBITRARY text. For SSE it preserves the
|
|
300
|
+
// original non-data field lines (event:/id:/retry:/`:` comments) and re-emits the
|
|
301
|
+
// protected text as data: lines — one per text line, per the SSE spec, so a
|
|
302
|
+
// multi-line payload round-trips correctly. For NDJSON the frame body IS the
|
|
303
|
+
// text, so emit the protected text plus a newline.
|
|
304
|
+
function serializeTextFrame(text, format, original) {
|
|
305
|
+
if (format !== "sse") {
|
|
306
|
+
return `${text}\n`;
|
|
307
|
+
}
|
|
308
|
+
const dataLines = text.split("\n").map((line) => `data: ${line}`);
|
|
309
|
+
if (original && typeof original.raw === "string") {
|
|
310
|
+
const lines = original.raw.replace(/\n+$/, "").split("\n");
|
|
311
|
+
const out = [];
|
|
312
|
+
let dataWritten = false;
|
|
313
|
+
for (const line of lines) {
|
|
314
|
+
if (SSE_DATA_LINE.test(line)) {
|
|
315
|
+
// Replace the (possibly multi-line) data block with the protected lines.
|
|
316
|
+
// Lenient match (same as the parser) so a ` data:` line is replaced, not
|
|
317
|
+
// emitted verbatim (which would leak the original plaintext PII).
|
|
318
|
+
if (!dataWritten) {
|
|
319
|
+
out.push(...dataLines);
|
|
320
|
+
dataWritten = true;
|
|
321
|
+
}
|
|
322
|
+
} else {
|
|
323
|
+
out.push(line);
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
if (!dataWritten) {
|
|
327
|
+
out.push(...dataLines);
|
|
328
|
+
}
|
|
329
|
+
return `${out.join("\n")}\n\n`;
|
|
330
|
+
}
|
|
331
|
+
return `${dataLines.join("\n")}\n\n`;
|
|
332
|
+
}
|
|
333
|
+
|
|
219
334
|
export function getByPath(value, path) {
|
|
220
335
|
let current = value;
|
|
221
336
|
for (const part of path) {
|