haechi 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ko.md +46 -11
- package/README.md +46 -11
- package/docs/current/config-version.ko.md +2 -2
- package/docs/current/config-version.md +2 -2
- package/docs/current/configuration.ko.md +26 -10
- package/docs/current/configuration.md +26 -10
- package/docs/current/operations-runbook.ko.md +36 -2
- package/docs/current/operations-runbook.md +39 -2
- package/docs/current/release-process.ko.md +5 -1
- package/docs/current/release-process.md +5 -1
- package/docs/current/risk-register-release-gate.ko.md +4 -3
- package/docs/current/risk-register-release-gate.md +4 -3
- package/docs/current/shared-responsibility.ko.md +2 -2
- package/docs/current/shared-responsibility.md +2 -2
- package/docs/current/threat-model.ko.md +4 -3
- package/docs/current/threat-model.md +4 -3
- package/examples/local-proxy-demo/README.md +51 -0
- package/examples/local-proxy-demo/demo.mjs +144 -0
- package/examples/local-proxy-demo/demo.tape +19 -0
- package/examples/local-proxy-demo/live-demo.mjs +121 -0
- package/examples/local-proxy-demo/live-demo.tape +25 -0
- package/haechi.config.example.json +2 -1
- package/package.json +3 -1
- package/packages/cli/bin/haechi.mjs +3 -2
- package/packages/cli/runtime.mjs +12 -1
- package/packages/filter/index.mjs +679 -6
- package/packages/privacy-profiles/index.mjs +72 -3
- package/packages/protocol-adapters/index.mjs +99 -1
- package/packages/proxy/index.mjs +7 -1
- package/packages/stream-filter/index.mjs +69 -7
|
@@ -12,7 +12,11 @@ const PROFILES = {
|
|
|
12
12
|
email: "redact",
|
|
13
13
|
card: "block",
|
|
14
14
|
api_key: "block",
|
|
15
|
-
secret: "block"
|
|
15
|
+
secret: "block",
|
|
16
|
+
// A Japan My Number leak is as sensitive as a national ID and is a
|
|
17
|
+
// checksummed true-positive — block it in every profile so a non-JP
|
|
18
|
+
// deployment that happens to process JP data is still covered.
|
|
19
|
+
jp_mynumber: "block"
|
|
16
20
|
}
|
|
17
21
|
},
|
|
18
22
|
transfer: {
|
|
@@ -31,7 +35,17 @@ const PROFILES = {
|
|
|
31
35
|
card: "block",
|
|
32
36
|
api_key: "block",
|
|
33
37
|
secret: "block",
|
|
34
|
-
kr_rrn: "block"
|
|
38
|
+
kr_rrn: "block",
|
|
39
|
+
// EU national IDs — France NIR, Spain DNI/NIE, UK National Insurance
|
|
40
|
+
// Number, Italy codice fiscale, Germany tax ID, Netherlands BSN — are
|
|
41
|
+
// GDPR special-category-adjacent identifiers; block them.
|
|
42
|
+
fr_nir: "block",
|
|
43
|
+
es_dni: "block",
|
|
44
|
+
uk_nino: "block",
|
|
45
|
+
it_codice_fiscale: "block",
|
|
46
|
+
de_steuer_id: "block",
|
|
47
|
+
nl_bsn: "block",
|
|
48
|
+
jp_mynumber: "block"
|
|
35
49
|
}
|
|
36
50
|
},
|
|
37
51
|
transfer: {
|
|
@@ -39,6 +53,37 @@ const PROFILES = {
|
|
|
39
53
|
note: "Treat model/tool transfer as processor/subprocessor transfer and document SCC/TIA evidence outside Haechi."
|
|
40
54
|
}
|
|
41
55
|
},
|
|
56
|
+
"asia-pdpa": {
|
|
57
|
+
id: "asia-pdpa",
|
|
58
|
+
region: "ASIA",
|
|
59
|
+
regulations: ["Singapore PDPA", "India DPDP Act"],
|
|
60
|
+
policy: {
|
|
61
|
+
actions: {
|
|
62
|
+
// Asia national IDs — Singapore NRIC/FIN and India Aadhaar — are sensitive
|
|
63
|
+
// identifiers under the Singapore PDPA / India DPDP Act; block them. The
|
|
64
|
+
// other checksummed national IDs are also blocked so a mixed-region payload
|
|
65
|
+
// is covered, matching the cross-profile convention.
|
|
66
|
+
sg_nric: "block",
|
|
67
|
+
in_aadhaar: "block",
|
|
68
|
+
jp_mynumber: "block",
|
|
69
|
+
kr_rrn: "block",
|
|
70
|
+
fr_nir: "block",
|
|
71
|
+
es_dni: "block",
|
|
72
|
+
it_codice_fiscale: "block",
|
|
73
|
+
de_steuer_id: "block",
|
|
74
|
+
nl_bsn: "block",
|
|
75
|
+
phone: "mask",
|
|
76
|
+
email: "redact",
|
|
77
|
+
card: "block",
|
|
78
|
+
api_key: "block",
|
|
79
|
+
secret: "block"
|
|
80
|
+
}
|
|
81
|
+
},
|
|
82
|
+
transfer: {
|
|
83
|
+
requiresAssessment: true,
|
|
84
|
+
note: "Document the PDPA/DPDP handling basis, purpose limitation, and cross-border transfer notice before production use."
|
|
85
|
+
}
|
|
86
|
+
},
|
|
42
87
|
"us-general": {
|
|
43
88
|
id: "us-general",
|
|
44
89
|
region: "US",
|
|
@@ -49,13 +94,37 @@ const PROFILES = {
|
|
|
49
94
|
phone: "mask",
|
|
50
95
|
card: "block",
|
|
51
96
|
api_key: "block",
|
|
52
|
-
secret: "block"
|
|
97
|
+
secret: "block",
|
|
98
|
+
jp_mynumber: "block"
|
|
53
99
|
}
|
|
54
100
|
},
|
|
55
101
|
transfer: {
|
|
56
102
|
requiresAssessment: false,
|
|
57
103
|
note: "Classify sector rules separately before using protected health, payment, or children's data."
|
|
58
104
|
}
|
|
105
|
+
},
|
|
106
|
+
"jp-appi": {
|
|
107
|
+
id: "jp-appi",
|
|
108
|
+
region: "JP",
|
|
109
|
+
regulations: ["APPI"],
|
|
110
|
+
policy: {
|
|
111
|
+
actions: {
|
|
112
|
+
// My Number (個人番号) is a special-care personal-information identifier
|
|
113
|
+
// under the My Number Act; block it. The EU/KR IDs are also blocked so a
|
|
114
|
+
// mixed-region payload is covered, matching the cross-profile convention.
|
|
115
|
+
jp_mynumber: "block",
|
|
116
|
+
phone: "mask",
|
|
117
|
+
email: "redact",
|
|
118
|
+
card: "block",
|
|
119
|
+
api_key: "block",
|
|
120
|
+
secret: "block",
|
|
121
|
+
kr_rrn: "block"
|
|
122
|
+
}
|
|
123
|
+
},
|
|
124
|
+
transfer: {
|
|
125
|
+
requiresAssessment: true,
|
|
126
|
+
note: "Document the My Number Act handling basis, purpose limitation, and cross-border transfer notice before production use."
|
|
127
|
+
}
|
|
59
128
|
}
|
|
60
129
|
};
|
|
61
130
|
|
|
@@ -8,6 +8,28 @@ const SSE_RESPONSES = { format: "sse", deltaPath: null };
|
|
|
8
8
|
const SSE_LLAMA_LEGACY = { format: "sse", deltaPath: ["content"] };
|
|
9
9
|
const NDJSON_OLLAMA_CHAT = { format: "ndjson", deltaPath: ["message", "content"] };
|
|
10
10
|
const NDJSON_OLLAMA_GENERATE = { format: "ndjson", deltaPath: ["response"] };
|
|
11
|
+
// Anthropic Messages API streams event-typed SSE frames; the incremental text
|
|
12
|
+
// channel is `delta.text` inside a `content_block_delta` frame. Other frame
|
|
13
|
+
// types (message_start, ping, etc.) don't carry deltaPath, so they get
|
|
14
|
+
// within-frame protection but no cross-frame buffering. The stream-filter
|
|
15
|
+
// preserves each frame's `event:` line on re-serialize. `flushOnType` lists the
|
|
16
|
+
// frame types that END a delta sequence: before one of them the held cross-frame
|
|
17
|
+
// buffer tail is flushed as a valid `content_block_delta`, so the residual lands
|
|
18
|
+
// IN ORDER (before content_block_stop/message_stop) rather than after the stream
|
|
19
|
+
// terminates. `ping` is intentionally absent — a match split across a keepalive
|
|
20
|
+
// must still be caught by the sliding buffer. Legacy /v1/complete streams a
|
|
21
|
+
// `completion` delta (no block framing, so no flushOnType needed).
|
|
22
|
+
const SSE_ANTHROPIC_MESSAGES = {
|
|
23
|
+
format: "sse",
|
|
24
|
+
deltaPath: ["delta", "text"],
|
|
25
|
+
flushOnType: { path: ["type"], values: ["content_block_stop", "message_delta", "message_stop"] }
|
|
26
|
+
};
|
|
27
|
+
const SSE_ANTHROPIC_COMPLETE = { format: "sse", deltaPath: ["completion"] };
|
|
28
|
+
// Google Gemini streams :streamGenerateContent as DATA-ONLY SSE (no `event:`
|
|
29
|
+
// lines, like OpenAI). Each `data:` frame is a FULL GenerateContentResponse;
|
|
30
|
+
// the incremental text channel is just deeper. Because frames are data-only,
|
|
31
|
+
// the held cross-frame tail can flush at end-of-stream — no flushOnType needed.
|
|
32
|
+
const SSE_GEMINI = { format: "sse", deltaPath: ["candidates", 0, "content", "parts", 0, "text"] };
|
|
11
33
|
|
|
12
34
|
const ADAPTERS = {
|
|
13
35
|
"openai-compatible": {
|
|
@@ -50,6 +72,47 @@ const ADAPTERS = {
|
|
|
50
72
|
route("/api/embed", "embed"),
|
|
51
73
|
route("/api/embeddings", "embeddings")
|
|
52
74
|
]
|
|
75
|
+
},
|
|
76
|
+
"anthropic": {
|
|
77
|
+
id: "anthropic",
|
|
78
|
+
protocol: "anthropic",
|
|
79
|
+
routes: [
|
|
80
|
+
// Anthropic Messages API. PII can sit in the top-level `system` string/blocks
|
|
81
|
+
// or any `messages[].content` string or content-block text/input — the core
|
|
82
|
+
// tree walk (collectStringEntries) covers every string leaf, so no custom
|
|
83
|
+
// extraction is needed. Streams via content_block_delta `delta.text`.
|
|
84
|
+
route("/v1/messages", "messages", { streaming: SSE_ANTHROPIC_MESSAGES }),
|
|
85
|
+
// count_tokens is a utility, but it carries prompt content, so protect it.
|
|
86
|
+
route("/v1/messages/count_tokens", "count-tokens", { protectRequest: true }),
|
|
87
|
+
// Legacy text completions: `prompt` is a top-level string; streams a `completion` delta.
|
|
88
|
+
route("/v1/complete", "complete", { streaming: SSE_ANTHROPIC_COMPLETE })
|
|
89
|
+
]
|
|
90
|
+
},
|
|
91
|
+
"gemini": {
|
|
92
|
+
id: "gemini",
|
|
93
|
+
protocol: "gemini",
|
|
94
|
+
routes: [
|
|
95
|
+
// Google Gemini API. Endpoints are MODEL-IN-PATH with a `:method` suffix:
|
|
96
|
+
// POST /v1beta/models/{model}:generateContent (and /v1, and arbitrary
|
|
97
|
+
// model names like gemini-2.0-flash). The route key is therefore the
|
|
98
|
+
// `:method` SUFFIX, not a fixed path — declared via `methodSuffix`, which
|
|
99
|
+
// matchRoute checks only AFTER exact-path matches (so existing adapters
|
|
100
|
+
// are unaffected). PII can sit in systemInstruction.parts[].text and any
|
|
101
|
+
// contents[].parts[].text; the core tree walk (collectStringEntries)
|
|
102
|
+
// covers every string leaf, so no custom extraction is needed.
|
|
103
|
+
suffixRoute("generateContent", "generate-content"),
|
|
104
|
+
// Streaming variant: data-only SSE, full GenerateContentResponse per frame;
|
|
105
|
+
// delta text lives at candidates[0].content.parts[0].text. The :stream*
|
|
106
|
+
// endpoint ALWAYS streams (the intent is in the path, not a body flag), so
|
|
107
|
+
// mark it streamingDefault — there is no `stream:false` body field for
|
|
108
|
+
// Gemini, so isStreamingRequest always classifies it as streaming.
|
|
109
|
+
suffixRoute("streamGenerateContent", "stream-generate-content", { streamingDefault: true, streaming: SSE_GEMINI }),
|
|
110
|
+
// countTokens carries prompt content (contents/systemInstruction), so protect it.
|
|
111
|
+
suffixRoute("countTokens", "count-tokens", { protectRequest: true }),
|
|
112
|
+
// Embedding endpoints: request carries text to embed; protect it.
|
|
113
|
+
suffixRoute("embedContent", "embed", { protectRequest: true }),
|
|
114
|
+
suffixRoute("batchEmbedContents", "batch-embed", { protectRequest: true })
|
|
115
|
+
]
|
|
53
116
|
}
|
|
54
117
|
};
|
|
55
118
|
|
|
@@ -121,10 +184,45 @@ function route(path, operation, options = {}) {
|
|
|
121
184
|
};
|
|
122
185
|
}
|
|
123
186
|
|
|
187
|
+
// A SUFFIX route matches by a `:method` suffix instead of an exact pathname —
|
|
188
|
+
// for model-in-path APIs (Gemini) where the path embeds an arbitrary model name
|
|
189
|
+
// and a version prefix (e.g. /v1beta/models/gemini-2.0-flash:generateContent).
|
|
190
|
+
// `path` stays null (there is no fixed path); `methodSuffix` carries the bare
|
|
191
|
+
// method name and matchRoute matches when the pathname ends with `:${suffix}`.
|
|
192
|
+
// matchRoute tries exact-path routes FIRST, so this never changes existing
|
|
193
|
+
// exact-match behavior for openai/anthropic/ollama/llama-cpp.
|
|
194
|
+
function suffixRoute(methodSuffix, operation, options = {}) {
|
|
195
|
+
return {
|
|
196
|
+
id: operation,
|
|
197
|
+
path: null,
|
|
198
|
+
methodSuffix,
|
|
199
|
+
operation,
|
|
200
|
+
protectRequest: options.protectRequest ?? true,
|
|
201
|
+
protectResponse: options.protectResponse ?? true,
|
|
202
|
+
streamingDefault: options.streamingDefault ?? false,
|
|
203
|
+
streaming: options.streaming ?? null
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
|
|
124
207
|
function pathFromRequestUrl(url) {
|
|
125
208
|
return new URL(url, "http://haechi.local").pathname;
|
|
126
209
|
}
|
|
127
210
|
|
|
128
211
|
function matchRoute(routes, pathname) {
|
|
129
|
-
|
|
212
|
+
// 1) EXACT pathname match (unchanged) — the only matcher for openai/anthropic/
|
|
213
|
+
// ollama/llama-cpp, so their classification is byte-for-byte identical.
|
|
214
|
+
const exact = routes.find((candidate) => candidate.path === pathname);
|
|
215
|
+
if (exact) {
|
|
216
|
+
return exact;
|
|
217
|
+
}
|
|
218
|
+
// 2) ADDITIVE: `:method`-SUFFIX match for model-in-path APIs (Gemini). A path
|
|
219
|
+
// like /v1beta/models/gemini-2.0-flash:generateContent matches the route
|
|
220
|
+
// whose methodSuffix the pathname ends with (`...:generateContent`). The
|
|
221
|
+
// `:` guard prevents a bare substring (e.g. a model literally named
|
|
222
|
+
// "generateContent") from matching without the method delimiter.
|
|
223
|
+
return routes.find(
|
|
224
|
+
(candidate) =>
|
|
225
|
+
typeof candidate.methodSuffix === "string" &&
|
|
226
|
+
pathname.endsWith(`:${candidate.methodSuffix}`)
|
|
227
|
+
);
|
|
130
228
|
}
|
package/packages/proxy/index.mjs
CHANGED
|
@@ -545,7 +545,13 @@ async function authorizeRequest({ runtime, request, routeContext, rateLimiter, m
|
|
|
545
545
|
|
|
546
546
|
if (resolved.rate && resolved.rate.requestsPerMinute) {
|
|
547
547
|
const key = identity?.id ?? "anonymous";
|
|
548
|
-
|
|
548
|
+
// allow() may return a boolean OR a Promise<boolean>: the built-in default is
|
|
549
|
+
// synchronous, but a shared-store (e.g. Redis-backed) limiter is inherently
|
|
550
|
+
// async. We await unconditionally — `await <boolean>` returns the boolean
|
|
551
|
+
// unchanged, so the sync default keeps working, while `!somePromise` (always
|
|
552
|
+
// false, because a Promise is truthy) can no longer let an async limiter
|
|
553
|
+
// silently fail open. See haechi-ratelimit-redis (shared-store satellite).
|
|
554
|
+
if (!(await rateLimiter.allow(key, resolved.rate.requestsPerMinute))) {
|
|
549
555
|
await recordProxyDecision({
|
|
550
556
|
runtime, routeContext, identity, profile: resolved.profile, correlationId,
|
|
551
557
|
decision: "rate_limited",
|
|
@@ -10,10 +10,40 @@ const SSE_DONE = "[DONE]";
|
|
|
10
10
|
export async function inspectResponseStream({ source, sink, streaming, protector, format }) {
|
|
11
11
|
const wireFormat = format ?? streaming?.format ?? "ndjson";
|
|
12
12
|
const deltaPath = streaming?.deltaPath ?? null;
|
|
13
|
+
// Frame types that TERMINATE a delta sequence (declared per-adapter, e.g.
|
|
14
|
+
// Anthropic's content_block_stop/message_delta/message_stop). Before such a
|
|
15
|
+
// frame the held cross-frame buffer tail is flushed as a valid delta frame, so
|
|
16
|
+
// the residual lands in-order BEFORE the terminator — never after message_stop.
|
|
17
|
+
// Keepalives (ping) are deliberately NOT listed, so a match split across a ping
|
|
18
|
+
// is still caught by the sliding buffer.
|
|
19
|
+
const flushOnType = streaming?.flushOnType ?? null;
|
|
13
20
|
const decoder = new TextDecoder("utf-8");
|
|
14
21
|
const frames = createFrameSplitter(wireFormat);
|
|
15
22
|
|
|
16
23
|
let blocked = false;
|
|
24
|
+
// A structural template of the last frame that carried delta text, used to
|
|
25
|
+
// re-emit a held buffer tail as a VALID delta frame (preserving its wire
|
|
26
|
+
// wrapper — Anthropic's `event:` line — plus sibling fields like type/index).
|
|
27
|
+
let lastDeltaTemplate = null;
|
|
28
|
+
|
|
29
|
+
async function flushHeldTail() {
|
|
30
|
+
const flushed = await protector.flush();
|
|
31
|
+
if (flushed.blocked) {
|
|
32
|
+
blocked = true;
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
if (!flushed.text || !deltaPath) {
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
if (lastDeltaTemplate) {
|
|
39
|
+
const object = structuredClone(lastDeltaTemplate.object);
|
|
40
|
+
setByPath(object, deltaPath, flushed.text);
|
|
41
|
+
sink.write(serializeFrame(object, wireFormat, lastDeltaTemplate.original));
|
|
42
|
+
} else {
|
|
43
|
+
// No prior delta frame to model — fall back to a minimal synthesized frame.
|
|
44
|
+
sink.write(serializeFrame(buildPathObject(deltaPath, flushed.text), wireFormat, null));
|
|
45
|
+
}
|
|
46
|
+
}
|
|
17
47
|
|
|
18
48
|
async function handleFrame(raw) {
|
|
19
49
|
const frame = { raw, body: raw.trim() };
|
|
@@ -26,6 +56,16 @@ export async function inspectResponseStream({ source, sink, streaming, protector
|
|
|
26
56
|
}
|
|
27
57
|
|
|
28
58
|
const json = parsed.json;
|
|
59
|
+
|
|
60
|
+
// A delta-terminating frame: flush the held tail (as a valid delta frame)
|
|
61
|
+
// before emitting it, so the residual is correctly ordered.
|
|
62
|
+
if (flushOnType && flushOnType.values.includes(getByPath(json, flushOnType.path))) {
|
|
63
|
+
await flushHeldTail();
|
|
64
|
+
if (blocked) {
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
29
69
|
let deltaText = null;
|
|
30
70
|
if (deltaPath) {
|
|
31
71
|
const found = getByPath(json, deltaPath);
|
|
@@ -50,6 +90,8 @@ export async function inspectResponseStream({ source, sink, streaming, protector
|
|
|
50
90
|
return;
|
|
51
91
|
}
|
|
52
92
|
setByPath(frameObject, deltaPath, pushed.text);
|
|
93
|
+
// Snapshot this frame's structure + wire wrapper as the flush template.
|
|
94
|
+
lastDeltaTemplate = { object: structuredClone(frameObject), original: frame };
|
|
53
95
|
}
|
|
54
96
|
|
|
55
97
|
sink.write(serializeFrame(frameObject, wireFormat, frame));
|
|
@@ -77,13 +119,8 @@ export async function inspectResponseStream({ source, sink, streaming, protector
|
|
|
77
119
|
}
|
|
78
120
|
|
|
79
121
|
if (!blocked) {
|
|
80
|
-
// Flush
|
|
81
|
-
|
|
82
|
-
if (flushed.blocked) {
|
|
83
|
-
blocked = true;
|
|
84
|
-
} else if (flushed.text && deltaPath) {
|
|
85
|
-
sink.write(serializeFrame(buildPathObject(deltaPath, flushed.text), wireFormat, null));
|
|
86
|
-
}
|
|
122
|
+
// Flush any remaining held tail (a stream that ended on a delta frame).
|
|
123
|
+
await flushHeldTail();
|
|
87
124
|
}
|
|
88
125
|
|
|
89
126
|
// The caller closes the sink AFTER recording the stream decision, so the
|
|
@@ -148,6 +185,31 @@ function parseFrame(frame, format) {
|
|
|
148
185
|
function serializeFrame(json, format, original) {
|
|
149
186
|
const body = JSON.stringify(json);
|
|
150
187
|
if (format === "sse") {
|
|
188
|
+
// Preserve the original SSE field lines (`event:`, `id:`, `retry:`, `:`
|
|
189
|
+
// comments) and substitute only the data payload. Event-typed streams
|
|
190
|
+
// (Anthropic Messages) dispatch on the `event:` line, so dropping it would
|
|
191
|
+
// make the stream unconsumable. OpenAI-style frames carry only a `data:`
|
|
192
|
+
// line, so the output is byte-identical to `data: ${body}\n\n`.
|
|
193
|
+
if (original && typeof original.raw === "string") {
|
|
194
|
+
const lines = original.raw.replace(/\n+$/, "").split("\n");
|
|
195
|
+
const out = [];
|
|
196
|
+
let dataWritten = false;
|
|
197
|
+
for (const line of lines) {
|
|
198
|
+
if (line.startsWith("data:")) {
|
|
199
|
+
// Collapse any (multi-line) data payload into the single new body.
|
|
200
|
+
if (!dataWritten) {
|
|
201
|
+
out.push(`data: ${body}`);
|
|
202
|
+
dataWritten = true;
|
|
203
|
+
}
|
|
204
|
+
} else {
|
|
205
|
+
out.push(line);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
if (!dataWritten) {
|
|
209
|
+
out.push(`data: ${body}`);
|
|
210
|
+
}
|
|
211
|
+
return `${out.join("\n")}\n\n`;
|
|
212
|
+
}
|
|
151
213
|
return `data: ${body}\n\n`;
|
|
152
214
|
}
|
|
153
215
|
// NDJSON: preserve the original trailing newline style when available.
|