haechi 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,11 @@ const PROFILES = {
12
12
  email: "redact",
13
13
  card: "block",
14
14
  api_key: "block",
15
- secret: "block"
15
+ secret: "block",
16
+ // A Japan My Number leak is as sensitive as a national ID and is a
17
+ // checksummed true-positive — block it in every profile so a non-JP
18
+ // deployment that happens to process JP data is still covered.
19
+ jp_mynumber: "block"
16
20
  }
17
21
  },
18
22
  transfer: {
@@ -31,7 +35,17 @@ const PROFILES = {
31
35
  card: "block",
32
36
  api_key: "block",
33
37
  secret: "block",
34
- kr_rrn: "block"
38
+ kr_rrn: "block",
39
+ // EU national IDs — France NIR, Spain DNI/NIE, UK National Insurance
40
+ // Number, Italy codice fiscale, Germany tax ID, Netherlands BSN — are
41
+ // GDPR special-category-adjacent identifiers; block them.
42
+ fr_nir: "block",
43
+ es_dni: "block",
44
+ uk_nino: "block",
45
+ it_codice_fiscale: "block",
46
+ de_steuer_id: "block",
47
+ nl_bsn: "block",
48
+ jp_mynumber: "block"
35
49
  }
36
50
  },
37
51
  transfer: {
@@ -39,6 +53,37 @@ const PROFILES = {
39
53
  note: "Treat model/tool transfer as processor/subprocessor transfer and document SCC/TIA evidence outside Haechi."
40
54
  }
41
55
  },
56
+ "asia-pdpa": {
57
+ id: "asia-pdpa",
58
+ region: "ASIA",
59
+ regulations: ["Singapore PDPA", "India DPDP Act"],
60
+ policy: {
61
+ actions: {
62
+ // Asia national IDs — Singapore NRIC/FIN and India Aadhaar — are sensitive
63
+ // identifiers under the Singapore PDPA / India DPDP Act; block them. The
64
+ // other checksummed national IDs are also blocked so a mixed-region payload
65
+ // is covered, matching the cross-profile convention.
66
+ sg_nric: "block",
67
+ in_aadhaar: "block",
68
+ jp_mynumber: "block",
69
+ kr_rrn: "block",
70
+ fr_nir: "block",
71
+ es_dni: "block",
72
+ it_codice_fiscale: "block",
73
+ de_steuer_id: "block",
74
+ nl_bsn: "block",
75
+ phone: "mask",
76
+ email: "redact",
77
+ card: "block",
78
+ api_key: "block",
79
+ secret: "block"
80
+ }
81
+ },
82
+ transfer: {
83
+ requiresAssessment: true,
84
+ note: "Document the PDPA/DPDP handling basis, purpose limitation, and cross-border transfer notice before production use."
85
+ }
86
+ },
42
87
  "us-general": {
43
88
  id: "us-general",
44
89
  region: "US",
@@ -49,13 +94,37 @@ const PROFILES = {
49
94
  phone: "mask",
50
95
  card: "block",
51
96
  api_key: "block",
52
- secret: "block"
97
+ secret: "block",
98
+ jp_mynumber: "block"
53
99
  }
54
100
  },
55
101
  transfer: {
56
102
  requiresAssessment: false,
57
103
  note: "Classify sector rules separately before using protected health, payment, or children's data."
58
104
  }
105
+ },
106
+ "jp-appi": {
107
+ id: "jp-appi",
108
+ region: "JP",
109
+ regulations: ["APPI"],
110
+ policy: {
111
+ actions: {
112
+ // My Number (個人番号) is a special-care personal-information identifier
113
+ // under the My Number Act; block it. The EU/KR IDs are also blocked so a
114
+ // mixed-region payload is covered, matching the cross-profile convention.
115
+ jp_mynumber: "block",
116
+ phone: "mask",
117
+ email: "redact",
118
+ card: "block",
119
+ api_key: "block",
120
+ secret: "block",
121
+ kr_rrn: "block"
122
+ }
123
+ },
124
+ transfer: {
125
+ requiresAssessment: true,
126
+ note: "Document the My Number Act handling basis, purpose limitation, and cross-border transfer notice before production use."
127
+ }
59
128
  }
60
129
  };
61
130
 
@@ -8,6 +8,28 @@ const SSE_RESPONSES = { format: "sse", deltaPath: null };
8
8
  const SSE_LLAMA_LEGACY = { format: "sse", deltaPath: ["content"] };
9
9
  const NDJSON_OLLAMA_CHAT = { format: "ndjson", deltaPath: ["message", "content"] };
10
10
  const NDJSON_OLLAMA_GENERATE = { format: "ndjson", deltaPath: ["response"] };
11
+ // Anthropic Messages API streams event-typed SSE frames; the incremental text
12
+ // channel is `delta.text` inside a `content_block_delta` frame. Other frame
13
+ // types (message_start, ping, etc.) don't carry deltaPath, so they get
14
+ // within-frame protection but no cross-frame buffering. The stream-filter
15
+ // preserves each frame's `event:` line on re-serialize. `flushOnType` lists the
16
+ // frame types that END a delta sequence: before one of them the held cross-frame
17
+ // buffer tail is flushed as a valid `content_block_delta`, so the residual lands
18
+ // IN ORDER (before content_block_stop/message_stop) rather than after the stream
19
+ // terminates. `ping` is intentionally absent — a match split across a keepalive
20
+ // must still be caught by the sliding buffer. Legacy /v1/complete streams a
21
+ // `completion` delta (no block framing, so no flushOnType needed).
22
+ const SSE_ANTHROPIC_MESSAGES = {
23
+ format: "sse",
24
+ deltaPath: ["delta", "text"],
25
+ flushOnType: { path: ["type"], values: ["content_block_stop", "message_delta", "message_stop"] }
26
+ };
27
+ const SSE_ANTHROPIC_COMPLETE = { format: "sse", deltaPath: ["completion"] };
28
+ // Google Gemini streams :streamGenerateContent as DATA-ONLY SSE (no `event:`
29
+ // lines, like OpenAI). Each `data:` frame is a FULL GenerateContentResponse;
30
+ // the incremental text channel is just deeper. Because frames are data-only,
31
+ // the held cross-frame tail can flush at end-of-stream — no flushOnType needed.
32
+ const SSE_GEMINI = { format: "sse", deltaPath: ["candidates", 0, "content", "parts", 0, "text"] };
11
33
 
12
34
  const ADAPTERS = {
13
35
  "openai-compatible": {
@@ -50,6 +72,47 @@ const ADAPTERS = {
50
72
  route("/api/embed", "embed"),
51
73
  route("/api/embeddings", "embeddings")
52
74
  ]
75
+ },
76
+ "anthropic": {
77
+ id: "anthropic",
78
+ protocol: "anthropic",
79
+ routes: [
80
+ // Anthropic Messages API. PII can sit in the top-level `system` string/blocks
81
+ // or any `messages[].content` string or content-block text/input — the core
82
+ // tree walk (collectStringEntries) covers every string leaf, so no custom
83
+ // extraction is needed. Streams via content_block_delta `delta.text`.
84
+ route("/v1/messages", "messages", { streaming: SSE_ANTHROPIC_MESSAGES }),
85
+ // count_tokens is a utility, but it carries prompt content, so protect it.
86
+ route("/v1/messages/count_tokens", "count-tokens", { protectRequest: true }),
87
+ // Legacy text completions: `prompt` is a top-level string; streams a `completion` delta.
88
+ route("/v1/complete", "complete", { streaming: SSE_ANTHROPIC_COMPLETE })
89
+ ]
90
+ },
91
+ "gemini": {
92
+ id: "gemini",
93
+ protocol: "gemini",
94
+ routes: [
95
+ // Google Gemini API. Endpoints are MODEL-IN-PATH with a `:method` suffix:
96
+ // POST /v1beta/models/{model}:generateContent (and /v1, and arbitrary
97
+ // model names like gemini-2.0-flash). The route key is therefore the
98
+ // `:method` SUFFIX, not a fixed path — declared via `methodSuffix`, which
99
+ // matchRoute checks only AFTER exact-path matches (so existing adapters
100
+ // are unaffected). PII can sit in systemInstruction.parts[].text and any
101
+ // contents[].parts[].text; the core tree walk (collectStringEntries)
102
+ // covers every string leaf, so no custom extraction is needed.
103
+ suffixRoute("generateContent", "generate-content"),
104
+ // Streaming variant: data-only SSE, full GenerateContentResponse per frame;
105
+ // delta text lives at candidates[0].content.parts[0].text. The :stream*
106
+ // endpoint ALWAYS streams (the intent is in the path, not a body flag), so
107
+ // mark it streamingDefault — there is no `stream:false` body field for
108
+ // Gemini, so isStreamingRequest always classifies it as streaming.
109
+ suffixRoute("streamGenerateContent", "stream-generate-content", { streamingDefault: true, streaming: SSE_GEMINI }),
110
+ // countTokens carries prompt content (contents/systemInstruction), so protect it.
111
+ suffixRoute("countTokens", "count-tokens", { protectRequest: true }),
112
+ // Embedding endpoints: request carries text to embed; protect it.
113
+ suffixRoute("embedContent", "embed", { protectRequest: true }),
114
+ suffixRoute("batchEmbedContents", "batch-embed", { protectRequest: true })
115
+ ]
53
116
  }
54
117
  };
55
118
 
@@ -121,10 +184,45 @@ function route(path, operation, options = {}) {
121
184
  };
122
185
  }
123
186
 
187
+ // A SUFFIX route matches by a `:method` suffix instead of an exact pathname —
188
+ // for model-in-path APIs (Gemini) where the path embeds an arbitrary model name
189
+ // and a version prefix (e.g. /v1beta/models/gemini-2.0-flash:generateContent).
190
+ // `path` stays null (there is no fixed path); `methodSuffix` carries the bare
191
+ // method name and matchRoute matches when the pathname ends with `:${suffix}`.
192
+ // matchRoute tries exact-path routes FIRST, so this never changes existing
193
+ // exact-match behavior for openai/anthropic/ollama/llama-cpp.
194
+ function suffixRoute(methodSuffix, operation, options = {}) {
195
+ return {
196
+ id: operation,
197
+ path: null,
198
+ methodSuffix,
199
+ operation,
200
+ protectRequest: options.protectRequest ?? true,
201
+ protectResponse: options.protectResponse ?? true,
202
+ streamingDefault: options.streamingDefault ?? false,
203
+ streaming: options.streaming ?? null
204
+ };
205
+ }
206
+
124
207
  function pathFromRequestUrl(url) {
125
208
  return new URL(url, "http://haechi.local").pathname;
126
209
  }
127
210
 
128
211
  function matchRoute(routes, pathname) {
129
- return routes.find((candidate) => candidate.path === pathname);
212
+ // 1) EXACT pathname match (unchanged) the only matcher for openai/anthropic/
213
+ // ollama/llama-cpp, so their classification is byte-for-byte identical.
214
+ const exact = routes.find((candidate) => candidate.path === pathname);
215
+ if (exact) {
216
+ return exact;
217
+ }
218
+ // 2) ADDITIVE: `:method`-SUFFIX match for model-in-path APIs (Gemini). A path
219
+ // like /v1beta/models/gemini-2.0-flash:generateContent matches the route
220
+ // whose methodSuffix the pathname ends with (`...:generateContent`). The
221
+ // `:` guard prevents a bare substring (e.g. a model literally named
222
+ // "generateContent") from matching without the method delimiter.
223
+ return routes.find(
224
+ (candidate) =>
225
+ typeof candidate.methodSuffix === "string" &&
226
+ pathname.endsWith(`:${candidate.methodSuffix}`)
227
+ );
130
228
  }
@@ -545,7 +545,13 @@ async function authorizeRequest({ runtime, request, routeContext, rateLimiter, m
545
545
 
546
546
  if (resolved.rate && resolved.rate.requestsPerMinute) {
547
547
  const key = identity?.id ?? "anonymous";
548
- if (!rateLimiter.allow(key, resolved.rate.requestsPerMinute)) {
548
+ // allow() may return a boolean OR a Promise<boolean>: the built-in default is
549
+ // synchronous, but a shared-store (e.g. Redis-backed) limiter is inherently
550
+ // async. We await unconditionally — `await <boolean>` returns the boolean
551
+ // unchanged, so the sync default keeps working, while `!somePromise` (always
552
+ // false, because a Promise is truthy) can no longer let an async limiter
553
+ // silently fail open. See haechi-ratelimit-redis (shared-store satellite).
554
+ if (!(await rateLimiter.allow(key, resolved.rate.requestsPerMinute))) {
549
555
  await recordProxyDecision({
550
556
  runtime, routeContext, identity, profile: resolved.profile, correlationId,
551
557
  decision: "rate_limited",
@@ -10,10 +10,40 @@ const SSE_DONE = "[DONE]";
10
10
  export async function inspectResponseStream({ source, sink, streaming, protector, format }) {
11
11
  const wireFormat = format ?? streaming?.format ?? "ndjson";
12
12
  const deltaPath = streaming?.deltaPath ?? null;
13
+ // Frame types that TERMINATE a delta sequence (declared per-adapter, e.g.
14
+ // Anthropic's content_block_stop/message_delta/message_stop). Before such a
15
+ // frame the held cross-frame buffer tail is flushed as a valid delta frame, so
16
+ // the residual lands in-order BEFORE the terminator — never after message_stop.
17
+ // Keepalives (ping) are deliberately NOT listed, so a match split across a ping
18
+ // is still caught by the sliding buffer.
19
+ const flushOnType = streaming?.flushOnType ?? null;
13
20
  const decoder = new TextDecoder("utf-8");
14
21
  const frames = createFrameSplitter(wireFormat);
15
22
 
16
23
  let blocked = false;
24
+ // A structural template of the last frame that carried delta text, used to
25
+ // re-emit a held buffer tail as a VALID delta frame (preserving its wire
26
+ // wrapper — Anthropic's `event:` line — plus sibling fields like type/index).
27
+ let lastDeltaTemplate = null;
28
+
29
+ async function flushHeldTail() {
30
+ const flushed = await protector.flush();
31
+ if (flushed.blocked) {
32
+ blocked = true;
33
+ return;
34
+ }
35
+ if (!flushed.text || !deltaPath) {
36
+ return;
37
+ }
38
+ if (lastDeltaTemplate) {
39
+ const object = structuredClone(lastDeltaTemplate.object);
40
+ setByPath(object, deltaPath, flushed.text);
41
+ sink.write(serializeFrame(object, wireFormat, lastDeltaTemplate.original));
42
+ } else {
43
+ // No prior delta frame to model — fall back to a minimal synthesized frame.
44
+ sink.write(serializeFrame(buildPathObject(deltaPath, flushed.text), wireFormat, null));
45
+ }
46
+ }
17
47
 
18
48
  async function handleFrame(raw) {
19
49
  const frame = { raw, body: raw.trim() };
@@ -26,6 +56,16 @@ export async function inspectResponseStream({ source, sink, streaming, protector
26
56
  }
27
57
 
28
58
  const json = parsed.json;
59
+
60
+ // A delta-terminating frame: flush the held tail (as a valid delta frame)
61
+ // before emitting it, so the residual is correctly ordered.
62
+ if (flushOnType && flushOnType.values.includes(getByPath(json, flushOnType.path))) {
63
+ await flushHeldTail();
64
+ if (blocked) {
65
+ return;
66
+ }
67
+ }
68
+
29
69
  let deltaText = null;
30
70
  if (deltaPath) {
31
71
  const found = getByPath(json, deltaPath);
@@ -50,6 +90,8 @@ export async function inspectResponseStream({ source, sink, streaming, protector
50
90
  return;
51
91
  }
52
92
  setByPath(frameObject, deltaPath, pushed.text);
93
+ // Snapshot this frame's structure + wire wrapper as the flush template.
94
+ lastDeltaTemplate = { object: structuredClone(frameObject), original: frame };
53
95
  }
54
96
 
55
97
  sink.write(serializeFrame(frameObject, wireFormat, frame));
@@ -77,13 +119,8 @@ export async function inspectResponseStream({ source, sink, streaming, protector
77
119
  }
78
120
 
79
121
  if (!blocked) {
80
- // Flush the held tail of the delta buffer as a synthesized final frame.
81
- const flushed = await protector.flush();
82
- if (flushed.blocked) {
83
- blocked = true;
84
- } else if (flushed.text && deltaPath) {
85
- sink.write(serializeFrame(buildPathObject(deltaPath, flushed.text), wireFormat, null));
86
- }
122
+ // Flush any remaining held tail (a stream that ended on a delta frame).
123
+ await flushHeldTail();
87
124
  }
88
125
 
89
126
  // The caller closes the sink AFTER recording the stream decision, so the
@@ -148,6 +185,31 @@ function parseFrame(frame, format) {
148
185
  function serializeFrame(json, format, original) {
149
186
  const body = JSON.stringify(json);
150
187
  if (format === "sse") {
188
+ // Preserve the original SSE field lines (`event:`, `id:`, `retry:`, `:`
189
+ // comments) and substitute only the data payload. Event-typed streams
190
+ // (Anthropic Messages) dispatch on the `event:` line, so dropping it would
191
+ // make the stream unconsumable. OpenAI-style frames carry only a `data:`
192
+ // line, so the output is byte-identical to `data: ${body}\n\n`.
193
+ if (original && typeof original.raw === "string") {
194
+ const lines = original.raw.replace(/\n+$/, "").split("\n");
195
+ const out = [];
196
+ let dataWritten = false;
197
+ for (const line of lines) {
198
+ if (line.startsWith("data:")) {
199
+ // Collapse any (multi-line) data payload into the single new body.
200
+ if (!dataWritten) {
201
+ out.push(`data: ${body}`);
202
+ dataWritten = true;
203
+ }
204
+ } else {
205
+ out.push(line);
206
+ }
207
+ }
208
+ if (!dataWritten) {
209
+ out.push(`data: ${body}`);
210
+ }
211
+ return `${out.join("\n")}\n\n`;
212
+ }
151
213
  return `data: ${body}\n\n`;
152
214
  }
153
215
  // NDJSON: preserve the original trailing newline style when available.