haechi 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ko.md +227 -0
- package/README.md +13 -4
- package/docs/README.md +3 -6
- package/docs/current/api-stability.ko.md +2 -1
- package/docs/current/api-stability.md +1 -0
- package/docs/current/configuration.ko.md +210 -0
- package/docs/current/configuration.md +210 -0
- package/docs/current/release-0.5-implementation-scope.ko.md +69 -0
- package/docs/current/release-0.5-implementation-scope.md +69 -0
- package/docs/current/release-process.ko.md +2 -2
- package/docs/current/release-process.md +2 -2
- package/docs/current/risk-register-release-gate.ko.md +2 -2
- package/docs/current/risk-register-release-gate.md +2 -2
- package/docs/current/threat-model.ko.md +6 -4
- package/docs/current/threat-model.md +5 -3
- package/haechi.config.example.json +3 -1
- package/package.json +3 -2
- package/packages/cli/bin/haechi.mjs +163 -22
- package/packages/cli/runtime.mjs +10 -2
- package/packages/core/index.mjs +110 -1
- package/packages/protocol-adapters/index.mjs +33 -14
- package/packages/proxy/index.mjs +108 -1
- package/packages/stream-filter/index.mjs +194 -0
|
@@ -10,6 +10,7 @@ import { DEFAULT_CONFIG_PATH, createRuntime, isValidPort, loadConfig, writeDefau
|
|
|
10
10
|
|
|
11
11
|
const [command, ...argv] = process.argv.slice(2);
|
|
12
12
|
|
|
13
|
+
async function main(command, argv) {
|
|
13
14
|
try {
|
|
14
15
|
switch (command) {
|
|
15
16
|
case "init":
|
|
@@ -54,19 +55,23 @@ try {
|
|
|
54
55
|
case "mcp-wrap":
|
|
55
56
|
await mcpWrapCommand(argv);
|
|
56
57
|
break;
|
|
58
|
+
case "config":
|
|
59
|
+
printConfigGuide();
|
|
60
|
+
break;
|
|
57
61
|
case "help":
|
|
58
62
|
case "--help":
|
|
59
63
|
case "-h":
|
|
60
64
|
case undefined:
|
|
61
|
-
printHelp();
|
|
65
|
+
printHelp(argv[0]);
|
|
62
66
|
break;
|
|
63
67
|
default:
|
|
64
|
-
throw new Error(`Unknown command: ${command}
|
|
68
|
+
throw new Error(`Unknown command: ${command}. Run 'haechi help' for usage.`);
|
|
65
69
|
}
|
|
66
70
|
} catch (error) {
|
|
67
71
|
console.error(`haechi: ${error.message}`);
|
|
68
72
|
process.exitCode = process.exitCode || 1;
|
|
69
73
|
}
|
|
74
|
+
}
|
|
70
75
|
|
|
71
76
|
async function initCommand(argv) {
|
|
72
77
|
const options = parseOptions(argv);
|
|
@@ -222,7 +227,8 @@ async function statusCommand(argv) {
|
|
|
222
227
|
mode: config.responseProtection.mode,
|
|
223
228
|
failureMode: config.responseProtection.failureMode
|
|
224
229
|
},
|
|
225
|
-
streamingRequestMode: config.streaming.requestMode
|
|
230
|
+
streamingRequestMode: config.streaming.requestMode,
|
|
231
|
+
streamingResponseMode: config.streaming.responseMode
|
|
226
232
|
},
|
|
227
233
|
target: {
|
|
228
234
|
type: config.target.type,
|
|
@@ -464,26 +470,161 @@ function parsePort(value) {
|
|
|
464
470
|
return port;
|
|
465
471
|
}
|
|
466
472
|
|
|
467
|
-
|
|
468
|
-
|
|
473
|
+
const COMMAND_HELP = {
|
|
474
|
+
init: {
|
|
475
|
+
usage: "haechi init [--config haechi.config.json] [--force]",
|
|
476
|
+
summary: "Create a local key, sample config, and audit path.",
|
|
477
|
+
detail: "Writes haechi.config.json and .haechi/dev.keys.json (0600). --force rotates the key (prior keys are retired, not deleted) and overwrites the config."
|
|
478
|
+
},
|
|
479
|
+
protect: {
|
|
480
|
+
usage: "haechi protect <input.json> [--config haechi.config.json]",
|
|
481
|
+
summary: "Inspect and protect a JSON payload, printing the result.",
|
|
482
|
+
detail: "Reads input.json, applies the policy, and prints the protected payload, audit id, and warnings. Exit 3 if the payload is blocked."
|
|
483
|
+
},
|
|
484
|
+
report: {
|
|
485
|
+
usage: "haechi report [--audit .haechi/audit.jsonl]",
|
|
486
|
+
summary: "Summarize audit events without raw payloads."
|
|
487
|
+
},
|
|
488
|
+
"audit-verify": {
|
|
489
|
+
usage: "haechi audit-verify [--audit .haechi/audit.jsonl] [--config haechi.config.json]",
|
|
490
|
+
summary: "Verify the audit hash chain; print validity, record count, and head hash.",
|
|
491
|
+
detail: "Exit 4 on a broken chain. The head hash is the value to anchor externally against tail truncation."
|
|
492
|
+
},
|
|
493
|
+
status: {
|
|
494
|
+
usage: "haechi status [--config haechi.config.json]",
|
|
495
|
+
summary: "Show what is and is not protected under the current config.",
|
|
496
|
+
detail: "Prints effective policy mode, response/streaming protection, target, token vault governance, key file permissions, audit chain status, and a consolidated warnings list."
|
|
497
|
+
},
|
|
498
|
+
proxy: {
|
|
499
|
+
usage: `haechi proxy [--config haechi.config.json] [--host 127.0.0.1] [--port ${DEFAULT_PROXY_PORT}] [--allow-remote-bind]`,
|
|
500
|
+
summary: "Run the local HTTP JSON proxy in front of an upstream LLM.",
|
|
501
|
+
detail: "Binds loopback by default; --allow-remote-bind is required (and must be a CLI flag, not config) to bind non-loopback hosts. There is no client auth yet — see 'haechi config'."
|
|
502
|
+
},
|
|
503
|
+
"policy-sign": {
|
|
504
|
+
usage: "haechi policy-sign <policy.json> [--config haechi.config.json] [--out policy.bundle.json]",
|
|
505
|
+
summary: "Sign a policy file into a verifiable bundle."
|
|
506
|
+
},
|
|
507
|
+
"policy-verify": {
|
|
508
|
+
usage: "haechi policy-verify <policy.bundle.json> [--config haechi.config.json]",
|
|
509
|
+
summary: "Verify a signed policy bundle against the configured key."
|
|
510
|
+
},
|
|
511
|
+
"token-reveal": {
|
|
512
|
+
usage: "haechi token-reveal <token> [--config haechi.config.json] [--allow-dev-reveal]",
|
|
513
|
+
summary: "Reveal a tokenized value (governed by tokenVault.revealPolicy; audited).",
|
|
514
|
+
detail: "Fails unless revealPolicy is local-dev or --allow-dev-reveal is passed."
|
|
515
|
+
},
|
|
516
|
+
"token-purge": {
|
|
517
|
+
usage: "haechi token-purge <token> [--config haechi.config.json]\n haechi token-purge --expired [--config haechi.config.json]",
|
|
518
|
+
summary: "Purge a specific token, or all expired tokens with --expired."
|
|
519
|
+
},
|
|
520
|
+
"token-export": {
|
|
521
|
+
usage: "haechi token-export [--config haechi.config.json] [--type email]",
|
|
522
|
+
summary: "Export token metadata (never plaintext), optionally filtered by type."
|
|
523
|
+
},
|
|
524
|
+
"plugin-validate": {
|
|
525
|
+
usage: "haechi plugin-validate <plugin-manifest.json>",
|
|
526
|
+
summary: "Validate a plugin manifest (manifest-only; dynamic runtime is rejected)."
|
|
527
|
+
},
|
|
528
|
+
"mcp-stdio": {
|
|
529
|
+
usage: "haechi mcp-stdio [--config haechi.config.json]",
|
|
530
|
+
summary: "Filter MCP JSON-RPC traffic on stdin/stdout (one direction)."
|
|
531
|
+
},
|
|
532
|
+
"mcp-wrap": {
|
|
533
|
+
usage: "haechi mcp-wrap [--config haechi.config.json] -- <command> [args...]",
|
|
534
|
+
summary: "Wrap an MCP server with bidirectional stdio protection.",
|
|
535
|
+
detail: "Spawns <command>, applies the method allowlist + params protection client→server, and result protection + injection heuristics server→client. Drop-in for MCP client configs."
|
|
536
|
+
},
|
|
537
|
+
config: {
|
|
538
|
+
usage: "haechi config",
|
|
539
|
+
summary: "Print the configuration guide (keys, defaults, common setups)."
|
|
540
|
+
}
|
|
541
|
+
};
|
|
542
|
+
|
|
543
|
+
function printHelp(topic) {
|
|
544
|
+
if (topic && COMMAND_HELP[topic]) {
|
|
545
|
+
const entry = COMMAND_HELP[topic];
|
|
546
|
+
console.log(`haechi ${topic} — ${entry.summary}\n\nUsage:\n ${entry.usage}${entry.detail ? `\n\n${entry.detail}` : ""}`);
|
|
547
|
+
return;
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
const order = [
|
|
551
|
+
"init", "protect", "report", "status", "audit-verify", "proxy",
|
|
552
|
+
"policy-sign", "policy-verify",
|
|
553
|
+
"token-reveal", "token-purge", "token-export",
|
|
554
|
+
"plugin-validate", "mcp-stdio", "mcp-wrap", "config"
|
|
555
|
+
];
|
|
556
|
+
const lines = order.map((name) => ` ${name.padEnd(16)}${COMMAND_HELP[name].summary}`);
|
|
557
|
+
console.log(`Haechi — self-hosted AI context enforcement (developer preview)
|
|
469
558
|
|
|
470
559
|
Usage:
|
|
471
|
-
haechi
|
|
472
|
-
haechi
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
haechi
|
|
479
|
-
haechi
|
|
480
|
-
haechi
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
haechi mcp-stdio [--config haechi.config.json]
|
|
485
|
-
haechi mcp-wrap [--config haechi.config.json] -- <command> [args...]
|
|
486
|
-
|
|
487
|
-
The default policy mode is dry-run. Change policy.mode to enforce to mutate or block payloads.
|
|
560
|
+
haechi <command> [options]
|
|
561
|
+
haechi help <command> show usage for one command
|
|
562
|
+
|
|
563
|
+
Commands:
|
|
564
|
+
${lines.join("\n")}
|
|
565
|
+
|
|
566
|
+
Getting started:
|
|
567
|
+
haechi init write config + local key
|
|
568
|
+
haechi status see what is protected
|
|
569
|
+
haechi config configuration guide
|
|
570
|
+
|
|
571
|
+
The default policy mode is dry-run (detect + audit only). Set policy.mode to
|
|
572
|
+
"enforce" to transform or block. Run 'haechi config' for all settings.
|
|
488
573
|
`);
|
|
489
574
|
}
|
|
575
|
+
|
|
576
|
+
function printConfigGuide() {
|
|
577
|
+
console.log(`Haechi configuration guide
|
|
578
|
+
|
|
579
|
+
Config file: haechi.config.json (override with --config <path>); template at
|
|
580
|
+
haechi.config.example.json. All values are validated fail-closed — unknown or
|
|
581
|
+
malformed settings refuse to start. 'haechi status' prints the EFFECTIVE state.
|
|
582
|
+
|
|
583
|
+
Enforcement
|
|
584
|
+
mode / policy.mode dry-run | report-only | enforce (default dry-run)
|
|
585
|
+
dry-run/report-only detect + audit only.
|
|
586
|
+
policy.mode overrides mode.
|
|
587
|
+
|
|
588
|
+
Upstream + proxy
|
|
589
|
+
target.type llm-http | openai-compatible | vllm-openai |
|
|
590
|
+
ollama | llama-cpp (unknown = fail)
|
|
591
|
+
target.upstream the only upstream the proxy forwards to
|
|
592
|
+
proxy.host / proxy.port 127.0.0.1 / ${DEFAULT_PROXY_PORT}
|
|
593
|
+
non-loopback host needs --allow-remote-bind (CLI flag)
|
|
594
|
+
|
|
595
|
+
Response + streaming
|
|
596
|
+
responseProtection.enabled inspect upstream responses (default false)
|
|
597
|
+
responseProtection.failureMode fail-closed | allow (default fail-closed)
|
|
598
|
+
streaming.requestMode block | pass-through | inspect (default block)
|
|
599
|
+
inspect = stream-filter SSE/NDJSON responses
|
|
600
|
+
streaming.maxMatchBytes cross-frame match window (default 256)
|
|
601
|
+
limits.upstreamTimeoutMs upstream timeout in ms (default 120000)
|
|
602
|
+
|
|
603
|
+
Detection policy
|
|
604
|
+
policy.presets korean-pii, secrets-only, llm-redact,
|
|
605
|
+
strict-block, mcp-basic, local-inference, local-only
|
|
606
|
+
policy.defaultAction allow | redact | mask | tokenize | encrypt | block
|
|
607
|
+
policy.actions per-type overrides; merges may strengthen, not weaken
|
|
608
|
+
filters.customRules extra regex rules (ReDoS-screened)
|
|
609
|
+
|
|
610
|
+
Tokenization (model sees token, caller sees plaintext)
|
|
611
|
+
tokenVault.revealPolicy disabled | local-dev (manual reveal gate)
|
|
612
|
+
tokenVault.deterministic same value -> same token (default false)
|
|
613
|
+
tokenVault.detokenizeResponses restore request-issued tokens in the response
|
|
614
|
+
(needs responseProtection.enabled)
|
|
615
|
+
|
|
616
|
+
Privacy + MCP
|
|
617
|
+
privacy.profile kr-pipa | eu-gdpr | us-general | null
|
|
618
|
+
mcp.allowedMethods client-callable method allowlist
|
|
619
|
+
|
|
620
|
+
Binding beyond loopback (0.0.0.0):
|
|
621
|
+
haechi proxy --host 0.0.0.0 --allow-remote-bind
|
|
622
|
+
There is NO client auth yet (planned 0.6). Use only behind network controls:
|
|
623
|
+
bind 0.0.0.0 in a container and map -p 127.0.0.1:${DEFAULT_PROXY_PORT}:${DEFAULT_PROXY_PORT}, or front
|
|
624
|
+
it with a firewall/VPN/authenticating reverse proxy.
|
|
625
|
+
|
|
626
|
+
Full reference: docs/current/configuration.md
|
|
627
|
+
`);
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
await main(command, argv);
|
package/packages/cli/runtime.mjs
CHANGED
|
@@ -34,7 +34,9 @@ export function defaultConfig() {
|
|
|
34
34
|
maxBytes: 1048576
|
|
35
35
|
},
|
|
36
36
|
streaming: {
|
|
37
|
-
requestMode: "block"
|
|
37
|
+
requestMode: "block",
|
|
38
|
+
responseMode: "enforce",
|
|
39
|
+
maxMatchBytes: 256
|
|
38
40
|
},
|
|
39
41
|
limits: {
|
|
40
42
|
maxRequestBytes: 1048576,
|
|
@@ -271,9 +273,15 @@ export function normalizeConfig(config) {
|
|
|
271
273
|
if (typeof merged.responseProtection.maxBytes !== "number" || merged.responseProtection.maxBytes < 1) {
|
|
272
274
|
throw new Error("responseProtection.maxBytes must be a positive number");
|
|
273
275
|
}
|
|
274
|
-
if (!["block", "pass-through"].includes(merged.streaming.requestMode)) {
|
|
276
|
+
if (!["block", "pass-through", "inspect"].includes(merged.streaming.requestMode)) {
|
|
275
277
|
throw new Error(`Invalid streaming.requestMode: ${merged.streaming.requestMode}`);
|
|
276
278
|
}
|
|
279
|
+
if (!["dry-run", "report-only", "enforce"].includes(merged.streaming.responseMode)) {
|
|
280
|
+
throw new Error(`Invalid streaming.responseMode: ${merged.streaming.responseMode}`);
|
|
281
|
+
}
|
|
282
|
+
if (typeof merged.streaming.maxMatchBytes !== "number" || merged.streaming.maxMatchBytes < 1) {
|
|
283
|
+
throw new Error("streaming.maxMatchBytes must be a positive number");
|
|
284
|
+
}
|
|
277
285
|
if (typeof merged.limits.maxRequestBytes !== "number" || merged.limits.maxRequestBytes < 1) {
|
|
278
286
|
throw new Error("limits.maxRequestBytes must be a positive number");
|
|
279
287
|
}
|
package/packages/core/index.mjs
CHANGED
|
@@ -51,7 +51,116 @@ export function createHaechi({ filterEngine, policyEngine, cryptoProvider, audit
|
|
|
51
51
|
};
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
-
|
|
54
|
+
// Stateful protector for an incremental text stream (SSE/NDJSON deltas).
|
|
55
|
+
// Holds a bounded raw tail so a detection split across chunk boundaries is
|
|
56
|
+
// caught before the leading part is emitted. maxMatchBytes bounds the
|
|
57
|
+
// guarantee: a single match longer than it may still split across frames.
|
|
58
|
+
function createStreamProtector(context = {}) {
|
|
59
|
+
const effectiveMode = context.mode ?? mode;
|
|
60
|
+
const enforced = !NO_ENFORCE_MODES.has(effectiveMode);
|
|
61
|
+
const maxMatchBytes = context.maxMatchBytes ?? 256;
|
|
62
|
+
const byType = {};
|
|
63
|
+
const byAction = {};
|
|
64
|
+
let detectionCount = 0;
|
|
65
|
+
let pending = "";
|
|
66
|
+
|
|
67
|
+
function tally(detections, decisions) {
|
|
68
|
+
detections.forEach((detection, index) => {
|
|
69
|
+
byType[detection.type] = (byType[detection.type] ?? 0) + 1;
|
|
70
|
+
const action = decisions[index]?.action ?? "unknown";
|
|
71
|
+
byAction[action] = (byAction[action] ?? 0) + 1;
|
|
72
|
+
detectionCount += 1;
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async function decideAll(detections) {
|
|
77
|
+
const decisions = [];
|
|
78
|
+
for (const detection of detections) {
|
|
79
|
+
decisions.push(await policyEngine.decide({ detection, context, mode: effectiveMode }));
|
|
80
|
+
}
|
|
81
|
+
return decisions;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Transform a complete, committed text segment.
|
|
85
|
+
async function transformSegment(text) {
|
|
86
|
+
const detections = await filterEngine.detect({
|
|
87
|
+
entries: collectStringEntries(text),
|
|
88
|
+
context
|
|
89
|
+
});
|
|
90
|
+
const decisions = await decideAll(detections);
|
|
91
|
+
tally(detections, decisions);
|
|
92
|
+
const blocked = enforced && decisions.some((decision) => decision.action === "block");
|
|
93
|
+
if (blocked) {
|
|
94
|
+
return { text: "", blocked: true };
|
|
95
|
+
}
|
|
96
|
+
if (!enforced || detections.length === 0) {
|
|
97
|
+
return { text, blocked: false };
|
|
98
|
+
}
|
|
99
|
+
const items = detections.map((detection, index) => ({ detection, decision: decisions[index] }));
|
|
100
|
+
const transformed = await transformString(text, items, { context, cryptoProvider, tokenVault, issuedTokens: null });
|
|
101
|
+
return { text: transformed, blocked: false };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return {
|
|
105
|
+
// Protect string leaves of a parsed frame OTHER than the incremental
|
|
106
|
+
// delta text (e.g. tool-call arguments). Returns the mutated object.
|
|
107
|
+
async protectFrameExtras(value) {
|
|
108
|
+
const detections = await filterEngine.detect({
|
|
109
|
+
entries: collectStringEntries(value),
|
|
110
|
+
context
|
|
111
|
+
});
|
|
112
|
+
if (detections.length === 0) {
|
|
113
|
+
return { value, blocked: false };
|
|
114
|
+
}
|
|
115
|
+
const decisions = await decideAll(detections);
|
|
116
|
+
tally(detections, decisions);
|
|
117
|
+
const blocked = enforced && decisions.some((decision) => decision.action === "block");
|
|
118
|
+
if (blocked) {
|
|
119
|
+
return { value: null, blocked: true };
|
|
120
|
+
}
|
|
121
|
+
if (!enforced) {
|
|
122
|
+
return { value, blocked: false };
|
|
123
|
+
}
|
|
124
|
+
const transformed = await transformPayload(value, detections, decisions, {
|
|
125
|
+
context, cryptoProvider, tokenVault, enforced
|
|
126
|
+
});
|
|
127
|
+
return { value: transformed, blocked: false };
|
|
128
|
+
},
|
|
129
|
+
// Append incremental text; return the portion safe to emit now.
|
|
130
|
+
async push(text) {
|
|
131
|
+
pending += text;
|
|
132
|
+
const detections = await filterEngine.detect({
|
|
133
|
+
entries: collectStringEntries(pending),
|
|
134
|
+
context
|
|
135
|
+
});
|
|
136
|
+
let commit = Math.max(0, pending.length - maxMatchBytes);
|
|
137
|
+
const straddlers = detections.filter((detection) => detection.end > commit);
|
|
138
|
+
if (straddlers.length > 0) {
|
|
139
|
+
commit = Math.min(commit, ...straddlers.map((detection) => detection.start));
|
|
140
|
+
}
|
|
141
|
+
if (commit <= 0) {
|
|
142
|
+
return { text: "", blocked: false };
|
|
143
|
+
}
|
|
144
|
+
const head = pending.slice(0, commit);
|
|
145
|
+
pending = pending.slice(commit);
|
|
146
|
+
return transformSegment(head);
|
|
147
|
+
},
|
|
148
|
+
// Drain the held tail at end of stream (no more cross-frame risk).
|
|
149
|
+
async flush() {
|
|
150
|
+
const tail = pending;
|
|
151
|
+
pending = "";
|
|
152
|
+
if (!tail) {
|
|
153
|
+
return { text: "", blocked: false };
|
|
154
|
+
}
|
|
155
|
+
return transformSegment(tail);
|
|
156
|
+
},
|
|
157
|
+
summary() {
|
|
158
|
+
return { detectionCount, byType, byAction };
|
|
159
|
+
}
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return { protectJson, createStreamProtector };
|
|
55
164
|
}
|
|
56
165
|
|
|
57
166
|
export function collectStringEntries(value, path = []) {
|
|
@@ -1,11 +1,22 @@
|
|
|
1
|
+
// Streaming descriptors: `format` is the wire framing, `deltaPath` is the
|
|
2
|
+
// primary incremental-text channel (index 0 of choices for OpenAI-style).
|
|
3
|
+
// A null deltaPath means "no known channel" — frames still get within-frame
|
|
4
|
+
// protection but no cross-frame buffering.
|
|
5
|
+
const SSE_CHAT = { format: "sse", deltaPath: ["choices", 0, "delta", "content"] };
|
|
6
|
+
const SSE_COMPLETION = { format: "sse", deltaPath: ["choices", 0, "text"] };
|
|
7
|
+
const SSE_RESPONSES = { format: "sse", deltaPath: null };
|
|
8
|
+
const SSE_LLAMA_LEGACY = { format: "sse", deltaPath: ["content"] };
|
|
9
|
+
const NDJSON_OLLAMA_CHAT = { format: "ndjson", deltaPath: ["message", "content"] };
|
|
10
|
+
const NDJSON_OLLAMA_GENERATE = { format: "ndjson", deltaPath: ["response"] };
|
|
11
|
+
|
|
1
12
|
const ADAPTERS = {
|
|
2
13
|
"openai-compatible": {
|
|
3
14
|
id: "openai-compatible",
|
|
4
15
|
protocol: "llm-http",
|
|
5
16
|
routes: [
|
|
6
|
-
route("/v1/chat/completions", "chat-completions"),
|
|
7
|
-
route("/v1/completions", "completions"),
|
|
8
|
-
route("/v1/responses", "responses"),
|
|
17
|
+
route("/v1/chat/completions", "chat-completions", { streaming: SSE_CHAT }),
|
|
18
|
+
route("/v1/completions", "completions", { streaming: SSE_COMPLETION }),
|
|
19
|
+
route("/v1/responses", "responses", { streaming: SSE_RESPONSES }),
|
|
9
20
|
route("/v1/embeddings", "embeddings")
|
|
10
21
|
]
|
|
11
22
|
},
|
|
@@ -13,9 +24,9 @@ const ADAPTERS = {
|
|
|
13
24
|
id: "vllm-openai",
|
|
14
25
|
protocol: "vllm-openai",
|
|
15
26
|
routes: [
|
|
16
|
-
route("/v1/chat/completions", "chat-completions"),
|
|
17
|
-
route("/v1/completions", "completions"),
|
|
18
|
-
route("/v1/responses", "responses"),
|
|
27
|
+
route("/v1/chat/completions", "chat-completions", { streaming: SSE_CHAT }),
|
|
28
|
+
route("/v1/completions", "completions", { streaming: SSE_COMPLETION }),
|
|
29
|
+
route("/v1/responses", "responses", { streaming: SSE_RESPONSES }),
|
|
19
30
|
route("/v1/embeddings", "embeddings")
|
|
20
31
|
]
|
|
21
32
|
},
|
|
@@ -23,10 +34,10 @@ const ADAPTERS = {
|
|
|
23
34
|
id: "llama-cpp",
|
|
24
35
|
protocol: "llama-cpp",
|
|
25
36
|
routes: [
|
|
26
|
-
route("/v1/chat/completions", "chat-completions"),
|
|
27
|
-
route("/v1/completions", "completions"),
|
|
37
|
+
route("/v1/chat/completions", "chat-completions", { streaming: SSE_CHAT }),
|
|
38
|
+
route("/v1/completions", "completions", { streaming: SSE_COMPLETION }),
|
|
28
39
|
route("/v1/embeddings", "embeddings"),
|
|
29
|
-
route("/completion", "legacy-completion")
|
|
40
|
+
route("/completion", "legacy-completion", { streaming: SSE_LLAMA_LEGACY })
|
|
30
41
|
]
|
|
31
42
|
},
|
|
32
43
|
"ollama": {
|
|
@@ -34,8 +45,8 @@ const ADAPTERS = {
|
|
|
34
45
|
protocol: "ollama",
|
|
35
46
|
routes: [
|
|
36
47
|
// Ollama streams /api/chat and /api/generate unless the request sets stream:false.
|
|
37
|
-
route("/api/chat", "chat", { streamingDefault: true }),
|
|
38
|
-
route("/api/generate", "generate", { streamingDefault: true }),
|
|
48
|
+
route("/api/chat", "chat", { streamingDefault: true, streaming: NDJSON_OLLAMA_CHAT }),
|
|
49
|
+
route("/api/generate", "generate", { streamingDefault: true, streaming: NDJSON_OLLAMA_GENERATE }),
|
|
39
50
|
route("/api/embed", "embed"),
|
|
40
51
|
route("/api/embeddings", "embeddings")
|
|
41
52
|
]
|
|
@@ -47,7 +58,13 @@ const TARGET_TYPE_ALIASES = {
|
|
|
47
58
|
};
|
|
48
59
|
|
|
49
60
|
export function createProtocolAdapter(target = {}) {
|
|
50
|
-
|
|
61
|
+
// A specific target.type (vllm-openai, ollama, llama-cpp) names its own
|
|
62
|
+
// adapter and wins over a generic/default target.adapter — otherwise the
|
|
63
|
+
// default config's adapter ("openai-compatible") would shadow the type after
|
|
64
|
+
// a deep merge and silently route an Ollama target to OpenAI paths.
|
|
65
|
+
const adapterId = ADAPTERS[target.type]
|
|
66
|
+
? target.type
|
|
67
|
+
: (target.adapter ?? adapterFromTargetType(target.type));
|
|
51
68
|
const adapter = ADAPTERS[adapterId];
|
|
52
69
|
if (!adapter) {
|
|
53
70
|
throw new Error(`Unknown protocol adapter: ${adapterId}`);
|
|
@@ -71,7 +88,8 @@ export function createProtocolAdapter(target = {}) {
|
|
|
71
88
|
operation,
|
|
72
89
|
protectRequest: matched?.protectRequest ?? true,
|
|
73
90
|
protectResponse: matched?.protectResponse ?? true,
|
|
74
|
-
streamingByDefault: matched?.streamingDefault ?? false
|
|
91
|
+
streamingByDefault: matched?.streamingDefault ?? false,
|
|
92
|
+
streaming: matched?.streaming ?? null
|
|
75
93
|
};
|
|
76
94
|
}
|
|
77
95
|
};
|
|
@@ -98,7 +116,8 @@ function route(path, operation, options = {}) {
|
|
|
98
116
|
operation,
|
|
99
117
|
protectRequest: options.protectRequest ?? true,
|
|
100
118
|
protectResponse: options.protectResponse ?? true,
|
|
101
|
-
streamingDefault: options.streamingDefault ?? false
|
|
119
|
+
streamingDefault: options.streamingDefault ?? false,
|
|
120
|
+
streaming: options.streaming ?? null
|
|
102
121
|
};
|
|
103
122
|
}
|
|
104
123
|
|
package/packages/proxy/index.mjs
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { createServer } from "node:http";
|
|
2
2
|
import { createHash, randomUUID } from "node:crypto";
|
|
3
|
+
import { inspectResponseStream } from "../stream-filter/index.mjs";
|
|
3
4
|
|
|
4
5
|
export const DEFAULT_PROXY_PORT = 1016;
|
|
5
6
|
|
|
@@ -22,6 +23,11 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
|
|
|
22
23
|
const json = parseJsonBody(body);
|
|
23
24
|
|
|
24
25
|
if (isStreamingRequest(json, routeContext)) {
|
|
26
|
+
if (config.streaming.requestMode === "inspect") {
|
|
27
|
+
await handleInspectedStream({ runtime, request, response, routeContext, json });
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
|
|
25
31
|
if (config.streaming.requestMode === "pass-through") {
|
|
26
32
|
await recordProxyDecision({
|
|
27
33
|
runtime,
|
|
@@ -45,7 +51,7 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
|
|
|
45
51
|
|
|
46
52
|
writeJson(response, 501, {
|
|
47
53
|
error: "haechi_streaming_unsupported",
|
|
48
|
-
message: "Streaming requests are blocked unless streaming.requestMode is
|
|
54
|
+
message: "Streaming requests are blocked unless streaming.requestMode is set to pass-through or inspect"
|
|
49
55
|
});
|
|
50
56
|
return;
|
|
51
57
|
}
|
|
@@ -114,6 +120,107 @@ export function createHaechiProxy({ runtime, port = DEFAULT_PROXY_PORT, host = "
|
|
|
114
120
|
};
|
|
115
121
|
}
|
|
116
122
|
|
|
123
|
+
async function handleInspectedStream({ runtime, request, response, routeContext, json }) {
|
|
124
|
+
const { haechi, config } = runtime;
|
|
125
|
+
|
|
126
|
+
// Inspection needs to know the wire format and delta channel for this route.
|
|
127
|
+
if (!routeContext.streaming) {
|
|
128
|
+
writeJson(response, 501, {
|
|
129
|
+
error: "haechi_streaming_uninspectable_route",
|
|
130
|
+
message: `streaming.requestMode is "inspect" but route ${routeContext.routeId} has no known streaming format`
|
|
131
|
+
});
|
|
132
|
+
return;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// The request body is ordinary JSON even when the response streams, so it is
|
|
136
|
+
// protected like any other request.
|
|
137
|
+
const requestResult = routeContext.protectRequest
|
|
138
|
+
? await haechi.protectJson(json, {
|
|
139
|
+
...routeContext,
|
|
140
|
+
operation: `request:${routeContext.operation}`,
|
|
141
|
+
direction: "request",
|
|
142
|
+
mode: config.policy.mode ?? config.mode
|
|
143
|
+
})
|
|
144
|
+
: { payload: json, blocked: false };
|
|
145
|
+
|
|
146
|
+
if (requestResult.blocked) {
|
|
147
|
+
writeJson(response, 403, {
|
|
148
|
+
error: "haechi_policy_block",
|
|
149
|
+
summary: requestResult.summary,
|
|
150
|
+
auditId: requestResult.auditEvent.id
|
|
151
|
+
});
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const upstreamResponse = await forward({
|
|
156
|
+
upstream: config.target.upstream,
|
|
157
|
+
request,
|
|
158
|
+
body: JSON.stringify(requestResult.payload),
|
|
159
|
+
timeoutMs: config.limits.upstreamTimeoutMs
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
const streamMode = config.streaming.responseMode ?? config.responseProtection.mode ?? config.policy.mode ?? config.mode;
|
|
163
|
+
const protector = haechi.createStreamProtector({
|
|
164
|
+
...routeContext,
|
|
165
|
+
operation: `response-stream:${routeContext.operation}`,
|
|
166
|
+
direction: "response",
|
|
167
|
+
mode: streamMode,
|
|
168
|
+
maxMatchBytes: config.streaming.maxMatchBytes
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
response.writeHead(upstreamResponse.status, streamingResponseHeaders(upstreamResponse));
|
|
172
|
+
|
|
173
|
+
const { blocked, summary } = await inspectResponseStream({
|
|
174
|
+
source: upstreamResponse.body ?? emptyAsyncIterable(),
|
|
175
|
+
sink: nodeResponseSink(response),
|
|
176
|
+
streaming: routeContext.streaming,
|
|
177
|
+
protector
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
await recordStreamDecision({ runtime, routeContext, blocked, summary, mode: streamMode });
|
|
181
|
+
response.end();
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function streamingResponseHeaders(upstreamResponse) {
|
|
185
|
+
const headers = Object.fromEntries(upstreamResponse.headers.entries());
|
|
186
|
+
delete headers["content-length"];
|
|
187
|
+
delete headers["content-encoding"];
|
|
188
|
+
return headers;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function nodeResponseSink(response) {
|
|
192
|
+
return {
|
|
193
|
+
write(text) {
|
|
194
|
+
response.write(text);
|
|
195
|
+
}
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
async function* emptyAsyncIterable() {
|
|
200
|
+
// No upstream body to inspect.
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
async function recordStreamDecision({ runtime, routeContext, blocked, summary, mode }) {
|
|
204
|
+
if (typeof runtime.auditSink?.record !== "function") {
|
|
205
|
+
return;
|
|
206
|
+
}
|
|
207
|
+
await runtime.auditSink.record({
|
|
208
|
+
id: randomUUID(),
|
|
209
|
+
timestamp: new Date().toISOString(),
|
|
210
|
+
protocol: routeContext?.protocol ?? "proxy",
|
|
211
|
+
operation: `response-stream:${routeContext?.operation ?? "unknown"}`,
|
|
212
|
+
mode,
|
|
213
|
+
identity: null,
|
|
214
|
+
enforced: !["dry-run", "report-only"].includes(mode),
|
|
215
|
+
blocked,
|
|
216
|
+
decision: blocked ? "stream_blocked" : "stream_inspected",
|
|
217
|
+
reason: blocked ? "stream_policy_block" : "stream_inspected",
|
|
218
|
+
routeId: routeContext?.routeId ?? "unknown",
|
|
219
|
+
pathHash: routeContext?.path ? shortHash(routeContext.path) : null,
|
|
220
|
+
summary
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
|
|
117
224
|
async function maybeProtectResponse({ upstreamResponse, routeContext, runtime, issuedTokens = [] }) {
|
|
118
225
|
const headers = Object.fromEntries(upstreamResponse.headers.entries());
|
|
119
226
|
|