@fusionkit/model-gateway 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/acp-agent.d.ts +39 -0
- package/dist/acp-agent.js +143 -0
- package/dist/acp-registry.d.ts +36 -0
- package/dist/acp-registry.js +85 -0
- package/dist/adapters/anthropic.d.ts +111 -0
- package/dist/adapters/anthropic.js +446 -0
- package/dist/adapters/chat.d.ts +14 -0
- package/dist/adapters/chat.js +34 -0
- package/dist/adapters/responses.d.ts +94 -0
- package/dist/adapters/responses.js +438 -0
- package/dist/backend.d.ts +52 -0
- package/dist/backend.js +57 -0
- package/dist/config.d.ts +22 -0
- package/dist/config.js +47 -0
- package/dist/front-door-acceptance.d.ts +41 -0
- package/dist/front-door-acceptance.js +219 -0
- package/dist/fusion-backend.d.ts +96 -0
- package/dist/fusion-backend.js +521 -0
- package/dist/fusion-gateway.d.ts +69 -0
- package/dist/fusion-gateway.js +355 -0
- package/dist/index.d.ts +40 -0
- package/dist/index.js +28 -0
- package/dist/mlx-backend.d.ts +42 -0
- package/dist/mlx-backend.js +71 -0
- package/dist/provenance.d.ts +29 -0
- package/dist/provenance.js +182 -0
- package/dist/server.d.ts +27 -0
- package/dist/server.js +234 -0
- package/dist/test/acp-agent.test.d.ts +1 -0
- package/dist/test/acp-agent.test.js +66 -0
- package/dist/test/acp-registry.test.d.ts +1 -0
- package/dist/test/acp-registry.test.js +70 -0
- package/dist/test/anthropic.test.d.ts +1 -0
- package/dist/test/anthropic.test.js +251 -0
- package/dist/test/chat.test.d.ts +1 -0
- package/dist/test/chat.test.js +270 -0
- package/dist/test/front-door-acceptance.test.d.ts +1 -0
- package/dist/test/front-door-acceptance.test.js +94 -0
- package/dist/test/fusion-backend-trace.test.d.ts +1 -0
- package/dist/test/fusion-backend-trace.test.js +107 -0
- package/dist/test/fusion-backend.test.d.ts +1 -0
- package/dist/test/fusion-backend.test.js +193 -0
- package/dist/test/fusion-gateway.test.d.ts +1 -0
- package/dist/test/fusion-gateway.test.js +107 -0
- package/dist/test/responses.test.d.ts +1 -0
- package/dist/test/responses.test.js +157 -0
- package/package.json +31 -0
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fusion Harness Gateway — the provider-facing front door that lets a coding
|
|
3
|
+
* tool (Codex, Claude Code, Cursor via Cursorkit) be the entrypoint. A prompt
|
|
4
|
+
* sent from the tool hits this gateway, which translates the request into a
|
|
5
|
+
* dialect-agnostic prompt, runs the unified HandoffKit/FusionKit harness
|
|
6
|
+
* ensemble through an injected runner, then translates the synthesized final
|
|
7
|
+
* answer back into the tool's native wire format.
|
|
8
|
+
*
|
|
9
|
+
* The runner is injected (not imported) so this package stays free of a
|
|
10
|
+
* dependency on `@fusionkit/ensemble`, which already depends on this package.
|
|
11
|
+
*/
|
|
12
|
+
import { once } from "node:events";
|
|
13
|
+
import { createServer } from "node:http";
|
|
14
|
+
import { newTraceId, TRACE_ID_HEADER } from "@fusionkit/protocol";
|
|
15
|
+
import { chatToAnthropicMessage, openAiSseToAnthropic } from "./adapters/anthropic.js";
|
|
16
|
+
import { chatToResponses, openAiSseToResponses } from "./adapters/responses.js";
|
|
17
|
+
export const FUSION_RUN_ID_HEADER = "x-fusion-run-id";
|
|
18
|
+
export const FUSION_STATUS_HEADER = "x-fusion-status";
|
|
19
|
+
export const FUSION_EVIDENCE_HEADER = "x-fusion-evidence";
|
|
20
|
+
export const FUSION_REPORT_HEADER = "x-fusion-report";
|
|
21
|
+
const DEFAULT_MODEL = "fusion-panel";
|
|
22
|
+
function partText(part) {
|
|
23
|
+
if (typeof part.text === "string")
|
|
24
|
+
return part.text;
|
|
25
|
+
return "";
|
|
26
|
+
}
|
|
27
|
+
function contentToText(content) {
|
|
28
|
+
if (typeof content === "string")
|
|
29
|
+
return content;
|
|
30
|
+
if (Array.isArray(content)) {
|
|
31
|
+
return content.map((part) => partText(part)).join("");
|
|
32
|
+
}
|
|
33
|
+
return "";
|
|
34
|
+
}
|
|
35
|
+
export function promptFromResponses(body) {
|
|
36
|
+
const parts = [];
|
|
37
|
+
if (typeof body.instructions === "string" && body.instructions.length > 0) {
|
|
38
|
+
parts.push(body.instructions);
|
|
39
|
+
}
|
|
40
|
+
const input = body.input;
|
|
41
|
+
if (typeof input === "string") {
|
|
42
|
+
parts.push(input);
|
|
43
|
+
}
|
|
44
|
+
else if (Array.isArray(input)) {
|
|
45
|
+
for (const item of input) {
|
|
46
|
+
const type = item.type;
|
|
47
|
+
if (type === "function_call" || type === "function_call_output")
|
|
48
|
+
continue;
|
|
49
|
+
const content = item.content;
|
|
50
|
+
const text = contentToText(content);
|
|
51
|
+
if (text.length > 0)
|
|
52
|
+
parts.push(text);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return parts.join("\n\n").trim();
|
|
56
|
+
}
|
|
57
|
+
export function promptFromAnthropic(body) {
|
|
58
|
+
const parts = [];
|
|
59
|
+
if (typeof body.system === "string" && body.system.length > 0) {
|
|
60
|
+
parts.push(body.system);
|
|
61
|
+
}
|
|
62
|
+
else if (Array.isArray(body.system)) {
|
|
63
|
+
parts.push(body.system.map((block) => block.text).join("\n"));
|
|
64
|
+
}
|
|
65
|
+
for (const message of body.messages ?? []) {
|
|
66
|
+
if (message.role !== "user")
|
|
67
|
+
continue;
|
|
68
|
+
const text = contentToText(message.content);
|
|
69
|
+
if (text.length > 0)
|
|
70
|
+
parts.push(text);
|
|
71
|
+
}
|
|
72
|
+
return parts.join("\n\n").trim();
|
|
73
|
+
}
|
|
74
|
+
export function promptFromChat(body) {
|
|
75
|
+
const parts = [];
|
|
76
|
+
for (const message of body.messages ?? []) {
|
|
77
|
+
if (message.role !== "user" && message.role !== "system")
|
|
78
|
+
continue;
|
|
79
|
+
const text = contentToText(message.content);
|
|
80
|
+
if (text.length > 0)
|
|
81
|
+
parts.push(text);
|
|
82
|
+
}
|
|
83
|
+
return parts.join("\n\n").trim();
|
|
84
|
+
}
|
|
85
|
+
// ---- response formatting ----
|
|
86
|
+
function syntheticOpenAiResponse(finalOutput) {
|
|
87
|
+
return {
|
|
88
|
+
id: Math.random().toString(36).slice(2, 12),
|
|
89
|
+
choices: [{ message: { content: finalOutput }, finish_reason: "stop" }],
|
|
90
|
+
usage: { prompt_tokens: 0, completion_tokens: 0 }
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
export function formatResponses(finalOutput, model) {
|
|
94
|
+
return chatToResponses(syntheticOpenAiResponse(finalOutput), model);
|
|
95
|
+
}
|
|
96
|
+
export function formatAnthropic(finalOutput, model) {
|
|
97
|
+
return chatToAnthropicMessage(syntheticOpenAiResponse(finalOutput), model);
|
|
98
|
+
}
|
|
99
|
+
export function formatChat(finalOutput, model) {
|
|
100
|
+
return {
|
|
101
|
+
id: `chatcmpl_${Math.random().toString(36).slice(2, 12)}`,
|
|
102
|
+
object: "chat.completion",
|
|
103
|
+
created: Math.floor(Date.now() / 1000),
|
|
104
|
+
model,
|
|
105
|
+
choices: [
|
|
106
|
+
{
|
|
107
|
+
index: 0,
|
|
108
|
+
message: { role: "assistant", content: finalOutput },
|
|
109
|
+
finish_reason: "stop"
|
|
110
|
+
}
|
|
111
|
+
],
|
|
112
|
+
usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
// ---- streaming ----
|
|
116
|
+
const SSE_ENCODER = new TextEncoder();
|
|
117
|
+
/**
|
|
118
|
+
* Build a synthetic OpenAI Chat Completions SSE stream carrying a single
|
|
119
|
+
* already-complete answer. The existing Responses/Anthropic SSE translators
|
|
120
|
+
* consume this to emit each dialect's native streamed event sequence.
|
|
121
|
+
*/
|
|
122
|
+
function openAiChatSseFromText(finalOutput) {
|
|
123
|
+
const frames = [
|
|
124
|
+
`data: ${JSON.stringify({
|
|
125
|
+
choices: [{ index: 0, delta: { role: "assistant", content: finalOutput }, finish_reason: null }]
|
|
126
|
+
})}\n\n`,
|
|
127
|
+
`data: ${JSON.stringify({
|
|
128
|
+
choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
|
|
129
|
+
usage: { prompt_tokens: 0, completion_tokens: 0 }
|
|
130
|
+
})}\n\n`,
|
|
131
|
+
"data: [DONE]\n\n"
|
|
132
|
+
];
|
|
133
|
+
let index = 0;
|
|
134
|
+
return new ReadableStream({
|
|
135
|
+
pull(controller) {
|
|
136
|
+
if (index < frames.length) {
|
|
137
|
+
controller.enqueue(SSE_ENCODER.encode(frames[index]));
|
|
138
|
+
index += 1;
|
|
139
|
+
}
|
|
140
|
+
else {
|
|
141
|
+
controller.close();
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
async function pipeSse(res, stream) {
|
|
147
|
+
res.statusCode = 200;
|
|
148
|
+
res.setHeader("content-type", "text/event-stream");
|
|
149
|
+
res.setHeader("cache-control", "no-cache");
|
|
150
|
+
const reader = stream.getReader();
|
|
151
|
+
try {
|
|
152
|
+
for (;;) {
|
|
153
|
+
const { done, value } = await reader.read();
|
|
154
|
+
if (done)
|
|
155
|
+
break;
|
|
156
|
+
if (value !== undefined && !res.write(Buffer.from(value)))
|
|
157
|
+
await once(res, "drain");
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
finally {
|
|
161
|
+
res.end();
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
function writeChatSse(res, finalOutput, model) {
|
|
165
|
+
res.statusCode = 200;
|
|
166
|
+
res.setHeader("content-type", "text/event-stream");
|
|
167
|
+
res.setHeader("cache-control", "no-cache");
|
|
168
|
+
const id = `chatcmpl_${Math.random().toString(36).slice(2, 12)}`;
|
|
169
|
+
const created = Math.floor(Date.now() / 1000);
|
|
170
|
+
res.write(`data: ${JSON.stringify({
|
|
171
|
+
id,
|
|
172
|
+
object: "chat.completion.chunk",
|
|
173
|
+
created,
|
|
174
|
+
model,
|
|
175
|
+
choices: [{ index: 0, delta: { role: "assistant", content: finalOutput }, finish_reason: null }]
|
|
176
|
+
})}\n\n`);
|
|
177
|
+
res.write(`data: ${JSON.stringify({
|
|
178
|
+
id,
|
|
179
|
+
object: "chat.completion.chunk",
|
|
180
|
+
created,
|
|
181
|
+
model,
|
|
182
|
+
choices: [{ index: 0, delta: {}, finish_reason: "stop" }]
|
|
183
|
+
})}\n\n`);
|
|
184
|
+
res.write("data: [DONE]\n\n");
|
|
185
|
+
res.end();
|
|
186
|
+
}
|
|
187
|
+
function openAiModels(model) {
|
|
188
|
+
return { object: "list", data: [{ id: model, object: "model", owned_by: "fusion-gateway" }] };
|
|
189
|
+
}
|
|
190
|
+
function anthropicModels(model) {
|
|
191
|
+
return {
|
|
192
|
+
object: "list",
|
|
193
|
+
data: [{ type: "model", id: model, display_name: model }],
|
|
194
|
+
has_more: false
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
// ---- server ----
|
|
198
|
+
const NO_BODY = Symbol("no-body");
|
|
199
|
+
async function readBody(req) {
|
|
200
|
+
const chunks = [];
|
|
201
|
+
for await (const chunk of req)
|
|
202
|
+
chunks.push(chunk);
|
|
203
|
+
return Buffer.concat(chunks);
|
|
204
|
+
}
|
|
205
|
+
async function readJson(req, res) {
|
|
206
|
+
const buffer = await readBody(req);
|
|
207
|
+
if (buffer.length === 0)
|
|
208
|
+
return {};
|
|
209
|
+
try {
|
|
210
|
+
return JSON.parse(buffer.toString("utf8"));
|
|
211
|
+
}
|
|
212
|
+
catch {
|
|
213
|
+
writeJson(res, 400, { error: { message: "invalid JSON body", type: "bad_request" } });
|
|
214
|
+
return NO_BODY;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
function writeJson(res, status, value) {
|
|
218
|
+
const payload = Buffer.from(JSON.stringify(value), "utf8");
|
|
219
|
+
res.statusCode = status;
|
|
220
|
+
res.setHeader("content-type", "application/json");
|
|
221
|
+
res.setHeader("content-length", String(payload.byteLength));
|
|
222
|
+
res.end(payload);
|
|
223
|
+
}
|
|
224
|
+
function authorized(req, token) {
|
|
225
|
+
const auth = req.headers.authorization;
|
|
226
|
+
if (typeof auth === "string" && auth === `Bearer ${token}`)
|
|
227
|
+
return true;
|
|
228
|
+
const apiKey = req.headers["x-api-key"];
|
|
229
|
+
return typeof apiKey === "string" && apiKey === token;
|
|
230
|
+
}
|
|
231
|
+
function requestId(prefix) {
|
|
232
|
+
return `${prefix}_${Math.random().toString(36).slice(2, 12)}`;
|
|
233
|
+
}
|
|
234
|
+
function errorMessage(error) {
|
|
235
|
+
return error instanceof Error ? error.message : String(error);
|
|
236
|
+
}
|
|
237
|
+
export async function startFusionGateway(options) {
|
|
238
|
+
const host = options.host ?? "127.0.0.1";
|
|
239
|
+
const defaultModel = options.defaultModel ?? DEFAULT_MODEL;
|
|
240
|
+
const { runner, authToken } = options;
|
|
241
|
+
async function runFrontDoor(res, dialect, prompt, requestedModel, stream, format, traceId) {
|
|
242
|
+
const id = requestId(dialect);
|
|
243
|
+
res.setHeader(TRACE_ID_HEADER, traceId);
|
|
244
|
+
const result = await runner({ dialect, prompt, requestedModel, requestId: id, traceId });
|
|
245
|
+
res.setHeader(FUSION_RUN_ID_HEADER, result.runId);
|
|
246
|
+
res.setHeader(FUSION_STATUS_HEADER, result.status);
|
|
247
|
+
res.setHeader(FUSION_EVIDENCE_HEADER, JSON.stringify(result.evidence));
|
|
248
|
+
if (result.reportPath !== undefined)
|
|
249
|
+
res.setHeader(FUSION_REPORT_HEADER, result.reportPath);
|
|
250
|
+
const model = requestedModel ?? defaultModel;
|
|
251
|
+
if (!stream) {
|
|
252
|
+
writeJson(res, 200, format(result.finalOutput, model));
|
|
253
|
+
return;
|
|
254
|
+
}
|
|
255
|
+
switch (dialect) {
|
|
256
|
+
case "openai-responses":
|
|
257
|
+
await pipeSse(res, openAiSseToResponses(openAiChatSseFromText(result.finalOutput), model));
|
|
258
|
+
return;
|
|
259
|
+
case "anthropic-messages":
|
|
260
|
+
await pipeSse(res, openAiSseToAnthropic(openAiChatSseFromText(result.finalOutput), model));
|
|
261
|
+
return;
|
|
262
|
+
case "openai-chat":
|
|
263
|
+
writeChatSse(res, result.finalOutput, model);
|
|
264
|
+
return;
|
|
265
|
+
default: {
|
|
266
|
+
const exhaustive = dialect;
|
|
267
|
+
throw new Error(`unhandled dialect ${String(exhaustive)}`);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
function traceIdFor(req) {
|
|
272
|
+
const incoming = req.headers[TRACE_ID_HEADER];
|
|
273
|
+
if (typeof incoming === "string" && incoming.length > 0)
|
|
274
|
+
return incoming;
|
|
275
|
+
if (Array.isArray(incoming) && incoming.length > 0 && incoming[0])
|
|
276
|
+
return incoming[0];
|
|
277
|
+
return newTraceId();
|
|
278
|
+
}
|
|
279
|
+
async function handle(req, res) {
|
|
280
|
+
const method = req.method ?? "GET";
|
|
281
|
+
const path = new URL(req.url ?? "/", "http://localhost").pathname;
|
|
282
|
+
if (path === "/health") {
|
|
283
|
+
writeJson(res, 200, { status: "ok", service: "fusion-harness-gateway" });
|
|
284
|
+
return;
|
|
285
|
+
}
|
|
286
|
+
if (authToken !== undefined && !authorized(req, authToken)) {
|
|
287
|
+
writeJson(res, 401, { error: { message: "unauthorized", type: "auth_error" } });
|
|
288
|
+
return;
|
|
289
|
+
}
|
|
290
|
+
if (method === "GET" && (path === "/v1/models" || path === "/models")) {
|
|
291
|
+
if (req.headers["anthropic-version"] !== undefined) {
|
|
292
|
+
writeJson(res, 200, anthropicModels(defaultModel));
|
|
293
|
+
return;
|
|
294
|
+
}
|
|
295
|
+
writeJson(res, 200, openAiModels(defaultModel));
|
|
296
|
+
return;
|
|
297
|
+
}
|
|
298
|
+
if (method === "POST" && (path === "/v1/responses" || path === "/responses")) {
|
|
299
|
+
const raw = await readJson(req, res);
|
|
300
|
+
if (raw === NO_BODY)
|
|
301
|
+
return;
|
|
302
|
+
const body = raw;
|
|
303
|
+
await runFrontDoor(res, "openai-responses", promptFromResponses(body), body.model, body.stream === true, formatResponses, traceIdFor(req));
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
if (method === "POST" && path === "/v1/messages/count_tokens") {
|
|
307
|
+
const raw = await readJson(req, res);
|
|
308
|
+
if (raw === NO_BODY)
|
|
309
|
+
return;
|
|
310
|
+
const body = raw;
|
|
311
|
+
const text = promptFromAnthropic(body);
|
|
312
|
+
writeJson(res, 200, { input_tokens: Math.max(1, Math.ceil(text.length / 4)) });
|
|
313
|
+
return;
|
|
314
|
+
}
|
|
315
|
+
if (method === "POST" && (path === "/v1/messages" || path === "/messages")) {
|
|
316
|
+
const raw = await readJson(req, res);
|
|
317
|
+
if (raw === NO_BODY)
|
|
318
|
+
return;
|
|
319
|
+
const body = raw;
|
|
320
|
+
await runFrontDoor(res, "anthropic-messages", promptFromAnthropic(body), body.model, body.stream === true, formatAnthropic, traceIdFor(req));
|
|
321
|
+
return;
|
|
322
|
+
}
|
|
323
|
+
if (method === "POST" && (path === "/v1/chat/completions" || path === "/chat/completions")) {
|
|
324
|
+
const raw = await readJson(req, res);
|
|
325
|
+
if (raw === NO_BODY)
|
|
326
|
+
return;
|
|
327
|
+
const body = raw;
|
|
328
|
+
await runFrontDoor(res, "openai-chat", promptFromChat(body), body.model, body.stream === true, formatChat, traceIdFor(req));
|
|
329
|
+
return;
|
|
330
|
+
}
|
|
331
|
+
writeJson(res, 404, { error: { message: `no route for ${method} ${path}`, type: "not_found" } });
|
|
332
|
+
}
|
|
333
|
+
const server = createServer((req, res) => {
|
|
334
|
+
void handle(req, res).catch((error) => {
|
|
335
|
+
writeJson(res, 502, { error: { message: errorMessage(error), type: "front_door_error" } });
|
|
336
|
+
});
|
|
337
|
+
});
|
|
338
|
+
await new Promise((resolve, reject) => {
|
|
339
|
+
const onError = (error) => reject(error);
|
|
340
|
+
server.once("error", onError);
|
|
341
|
+
server.listen(options.port ?? 0, host, () => {
|
|
342
|
+
server.off("error", onError);
|
|
343
|
+
resolve();
|
|
344
|
+
});
|
|
345
|
+
});
|
|
346
|
+
const address = server.address();
|
|
347
|
+
const port = typeof address === "object" && address !== null ? address.port : options.port ?? 0;
|
|
348
|
+
return {
|
|
349
|
+
url: () => `http://${host}:${port}`,
|
|
350
|
+
port: () => port,
|
|
351
|
+
close: () => new Promise((resolve, reject) => {
|
|
352
|
+
server.close((error) => (error ? reject(error) : resolve()));
|
|
353
|
+
})
|
|
354
|
+
};
|
|
355
|
+
}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fusionkit/model-gateway — a native local-model gateway.
|
|
3
|
+
*
|
|
4
|
+
* It fronts a single OpenAI Chat Completions backend (the owned
|
|
5
|
+
* `velum-labs/mlx-lm` fork by default — "mlx_lm.server first") and exposes the
|
|
6
|
+
* wire dialects each agent harness needs so a local model can transparently
|
|
7
|
+
* back them with no change to the user's workflow:
|
|
8
|
+
*
|
|
9
|
+
* - OpenAI Chat Completions (`/v1/chat/completions`) — opencode, Cursor IDE
|
|
10
|
+
* plan panel. Implemented (M1).
|
|
11
|
+
* - Anthropic Messages (`/v1/messages`) — Claude Code. Planned (M2).
|
|
12
|
+
* - OpenAI Responses (`/v1/responses`) — Codex. Planned (M3).
|
|
13
|
+
*
|
|
14
|
+
* See spec/2026-06-13-local-model-harness-bridge-spec.md.
|
|
15
|
+
*/
|
|
16
|
+
export { startGateway } from "./server.js";
|
|
17
|
+
export type { Gateway, GatewayOptions } from "./server.js";
|
|
18
|
+
export { joinPath, OpenAiBackend } from "./backend.js";
|
|
19
|
+
export type { Backend, BackendRequestOptions, OpenAiBackendOptions } from "./backend.js";
|
|
20
|
+
export { FusionBackend } from "./fusion-backend.js";
|
|
21
|
+
export type { ChatMessageLike, FusionBackendOptions, PanelRunInput, PanelRunner, WireTrajectory } from "./fusion-backend.js";
|
|
22
|
+
export { MlxBackend } from "./mlx-backend.js";
|
|
23
|
+
export type { MlxBackendOptions } from "./mlx-backend.js";
|
|
24
|
+
export { createBackend, DEFAULT_MLX_MODEL, resolveBackendConfig } from "./config.js";
|
|
25
|
+
export type { BackendConfig } from "./config.js";
|
|
26
|
+
export { effectiveModel, isStream, withDefaultModel } from "./adapters/chat.js";
|
|
27
|
+
export { anthropicModelsResponse, anthropicToChat, chatToAnthropicMessage, countTokensEstimate, handleAnthropicMessages, handleCountTokens, mapStopReason, openAiSseToAnthropic } from "./adapters/anthropic.js";
|
|
28
|
+
export type { AnthropicRequest } from "./adapters/anthropic.js";
|
|
29
|
+
export { chatToResponses, handleResponses, openAiSseToResponses, responsesToChat } from "./adapters/responses.js";
|
|
30
|
+
export type { ResponsesRequest } from "./adapters/responses.js";
|
|
31
|
+
export { FUSION_EVIDENCE_HEADER, FUSION_REPORT_HEADER, FUSION_RUN_ID_HEADER, FUSION_STATUS_HEADER, formatAnthropic, formatChat, formatResponses, promptFromAnthropic, promptFromChat, promptFromResponses, startFusionGateway } from "./fusion-gateway.js";
|
|
32
|
+
export type { ChatRequest, FrontDoorDialect, FrontDoorRunner, FrontDoorRunnerInput, FrontDoorRunnerResult, FusionGateway, FusionGatewayOptions } from "./fusion-gateway.js";
|
|
33
|
+
export { ACP_PROTOCOL_VERSION, runAcpAgent } from "./acp-agent.js";
|
|
34
|
+
export type { AcpAgentOptions, AcpRunner, AcpRunnerInput, AcpRunnerResult } from "./acp-agent.js";
|
|
35
|
+
export { runFrontDoorAcceptance } from "./front-door-acceptance.js";
|
|
36
|
+
export type { FrontDoorAcceptanceOptions, FrontDoorAcceptanceReport, FrontDoorOutcome, FrontDoorOutcomeProducer, FrontDoorStatus } from "./front-door-acceptance.js";
|
|
37
|
+
export { ACP_REGISTRY_URL, fetchAcpRegistry, installAcpAdapters } from "./acp-registry.js";
|
|
38
|
+
export type { AcpRegistry, AcpRegistryAgent, AcpRegistryFetcher, InstallAcpAdaptersOptions, InstalledAcpAdapter } from "./acp-registry.js";
|
|
39
|
+
export { buildModelCallRecord, MODEL_CALL_ID_HEADER, modelCallId, responseBodyHash } from "./provenance.js";
|
|
40
|
+
export type { GatewayDialect, ModelCallRecord, ModelGatewayCallContext, ModelGatewayCallResult, ProvenanceSink } from "./provenance.js";
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fusionkit/model-gateway — a native local-model gateway.
|
|
3
|
+
*
|
|
4
|
+
* It fronts a single OpenAI Chat Completions backend (the owned
|
|
5
|
+
* `velum-labs/mlx-lm` fork by default — "mlx_lm.server first") and exposes the
|
|
6
|
+
* wire dialects each agent harness needs so a local model can transparently
|
|
7
|
+
* back them with no change to the user's workflow:
|
|
8
|
+
*
|
|
9
|
+
* - OpenAI Chat Completions (`/v1/chat/completions`) — opencode, Cursor IDE
|
|
10
|
+
* plan panel. Implemented (M1).
|
|
11
|
+
* - Anthropic Messages (`/v1/messages`) — Claude Code. Planned (M2).
|
|
12
|
+
* - OpenAI Responses (`/v1/responses`) — Codex. Planned (M3).
|
|
13
|
+
*
|
|
14
|
+
* See spec/2026-06-13-local-model-harness-bridge-spec.md.
|
|
15
|
+
*/
|
|
16
|
+
export { startGateway } from "./server.js";
|
|
17
|
+
export { joinPath, OpenAiBackend } from "./backend.js";
|
|
18
|
+
export { FusionBackend } from "./fusion-backend.js";
|
|
19
|
+
export { MlxBackend } from "./mlx-backend.js";
|
|
20
|
+
export { createBackend, DEFAULT_MLX_MODEL, resolveBackendConfig } from "./config.js";
|
|
21
|
+
export { effectiveModel, isStream, withDefaultModel } from "./adapters/chat.js";
|
|
22
|
+
export { anthropicModelsResponse, anthropicToChat, chatToAnthropicMessage, countTokensEstimate, handleAnthropicMessages, handleCountTokens, mapStopReason, openAiSseToAnthropic } from "./adapters/anthropic.js";
|
|
23
|
+
export { chatToResponses, handleResponses, openAiSseToResponses, responsesToChat } from "./adapters/responses.js";
|
|
24
|
+
export { FUSION_EVIDENCE_HEADER, FUSION_REPORT_HEADER, FUSION_RUN_ID_HEADER, FUSION_STATUS_HEADER, formatAnthropic, formatChat, formatResponses, promptFromAnthropic, promptFromChat, promptFromResponses, startFusionGateway } from "./fusion-gateway.js";
|
|
25
|
+
export { ACP_PROTOCOL_VERSION, runAcpAgent } from "./acp-agent.js";
|
|
26
|
+
export { runFrontDoorAcceptance } from "./front-door-acceptance.js";
|
|
27
|
+
export { ACP_REGISTRY_URL, fetchAcpRegistry, installAcpAdapters } from "./acp-registry.js";
|
|
28
|
+
export { buildModelCallRecord, MODEL_CALL_ID_HEADER, modelCallId, responseBodyHash } from "./provenance.js";
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { mlxServer } from "@fusionkit/adapter-ai-sdk";
|
|
2
|
+
import type { ManagedServerEvent } from "@fusionkit/adapter-ai-sdk";
|
|
3
|
+
import type { Backend } from "./backend.js";
|
|
4
|
+
/**
|
|
5
|
+
* The first-class gateway backend: the owned `velum-labs/mlx-lm` fork run as
|
|
6
|
+
* `mlx_lm.server`, provisioned and supervised by `mlxServer`
|
|
7
|
+
* (`@fusionkit/adapter-ai-sdk`). The gateway does not speak the AI SDK model
|
|
8
|
+
* interface to it — it proxies raw HTTP to the server's OpenAI-compatible
|
|
9
|
+
* `/v1` surface — so this wrapper only needs the process lifecycle and the
|
|
10
|
+
* resolved base URL. A long-lived gateway keeps the server up
|
|
11
|
+
* (`idleShutdownMs: 0` by default) rather than scaling to zero between calls.
|
|
12
|
+
*/
|
|
13
|
+
export type MlxBackendOptions = {
|
|
14
|
+
/** Hugging Face repo id the mlx server loads. */
|
|
15
|
+
model: string;
|
|
16
|
+
/**
|
|
17
|
+
* Provision the structured-decoding fork (`response_format`, `guided_json`,
|
|
18
|
+
* …). Defaults to true: structured output is the reason we own the fork.
|
|
19
|
+
*/
|
|
20
|
+
structured?: boolean;
|
|
21
|
+
/**
|
|
22
|
+
* Idle period after which the underlying server scales to zero; defaults to
|
|
23
|
+
* 0 (stay up) since the gateway is a long-lived front door.
|
|
24
|
+
*/
|
|
25
|
+
idleShutdownMs?: number;
|
|
26
|
+
onEvent?: (event: ManagedServerEvent) => void;
|
|
27
|
+
};
|
|
28
|
+
export declare class MlxBackend implements Backend {
|
|
29
|
+
#private;
|
|
30
|
+
constructor(options: MlxBackendOptions);
|
|
31
|
+
get defaultModel(): string;
|
|
32
|
+
/** The owned MLX footprint (verify/info/destroy). */
|
|
33
|
+
get env(): ReturnType<typeof mlxServer>["env"];
|
|
34
|
+
/** Provision (if needed), spawn, and health-check the mlx server. */
|
|
35
|
+
start(): Promise<void>;
|
|
36
|
+
stop(): Promise<void>;
|
|
37
|
+
/** Backend lifecycle hook: a gateway owning this backend tears the server down. */
|
|
38
|
+
close(): Promise<void>;
|
|
39
|
+
chat(body: unknown, signal?: AbortSignal): Promise<Response>;
|
|
40
|
+
models(signal?: AbortSignal): Promise<Response>;
|
|
41
|
+
embeddings(body: unknown, signal?: AbortSignal): Promise<Response>;
|
|
42
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { mlxServer } from "@fusionkit/adapter-ai-sdk";
|
|
2
|
+
import { OpenAiBackend } from "./backend.js";
|
|
3
|
+
export class MlxBackend {
|
|
4
|
+
#server;
|
|
5
|
+
#model;
|
|
6
|
+
#inner;
|
|
7
|
+
#startPromise;
|
|
8
|
+
constructor(options) {
|
|
9
|
+
this.#model = options.model;
|
|
10
|
+
this.#server = mlxServer({
|
|
11
|
+
model: options.model,
|
|
12
|
+
idleShutdownMs: options.idleShutdownMs ?? 0,
|
|
13
|
+
structured: options.structured ?? true,
|
|
14
|
+
...(options.onEvent ? { onEvent: options.onEvent } : {})
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
get defaultModel() {
|
|
18
|
+
return this.#model;
|
|
19
|
+
}
|
|
20
|
+
/** The owned MLX footprint (verify/info/destroy). */
|
|
21
|
+
get env() {
|
|
22
|
+
return this.#server.env;
|
|
23
|
+
}
|
|
24
|
+
/** Provision (if needed), spawn, and health-check the mlx server. */
|
|
25
|
+
start() {
|
|
26
|
+
if (!this.#startPromise) {
|
|
27
|
+
this.#startPromise = (async () => {
|
|
28
|
+
await this.#server.start();
|
|
29
|
+
const base = this.#server.baseURL();
|
|
30
|
+
if (base === undefined) {
|
|
31
|
+
throw new Error("mlx server did not report a base URL after start");
|
|
32
|
+
}
|
|
33
|
+
// The server's base URL omits the OpenAI prefix; the fork serves the
|
|
34
|
+
// OpenAI routes under /v1 (see mlx_lm.server).
|
|
35
|
+
this.#inner = new OpenAiBackend({
|
|
36
|
+
baseUrl: `${base}/v1`,
|
|
37
|
+
defaultModel: this.#model
|
|
38
|
+
});
|
|
39
|
+
})().catch((error) => {
|
|
40
|
+
this.#startPromise = undefined;
|
|
41
|
+
throw error;
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
return this.#startPromise;
|
|
45
|
+
}
|
|
46
|
+
async stop() {
|
|
47
|
+
await this.#server.stop();
|
|
48
|
+
this.#inner = undefined;
|
|
49
|
+
this.#startPromise = undefined;
|
|
50
|
+
}
|
|
51
|
+
/** Backend lifecycle hook: a gateway owning this backend tears the server down. */
|
|
52
|
+
async close() {
|
|
53
|
+
await this.stop();
|
|
54
|
+
}
|
|
55
|
+
async #ready() {
|
|
56
|
+
await this.start();
|
|
57
|
+
if (this.#inner === undefined) {
|
|
58
|
+
throw new Error("mlx backend is not ready");
|
|
59
|
+
}
|
|
60
|
+
return this.#inner;
|
|
61
|
+
}
|
|
62
|
+
async chat(body, signal) {
|
|
63
|
+
return (await this.#ready()).chat(body, signal);
|
|
64
|
+
}
|
|
65
|
+
async models(signal) {
|
|
66
|
+
return (await this.#ready()).models(signal);
|
|
67
|
+
}
|
|
68
|
+
async embeddings(body, signal) {
|
|
69
|
+
return (await this.#ready()).embeddings(body, signal);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import type { ModelCallRecordV1 } from "@fusionkit/protocol";
|
|
2
|
+
/** The wire dialect a request arrived on. */
|
|
3
|
+
export type GatewayDialect = "openai-chat" | "anthropic-messages" | "openai-responses";
|
|
4
|
+
export declare const MODEL_CALL_ID_HEADER = "x-velum-model-call-id";
|
|
5
|
+
export type ModelGatewayCallContext = {
|
|
6
|
+
callId: string;
|
|
7
|
+
dialect: GatewayDialect;
|
|
8
|
+
requestedModel: string | undefined;
|
|
9
|
+
model: string | undefined;
|
|
10
|
+
stream: boolean;
|
|
11
|
+
requestBody: unknown;
|
|
12
|
+
startedAt: string;
|
|
13
|
+
endpointId?: string;
|
|
14
|
+
};
|
|
15
|
+
export type ModelGatewayCallResult = {
|
|
16
|
+
statusCode: number;
|
|
17
|
+
responseBody?: Buffer;
|
|
18
|
+
durationMs: number;
|
|
19
|
+
error?: unknown;
|
|
20
|
+
};
|
|
21
|
+
/** One recorded model call observed at the gateway boundary. */
|
|
22
|
+
export type ModelCallRecord = ModelCallRecordV1;
|
|
23
|
+
/** Sink for gateway observations. All methods are optional. */
|
|
24
|
+
export type ProvenanceSink = {
|
|
25
|
+
onModelCall?(record: ModelCallRecord): void;
|
|
26
|
+
};
|
|
27
|
+
export declare function buildModelCallRecord(context: ModelGatewayCallContext, result: ModelGatewayCallResult): ModelCallRecord;
|
|
28
|
+
export declare function modelCallId(): string;
|
|
29
|
+
export declare function responseBodyHash(body: Buffer): string;
|