@fusionkit/model-gateway 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/acp-agent.d.ts +39 -0
  2. package/dist/acp-agent.js +143 -0
  3. package/dist/acp-registry.d.ts +36 -0
  4. package/dist/acp-registry.js +85 -0
  5. package/dist/adapters/anthropic.d.ts +111 -0
  6. package/dist/adapters/anthropic.js +446 -0
  7. package/dist/adapters/chat.d.ts +14 -0
  8. package/dist/adapters/chat.js +34 -0
  9. package/dist/adapters/responses.d.ts +94 -0
  10. package/dist/adapters/responses.js +438 -0
  11. package/dist/backend.d.ts +52 -0
  12. package/dist/backend.js +57 -0
  13. package/dist/config.d.ts +22 -0
  14. package/dist/config.js +47 -0
  15. package/dist/front-door-acceptance.d.ts +41 -0
  16. package/dist/front-door-acceptance.js +219 -0
  17. package/dist/fusion-backend.d.ts +96 -0
  18. package/dist/fusion-backend.js +521 -0
  19. package/dist/fusion-gateway.d.ts +69 -0
  20. package/dist/fusion-gateway.js +355 -0
  21. package/dist/index.d.ts +40 -0
  22. package/dist/index.js +28 -0
  23. package/dist/mlx-backend.d.ts +42 -0
  24. package/dist/mlx-backend.js +71 -0
  25. package/dist/provenance.d.ts +29 -0
  26. package/dist/provenance.js +182 -0
  27. package/dist/server.d.ts +27 -0
  28. package/dist/server.js +234 -0
  29. package/dist/test/acp-agent.test.d.ts +1 -0
  30. package/dist/test/acp-agent.test.js +66 -0
  31. package/dist/test/acp-registry.test.d.ts +1 -0
  32. package/dist/test/acp-registry.test.js +70 -0
  33. package/dist/test/anthropic.test.d.ts +1 -0
  34. package/dist/test/anthropic.test.js +251 -0
  35. package/dist/test/chat.test.d.ts +1 -0
  36. package/dist/test/chat.test.js +270 -0
  37. package/dist/test/front-door-acceptance.test.d.ts +1 -0
  38. package/dist/test/front-door-acceptance.test.js +94 -0
  39. package/dist/test/fusion-backend-trace.test.d.ts +1 -0
  40. package/dist/test/fusion-backend-trace.test.js +107 -0
  41. package/dist/test/fusion-backend.test.d.ts +1 -0
  42. package/dist/test/fusion-backend.test.js +193 -0
  43. package/dist/test/fusion-gateway.test.d.ts +1 -0
  44. package/dist/test/fusion-gateway.test.js +107 -0
  45. package/dist/test/responses.test.d.ts +1 -0
  46. package/dist/test/responses.test.js +157 -0
  47. package/package.json +31 -0
@@ -0,0 +1,438 @@
1
+ /**
2
+ * OpenAI Responses adapter. Codex speaks the Responses API exclusively
3
+ * (`wire_api="responses"`; Chat Completions support was removed), so to back it
4
+ * with a local model we translate `/v1/responses` to and from the gateway's
5
+ * OpenAI Chat Completions core. The pure translation functions are exported for
6
+ * testing; the handler returns a `Response` the server pipes (JSON or SSE).
7
+ *
8
+ * This is the highest-fidelity adapter: it maps Responses `input` items
9
+ * (messages, function calls, function-call outputs) into chat messages, and
10
+ * emits the Responses streaming event sequence (`response.created`,
11
+ * `response.output_item.added`, `response.output_text.delta`,
12
+ * `response.function_call_arguments.delta`, `response.completed`, …) from chat
13
+ * completion chunks.
14
+ */
15
+ const ENCODER = new TextEncoder();
16
+ function randomId() {
17
+ return Math.random().toString(36).slice(2, 12);
18
+ }
19
+ function partText(part) {
20
+ if (typeof part.text === "string" && (part.type === "input_text" || part.type === "output_text" || part.type === "text")) {
21
+ return part.text;
22
+ }
23
+ return "";
24
+ }
25
+ function contentToText(content) {
26
+ if (typeof content === "string")
27
+ return content;
28
+ return content.map(partText).join("");
29
+ }
30
+ function contentToParts(content) {
31
+ if (typeof content === "string")
32
+ return content;
33
+ const parts = [];
34
+ for (const part of content) {
35
+ if (part.type === "input_image" && typeof part.image_url === "string") {
36
+ parts.push({ type: "image_url", image_url: { url: part.image_url } });
37
+ }
38
+ else {
39
+ const text = partText(part);
40
+ if (text.length > 0)
41
+ parts.push({ type: "text", text });
42
+ }
43
+ }
44
+ if (parts.length === 1 && parts[0]?.type === "text") {
45
+ return String(parts[0].text);
46
+ }
47
+ return parts;
48
+ }
49
+ function mapToolChoice(choice) {
50
+ if (typeof choice === "string")
51
+ return choice;
52
+ return { type: "function", function: { name: choice.name } };
53
+ }
54
+ /** Translate a Responses request to an OpenAI Chat Completions body. */
55
+ export function responsesToChat(body, backendModel) {
56
+ const messages = [];
57
+ if (typeof body.instructions === "string" && body.instructions.length > 0) {
58
+ messages.push({ role: "system", content: body.instructions });
59
+ }
60
+ const input = body.input;
61
+ if (typeof input === "string") {
62
+ messages.push({ role: "user", content: input });
63
+ }
64
+ else if (Array.isArray(input)) {
65
+ // Coalesce consecutive function_call items into ONE assistant message.
66
+ // Codex emits parallel tool calls as separate function_call items; the chat
67
+ // API requires an assistant message's tool_calls to be answered by the
68
+ // following tool messages before the next assistant message, so each call
69
+ // must not become its own assistant turn.
70
+ let pendingToolCalls = [];
71
+ const flushToolCalls = () => {
72
+ if (pendingToolCalls.length === 0)
73
+ return;
74
+ messages.push({ role: "assistant", content: null, tool_calls: pendingToolCalls });
75
+ pendingToolCalls = [];
76
+ };
77
+ for (const item of input) {
78
+ if (item.type === "function_call") {
79
+ const call = item;
80
+ pendingToolCalls.push({
81
+ id: call.call_id ?? call.id ?? `call_${randomId()}`,
82
+ type: "function",
83
+ function: { name: call.name, arguments: call.arguments }
84
+ });
85
+ continue;
86
+ }
87
+ flushToolCalls();
88
+ if (item.type === "function_call_output") {
89
+ const out = item;
90
+ const content = typeof out.output === "string" ? out.output : JSON.stringify(out.output);
91
+ messages.push({ role: "tool", tool_call_id: out.call_id, content });
92
+ continue;
93
+ }
94
+ // message item (explicit type "message" or a bare {role, content})
95
+ const message = item;
96
+ if (message.content === undefined)
97
+ continue;
98
+ const role = message.role === "developer" ? "system" : message.role ?? "user";
99
+ messages.push({ role, content: contentToParts(message.content) });
100
+ }
101
+ flushToolCalls();
102
+ }
103
+ const chat = {
104
+ model: backendModel ?? body.model ?? "",
105
+ messages,
106
+ stream: body.stream === true
107
+ };
108
+ if (typeof body.max_output_tokens === "number")
109
+ chat.max_tokens = body.max_output_tokens;
110
+ if (typeof body.temperature === "number")
111
+ chat.temperature = body.temperature;
112
+ if (typeof body.top_p === "number")
113
+ chat.top_p = body.top_p;
114
+ if (Array.isArray(body.tools) && body.tools.length > 0) {
115
+ // Only forward function tools with a usable name. Codex advertises some
116
+ // tools (e.g. custom/freeform shapes) that translate to an empty function
117
+ // name, which OpenAI Chat Completions rejects outright.
118
+ const named = body.tools.filter((tool) => typeof tool.name === "string" && tool.name.length > 0);
119
+ if (named.length > 0) {
120
+ chat.tools = named.map((tool) => ({
121
+ type: "function",
122
+ function: {
123
+ name: tool.name,
124
+ ...(tool.description !== undefined ? { description: tool.description } : {}),
125
+ parameters: tool.parameters ?? { type: "object", properties: {} }
126
+ }
127
+ }));
128
+ }
129
+ }
130
+ if (body.tool_choice !== undefined)
131
+ chat.tool_choice = mapToolChoice(body.tool_choice);
132
+ if (body.stream === true)
133
+ chat.stream_options = { include_usage: true };
134
+ return chat;
135
+ }
136
+ // ---- non-streaming response translation ----
137
+ function buildOutput(message) {
138
+ const output = [];
139
+ const text = typeof message?.content === "string" ? message.content : "";
140
+ if (text.length > 0) {
141
+ output.push({
142
+ type: "message",
143
+ id: `msg_${randomId()}`,
144
+ status: "completed",
145
+ role: "assistant",
146
+ content: [{ type: "output_text", text, annotations: [] }]
147
+ });
148
+ }
149
+ if (Array.isArray(message?.tool_calls)) {
150
+ for (const call of message.tool_calls) {
151
+ output.push({
152
+ type: "function_call",
153
+ id: `fc_${randomId()}`,
154
+ call_id: call.id ?? `call_${randomId()}`,
155
+ name: call.function?.name ?? "",
156
+ arguments: call.function?.arguments ?? "",
157
+ status: "completed"
158
+ });
159
+ }
160
+ }
161
+ return output;
162
+ }
163
+ export function chatToResponses(openai, model) {
164
+ const message = openai.choices?.[0]?.message;
165
+ const output = buildOutput(message);
166
+ const inputTokens = openai.usage?.prompt_tokens ?? 0;
167
+ const outputTokens = openai.usage?.completion_tokens ?? 0;
168
+ return {
169
+ id: `resp_${openai.id ?? randomId()}`,
170
+ object: "response",
171
+ created_at: Math.floor(Date.now() / 1000),
172
+ status: "completed",
173
+ model,
174
+ output,
175
+ usage: { input_tokens: inputTokens, output_tokens: outputTokens, total_tokens: inputTokens + outputTokens }
176
+ };
177
+ }
178
+ // ---- streaming translation (OpenAI chat SSE -> Responses SSE) ----
179
+ function sse(type, data) {
180
+ return ENCODER.encode(`event: ${type}\ndata: ${JSON.stringify({ type, ...data })}\n\n`);
181
+ }
182
+ export function openAiSseToResponses(upstream, model) {
183
+ const reader = upstream.getReader();
184
+ const decoder = new TextDecoder();
185
+ const responseId = `resp_${randomId()}`;
186
+ const messageItemId = `msg_${randomId()}`;
187
+ const tools = new Map();
188
+ let buffer = "";
189
+ let created = false;
190
+ let keepaliveTimer;
191
+ let textOpen = false;
192
+ let textValue = "";
193
+ let nextOutputIndex = 0;
194
+ let messageOutputIndex = -1;
195
+ let finished = false;
196
+ let inputTokens = 0;
197
+ let outputTokens = 0;
198
+ const baseResponse = (status, output) => ({
199
+ id: responseId,
200
+ object: "response",
201
+ created_at: Math.floor(Date.now() / 1000),
202
+ status,
203
+ model,
204
+ output,
205
+ usage: status === "completed"
206
+ ? { input_tokens: inputTokens, output_tokens: outputTokens, total_tokens: inputTokens + outputTokens }
207
+ : null
208
+ });
209
+ const ensureCreated = (controller) => {
210
+ if (created)
211
+ return;
212
+ created = true;
213
+ controller.enqueue(sse("response.created", { response: baseResponse("in_progress", []) }));
214
+ };
215
+ const ensureText = (controller) => {
216
+ ensureCreated(controller);
217
+ if (textOpen)
218
+ return;
219
+ textOpen = true;
220
+ messageOutputIndex = nextOutputIndex++;
221
+ controller.enqueue(sse("response.output_item.added", {
222
+ output_index: messageOutputIndex,
223
+ item: { type: "message", id: messageItemId, status: "in_progress", role: "assistant", content: [] }
224
+ }));
225
+ controller.enqueue(sse("response.content_part.added", {
226
+ item_id: messageItemId,
227
+ output_index: messageOutputIndex,
228
+ content_index: 0,
229
+ part: { type: "output_text", text: "", annotations: [] }
230
+ }));
231
+ };
232
+ const assembleOutput = () => {
233
+ const output = [];
234
+ if (textOpen) {
235
+ output.push({
236
+ type: "message",
237
+ id: messageItemId,
238
+ status: "completed",
239
+ role: "assistant",
240
+ content: [{ type: "output_text", text: textValue, annotations: [] }]
241
+ });
242
+ }
243
+ for (const tool of tools.values()) {
244
+ output.push({
245
+ type: "function_call",
246
+ id: tool.itemId,
247
+ call_id: tool.callId,
248
+ name: tool.name,
249
+ arguments: tool.args,
250
+ status: "completed"
251
+ });
252
+ }
253
+ return output;
254
+ };
255
+ const finalize = (controller) => {
256
+ if (finished)
257
+ return;
258
+ finished = true;
259
+ if (keepaliveTimer !== undefined)
260
+ clearInterval(keepaliveTimer);
261
+ if (textOpen) {
262
+ controller.enqueue(sse("response.output_text.done", {
263
+ item_id: messageItemId,
264
+ output_index: messageOutputIndex,
265
+ content_index: 0,
266
+ text: textValue
267
+ }));
268
+ controller.enqueue(sse("response.content_part.done", {
269
+ item_id: messageItemId,
270
+ output_index: messageOutputIndex,
271
+ content_index: 0,
272
+ part: { type: "output_text", text: textValue, annotations: [] }
273
+ }));
274
+ controller.enqueue(sse("response.output_item.done", {
275
+ output_index: messageOutputIndex,
276
+ item: {
277
+ type: "message",
278
+ id: messageItemId,
279
+ status: "completed",
280
+ role: "assistant",
281
+ content: [{ type: "output_text", text: textValue, annotations: [] }]
282
+ }
283
+ }));
284
+ }
285
+ for (const tool of tools.values()) {
286
+ controller.enqueue(sse("response.function_call_arguments.done", {
287
+ item_id: tool.itemId,
288
+ output_index: tool.outputIndex,
289
+ arguments: tool.args
290
+ }));
291
+ controller.enqueue(sse("response.output_item.done", {
292
+ output_index: tool.outputIndex,
293
+ item: {
294
+ type: "function_call",
295
+ id: tool.itemId,
296
+ call_id: tool.callId,
297
+ name: tool.name,
298
+ arguments: tool.args,
299
+ status: "completed"
300
+ }
301
+ }));
302
+ }
303
+ controller.enqueue(sse("response.completed", { response: baseResponse("completed", assembleOutput()) }));
304
+ };
305
+ const process = (controller, chunk) => {
306
+ if (chunk.usage !== undefined) {
307
+ inputTokens = chunk.usage.prompt_tokens ?? inputTokens;
308
+ outputTokens = chunk.usage.completion_tokens ?? outputTokens;
309
+ }
310
+ const choice = chunk.choices?.[0];
311
+ if (choice === undefined)
312
+ return;
313
+ const delta = choice.delta ?? {};
314
+ if (typeof delta.content === "string" && delta.content.length > 0) {
315
+ ensureText(controller);
316
+ textValue += delta.content;
317
+ controller.enqueue(sse("response.output_text.delta", {
318
+ item_id: messageItemId,
319
+ output_index: messageOutputIndex,
320
+ content_index: 0,
321
+ delta: delta.content
322
+ }));
323
+ }
324
+ if (Array.isArray(delta.tool_calls)) {
325
+ for (const call of delta.tool_calls) {
326
+ const openAiIndex = typeof call.index === "number" ? call.index : 0;
327
+ let tool = tools.get(openAiIndex);
328
+ if (tool === undefined) {
329
+ ensureCreated(controller);
330
+ tool = {
331
+ outputIndex: nextOutputIndex++,
332
+ itemId: `fc_${randomId()}`,
333
+ callId: call.id ?? `call_${randomId()}`,
334
+ name: call.function?.name ?? "",
335
+ args: ""
336
+ };
337
+ tools.set(openAiIndex, tool);
338
+ controller.enqueue(sse("response.output_item.added", {
339
+ output_index: tool.outputIndex,
340
+ item: { type: "function_call", id: tool.itemId, call_id: tool.callId, name: tool.name, arguments: "" }
341
+ }));
342
+ }
343
+ if (call.function?.name !== undefined && tool.name.length === 0)
344
+ tool.name = call.function.name;
345
+ const args = call.function?.arguments;
346
+ if (typeof args === "string" && args.length > 0) {
347
+ tool.args += args;
348
+ controller.enqueue(sse("response.function_call_arguments.delta", {
349
+ item_id: tool.itemId,
350
+ output_index: tool.outputIndex,
351
+ delta: args
352
+ }));
353
+ }
354
+ }
355
+ }
356
+ if (choice.finish_reason !== null && choice.finish_reason !== undefined) {
357
+ finalize(controller);
358
+ }
359
+ };
360
+ return new ReadableStream({
361
+ start(controller) {
362
+ // Emit `response.created` immediately and keep the connection alive with
363
+ // SSE comments while the upstream is still producing its first event. Real
364
+ // CLIs (codex) reconnect if they see nothing for a while — which happens
365
+ // during the fusion panel phase before the judge's first token.
366
+ ensureCreated(controller);
367
+ keepaliveTimer = setInterval(() => {
368
+ if (finished)
369
+ return;
370
+ try {
371
+ controller.enqueue(ENCODER.encode(": keepalive\n\n"));
372
+ }
373
+ catch {
374
+ // controller closed
375
+ }
376
+ }, 3000);
377
+ },
378
+ async pull(controller) {
379
+ const { done, value } = await reader.read();
380
+ if (done) {
381
+ if (!finished)
382
+ finalize(controller);
383
+ controller.close();
384
+ return;
385
+ }
386
+ buffer += decoder.decode(value, { stream: true });
387
+ let newline = buffer.indexOf("\n");
388
+ while (newline >= 0) {
389
+ const line = buffer.slice(0, newline).trim();
390
+ buffer = buffer.slice(newline + 1);
391
+ newline = buffer.indexOf("\n");
392
+ if (!line.startsWith("data:"))
393
+ continue;
394
+ const payload = line.slice(5).trim();
395
+ if (payload === "[DONE]") {
396
+ if (!finished)
397
+ finalize(controller);
398
+ continue;
399
+ }
400
+ try {
401
+ process(controller, JSON.parse(payload));
402
+ }
403
+ catch {
404
+ // ignore malformed lines
405
+ }
406
+ }
407
+ },
408
+ cancel(reason) {
409
+ if (keepaliveTimer !== undefined)
410
+ clearInterval(keepaliveTimer);
411
+ return reader.cancel(reason);
412
+ }
413
+ });
414
+ }
415
+ // ---- handler ----
416
+ function jsonResponse(status, value) {
417
+ return new Response(JSON.stringify(value), { status, headers: { "content-type": "application/json" } });
418
+ }
419
+ export async function handleResponses(backend, body, modelCallId, signal) {
420
+ const requestedModel = body.model ?? backend.defaultModel ?? "";
421
+ const chat = responsesToChat(body, backend.defaultModel);
422
+ const upstream = await backend.chat(chat, signal, { modelCallId });
423
+ if (!upstream.ok) {
424
+ const detail = await upstream.text();
425
+ return jsonResponse(upstream.status, { error: { type: "api_error", message: detail.slice(0, 2000) } });
426
+ }
427
+ if (body.stream === true) {
428
+ const source = upstream.body;
429
+ if (source === null)
430
+ return jsonResponse(502, { error: { type: "api_error", message: "no upstream stream" } });
431
+ return new Response(openAiSseToResponses(source, requestedModel), {
432
+ status: 200,
433
+ headers: { "content-type": "text/event-stream", "cache-control": "no-cache" }
434
+ });
435
+ }
436
+ const openai = (await upstream.json());
437
+ return jsonResponse(200, chatToResponses(openai, requestedModel));
438
+ }
@@ -0,0 +1,52 @@
1
+ /**
2
+ * The gateway's model backend: an OpenAI-compatible Chat Completions server
3
+ * that the gateway translates every harness dialect down to. In practice this
4
+ * is the owned `velum-labs/mlx-lm` fork (`mlx_lm.server`), but it is equally
5
+ * any OpenAI-compatible local server (Ollama, vLLM, LM Studio) or a process
6
+ * fronted by `mlxServer`/`routedModel`. The backend is intentionally a thin
7
+ * `fetch` wrapper that returns the upstream `Response` unchanged, so the chat
8
+ * surface can stream straight through and the dialect adapters can consume the
9
+ * same core without a second abstraction.
10
+ */
11
+ export type Backend = {
12
+ /** Model id sent to the backend when a request omits one. */
13
+ readonly defaultModel: string | undefined;
14
+ /** POST <base>/chat/completions — supports streaming (SSE) upstream. */
15
+ chat(body: unknown, signal?: AbortSignal, options?: BackendRequestOptions): Promise<Response>;
16
+ /** GET <base>/models. */
17
+ models(signal?: AbortSignal): Promise<Response>;
18
+ /** POST <base>/embeddings. */
19
+ embeddings(body: unknown, signal?: AbortSignal): Promise<Response>;
20
+ /** Release any owned resources (e.g. a managed model process). Optional. */
21
+ close?(): Promise<void> | void;
22
+ };
23
+ export type BackendRequestOptions = {
24
+ modelCallId?: string;
25
+ };
26
+ export type OpenAiBackendOptions = {
27
+ /**
28
+ * Base URL including the OpenAI API prefix, e.g.
29
+ * `http://127.0.0.1:8080/v1`. Route paths (`/chat/completions`, `/models`,
30
+ * `/embeddings`) are appended to this value.
31
+ */
32
+ baseUrl: string;
33
+ /**
34
+ * Bearer credential forwarded to the backend. Local servers ignore it; the
35
+ * default mirrors the `not-needed` placeholder the AI SDK uses for local
36
+ * OpenAI-compatible servers.
37
+ */
38
+ apiKey?: string;
39
+ /** Model id used when a request omits `model`. */
40
+ defaultModel?: string;
41
+ };
42
+ /** Join a base URL (which may end in `/`) with a route path. */
43
+ export declare function joinPath(baseUrl: string, path: string): string;
44
+ /** An OpenAI Chat Completions backend reached over HTTP. */
45
+ export declare class OpenAiBackend implements Backend {
46
+ #private;
47
+ readonly defaultModel: string | undefined;
48
+ constructor(options: OpenAiBackendOptions);
49
+ chat(body: unknown, signal?: AbortSignal, options?: BackendRequestOptions): Promise<Response>;
50
+ models(signal?: AbortSignal): Promise<Response>;
51
+ embeddings(body: unknown, signal?: AbortSignal): Promise<Response>;
52
+ }
@@ -0,0 +1,57 @@
1
+ /**
2
+ * The gateway's model backend: an OpenAI-compatible Chat Completions server
3
+ * that the gateway translates every harness dialect down to. In practice this
4
+ * is the owned `velum-labs/mlx-lm` fork (`mlx_lm.server`), but it is equally
5
+ * any OpenAI-compatible local server (Ollama, vLLM, LM Studio) or a process
6
+ * fronted by `mlxServer`/`routedModel`. The backend is intentionally a thin
7
+ * `fetch` wrapper that returns the upstream `Response` unchanged, so the chat
8
+ * surface can stream straight through and the dialect adapters can consume the
9
+ * same core without a second abstraction.
10
+ */
11
+ /** Join a base URL (which may end in `/`) with a route path. */
12
+ export function joinPath(baseUrl, path) {
13
+ const base = baseUrl.endsWith("/") ? baseUrl.slice(0, -1) : baseUrl;
14
+ const suffix = path.startsWith("/") ? path : `/${path}`;
15
+ return `${base}${suffix}`;
16
+ }
17
+ /** An OpenAI Chat Completions backend reached over HTTP. */
18
+ export class OpenAiBackend {
19
+ #baseUrl;
20
+ #apiKey;
21
+ defaultModel;
22
+ constructor(options) {
23
+ this.#baseUrl = options.baseUrl;
24
+ this.#apiKey = options.apiKey ?? "not-needed";
25
+ this.defaultModel = options.defaultModel;
26
+ }
27
+ #headers(options = {}) {
28
+ return {
29
+ "content-type": "application/json",
30
+ authorization: `Bearer ${this.#apiKey}`,
31
+ ...(options.modelCallId ? { "x-velum-model-call-id": options.modelCallId } : {})
32
+ };
33
+ }
34
+ chat(body, signal, options = {}) {
35
+ return fetch(joinPath(this.#baseUrl, "/chat/completions"), {
36
+ method: "POST",
37
+ headers: this.#headers(options),
38
+ body: JSON.stringify(body),
39
+ ...(signal ? { signal } : {})
40
+ });
41
+ }
42
+ models(signal) {
43
+ return fetch(joinPath(this.#baseUrl, "/models"), {
44
+ method: "GET",
45
+ headers: this.#headers(),
46
+ ...(signal ? { signal } : {})
47
+ });
48
+ }
49
+ embeddings(body, signal) {
50
+ return fetch(joinPath(this.#baseUrl, "/embeddings"), {
51
+ method: "POST",
52
+ headers: this.#headers(),
53
+ body: JSON.stringify(body),
54
+ ...(signal ? { signal } : {})
55
+ });
56
+ }
57
+ }
@@ -0,0 +1,22 @@
1
+ import type { Backend } from "./backend.js";
2
+ /**
3
+ * Backend selection for the gateway. The default is the owned mlx fork
4
+ * (`mlx_lm.server`) — "mlx_lm.server first". An explicit OpenAI-compatible URL
5
+ * (`FUSIONKIT_LOCAL_MODEL_URL`) overrides it, which covers an already-running mlx
6
+ * server or a different local server (Ollama, vLLM, LM Studio) on hosts where
7
+ * the mlx provisioner cannot run. Legacy `WARRANT_*` names are still honored.
8
+ */
9
+ /** Default mlx model, matching the examples/mlx default. */
10
+ export declare const DEFAULT_MLX_MODEL = "prism-ml/Ternary-Bonsai-4B-mlx-2bit";
11
+ export type BackendConfig = {
12
+ kind: "mlx";
13
+ model: string;
14
+ structured: boolean;
15
+ } | {
16
+ kind: "openai";
17
+ baseUrl: string;
18
+ apiKey?: string;
19
+ defaultModel?: string;
20
+ };
21
+ export declare function resolveBackendConfig(env?: Record<string, string | undefined>): BackendConfig;
22
+ export declare function createBackend(config: BackendConfig): Backend;
package/dist/config.js ADDED
@@ -0,0 +1,47 @@
1
+ import { OpenAiBackend } from "./backend.js";
2
+ import { MlxBackend } from "./mlx-backend.js";
3
+ /**
4
+ * Backend selection for the gateway. The default is the owned mlx fork
5
+ * (`mlx_lm.server`) — "mlx_lm.server first". An explicit OpenAI-compatible URL
6
+ * (`FUSIONKIT_LOCAL_MODEL_URL`) overrides it, which covers an already-running mlx
7
+ * server or a different local server (Ollama, vLLM, LM Studio) on hosts where
8
+ * the mlx provisioner cannot run. Legacy `WARRANT_*` names are still honored.
9
+ */
10
+ /** Default mlx model, matching the examples/mlx default. */
11
+ export const DEFAULT_MLX_MODEL = "prism-ml/Ternary-Bonsai-4B-mlx-2bit";
12
+ export function resolveBackendConfig(env = process.env) {
13
+ const url = env.FUSIONKIT_LOCAL_MODEL_URL ?? env.WARRANT_LOCAL_MODEL_URL;
14
+ if (url !== undefined && url.length > 0) {
15
+ const apiKey = env.FUSIONKIT_LOCAL_MODEL_KEY ?? env.WARRANT_LOCAL_MODEL_KEY;
16
+ const defaultModel = env.FUSIONKIT_LOCAL_MODEL ?? env.WARRANT_LOCAL_MODEL;
17
+ return {
18
+ kind: "openai",
19
+ baseUrl: url,
20
+ ...(apiKey !== undefined ? { apiKey } : {}),
21
+ ...(defaultModel !== undefined ? { defaultModel } : {})
22
+ };
23
+ }
24
+ return {
25
+ kind: "mlx",
26
+ model: env.FUSIONKIT_MLX_MODEL ?? env.WARRANT_MLX_MODEL ?? DEFAULT_MLX_MODEL,
27
+ // Structured decoding (the owned fork's reason for being) is on unless
28
+ // explicitly disabled.
29
+ structured: (env.FUSIONKIT_MLX_STRUCTURED ?? env.WARRANT_MLX_STRUCTURED) !== "0"
30
+ };
31
+ }
32
+ export function createBackend(config) {
33
+ switch (config.kind) {
34
+ case "mlx":
35
+ return new MlxBackend({ model: config.model, structured: config.structured });
36
+ case "openai":
37
+ return new OpenAiBackend({
38
+ baseUrl: config.baseUrl,
39
+ ...(config.apiKey !== undefined ? { apiKey: config.apiKey } : {}),
40
+ ...(config.defaultModel !== undefined ? { defaultModel: config.defaultModel } : {})
41
+ });
42
+ default: {
43
+ const unreachable = config;
44
+ throw new Error(`unknown backend config: ${JSON.stringify(unreachable)}`);
45
+ }
46
+ }
47
+ }
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Unified front-door acceptance suite — the definition of "correct and done".
3
+ *
4
+ * Runs the same prompt/sentinel through every configured front door and
5
+ * produces one stable report with explicit `passed` / `failed` /
6
+ * `skipped_with_reason` / `blocked` outcomes. The HTTP front doors (Codex
7
+ * Responses, Claude Messages, OpenAI Chat for Cursorkit) are probed against a
8
+ * running Fusion Harness Gateway. The generic ACP front door is exercised
9
+ * in-process through an injected ACP runner. Cursor ACP and the registry-backed
10
+ * Codex/Claude ACP adapters are supplied as injected outcome producers so the
11
+ * CLI can wire real adapters while tests inject deterministic fakes.
12
+ */
13
+ import type { AcpRunner } from "./acp-agent.js";
14
+ export type FrontDoorStatus = "passed" | "failed" | "skipped_with_reason" | "blocked";
15
+ export type FrontDoorOutcome = {
16
+ id: string;
17
+ status: FrontDoorStatus;
18
+ request_path?: string;
19
+ gateway_run_id?: string;
20
+ reason?: string;
21
+ evidence: string[];
22
+ };
23
+ export type FrontDoorAcceptanceReport = {
24
+ sentinel: string;
25
+ generated_at: string;
26
+ front_doors: FrontDoorOutcome[];
27
+ };
28
+ export type FrontDoorOutcomeProducer = () => Promise<FrontDoorOutcome>;
29
+ export type FrontDoorAcceptanceOptions = {
30
+ gatewayUrl: string;
31
+ sentinel: string;
32
+ /** In-process ACP runner for the generic ACP front door. */
33
+ acpRunner?: AcpRunner;
34
+ /** Cursor ACP outcome via Cursorkit; absent means the dependency is missing. */
35
+ cursorAcp?: FrontDoorOutcomeProducer;
36
+ /** Registry-backed Codex ACP adapter outcome. */
37
+ codexAcp?: FrontDoorOutcomeProducer;
38
+ /** Registry-backed Claude Agent ACP adapter outcome. */
39
+ claudeAcp?: FrontDoorOutcomeProducer;
40
+ };
41
+ export declare function runFrontDoorAcceptance(options: FrontDoorAcceptanceOptions): Promise<FrontDoorAcceptanceReport>;