@fusionkit/model-gateway 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/acp-agent.d.ts +39 -0
- package/dist/acp-agent.js +143 -0
- package/dist/acp-registry.d.ts +36 -0
- package/dist/acp-registry.js +85 -0
- package/dist/adapters/anthropic.d.ts +111 -0
- package/dist/adapters/anthropic.js +446 -0
- package/dist/adapters/chat.d.ts +14 -0
- package/dist/adapters/chat.js +34 -0
- package/dist/adapters/responses.d.ts +94 -0
- package/dist/adapters/responses.js +438 -0
- package/dist/backend.d.ts +52 -0
- package/dist/backend.js +57 -0
- package/dist/config.d.ts +22 -0
- package/dist/config.js +47 -0
- package/dist/front-door-acceptance.d.ts +41 -0
- package/dist/front-door-acceptance.js +219 -0
- package/dist/fusion-backend.d.ts +96 -0
- package/dist/fusion-backend.js +521 -0
- package/dist/fusion-gateway.d.ts +69 -0
- package/dist/fusion-gateway.js +355 -0
- package/dist/index.d.ts +40 -0
- package/dist/index.js +28 -0
- package/dist/mlx-backend.d.ts +42 -0
- package/dist/mlx-backend.js +71 -0
- package/dist/provenance.d.ts +29 -0
- package/dist/provenance.js +182 -0
- package/dist/server.d.ts +27 -0
- package/dist/server.js +234 -0
- package/dist/test/acp-agent.test.d.ts +1 -0
- package/dist/test/acp-agent.test.js +66 -0
- package/dist/test/acp-registry.test.d.ts +1 -0
- package/dist/test/acp-registry.test.js +70 -0
- package/dist/test/anthropic.test.d.ts +1 -0
- package/dist/test/anthropic.test.js +251 -0
- package/dist/test/chat.test.d.ts +1 -0
- package/dist/test/chat.test.js +270 -0
- package/dist/test/front-door-acceptance.test.d.ts +1 -0
- package/dist/test/front-door-acceptance.test.js +94 -0
- package/dist/test/fusion-backend-trace.test.d.ts +1 -0
- package/dist/test/fusion-backend-trace.test.js +107 -0
- package/dist/test/fusion-backend.test.d.ts +1 -0
- package/dist/test/fusion-backend.test.js +193 -0
- package/dist/test/fusion-gateway.test.d.ts +1 -0
- package/dist/test/fusion-gateway.test.js +107 -0
- package/dist/test/responses.test.d.ts +1 -0
- package/dist/test/responses.test.js +157 -0
- package/package.json +31 -0
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI Responses adapter. Codex speaks the Responses API exclusively
|
|
3
|
+
* (`wire_api="responses"`; Chat Completions support was removed), so to back it
|
|
4
|
+
* with a local model we translate `/v1/responses` to and from the gateway's
|
|
5
|
+
* OpenAI Chat Completions core. The pure translation functions are exported for
|
|
6
|
+
* testing; the handler returns a `Response` the server pipes (JSON or SSE).
|
|
7
|
+
*
|
|
8
|
+
* This is the highest-fidelity adapter: it maps Responses `input` items
|
|
9
|
+
* (messages, function calls, function-call outputs) into chat messages, and
|
|
10
|
+
* emits the Responses streaming event sequence (`response.created`,
|
|
11
|
+
* `response.output_item.added`, `response.output_text.delta`,
|
|
12
|
+
* `response.function_call_arguments.delta`, `response.completed`, …) from chat
|
|
13
|
+
* completion chunks.
|
|
14
|
+
*/
|
|
15
|
+
const ENCODER = new TextEncoder();
|
|
16
|
+
function randomId() {
|
|
17
|
+
return Math.random().toString(36).slice(2, 12);
|
|
18
|
+
}
|
|
19
|
+
function partText(part) {
|
|
20
|
+
if (typeof part.text === "string" && (part.type === "input_text" || part.type === "output_text" || part.type === "text")) {
|
|
21
|
+
return part.text;
|
|
22
|
+
}
|
|
23
|
+
return "";
|
|
24
|
+
}
|
|
25
|
+
function contentToText(content) {
|
|
26
|
+
if (typeof content === "string")
|
|
27
|
+
return content;
|
|
28
|
+
return content.map(partText).join("");
|
|
29
|
+
}
|
|
30
|
+
function contentToParts(content) {
|
|
31
|
+
if (typeof content === "string")
|
|
32
|
+
return content;
|
|
33
|
+
const parts = [];
|
|
34
|
+
for (const part of content) {
|
|
35
|
+
if (part.type === "input_image" && typeof part.image_url === "string") {
|
|
36
|
+
parts.push({ type: "image_url", image_url: { url: part.image_url } });
|
|
37
|
+
}
|
|
38
|
+
else {
|
|
39
|
+
const text = partText(part);
|
|
40
|
+
if (text.length > 0)
|
|
41
|
+
parts.push({ type: "text", text });
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
if (parts.length === 1 && parts[0]?.type === "text") {
|
|
45
|
+
return String(parts[0].text);
|
|
46
|
+
}
|
|
47
|
+
return parts;
|
|
48
|
+
}
|
|
49
|
+
function mapToolChoice(choice) {
|
|
50
|
+
if (typeof choice === "string")
|
|
51
|
+
return choice;
|
|
52
|
+
return { type: "function", function: { name: choice.name } };
|
|
53
|
+
}
|
|
54
|
+
/** Translate a Responses request to an OpenAI Chat Completions body. */
|
|
55
|
+
export function responsesToChat(body, backendModel) {
|
|
56
|
+
const messages = [];
|
|
57
|
+
if (typeof body.instructions === "string" && body.instructions.length > 0) {
|
|
58
|
+
messages.push({ role: "system", content: body.instructions });
|
|
59
|
+
}
|
|
60
|
+
const input = body.input;
|
|
61
|
+
if (typeof input === "string") {
|
|
62
|
+
messages.push({ role: "user", content: input });
|
|
63
|
+
}
|
|
64
|
+
else if (Array.isArray(input)) {
|
|
65
|
+
// Coalesce consecutive function_call items into ONE assistant message.
|
|
66
|
+
// Codex emits parallel tool calls as separate function_call items; the chat
|
|
67
|
+
// API requires an assistant message's tool_calls to be answered by the
|
|
68
|
+
// following tool messages before the next assistant message, so each call
|
|
69
|
+
// must not become its own assistant turn.
|
|
70
|
+
let pendingToolCalls = [];
|
|
71
|
+
const flushToolCalls = () => {
|
|
72
|
+
if (pendingToolCalls.length === 0)
|
|
73
|
+
return;
|
|
74
|
+
messages.push({ role: "assistant", content: null, tool_calls: pendingToolCalls });
|
|
75
|
+
pendingToolCalls = [];
|
|
76
|
+
};
|
|
77
|
+
for (const item of input) {
|
|
78
|
+
if (item.type === "function_call") {
|
|
79
|
+
const call = item;
|
|
80
|
+
pendingToolCalls.push({
|
|
81
|
+
id: call.call_id ?? call.id ?? `call_${randomId()}`,
|
|
82
|
+
type: "function",
|
|
83
|
+
function: { name: call.name, arguments: call.arguments }
|
|
84
|
+
});
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
flushToolCalls();
|
|
88
|
+
if (item.type === "function_call_output") {
|
|
89
|
+
const out = item;
|
|
90
|
+
const content = typeof out.output === "string" ? out.output : JSON.stringify(out.output);
|
|
91
|
+
messages.push({ role: "tool", tool_call_id: out.call_id, content });
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
// message item (explicit type "message" or a bare {role, content})
|
|
95
|
+
const message = item;
|
|
96
|
+
if (message.content === undefined)
|
|
97
|
+
continue;
|
|
98
|
+
const role = message.role === "developer" ? "system" : message.role ?? "user";
|
|
99
|
+
messages.push({ role, content: contentToParts(message.content) });
|
|
100
|
+
}
|
|
101
|
+
flushToolCalls();
|
|
102
|
+
}
|
|
103
|
+
const chat = {
|
|
104
|
+
model: backendModel ?? body.model ?? "",
|
|
105
|
+
messages,
|
|
106
|
+
stream: body.stream === true
|
|
107
|
+
};
|
|
108
|
+
if (typeof body.max_output_tokens === "number")
|
|
109
|
+
chat.max_tokens = body.max_output_tokens;
|
|
110
|
+
if (typeof body.temperature === "number")
|
|
111
|
+
chat.temperature = body.temperature;
|
|
112
|
+
if (typeof body.top_p === "number")
|
|
113
|
+
chat.top_p = body.top_p;
|
|
114
|
+
if (Array.isArray(body.tools) && body.tools.length > 0) {
|
|
115
|
+
// Only forward function tools with a usable name. Codex advertises some
|
|
116
|
+
// tools (e.g. custom/freeform shapes) that translate to an empty function
|
|
117
|
+
// name, which OpenAI Chat Completions rejects outright.
|
|
118
|
+
const named = body.tools.filter((tool) => typeof tool.name === "string" && tool.name.length > 0);
|
|
119
|
+
if (named.length > 0) {
|
|
120
|
+
chat.tools = named.map((tool) => ({
|
|
121
|
+
type: "function",
|
|
122
|
+
function: {
|
|
123
|
+
name: tool.name,
|
|
124
|
+
...(tool.description !== undefined ? { description: tool.description } : {}),
|
|
125
|
+
parameters: tool.parameters ?? { type: "object", properties: {} }
|
|
126
|
+
}
|
|
127
|
+
}));
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
if (body.tool_choice !== undefined)
|
|
131
|
+
chat.tool_choice = mapToolChoice(body.tool_choice);
|
|
132
|
+
if (body.stream === true)
|
|
133
|
+
chat.stream_options = { include_usage: true };
|
|
134
|
+
return chat;
|
|
135
|
+
}
|
|
136
|
+
// ---- non-streaming response translation ----
|
|
137
|
+
function buildOutput(message) {
|
|
138
|
+
const output = [];
|
|
139
|
+
const text = typeof message?.content === "string" ? message.content : "";
|
|
140
|
+
if (text.length > 0) {
|
|
141
|
+
output.push({
|
|
142
|
+
type: "message",
|
|
143
|
+
id: `msg_${randomId()}`,
|
|
144
|
+
status: "completed",
|
|
145
|
+
role: "assistant",
|
|
146
|
+
content: [{ type: "output_text", text, annotations: [] }]
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
if (Array.isArray(message?.tool_calls)) {
|
|
150
|
+
for (const call of message.tool_calls) {
|
|
151
|
+
output.push({
|
|
152
|
+
type: "function_call",
|
|
153
|
+
id: `fc_${randomId()}`,
|
|
154
|
+
call_id: call.id ?? `call_${randomId()}`,
|
|
155
|
+
name: call.function?.name ?? "",
|
|
156
|
+
arguments: call.function?.arguments ?? "",
|
|
157
|
+
status: "completed"
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
return output;
|
|
162
|
+
}
|
|
163
|
+
export function chatToResponses(openai, model) {
|
|
164
|
+
const message = openai.choices?.[0]?.message;
|
|
165
|
+
const output = buildOutput(message);
|
|
166
|
+
const inputTokens = openai.usage?.prompt_tokens ?? 0;
|
|
167
|
+
const outputTokens = openai.usage?.completion_tokens ?? 0;
|
|
168
|
+
return {
|
|
169
|
+
id: `resp_${openai.id ?? randomId()}`,
|
|
170
|
+
object: "response",
|
|
171
|
+
created_at: Math.floor(Date.now() / 1000),
|
|
172
|
+
status: "completed",
|
|
173
|
+
model,
|
|
174
|
+
output,
|
|
175
|
+
usage: { input_tokens: inputTokens, output_tokens: outputTokens, total_tokens: inputTokens + outputTokens }
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
// ---- streaming translation (OpenAI chat SSE -> Responses SSE) ----
|
|
179
|
+
function sse(type, data) {
|
|
180
|
+
return ENCODER.encode(`event: ${type}\ndata: ${JSON.stringify({ type, ...data })}\n\n`);
|
|
181
|
+
}
|
|
182
|
+
export function openAiSseToResponses(upstream, model) {
|
|
183
|
+
const reader = upstream.getReader();
|
|
184
|
+
const decoder = new TextDecoder();
|
|
185
|
+
const responseId = `resp_${randomId()}`;
|
|
186
|
+
const messageItemId = `msg_${randomId()}`;
|
|
187
|
+
const tools = new Map();
|
|
188
|
+
let buffer = "";
|
|
189
|
+
let created = false;
|
|
190
|
+
let keepaliveTimer;
|
|
191
|
+
let textOpen = false;
|
|
192
|
+
let textValue = "";
|
|
193
|
+
let nextOutputIndex = 0;
|
|
194
|
+
let messageOutputIndex = -1;
|
|
195
|
+
let finished = false;
|
|
196
|
+
let inputTokens = 0;
|
|
197
|
+
let outputTokens = 0;
|
|
198
|
+
const baseResponse = (status, output) => ({
|
|
199
|
+
id: responseId,
|
|
200
|
+
object: "response",
|
|
201
|
+
created_at: Math.floor(Date.now() / 1000),
|
|
202
|
+
status,
|
|
203
|
+
model,
|
|
204
|
+
output,
|
|
205
|
+
usage: status === "completed"
|
|
206
|
+
? { input_tokens: inputTokens, output_tokens: outputTokens, total_tokens: inputTokens + outputTokens }
|
|
207
|
+
: null
|
|
208
|
+
});
|
|
209
|
+
const ensureCreated = (controller) => {
|
|
210
|
+
if (created)
|
|
211
|
+
return;
|
|
212
|
+
created = true;
|
|
213
|
+
controller.enqueue(sse("response.created", { response: baseResponse("in_progress", []) }));
|
|
214
|
+
};
|
|
215
|
+
const ensureText = (controller) => {
|
|
216
|
+
ensureCreated(controller);
|
|
217
|
+
if (textOpen)
|
|
218
|
+
return;
|
|
219
|
+
textOpen = true;
|
|
220
|
+
messageOutputIndex = nextOutputIndex++;
|
|
221
|
+
controller.enqueue(sse("response.output_item.added", {
|
|
222
|
+
output_index: messageOutputIndex,
|
|
223
|
+
item: { type: "message", id: messageItemId, status: "in_progress", role: "assistant", content: [] }
|
|
224
|
+
}));
|
|
225
|
+
controller.enqueue(sse("response.content_part.added", {
|
|
226
|
+
item_id: messageItemId,
|
|
227
|
+
output_index: messageOutputIndex,
|
|
228
|
+
content_index: 0,
|
|
229
|
+
part: { type: "output_text", text: "", annotations: [] }
|
|
230
|
+
}));
|
|
231
|
+
};
|
|
232
|
+
const assembleOutput = () => {
|
|
233
|
+
const output = [];
|
|
234
|
+
if (textOpen) {
|
|
235
|
+
output.push({
|
|
236
|
+
type: "message",
|
|
237
|
+
id: messageItemId,
|
|
238
|
+
status: "completed",
|
|
239
|
+
role: "assistant",
|
|
240
|
+
content: [{ type: "output_text", text: textValue, annotations: [] }]
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
for (const tool of tools.values()) {
|
|
244
|
+
output.push({
|
|
245
|
+
type: "function_call",
|
|
246
|
+
id: tool.itemId,
|
|
247
|
+
call_id: tool.callId,
|
|
248
|
+
name: tool.name,
|
|
249
|
+
arguments: tool.args,
|
|
250
|
+
status: "completed"
|
|
251
|
+
});
|
|
252
|
+
}
|
|
253
|
+
return output;
|
|
254
|
+
};
|
|
255
|
+
const finalize = (controller) => {
|
|
256
|
+
if (finished)
|
|
257
|
+
return;
|
|
258
|
+
finished = true;
|
|
259
|
+
if (keepaliveTimer !== undefined)
|
|
260
|
+
clearInterval(keepaliveTimer);
|
|
261
|
+
if (textOpen) {
|
|
262
|
+
controller.enqueue(sse("response.output_text.done", {
|
|
263
|
+
item_id: messageItemId,
|
|
264
|
+
output_index: messageOutputIndex,
|
|
265
|
+
content_index: 0,
|
|
266
|
+
text: textValue
|
|
267
|
+
}));
|
|
268
|
+
controller.enqueue(sse("response.content_part.done", {
|
|
269
|
+
item_id: messageItemId,
|
|
270
|
+
output_index: messageOutputIndex,
|
|
271
|
+
content_index: 0,
|
|
272
|
+
part: { type: "output_text", text: textValue, annotations: [] }
|
|
273
|
+
}));
|
|
274
|
+
controller.enqueue(sse("response.output_item.done", {
|
|
275
|
+
output_index: messageOutputIndex,
|
|
276
|
+
item: {
|
|
277
|
+
type: "message",
|
|
278
|
+
id: messageItemId,
|
|
279
|
+
status: "completed",
|
|
280
|
+
role: "assistant",
|
|
281
|
+
content: [{ type: "output_text", text: textValue, annotations: [] }]
|
|
282
|
+
}
|
|
283
|
+
}));
|
|
284
|
+
}
|
|
285
|
+
for (const tool of tools.values()) {
|
|
286
|
+
controller.enqueue(sse("response.function_call_arguments.done", {
|
|
287
|
+
item_id: tool.itemId,
|
|
288
|
+
output_index: tool.outputIndex,
|
|
289
|
+
arguments: tool.args
|
|
290
|
+
}));
|
|
291
|
+
controller.enqueue(sse("response.output_item.done", {
|
|
292
|
+
output_index: tool.outputIndex,
|
|
293
|
+
item: {
|
|
294
|
+
type: "function_call",
|
|
295
|
+
id: tool.itemId,
|
|
296
|
+
call_id: tool.callId,
|
|
297
|
+
name: tool.name,
|
|
298
|
+
arguments: tool.args,
|
|
299
|
+
status: "completed"
|
|
300
|
+
}
|
|
301
|
+
}));
|
|
302
|
+
}
|
|
303
|
+
controller.enqueue(sse("response.completed", { response: baseResponse("completed", assembleOutput()) }));
|
|
304
|
+
};
|
|
305
|
+
const process = (controller, chunk) => {
|
|
306
|
+
if (chunk.usage !== undefined) {
|
|
307
|
+
inputTokens = chunk.usage.prompt_tokens ?? inputTokens;
|
|
308
|
+
outputTokens = chunk.usage.completion_tokens ?? outputTokens;
|
|
309
|
+
}
|
|
310
|
+
const choice = chunk.choices?.[0];
|
|
311
|
+
if (choice === undefined)
|
|
312
|
+
return;
|
|
313
|
+
const delta = choice.delta ?? {};
|
|
314
|
+
if (typeof delta.content === "string" && delta.content.length > 0) {
|
|
315
|
+
ensureText(controller);
|
|
316
|
+
textValue += delta.content;
|
|
317
|
+
controller.enqueue(sse("response.output_text.delta", {
|
|
318
|
+
item_id: messageItemId,
|
|
319
|
+
output_index: messageOutputIndex,
|
|
320
|
+
content_index: 0,
|
|
321
|
+
delta: delta.content
|
|
322
|
+
}));
|
|
323
|
+
}
|
|
324
|
+
if (Array.isArray(delta.tool_calls)) {
|
|
325
|
+
for (const call of delta.tool_calls) {
|
|
326
|
+
const openAiIndex = typeof call.index === "number" ? call.index : 0;
|
|
327
|
+
let tool = tools.get(openAiIndex);
|
|
328
|
+
if (tool === undefined) {
|
|
329
|
+
ensureCreated(controller);
|
|
330
|
+
tool = {
|
|
331
|
+
outputIndex: nextOutputIndex++,
|
|
332
|
+
itemId: `fc_${randomId()}`,
|
|
333
|
+
callId: call.id ?? `call_${randomId()}`,
|
|
334
|
+
name: call.function?.name ?? "",
|
|
335
|
+
args: ""
|
|
336
|
+
};
|
|
337
|
+
tools.set(openAiIndex, tool);
|
|
338
|
+
controller.enqueue(sse("response.output_item.added", {
|
|
339
|
+
output_index: tool.outputIndex,
|
|
340
|
+
item: { type: "function_call", id: tool.itemId, call_id: tool.callId, name: tool.name, arguments: "" }
|
|
341
|
+
}));
|
|
342
|
+
}
|
|
343
|
+
if (call.function?.name !== undefined && tool.name.length === 0)
|
|
344
|
+
tool.name = call.function.name;
|
|
345
|
+
const args = call.function?.arguments;
|
|
346
|
+
if (typeof args === "string" && args.length > 0) {
|
|
347
|
+
tool.args += args;
|
|
348
|
+
controller.enqueue(sse("response.function_call_arguments.delta", {
|
|
349
|
+
item_id: tool.itemId,
|
|
350
|
+
output_index: tool.outputIndex,
|
|
351
|
+
delta: args
|
|
352
|
+
}));
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
if (choice.finish_reason !== null && choice.finish_reason !== undefined) {
|
|
357
|
+
finalize(controller);
|
|
358
|
+
}
|
|
359
|
+
};
|
|
360
|
+
return new ReadableStream({
|
|
361
|
+
start(controller) {
|
|
362
|
+
// Emit `response.created` immediately and keep the connection alive with
|
|
363
|
+
// SSE comments while the upstream is still producing its first event. Real
|
|
364
|
+
// CLIs (codex) reconnect if they see nothing for a while — which happens
|
|
365
|
+
// during the fusion panel phase before the judge's first token.
|
|
366
|
+
ensureCreated(controller);
|
|
367
|
+
keepaliveTimer = setInterval(() => {
|
|
368
|
+
if (finished)
|
|
369
|
+
return;
|
|
370
|
+
try {
|
|
371
|
+
controller.enqueue(ENCODER.encode(": keepalive\n\n"));
|
|
372
|
+
}
|
|
373
|
+
catch {
|
|
374
|
+
// controller closed
|
|
375
|
+
}
|
|
376
|
+
}, 3000);
|
|
377
|
+
},
|
|
378
|
+
async pull(controller) {
|
|
379
|
+
const { done, value } = await reader.read();
|
|
380
|
+
if (done) {
|
|
381
|
+
if (!finished)
|
|
382
|
+
finalize(controller);
|
|
383
|
+
controller.close();
|
|
384
|
+
return;
|
|
385
|
+
}
|
|
386
|
+
buffer += decoder.decode(value, { stream: true });
|
|
387
|
+
let newline = buffer.indexOf("\n");
|
|
388
|
+
while (newline >= 0) {
|
|
389
|
+
const line = buffer.slice(0, newline).trim();
|
|
390
|
+
buffer = buffer.slice(newline + 1);
|
|
391
|
+
newline = buffer.indexOf("\n");
|
|
392
|
+
if (!line.startsWith("data:"))
|
|
393
|
+
continue;
|
|
394
|
+
const payload = line.slice(5).trim();
|
|
395
|
+
if (payload === "[DONE]") {
|
|
396
|
+
if (!finished)
|
|
397
|
+
finalize(controller);
|
|
398
|
+
continue;
|
|
399
|
+
}
|
|
400
|
+
try {
|
|
401
|
+
process(controller, JSON.parse(payload));
|
|
402
|
+
}
|
|
403
|
+
catch {
|
|
404
|
+
// ignore malformed lines
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
},
|
|
408
|
+
cancel(reason) {
|
|
409
|
+
if (keepaliveTimer !== undefined)
|
|
410
|
+
clearInterval(keepaliveTimer);
|
|
411
|
+
return reader.cancel(reason);
|
|
412
|
+
}
|
|
413
|
+
});
|
|
414
|
+
}
|
|
415
|
+
// ---- handler ----
|
|
416
|
+
function jsonResponse(status, value) {
|
|
417
|
+
return new Response(JSON.stringify(value), { status, headers: { "content-type": "application/json" } });
|
|
418
|
+
}
|
|
419
|
+
export async function handleResponses(backend, body, modelCallId, signal) {
|
|
420
|
+
const requestedModel = body.model ?? backend.defaultModel ?? "";
|
|
421
|
+
const chat = responsesToChat(body, backend.defaultModel);
|
|
422
|
+
const upstream = await backend.chat(chat, signal, { modelCallId });
|
|
423
|
+
if (!upstream.ok) {
|
|
424
|
+
const detail = await upstream.text();
|
|
425
|
+
return jsonResponse(upstream.status, { error: { type: "api_error", message: detail.slice(0, 2000) } });
|
|
426
|
+
}
|
|
427
|
+
if (body.stream === true) {
|
|
428
|
+
const source = upstream.body;
|
|
429
|
+
if (source === null)
|
|
430
|
+
return jsonResponse(502, { error: { type: "api_error", message: "no upstream stream" } });
|
|
431
|
+
return new Response(openAiSseToResponses(source, requestedModel), {
|
|
432
|
+
status: 200,
|
|
433
|
+
headers: { "content-type": "text/event-stream", "cache-control": "no-cache" }
|
|
434
|
+
});
|
|
435
|
+
}
|
|
436
|
+
const openai = (await upstream.json());
|
|
437
|
+
return jsonResponse(200, chatToResponses(openai, requestedModel));
|
|
438
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The gateway's model backend: an OpenAI-compatible Chat Completions server
|
|
3
|
+
* that the gateway translates every harness dialect down to. In practice this
|
|
4
|
+
* is the owned `velum-labs/mlx-lm` fork (`mlx_lm.server`), but it is equally
|
|
5
|
+
* any OpenAI-compatible local server (Ollama, vLLM, LM Studio) or a process
|
|
6
|
+
* fronted by `mlxServer`/`routedModel`. The backend is intentionally a thin
|
|
7
|
+
* `fetch` wrapper that returns the upstream `Response` unchanged, so the chat
|
|
8
|
+
* surface can stream straight through and the dialect adapters can consume the
|
|
9
|
+
* same core without a second abstraction.
|
|
10
|
+
*/
|
|
11
|
+
export type Backend = {
|
|
12
|
+
/** Model id sent to the backend when a request omits one. */
|
|
13
|
+
readonly defaultModel: string | undefined;
|
|
14
|
+
/** POST <base>/chat/completions — supports streaming (SSE) upstream. */
|
|
15
|
+
chat(body: unknown, signal?: AbortSignal, options?: BackendRequestOptions): Promise<Response>;
|
|
16
|
+
/** GET <base>/models. */
|
|
17
|
+
models(signal?: AbortSignal): Promise<Response>;
|
|
18
|
+
/** POST <base>/embeddings. */
|
|
19
|
+
embeddings(body: unknown, signal?: AbortSignal): Promise<Response>;
|
|
20
|
+
/** Release any owned resources (e.g. a managed model process). Optional. */
|
|
21
|
+
close?(): Promise<void> | void;
|
|
22
|
+
};
|
|
23
|
+
export type BackendRequestOptions = {
|
|
24
|
+
modelCallId?: string;
|
|
25
|
+
};
|
|
26
|
+
export type OpenAiBackendOptions = {
|
|
27
|
+
/**
|
|
28
|
+
* Base URL including the OpenAI API prefix, e.g.
|
|
29
|
+
* `http://127.0.0.1:8080/v1`. Route paths (`/chat/completions`, `/models`,
|
|
30
|
+
* `/embeddings`) are appended to this value.
|
|
31
|
+
*/
|
|
32
|
+
baseUrl: string;
|
|
33
|
+
/**
|
|
34
|
+
* Bearer credential forwarded to the backend. Local servers ignore it; the
|
|
35
|
+
* default mirrors the `not-needed` placeholder the AI SDK uses for local
|
|
36
|
+
* OpenAI-compatible servers.
|
|
37
|
+
*/
|
|
38
|
+
apiKey?: string;
|
|
39
|
+
/** Model id used when a request omits `model`. */
|
|
40
|
+
defaultModel?: string;
|
|
41
|
+
};
|
|
42
|
+
/** Join a base URL (which may end in `/`) with a route path. */
|
|
43
|
+
export declare function joinPath(baseUrl: string, path: string): string;
|
|
44
|
+
/** An OpenAI Chat Completions backend reached over HTTP. */
|
|
45
|
+
export declare class OpenAiBackend implements Backend {
|
|
46
|
+
#private;
|
|
47
|
+
readonly defaultModel: string | undefined;
|
|
48
|
+
constructor(options: OpenAiBackendOptions);
|
|
49
|
+
chat(body: unknown, signal?: AbortSignal, options?: BackendRequestOptions): Promise<Response>;
|
|
50
|
+
models(signal?: AbortSignal): Promise<Response>;
|
|
51
|
+
embeddings(body: unknown, signal?: AbortSignal): Promise<Response>;
|
|
52
|
+
}
|
package/dist/backend.js
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The gateway's model backend: an OpenAI-compatible Chat Completions server
|
|
3
|
+
* that the gateway translates every harness dialect down to. In practice this
|
|
4
|
+
* is the owned `velum-labs/mlx-lm` fork (`mlx_lm.server`), but it is equally
|
|
5
|
+
* any OpenAI-compatible local server (Ollama, vLLM, LM Studio) or a process
|
|
6
|
+
* fronted by `mlxServer`/`routedModel`. The backend is intentionally a thin
|
|
7
|
+
* `fetch` wrapper that returns the upstream `Response` unchanged, so the chat
|
|
8
|
+
* surface can stream straight through and the dialect adapters can consume the
|
|
9
|
+
* same core without a second abstraction.
|
|
10
|
+
*/
|
|
11
|
+
/** Join a base URL (which may end in `/`) with a route path. */
|
|
12
|
+
export function joinPath(baseUrl, path) {
|
|
13
|
+
const base = baseUrl.endsWith("/") ? baseUrl.slice(0, -1) : baseUrl;
|
|
14
|
+
const suffix = path.startsWith("/") ? path : `/${path}`;
|
|
15
|
+
return `${base}${suffix}`;
|
|
16
|
+
}
|
|
17
|
+
/** An OpenAI Chat Completions backend reached over HTTP. */
|
|
18
|
+
export class OpenAiBackend {
|
|
19
|
+
#baseUrl;
|
|
20
|
+
#apiKey;
|
|
21
|
+
defaultModel;
|
|
22
|
+
constructor(options) {
|
|
23
|
+
this.#baseUrl = options.baseUrl;
|
|
24
|
+
this.#apiKey = options.apiKey ?? "not-needed";
|
|
25
|
+
this.defaultModel = options.defaultModel;
|
|
26
|
+
}
|
|
27
|
+
#headers(options = {}) {
|
|
28
|
+
return {
|
|
29
|
+
"content-type": "application/json",
|
|
30
|
+
authorization: `Bearer ${this.#apiKey}`,
|
|
31
|
+
...(options.modelCallId ? { "x-velum-model-call-id": options.modelCallId } : {})
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
chat(body, signal, options = {}) {
|
|
35
|
+
return fetch(joinPath(this.#baseUrl, "/chat/completions"), {
|
|
36
|
+
method: "POST",
|
|
37
|
+
headers: this.#headers(options),
|
|
38
|
+
body: JSON.stringify(body),
|
|
39
|
+
...(signal ? { signal } : {})
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
models(signal) {
|
|
43
|
+
return fetch(joinPath(this.#baseUrl, "/models"), {
|
|
44
|
+
method: "GET",
|
|
45
|
+
headers: this.#headers(),
|
|
46
|
+
...(signal ? { signal } : {})
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
embeddings(body, signal) {
|
|
50
|
+
return fetch(joinPath(this.#baseUrl, "/embeddings"), {
|
|
51
|
+
method: "POST",
|
|
52
|
+
headers: this.#headers(),
|
|
53
|
+
body: JSON.stringify(body),
|
|
54
|
+
...(signal ? { signal } : {})
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
}
|
package/dist/config.d.ts
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { Backend } from "./backend.js";
|
|
2
|
+
/**
|
|
3
|
+
* Backend selection for the gateway. The default is the owned mlx fork
|
|
4
|
+
* (`mlx_lm.server`) — "mlx_lm.server first". An explicit OpenAI-compatible URL
|
|
5
|
+
* (`FUSIONKIT_LOCAL_MODEL_URL`) overrides it, which covers an already-running mlx
|
|
6
|
+
* server or a different local server (Ollama, vLLM, LM Studio) on hosts where
|
|
7
|
+
* the mlx provisioner cannot run. Legacy `WARRANT_*` names are still honored.
|
|
8
|
+
*/
|
|
9
|
+
/** Default mlx model, matching the examples/mlx default. */
|
|
10
|
+
export declare const DEFAULT_MLX_MODEL = "prism-ml/Ternary-Bonsai-4B-mlx-2bit";
|
|
11
|
+
export type BackendConfig = {
|
|
12
|
+
kind: "mlx";
|
|
13
|
+
model: string;
|
|
14
|
+
structured: boolean;
|
|
15
|
+
} | {
|
|
16
|
+
kind: "openai";
|
|
17
|
+
baseUrl: string;
|
|
18
|
+
apiKey?: string;
|
|
19
|
+
defaultModel?: string;
|
|
20
|
+
};
|
|
21
|
+
export declare function resolveBackendConfig(env?: Record<string, string | undefined>): BackendConfig;
|
|
22
|
+
export declare function createBackend(config: BackendConfig): Backend;
|
package/dist/config.js
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { OpenAiBackend } from "./backend.js";
|
|
2
|
+
import { MlxBackend } from "./mlx-backend.js";
|
|
3
|
+
/**
|
|
4
|
+
* Backend selection for the gateway. The default is the owned mlx fork
|
|
5
|
+
* (`mlx_lm.server`) — "mlx_lm.server first". An explicit OpenAI-compatible URL
|
|
6
|
+
* (`FUSIONKIT_LOCAL_MODEL_URL`) overrides it, which covers an already-running mlx
|
|
7
|
+
* server or a different local server (Ollama, vLLM, LM Studio) on hosts where
|
|
8
|
+
* the mlx provisioner cannot run. Legacy `WARRANT_*` names are still honored.
|
|
9
|
+
*/
|
|
10
|
+
/** Default mlx model, matching the examples/mlx default. */
|
|
11
|
+
export const DEFAULT_MLX_MODEL = "prism-ml/Ternary-Bonsai-4B-mlx-2bit";
|
|
12
|
+
export function resolveBackendConfig(env = process.env) {
|
|
13
|
+
const url = env.FUSIONKIT_LOCAL_MODEL_URL ?? env.WARRANT_LOCAL_MODEL_URL;
|
|
14
|
+
if (url !== undefined && url.length > 0) {
|
|
15
|
+
const apiKey = env.FUSIONKIT_LOCAL_MODEL_KEY ?? env.WARRANT_LOCAL_MODEL_KEY;
|
|
16
|
+
const defaultModel = env.FUSIONKIT_LOCAL_MODEL ?? env.WARRANT_LOCAL_MODEL;
|
|
17
|
+
return {
|
|
18
|
+
kind: "openai",
|
|
19
|
+
baseUrl: url,
|
|
20
|
+
...(apiKey !== undefined ? { apiKey } : {}),
|
|
21
|
+
...(defaultModel !== undefined ? { defaultModel } : {})
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
return {
|
|
25
|
+
kind: "mlx",
|
|
26
|
+
model: env.FUSIONKIT_MLX_MODEL ?? env.WARRANT_MLX_MODEL ?? DEFAULT_MLX_MODEL,
|
|
27
|
+
// Structured decoding (the owned fork's reason for being) is on unless
|
|
28
|
+
// explicitly disabled.
|
|
29
|
+
structured: (env.FUSIONKIT_MLX_STRUCTURED ?? env.WARRANT_MLX_STRUCTURED) !== "0"
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
export function createBackend(config) {
|
|
33
|
+
switch (config.kind) {
|
|
34
|
+
case "mlx":
|
|
35
|
+
return new MlxBackend({ model: config.model, structured: config.structured });
|
|
36
|
+
case "openai":
|
|
37
|
+
return new OpenAiBackend({
|
|
38
|
+
baseUrl: config.baseUrl,
|
|
39
|
+
...(config.apiKey !== undefined ? { apiKey: config.apiKey } : {}),
|
|
40
|
+
...(config.defaultModel !== undefined ? { defaultModel: config.defaultModel } : {})
|
|
41
|
+
});
|
|
42
|
+
default: {
|
|
43
|
+
const unreachable = config;
|
|
44
|
+
throw new Error(`unknown backend config: ${JSON.stringify(unreachable)}`);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified front-door acceptance suite — the definition of "correct and done".
|
|
3
|
+
*
|
|
4
|
+
* Runs the same prompt/sentinel through every configured front door and
|
|
5
|
+
* produces one stable report with explicit `passed` / `failed` /
|
|
6
|
+
* `skipped_with_reason` / `blocked` outcomes. The HTTP front doors (Codex
|
|
7
|
+
* Responses, Claude Messages, OpenAI Chat for Cursorkit) are probed against a
|
|
8
|
+
* running Fusion Harness Gateway. The generic ACP front door is exercised
|
|
9
|
+
* in-process through an injected ACP runner. Cursor ACP and the registry-backed
|
|
10
|
+
* Codex/Claude ACP adapters are supplied as injected outcome producers so the
|
|
11
|
+
* CLI can wire real adapters while tests inject deterministic fakes.
|
|
12
|
+
*/
|
|
13
|
+
import type { AcpRunner } from "./acp-agent.js";
|
|
14
|
+
export type FrontDoorStatus = "passed" | "failed" | "skipped_with_reason" | "blocked";
|
|
15
|
+
export type FrontDoorOutcome = {
|
|
16
|
+
id: string;
|
|
17
|
+
status: FrontDoorStatus;
|
|
18
|
+
request_path?: string;
|
|
19
|
+
gateway_run_id?: string;
|
|
20
|
+
reason?: string;
|
|
21
|
+
evidence: string[];
|
|
22
|
+
};
|
|
23
|
+
export type FrontDoorAcceptanceReport = {
|
|
24
|
+
sentinel: string;
|
|
25
|
+
generated_at: string;
|
|
26
|
+
front_doors: FrontDoorOutcome[];
|
|
27
|
+
};
|
|
28
|
+
export type FrontDoorOutcomeProducer = () => Promise<FrontDoorOutcome>;
|
|
29
|
+
export type FrontDoorAcceptanceOptions = {
|
|
30
|
+
gatewayUrl: string;
|
|
31
|
+
sentinel: string;
|
|
32
|
+
/** In-process ACP runner for the generic ACP front door. */
|
|
33
|
+
acpRunner?: AcpRunner;
|
|
34
|
+
/** Cursor ACP outcome via Cursorkit; absent means the dependency is missing. */
|
|
35
|
+
cursorAcp?: FrontDoorOutcomeProducer;
|
|
36
|
+
/** Registry-backed Codex ACP adapter outcome. */
|
|
37
|
+
codexAcp?: FrontDoorOutcomeProducer;
|
|
38
|
+
/** Registry-backed Claude Agent ACP adapter outcome. */
|
|
39
|
+
claudeAcp?: FrontDoorOutcomeProducer;
|
|
40
|
+
};
|
|
41
|
+
export declare function runFrontDoorAcceptance(options: FrontDoorAcceptanceOptions): Promise<FrontDoorAcceptanceReport>;
|