@fusionkit/model-gateway 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/acp-agent.d.ts +39 -0
- package/dist/acp-agent.js +143 -0
- package/dist/acp-registry.d.ts +36 -0
- package/dist/acp-registry.js +85 -0
- package/dist/adapters/anthropic.d.ts +111 -0
- package/dist/adapters/anthropic.js +446 -0
- package/dist/adapters/chat.d.ts +14 -0
- package/dist/adapters/chat.js +34 -0
- package/dist/adapters/responses.d.ts +94 -0
- package/dist/adapters/responses.js +438 -0
- package/dist/backend.d.ts +52 -0
- package/dist/backend.js +57 -0
- package/dist/config.d.ts +22 -0
- package/dist/config.js +47 -0
- package/dist/front-door-acceptance.d.ts +41 -0
- package/dist/front-door-acceptance.js +219 -0
- package/dist/fusion-backend.d.ts +96 -0
- package/dist/fusion-backend.js +521 -0
- package/dist/fusion-gateway.d.ts +69 -0
- package/dist/fusion-gateway.js +355 -0
- package/dist/index.d.ts +40 -0
- package/dist/index.js +28 -0
- package/dist/mlx-backend.d.ts +42 -0
- package/dist/mlx-backend.js +71 -0
- package/dist/provenance.d.ts +29 -0
- package/dist/provenance.js +182 -0
- package/dist/server.d.ts +27 -0
- package/dist/server.js +234 -0
- package/dist/test/acp-agent.test.d.ts +1 -0
- package/dist/test/acp-agent.test.js +66 -0
- package/dist/test/acp-registry.test.d.ts +1 -0
- package/dist/test/acp-registry.test.js +70 -0
- package/dist/test/anthropic.test.d.ts +1 -0
- package/dist/test/anthropic.test.js +251 -0
- package/dist/test/chat.test.d.ts +1 -0
- package/dist/test/chat.test.js +270 -0
- package/dist/test/front-door-acceptance.test.d.ts +1 -0
- package/dist/test/front-door-acceptance.test.js +94 -0
- package/dist/test/fusion-backend-trace.test.d.ts +1 -0
- package/dist/test/fusion-backend-trace.test.js +107 -0
- package/dist/test/fusion-backend.test.d.ts +1 -0
- package/dist/test/fusion-backend.test.js +193 -0
- package/dist/test/fusion-gateway.test.d.ts +1 -0
- package/dist/test/fusion-gateway.test.js +107 -0
- package/dist/test/responses.test.d.ts +1 -0
- package/dist/test/responses.test.js +157 -0
- package/package.json +31 -0
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified front-door acceptance suite — the definition of "correct and done".
|
|
3
|
+
*
|
|
4
|
+
* Runs the same prompt/sentinel through every configured front door and
|
|
5
|
+
* produces one stable report with explicit `passed` / `failed` /
|
|
6
|
+
* `skipped_with_reason` / `blocked` outcomes. The HTTP front doors (Codex
|
|
7
|
+
* Responses, Claude Messages, OpenAI Chat for Cursorkit) are probed against a
|
|
8
|
+
* running Fusion Harness Gateway. The generic ACP front door is exercised
|
|
9
|
+
* in-process through an injected ACP runner. Cursor ACP and the registry-backed
|
|
10
|
+
* Codex/Claude ACP adapters are supplied as injected outcome producers so the
|
|
11
|
+
* CLI can wire real adapters while tests inject deterministic fakes.
|
|
12
|
+
*/
|
|
13
|
+
import { PassThrough } from "node:stream";
|
|
14
|
+
import { runAcpAgent } from "./acp-agent.js";
|
|
15
|
+
import { FUSION_EVIDENCE_HEADER, FUSION_RUN_ID_HEADER } from "./fusion-gateway.js";
|
|
16
|
+
function normalizeGatewayUrl(value) {
|
|
17
|
+
return value.replace(/\/+$/, "");
|
|
18
|
+
}
|
|
19
|
+
function v1Url(gatewayUrl, path) {
|
|
20
|
+
const normalized = normalizeGatewayUrl(gatewayUrl);
|
|
21
|
+
const base = normalized.endsWith("/v1") ? normalized : `${normalized}/v1`;
|
|
22
|
+
return `${base}${path}`;
|
|
23
|
+
}
|
|
24
|
+
function parseEvidenceHeader(value) {
|
|
25
|
+
if (value === null)
|
|
26
|
+
return [];
|
|
27
|
+
try {
|
|
28
|
+
const parsed = JSON.parse(value);
|
|
29
|
+
if (Array.isArray(parsed))
|
|
30
|
+
return parsed.filter((item) => typeof item === "string");
|
|
31
|
+
}
|
|
32
|
+
catch {
|
|
33
|
+
return [];
|
|
34
|
+
}
|
|
35
|
+
return [];
|
|
36
|
+
}
|
|
37
|
+
function textFromResponses(body) {
|
|
38
|
+
const output = body.output;
|
|
39
|
+
if (!Array.isArray(output))
|
|
40
|
+
return "";
|
|
41
|
+
return output
|
|
42
|
+
.flatMap((item) => item.content ?? [])
|
|
43
|
+
.map((part) => (typeof part.text === "string" ? part.text : ""))
|
|
44
|
+
.join("");
|
|
45
|
+
}
|
|
46
|
+
function textFromAnthropic(body) {
|
|
47
|
+
const content = body.content;
|
|
48
|
+
if (!Array.isArray(content))
|
|
49
|
+
return "";
|
|
50
|
+
return content.map((part) => (typeof part.text === "string" ? part.text : "")).join("");
|
|
51
|
+
}
|
|
52
|
+
function textFromChat(body) {
|
|
53
|
+
const choices = body.choices;
|
|
54
|
+
const content = choices?.[0]?.message?.content;
|
|
55
|
+
return typeof content === "string" ? content : "";
|
|
56
|
+
}
|
|
57
|
+
async function probeHttpFrontDoor(input) {
|
|
58
|
+
let response;
|
|
59
|
+
try {
|
|
60
|
+
response = await fetch(input.url, {
|
|
61
|
+
method: "POST",
|
|
62
|
+
headers: { "content-type": "application/json", ...(input.headers ?? {}) },
|
|
63
|
+
body: JSON.stringify(input.body)
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
catch (error) {
|
|
67
|
+
return {
|
|
68
|
+
id: input.id,
|
|
69
|
+
status: "blocked",
|
|
70
|
+
request_path: input.requestPath,
|
|
71
|
+
reason: error instanceof Error ? error.message : String(error),
|
|
72
|
+
evidence: []
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
if (!response.ok) {
|
|
76
|
+
return {
|
|
77
|
+
id: input.id,
|
|
78
|
+
status: "failed",
|
|
79
|
+
request_path: input.requestPath,
|
|
80
|
+
reason: `gateway returned ${response.status}`,
|
|
81
|
+
evidence: []
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
const runId = response.headers.get(FUSION_RUN_ID_HEADER) ?? undefined;
|
|
85
|
+
const evidence = parseEvidenceHeader(response.headers.get(FUSION_EVIDENCE_HEADER));
|
|
86
|
+
const text = input.extractText((await response.json()));
|
|
87
|
+
const matched = text.includes(input.sentinel);
|
|
88
|
+
return {
|
|
89
|
+
id: input.id,
|
|
90
|
+
status: matched ? "passed" : "failed",
|
|
91
|
+
request_path: input.requestPath,
|
|
92
|
+
...(runId !== undefined ? { gateway_run_id: runId } : {}),
|
|
93
|
+
...(matched ? {} : { reason: "sentinel not found in final output" }),
|
|
94
|
+
evidence: matched ? ["sentinel", ...evidence] : evidence
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
async function probeGenericAcp(acpRunner, sentinel) {
|
|
98
|
+
const input = new PassThrough();
|
|
99
|
+
const output = new PassThrough();
|
|
100
|
+
let raw = "";
|
|
101
|
+
output.on("data", (chunk) => {
|
|
102
|
+
raw += chunk.toString("utf8");
|
|
103
|
+
});
|
|
104
|
+
const done = runAcpAgent({ runner: acpRunner, input, output });
|
|
105
|
+
const write = (message) => {
|
|
106
|
+
input.write(`${JSON.stringify(message)}\n`);
|
|
107
|
+
};
|
|
108
|
+
write({ jsonrpc: "2.0", id: 1, method: "initialize", params: { protocolVersion: 1 } });
|
|
109
|
+
write({ jsonrpc: "2.0", id: 2, method: "session/new", params: { cwd: process.cwd(), mcpServers: [] } });
|
|
110
|
+
write({
|
|
111
|
+
jsonrpc: "2.0",
|
|
112
|
+
id: 3,
|
|
113
|
+
method: "session/prompt",
|
|
114
|
+
params: { sessionId: "sess_1", prompt: [{ type: "text", text: "front-door acceptance" }] }
|
|
115
|
+
});
|
|
116
|
+
input.end();
|
|
117
|
+
await done;
|
|
118
|
+
const updates = raw
|
|
119
|
+
.split("\n")
|
|
120
|
+
.filter((line) => line.trim().length > 0)
|
|
121
|
+
.map((line) => JSON.parse(line));
|
|
122
|
+
const updateText = updates
|
|
123
|
+
.filter((message) => message.method === "session/update")
|
|
124
|
+
.map((message) => {
|
|
125
|
+
const params = message.params;
|
|
126
|
+
return params?.update?.content?.text ?? "";
|
|
127
|
+
})
|
|
128
|
+
.join("");
|
|
129
|
+
const promptResult = updates.find((message) => message.result !== undefined && typeof message.result === "object")?.result;
|
|
130
|
+
const matched = updateText.includes(sentinel);
|
|
131
|
+
const evidence = promptResult?._meta?.evidence ?? [];
|
|
132
|
+
return {
|
|
133
|
+
id: "generic-acp",
|
|
134
|
+
status: matched ? "passed" : "failed",
|
|
135
|
+
request_path: "session/prompt",
|
|
136
|
+
...(promptResult?._meta?.runId !== undefined ? { gateway_run_id: promptResult._meta.runId } : {}),
|
|
137
|
+
...(matched ? {} : { reason: "sentinel not found in session/update" }),
|
|
138
|
+
evidence: matched ? ["sentinel", ...evidence] : evidence
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
export async function runFrontDoorAcceptance(options) {
|
|
142
|
+
const frontDoors = [];
|
|
143
|
+
frontDoors.push(await probeHttpFrontDoor({
|
|
144
|
+
id: "codex-responses",
|
|
145
|
+
url: v1Url(options.gatewayUrl, "/responses"),
|
|
146
|
+
requestPath: "/v1/responses",
|
|
147
|
+
body: {
|
|
148
|
+
model: "fusion-panel",
|
|
149
|
+
input: [{ role: "user", content: [{ type: "input_text", text: "front-door acceptance" }] }]
|
|
150
|
+
},
|
|
151
|
+
extractText: textFromResponses,
|
|
152
|
+
sentinel: options.sentinel
|
|
153
|
+
}));
|
|
154
|
+
frontDoors.push(await probeHttpFrontDoor({
|
|
155
|
+
id: "claude-messages",
|
|
156
|
+
url: v1Url(options.gatewayUrl, "/messages"),
|
|
157
|
+
requestPath: "/v1/messages",
|
|
158
|
+
headers: { "anthropic-version": "2023-06-01" },
|
|
159
|
+
body: {
|
|
160
|
+
model: "fusion-panel",
|
|
161
|
+
max_tokens: 512,
|
|
162
|
+
messages: [{ role: "user", content: "front-door acceptance" }]
|
|
163
|
+
},
|
|
164
|
+
extractText: textFromAnthropic,
|
|
165
|
+
sentinel: options.sentinel
|
|
166
|
+
}));
|
|
167
|
+
frontDoors.push(await probeHttpFrontDoor({
|
|
168
|
+
id: "openai-chat",
|
|
169
|
+
url: v1Url(options.gatewayUrl, "/chat/completions"),
|
|
170
|
+
requestPath: "/v1/chat/completions",
|
|
171
|
+
body: {
|
|
172
|
+
model: "fusion-panel",
|
|
173
|
+
messages: [{ role: "user", content: "front-door acceptance" }]
|
|
174
|
+
},
|
|
175
|
+
extractText: textFromChat,
|
|
176
|
+
sentinel: options.sentinel
|
|
177
|
+
}));
|
|
178
|
+
if (options.acpRunner !== undefined) {
|
|
179
|
+
frontDoors.push(await probeGenericAcp(options.acpRunner, options.sentinel));
|
|
180
|
+
}
|
|
181
|
+
else {
|
|
182
|
+
frontDoors.push({
|
|
183
|
+
id: "generic-acp",
|
|
184
|
+
status: "blocked",
|
|
185
|
+
request_path: "session/prompt",
|
|
186
|
+
reason: "acp_runner_not_configured",
|
|
187
|
+
evidence: []
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
frontDoors.push(options.codexAcp !== undefined
|
|
191
|
+
? await options.codexAcp()
|
|
192
|
+
: {
|
|
193
|
+
id: "codex-acp",
|
|
194
|
+
status: "blocked",
|
|
195
|
+
reason: "codex_acp_adapter_not_installed",
|
|
196
|
+
evidence: []
|
|
197
|
+
});
|
|
198
|
+
frontDoors.push(options.claudeAcp !== undefined
|
|
199
|
+
? await options.claudeAcp()
|
|
200
|
+
: {
|
|
201
|
+
id: "claude-acp",
|
|
202
|
+
status: "blocked",
|
|
203
|
+
reason: "claude_acp_adapter_not_installed",
|
|
204
|
+
evidence: []
|
|
205
|
+
});
|
|
206
|
+
frontDoors.push(options.cursorAcp !== undefined
|
|
207
|
+
? await options.cursorAcp()
|
|
208
|
+
: {
|
|
209
|
+
id: "cursor-acp",
|
|
210
|
+
status: "blocked",
|
|
211
|
+
reason: "cursorkit_backend_not_running",
|
|
212
|
+
evidence: []
|
|
213
|
+
});
|
|
214
|
+
return {
|
|
215
|
+
sentinel: options.sentinel,
|
|
216
|
+
generated_at: new Date().toISOString(),
|
|
217
|
+
front_doors: frontDoors
|
|
218
|
+
};
|
|
219
|
+
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The fusion front-door backend.
|
|
3
|
+
*
|
|
4
|
+
* This is the clean abstraction behind "the judge streams a trajectory the
|
|
5
|
+
* user's harness executes". It implements the gateway {@link Backend} contract
|
|
6
|
+
* (an OpenAI Chat Completions surface) so it slots into the existing
|
|
7
|
+
* `startGateway` server and reuses every dialect adapter (chat / responses /
|
|
8
|
+
* anthropic) — including their full tool-call, tool-result, and streaming
|
|
9
|
+
* support — for free.
|
|
10
|
+
*
|
|
11
|
+
* Per front-door turn it:
|
|
12
|
+
* 1. derives a stable session key from the conversation prefix,
|
|
13
|
+
* 2. runs the panel **once** per session (injected `runPanels`, so this
|
|
14
|
+
* package keeps no dependency on `@fusionkit/ensemble`) to produce the
|
|
15
|
+
* candidate trajectories,
|
|
16
|
+
* 3. forwards the live conversation + the harness tools + the candidate
|
|
17
|
+
* trajectories to FusionKit's `trajectory:step`, whose response (an OpenAI
|
|
18
|
+
* chat completion, optionally streamed, that may carry `tool_calls`) is
|
|
19
|
+
* returned verbatim for the server to translate into the caller's dialect.
|
|
20
|
+
*
|
|
21
|
+
* There is no apply/verify/repair here: iteration is the user's harness's job.
|
|
22
|
+
*
|
|
23
|
+
* Failures are surfaced, never swallowed: a panel run that throws or yields no
|
|
24
|
+
* usable candidate, or a `trajectory:step` that errors, produces an explicit
|
|
25
|
+
* error (a non-2xx response when nothing has streamed yet, or a terminal error
|
|
26
|
+
* event with `finish_reason: "error"` once the SSE has started) and the failed
|
|
27
|
+
* session is evicted so the next turn retries instead of replaying the failure.
|
|
28
|
+
*/
|
|
29
|
+
import type { Backend, BackendRequestOptions } from "./backend.js";
|
|
30
|
+
/** A candidate trajectory in the wire shape FusionKit's `trajectory:step` accepts. */
|
|
31
|
+
export type WireTrajectory = {
|
|
32
|
+
trajectory_id: string;
|
|
33
|
+
model_id: string;
|
|
34
|
+
status: string;
|
|
35
|
+
final_output: string;
|
|
36
|
+
steps?: Array<Record<string, unknown>>;
|
|
37
|
+
candidate_id?: string;
|
|
38
|
+
model?: string;
|
|
39
|
+
harness_kind?: string;
|
|
40
|
+
diff?: string;
|
|
41
|
+
verification?: {
|
|
42
|
+
status: string;
|
|
43
|
+
evidence?: string[];
|
|
44
|
+
exit_code?: number;
|
|
45
|
+
};
|
|
46
|
+
metadata?: Record<string, unknown>;
|
|
47
|
+
};
|
|
48
|
+
export type ChatMessageLike = {
|
|
49
|
+
role: string;
|
|
50
|
+
content?: unknown;
|
|
51
|
+
tool_calls?: unknown;
|
|
52
|
+
tool_call_id?: string;
|
|
53
|
+
name?: string;
|
|
54
|
+
};
|
|
55
|
+
export type PanelRunInput = {
|
|
56
|
+
/** The task prompt distilled from the conversation prefix (system + first user). */
|
|
57
|
+
task: string;
|
|
58
|
+
/** The full incoming OpenAI-style message list for the first turn. */
|
|
59
|
+
messages: ChatMessageLike[];
|
|
60
|
+
/** The trace id minted for this fusion session. */
|
|
61
|
+
traceId: string;
|
|
62
|
+
/** The session root span; panel/candidate events parent under it. */
|
|
63
|
+
sessionSpanId: string;
|
|
64
|
+
/** Stable per-session key (hash of the conversation prefix). */
|
|
65
|
+
sessionKey: string;
|
|
66
|
+
/** 1-based user-turn index this panel run belongs to. */
|
|
67
|
+
turn: number;
|
|
68
|
+
};
|
|
69
|
+
/** Runs the panel once for a session and returns its candidate trajectories. */
|
|
70
|
+
export type PanelRunner = (input: PanelRunInput) => Promise<WireTrajectory[]>;
|
|
71
|
+
export type FusionBackendOptions = {
|
|
72
|
+
/** FusionKit `POST /v1/fusion/trajectory:step` URL. */
|
|
73
|
+
stepUrl: string;
|
|
74
|
+
/** Produces candidate trajectories for a new session (injected; uses ensemble). */
|
|
75
|
+
runPanels: PanelRunner;
|
|
76
|
+
/** Model id echoed to clients and sent to the judge step. */
|
|
77
|
+
defaultModel?: string;
|
|
78
|
+
/** Judge model id forwarded to FusionKit (defaults to its configured judge). */
|
|
79
|
+
judgeModel?: string;
|
|
80
|
+
/** How long a session's candidate trajectories stay cached. */
|
|
81
|
+
sessionTtlMs?: number;
|
|
82
|
+
/** Wall-clock budget for the panel phase before the turn fails. */
|
|
83
|
+
panelTimeoutMs?: number;
|
|
84
|
+
/** Wall-clock budget for a single `trajectory:step` call. */
|
|
85
|
+
stepTimeoutMs?: number;
|
|
86
|
+
/** Mint a trace id (injectable for tests). */
|
|
87
|
+
mintTraceId?: () => string;
|
|
88
|
+
};
|
|
89
|
+
export declare class FusionBackend implements Backend {
|
|
90
|
+
#private;
|
|
91
|
+
readonly defaultModel: string | undefined;
|
|
92
|
+
constructor(options: FusionBackendOptions);
|
|
93
|
+
chat(body: unknown, signal?: AbortSignal, options?: BackendRequestOptions): Promise<Response>;
|
|
94
|
+
models(): Promise<Response>;
|
|
95
|
+
embeddings(): Promise<Response>;
|
|
96
|
+
}
|