npm - @fusionkit/model-gateway - Versions diffs - 0.1.0 - Mend

@fusionkit/model-gateway 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/dist/acp-agent.d.ts +39 -0
package/dist/acp-agent.js +143 -0
package/dist/acp-registry.d.ts +36 -0
package/dist/acp-registry.js +85 -0
package/dist/adapters/anthropic.d.ts +111 -0
package/dist/adapters/anthropic.js +446 -0
package/dist/adapters/chat.d.ts +14 -0
package/dist/adapters/chat.js +34 -0
package/dist/adapters/responses.d.ts +94 -0
package/dist/adapters/responses.js +438 -0
package/dist/backend.d.ts +52 -0
package/dist/backend.js +57 -0
package/dist/config.d.ts +22 -0
package/dist/config.js +47 -0
package/dist/front-door-acceptance.d.ts +41 -0
package/dist/front-door-acceptance.js +219 -0
package/dist/fusion-backend.d.ts +96 -0
package/dist/fusion-backend.js +521 -0
package/dist/fusion-gateway.d.ts +69 -0
package/dist/fusion-gateway.js +355 -0
package/dist/index.d.ts +40 -0
package/dist/index.js +28 -0
package/dist/mlx-backend.d.ts +42 -0
package/dist/mlx-backend.js +71 -0
package/dist/provenance.d.ts +29 -0
package/dist/provenance.js +182 -0
package/dist/server.d.ts +27 -0
package/dist/server.js +234 -0
package/dist/test/acp-agent.test.d.ts +1 -0
package/dist/test/acp-agent.test.js +66 -0
package/dist/test/acp-registry.test.d.ts +1 -0
package/dist/test/acp-registry.test.js +70 -0
package/dist/test/anthropic.test.d.ts +1 -0
package/dist/test/anthropic.test.js +251 -0
package/dist/test/chat.test.d.ts +1 -0
package/dist/test/chat.test.js +270 -0
package/dist/test/front-door-acceptance.test.d.ts +1 -0
package/dist/test/front-door-acceptance.test.js +94 -0
package/dist/test/fusion-backend-trace.test.d.ts +1 -0
package/dist/test/fusion-backend-trace.test.js +107 -0
package/dist/test/fusion-backend.test.d.ts +1 -0
package/dist/test/fusion-backend.test.js +193 -0
package/dist/test/fusion-gateway.test.d.ts +1 -0
package/dist/test/fusion-gateway.test.js +107 -0
package/dist/test/responses.test.d.ts +1 -0
package/dist/test/responses.test.js +157 -0
package/package.json +31 -0

package/dist/adapters/anthropic.js ADDED Viewed

@@ -0,0 +1,446 @@
+/**
+ * Anthropic Messages adapter. Claude Code speaks the Anthropic Messages API to
+ * whatever `ANTHROPIC_BASE_URL` points at, so to back it with a local model we
+ * translate `/v1/messages` (and `/v1/messages/count_tokens`, and the
+ * `/v1/models` discovery probe) to and from the gateway's OpenAI Chat
+ * Completions core. The pure translation functions are exported for testing;
+ * the request handler wires them to a `Backend` and returns a `Response` the
+ * server pipes straight to the client (JSON or SSE).
+ */
+const ENCODER = new TextEncoder();
+// ---- request translation ----
+function randomId() {
+    return Math.random().toString(36).slice(2, 12);
+}
+function systemText(system) {
+    if (system === undefined)
+        return "";
+    if (typeof system === "string")
+        return system;
+    return system.map((block) => block.text).join("\n");
+}
+function blockText(content) {
+    if (content === undefined)
+        return "";
+    if (typeof content === "string")
+        return content;
+    return content
+        .map((block) => (block.type === "text" ? block.text : ""))
+        .join("");
+}
+function mapToolChoice(choice) {
+    switch (choice.type) {
+        case "auto":
+            return "auto";
+        case "any":
+            return "required";
+        case "tool":
+            return { type: "function", function: { name: choice.name ?? "" } };
+        default: {
+            const unreachable = choice.type;
+            return unreachable;
+        }
+    }
+}
+/**
+ * Translate an Anthropic Messages request to an OpenAI Chat Completions body.
+ * The upstream model is always the backend's own model (Claude Code sends a
+ * `claude-*` id the local server would not recognise); the requested id is
+ * only echoed back in the response.
+ */
+export function anthropicToChat(body, backendModel) {
+    const messages = [];
+    const system = systemText(body.system);
+    if (system.length > 0)
+        messages.push({ role: "system", content: system });
+    for (const message of body.messages) {
+        if (typeof message.content === "string") {
+            messages.push({ role: message.role, content: message.content });
+            continue;
+        }
+        const textParts = [];
+        const imageParts = [];
+        const toolCalls = [];
+        const toolResults = [];
+        for (const block of message.content) {
+            switch (block.type) {
+                case "text":
+                    textParts.push(block.text);
+                    break;
+                case "image": {
+                    const source = block.source;
+                    imageParts.push({
+                        type: "image_url",
+                        image_url: { url: `data:${source.media_type};base64,${source.data}` }
+                    });
+                    break;
+                }
+                case "tool_use": {
+                    const tool = block;
+                    toolCalls.push({
+                        id: tool.id,
+                        type: "function",
+                        function: { name: tool.name, arguments: JSON.stringify(tool.input ?? {}) }
+                    });
+                    break;
+                }
+                case "tool_result": {
+                    const result = block;
+                    toolResults.push({ id: result.tool_use_id, content: blockText(result.content) });
+                    break;
+                }
+                default:
+                    break;
+            }
+        }
+        if (message.role === "assistant") {
+            const text = textParts.join("");
+            const assistant = { role: "assistant", content: text.length > 0 ? text : null };
+            if (toolCalls.length > 0)
+                assistant.tool_calls = toolCalls;
+            messages.push(assistant);
+            continue;
+        }
+        // user turn: tool results become standalone tool messages; remaining
+        // text/images become a user message.
+        for (const result of toolResults) {
+            messages.push({ role: "tool", tool_call_id: result.id, content: result.content });
+        }
+        const text = textParts.join("");
+        if (imageParts.length > 0) {
+            const parts = [];
+            if (text.length > 0)
+                parts.push({ type: "text", text });
+            parts.push(...imageParts);
+            messages.push({ role: "user", content: parts });
+        }
+        else if (text.length > 0 || toolResults.length === 0) {
+            messages.push({ role: "user", content: text });
+        }
+    }
+    const chat = {
+        model: backendModel ?? body.model ?? "",
+        messages,
+        stream: body.stream === true
+    };
+    if (typeof body.max_tokens === "number")
+        chat.max_tokens = body.max_tokens;
+    if (typeof body.temperature === "number")
+        chat.temperature = body.temperature;
+    if (typeof body.top_p === "number")
+        chat.top_p = body.top_p;
+    if (Array.isArray(body.stop_sequences) && body.stop_sequences.length > 0) {
+        chat.stop = body.stop_sequences;
+    }
+    if (Array.isArray(body.tools) && body.tools.length > 0) {
+        chat.tools = body.tools.map((tool) => ({
+            type: "function",
+            function: {
+                name: tool.name,
+                ...(tool.description !== undefined ? { description: tool.description } : {}),
+                parameters: tool.input_schema ?? { type: "object", properties: {} }
+            }
+        }));
+    }
+    if (body.tool_choice !== undefined)
+        chat.tool_choice = mapToolChoice(body.tool_choice);
+    if (body.stream === true)
+        chat.stream_options = { include_usage: true };
+    return chat;
+}
+// ---- response translation ----
+export function mapStopReason(finishReason) {
+    switch (finishReason) {
+        case "length":
+            return "max_tokens";
+        case "tool_calls":
+            return "tool_use";
+        case "stop":
+        case "content_filter":
+        case null:
+        case undefined:
+            return "end_turn";
+        default:
+            return "end_turn";
+    }
+}
+export function chatToAnthropicMessage(openai, model) {
+    const choice = openai.choices?.[0];
+    const message = choice?.message;
+    const content = [];
+    const text = typeof message?.content === "string" ? message.content : "";
+    if (text.length > 0)
+        content.push({ type: "text", text });
+    if (Array.isArray(message?.tool_calls)) {
+        for (const call of message.tool_calls) {
+            let input = {};
+            const args = call.function?.arguments;
+            if (typeof args === "string" && args.length > 0) {
+                try {
+                    input = JSON.parse(args);
+                }
+                catch {
+                    input = {};
+                }
+            }
+            content.push({
+                type: "tool_use",
+                id: call.id ?? `toolu_${randomId()}`,
+                name: call.function?.name ?? "",
+                input
+            });
+        }
+    }
+    if (content.length === 0)
+        content.push({ type: "text", text: "" });
+    return {
+        id: openai.id !== undefined ? `msg_${openai.id}` : `msg_${randomId()}`,
+        type: "message",
+        role: "assistant",
+        model,
+        content,
+        stop_reason: mapStopReason(choice?.finish_reason),
+        stop_sequence: null,
+        usage: {
+            input_tokens: openai.usage?.prompt_tokens ?? 0,
+            output_tokens: openai.usage?.completion_tokens ?? 0
+        }
+    };
+}
+// ---- streaming translation (OpenAI chat SSE -> Anthropic Messages SSE) ----
+function sse(type, data) {
+    return ENCODER.encode(`event: ${type}\ndata: ${JSON.stringify(data)}\n\n`);
+}
+export function openAiSseToAnthropic(upstream, model) {
+    const reader = upstream.getReader();
+    const decoder = new TextDecoder();
+    const tools = new Map();
+    const messageId = `msg_${randomId()}`;
+    const state = {
+        started: false,
+        textOpen: false,
+        textIndex: -1,
+        nextIndex: 0,
+        finished: false,
+        outputTokens: 0,
+        keepaliveTimer: undefined
+    };
+    let buffer = "";
+    const ensureStarted = (controller) => {
+        if (state.started)
+            return;
+        state.started = true;
+        controller.enqueue(sse("message_start", {
+            type: "message_start",
+            message: {
+                id: messageId,
+                type: "message",
+                role: "assistant",
+                model,
+                content: [],
+                stop_reason: null,
+                stop_sequence: null,
+                usage: { input_tokens: 0, output_tokens: 0 }
+            }
+        }));
+    };
+    const ensureText = (controller) => {
+        ensureStarted(controller);
+        if (state.textOpen)
+            return;
+        state.textOpen = true;
+        state.textIndex = state.nextIndex++;
+        controller.enqueue(sse("content_block_start", {
+            type: "content_block_start",
+            index: state.textIndex,
+            content_block: { type: "text", text: "" }
+        }));
+    };
+    const finalize = (controller, stopReason) => {
+        if (state.finished)
+            return;
+        state.finished = true;
+        if (state.keepaliveTimer !== undefined)
+            clearInterval(state.keepaliveTimer);
+        if (state.textOpen) {
+            controller.enqueue(sse("content_block_stop", { type: "content_block_stop", index: state.textIndex }));
+        }
+        for (const index of tools.values()) {
+            controller.enqueue(sse("content_block_stop", { type: "content_block_stop", index }));
+        }
+        controller.enqueue(sse("message_delta", {
+            type: "message_delta",
+            delta: { stop_reason: stopReason, stop_sequence: null },
+            usage: { output_tokens: state.outputTokens }
+        }));
+        controller.enqueue(sse("message_stop", { type: "message_stop" }));
+    };
+    const process = (controller, chunk) => {
+        const choice = chunk.choices?.[0];
+        if (choice === undefined) {
+            if (chunk.usage?.completion_tokens !== undefined)
+                state.outputTokens = chunk.usage.completion_tokens;
+            return;
+        }
+        const delta = choice.delta ?? {};
+        if (typeof delta.content === "string" && delta.content.length > 0) {
+            ensureText(controller);
+            controller.enqueue(sse("content_block_delta", {
+                type: "content_block_delta",
+                index: state.textIndex,
+                delta: { type: "text_delta", text: delta.content }
+            }));
+        }
+        if (Array.isArray(delta.tool_calls)) {
+            for (const call of delta.tool_calls) {
+                const openAiIndex = typeof call.index === "number" ? call.index : 0;
+                let index = tools.get(openAiIndex);
+                if (index === undefined) {
+                    ensureStarted(controller);
+                    index = state.nextIndex++;
+                    tools.set(openAiIndex, index);
+                    controller.enqueue(sse("content_block_start", {
+                        type: "content_block_start",
+                        index,
+                        content_block: {
+                            type: "tool_use",
+                            id: call.id ?? `toolu_${randomId()}`,
+                            name: call.function?.name ?? "",
+                            input: {}
+                        }
+                    }));
+                }
+                const args = call.function?.arguments;
+                if (typeof args === "string" && args.length > 0) {
+                    controller.enqueue(sse("content_block_delta", {
+                        type: "content_block_delta",
+                        index,
+                        delta: { type: "input_json_delta", partial_json: args }
+                    }));
+                }
+            }
+        }
+        if (chunk.usage?.completion_tokens !== undefined)
+            state.outputTokens = chunk.usage.completion_tokens;
+        if (choice.finish_reason !== null && choice.finish_reason !== undefined) {
+            finalize(controller, mapStopReason(choice.finish_reason));
+        }
+    };
+    return new ReadableStream({
+        start(controller) {
+            // Start the message immediately and keep the connection alive with `ping`
+            // events while the upstream is still producing its first token. Claude
+            // Code times out if it sees nothing during the fusion panel phase (the
+            // chat-layer keepalive comments are dropped by this translator).
+            ensureStarted(controller);
+            state.keepaliveTimer = setInterval(() => {
+                if (state.finished)
+                    return;
+                try {
+                    controller.enqueue(sse("ping", { type: "ping" }));
+                }
+                catch {
+                    // controller closed
+                }
+            }, 3000);
+        },
+        async pull(controller) {
+            const { done, value } = await reader.read();
+            if (done) {
+                if (!state.finished)
+                    finalize(controller, "end_turn");
+                controller.close();
+                return;
+            }
+            buffer += decoder.decode(value, { stream: true });
+            let newline = buffer.indexOf("\n");
+            while (newline >= 0) {
+                const line = buffer.slice(0, newline).trim();
+                buffer = buffer.slice(newline + 1);
+                newline = buffer.indexOf("\n");
+                if (!line.startsWith("data:"))
+                    continue;
+                const payload = line.slice(5).trim();
+                if (payload === "[DONE]") {
+                    if (!state.finished)
+                        finalize(controller, "end_turn");
+                    continue;
+                }
+                try {
+                    process(controller, JSON.parse(payload));
+                }
+                catch {
+                    // ignore malformed lines; the upstream stream is authoritative
+                }
+            }
+        },
+        cancel(reason) {
+            if (state.keepaliveTimer !== undefined)
+                clearInterval(state.keepaliveTimer);
+            return reader.cancel(reason);
+        }
+    });
+}
+// ---- token counting + discovery ----
+export function countTokensEstimate(body) {
+    let chars = systemText(body.system).length;
+    for (const message of body.messages)
+        chars += blockText(message.content).length;
+    // A rough chars/4 heuristic; Claude Code uses this only for budgeting.
+    return Math.max(1, Math.ceil(chars / 4));
+}
+// ---- handlers (return a Response the server pipes) ----
+function jsonResponse(status, value) {
+    return new Response(JSON.stringify(value), {
+        status,
+        headers: { "content-type": "application/json" }
+    });
+}
+export async function handleAnthropicMessages(backend, body, modelCallId, signal) {
+    const requestedModel = body.model ?? backend.defaultModel ?? "";
+    const chat = anthropicToChat(body, backend.defaultModel);
+    const upstream = await backend.chat(chat, signal, { modelCallId });
+    if (!upstream.ok) {
+        const detail = await upstream.text();
+        return jsonResponse(upstream.status, {
+            type: "error",
+            error: { type: "api_error", message: detail.slice(0, 2000) }
+        });
+    }
+    if (body.stream === true) {
+        const source = upstream.body;
+        if (source === null)
+            return jsonResponse(502, { type: "error", error: { type: "api_error", message: "no upstream stream" } });
+        return new Response(openAiSseToAnthropic(source, requestedModel), {
+            status: 200,
+            headers: { "content-type": "text/event-stream", "cache-control": "no-cache" }
+        });
+    }
+    const openai = (await upstream.json());
+    return jsonResponse(200, chatToAnthropicMessage(openai, requestedModel));
+}
+export function handleCountTokens(body) {
+    return jsonResponse(200, { input_tokens: countTokensEstimate(body) });
+}
+/**
+ * Anthropic-shaped `/v1/models` discovery response. Claude Code only adds
+ * models whose id begins with `claude` or `anthropic`, so the local model is
+ * surfaced under a `claude`-prefixed id with the real model id as its
+ * display name.
+ */
+export function anthropicModelsResponse(backendModel) {
+    const id = "claude-warrant-local";
+    return new Response(JSON.stringify({
+        data: [
+            {
+                type: "model",
+                id,
+                display_name: backendModel ?? "warrant local model",
+                created_at: new Date(0).toISOString()
+            }
+        ],
+        has_more: false,
+        first_id: id,
+        last_id: id
+    }), { status: 200, headers: { "content-type": "application/json" } });
+}

package/dist/adapters/chat.d.ts ADDED Viewed

@@ -0,0 +1,14 @@
+/**
+ * OpenAI Chat Completions surface. This is the gateway's "core" dialect: it is
+ * what the owned mlx fork speaks, what opencode and the Cursor IDE plan panel
+ * consume directly, and what the Anthropic and Responses adapters translate
+ * down to. The handlers here are deliberately thin — the request is forwarded
+ * to the backend and the upstream response (including SSE streams) is piped
+ * straight back — so the only logic is filling in a default model.
+ */
+/** Fill in `model` from the backend default when the caller omitted it. */
+export declare function withDefaultModel(body: unknown, defaultModel: string | undefined): unknown;
+/** Whether a chat/completions request asked for a streamed response. */
+export declare function isStream(body: unknown): boolean;
+/** The model id a request will run as, after default injection. */
+export declare function effectiveModel(body: unknown, defaultModel: string | undefined): string | undefined;

package/dist/adapters/chat.js ADDED Viewed

@@ -0,0 +1,34 @@
+/**
+ * OpenAI Chat Completions surface. This is the gateway's "core" dialect: it is
+ * what the owned mlx fork speaks, what opencode and the Cursor IDE plan panel
+ * consume directly, and what the Anthropic and Responses adapters translate
+ * down to. The handlers here are deliberately thin — the request is forwarded
+ * to the backend and the upstream response (including SSE streams) is piped
+ * straight back — so the only logic is filling in a default model.
+ */
+function asObject(body) {
+    if (typeof body === "object" && body !== null && !Array.isArray(body)) {
+        return body;
+    }
+    return undefined;
+}
+/** Fill in `model` from the backend default when the caller omitted it. */
+export function withDefaultModel(body, defaultModel) {
+    if (defaultModel === undefined)
+        return body;
+    const obj = asObject(body);
+    if (obj === undefined || obj.model !== undefined)
+        return body;
+    return { ...obj, model: defaultModel };
+}
+/** Whether a chat/completions request asked for a streamed response. */
+export function isStream(body) {
+    return asObject(body)?.stream === true;
+}
+/** The model id a request will run as, after default injection. */
+export function effectiveModel(body, defaultModel) {
+    const model = asObject(body)?.model;
+    if (typeof model === "string")
+        return model;
+    return defaultModel;
+}

package/dist/adapters/responses.d.ts ADDED Viewed

@@ -0,0 +1,94 @@
+/**
+ * OpenAI Responses adapter. Codex speaks the Responses API exclusively
+ * (`wire_api="responses"`; Chat Completions support was removed), so to back it
+ * with a local model we translate `/v1/responses` to and from the gateway's
+ * OpenAI Chat Completions core. The pure translation functions are exported for
+ * testing; the handler returns a `Response` the server pipes (JSON or SSE).
+ *
+ * This is the highest-fidelity adapter: it maps Responses `input` items
+ * (messages, function calls, function-call outputs) into chat messages, and
+ * emits the Responses streaming event sequence (`response.created`,
+ * `response.output_item.added`, `response.output_text.delta`,
+ * `response.function_call_arguments.delta`, `response.completed`, …) from chat
+ * completion chunks.
+ */
+import type { Backend } from "../backend.js";
+type ResponsesContentPart = {
+    type: string;
+    text?: string;
+    image_url?: string;
+    [key: string]: unknown;
+};
+type ResponsesInputItem = {
+    type?: "message";
+    role: "user" | "assistant" | "system" | "developer";
+    content: string | ResponsesContentPart[];
+} | {
+    type: "function_call";
+    call_id?: string;
+    id?: string;
+    name: string;
+    arguments: string;
+} | {
+    type: "function_call_output";
+    call_id: string;
+    output: unknown;
+} | {
+    type: string;
+    [key: string]: unknown;
+};
+export type ResponsesRequest = {
+    model?: string;
+    instructions?: string;
+    input?: string | ResponsesInputItem[];
+    tools?: Array<{
+        type?: string;
+        name: string;
+        description?: string;
+        parameters?: unknown;
+        strict?: boolean;
+    }>;
+    tool_choice?: "auto" | "none" | "required" | {
+        type: "function";
+        name: string;
+    };
+    max_output_tokens?: number;
+    temperature?: number;
+    top_p?: number;
+    stream?: boolean;
+};
+type OpenAiToolCall = {
+    id?: string;
+    index?: number;
+    function?: {
+        name?: string;
+        arguments?: string;
+    };
+};
+type OpenAiDelta = {
+    content?: string | null;
+    tool_calls?: OpenAiToolCall[];
+};
+type OpenAiChoice = {
+    delta?: OpenAiDelta;
+    message?: {
+        content?: string | null;
+        tool_calls?: OpenAiToolCall[];
+    };
+    finish_reason?: string | null;
+};
+type OpenAiUsage = {
+    prompt_tokens?: number;
+    completion_tokens?: number;
+};
+type OpenAiResponse = {
+    id?: string;
+    choices?: OpenAiChoice[];
+    usage?: OpenAiUsage;
+};
+/** Translate a Responses request to an OpenAI Chat Completions body. */
+export declare function responsesToChat(body: ResponsesRequest, backendModel: string | undefined): Record<string, unknown>;
+export declare function chatToResponses(openai: OpenAiResponse, model: string): Record<string, unknown>;
+export declare function openAiSseToResponses(upstream: ReadableStream<Uint8Array>, model: string): ReadableStream<Uint8Array>;
+export declare function handleResponses(backend: Backend, body: ResponsesRequest, modelCallId?: string, signal?: AbortSignal): Promise<Response>;
+export {};