npm - @infersec/conduit - Versions diffs - 1.28.1 → 1.28.2 - Mend

@infersec/conduit 1.28.1 → 1.28.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/cli.js +1 -1
package/dist/index.js +1 -1
package/dist/requestHandlers/createConduitGeneralAPIReferenceHandlers.d.ts +92 -0
package/dist/requestHandlers/createConduitOpenAIAPIReferenceHandlers.d.ts +208 -0
package/dist/requestHandlers/createHealthHandler.d.ts +2 -0
package/dist/{start-DtAR3bT0.js → start-CyERvKjg.js} +1494 -1414
package/package.json +1 -1

package/dist/{start-DtAR3bT0.js → start-CyERvKjg.js} RENAMED Viewed

@@ -12,7 +12,7 @@ import require$$3$1 from 'assert';
 import require$$4$1 from 'events';
 import require$$1$1 from 'stream';
 import crypto, { createHash } from 'node:crypto';
-import require$$0$7, { Readable, Transform, getDefaultHighWaterMark, Duplex, Writable, PassThrough } from 'node:stream';
+import require$$0$7, { Readable, Transform, PassThrough, getDefaultHighWaterMark, Duplex, Writable } from 'node:stream';
 import 'argon2';
 import { spawn, ChildProcess, execFile, spawnSync } from 'node:child_process';
 import require$$0$6 from 'node:assert';
@@ -108649,352 +108649,551 @@ class ModelManager extends EventEmitter {
     }
 }
-async function handleSSERequests({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, signal }) {
-    const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/stream`;
-    const maxReconnectDelayMs = 30000;
-    let reconnectAttempt = 0;
-    while (!signal?.aborted) {
-        const connectionStartedAt = Date.now();
-        try {
-            await connectSSE(streamURL, {
-                headers: {
-                    "x-api-key": configuration.apiKey
-                },
-                onError: (error) => {
-                    logger.error("SSE connection error", {
-                        error
-                    });
-                },
-                onMessage: (message) => {
-                    if (message.event !== "request") {
-                        return;
+function createConduitGeneralAPIReferenceHandlers({ cycleEngine, conduitStateManager, getModelManager, logger, setErrorState, startEngine, stopEngine, stopRequestedByControl }) {
+    return {
+        "/conduit/engine/start": {
+            POST: async () => {
+                const modelManager = getModelManager();
+                if (conduitStateManager.getState().state !== "idle") {
+                    return {
+                        status: 409,
+                        statusText: "Engine can only be started from idle state"
+                    };
+                }
+                if (!modelManager.canStart) {
+                    return {
+                        status: 409,
+                        statusText: `Engine cannot be started from current state: ${modelManager.state}`
+                    };
+                }
+                try {
+                    logger.info("Received remote engine start request");
+                    await startEngine();
+                    return {
+                        body: {
+                            acknowledged: true
+                        },
+                        status: 202
+                    };
+                }
+                catch (error) {
+                    if (stopRequestedByControl() || modelManager.state === "stopped") {
+                        return {
+                            status: 409,
+                            statusText: "Engine start was interrupted"
+                        };
                     }
-                    const payload = ServerToClientAPIRequestSchema.parse(JSON.parse(message.data));
-                    handleRequest({
-                        apiURL,
-                        configuration,
-                        logger,
-                        modelID,
-                        onRequest,
-                        onRequestEnd,
-                        onRequestStart,
-                        reportMetrics,
-                        request: payload,
-                        signal
-                    }).catch(error => {
-                        logger.error("SSE request handler failed", {
-                            error: asError(error),
-                            requestMethod: payload.requestID
-                        });
-                    });
-                },
-                signal
-            });
-        }
-        catch (error) {
-            if (signal?.aborted) {
-                return;
+                    const parsedError = asError(error);
+                    setErrorState({ error: parsedError.message });
+                    return {
+                        status: 500,
+                        statusText: parsedError.message
+                    };
+                }
             }
-            const isTerminated = isTerminatedError(error);
-            if (!isTerminated) {
-                logger.error("SSE connection failed", {
-                    ...getNetworkErrorAttributes(error),
-                    error: asError(error)
+        },
+        "/conduit/engine/stop": {
+            POST: async () => {
+                const modelManager = getModelManager();
+                const sourceState = conduitStateManager.getState().state;
+                if (sourceState !== "bootingEngine" && sourceState !== "online") {
+                    return {
+                        status: 409,
+                        statusText: "Engine can only be stopped while booting or online"
+                    };
+                }
+                if (!modelManager.canStop) {
+                    return {
+                        status: 409,
+                        statusText: `Engine cannot be stopped from current state: ${modelManager.state}`
+                    };
+                }
+                logger.info("Received remote engine stop request");
+                stopEngine({
+                    reason: "Remote shutdown requested"
+                }).catch(error => {
+                    const parsedError = asError(error);
+                    logger.error("Remote engine stop request failed", {
+                        error: parsedError
+                    });
+                    setErrorState({ error: parsedError.message });
                 });
+                return {
+                    body: {
+                        acknowledged: true
+                    },
+                    status: 202
+                };
             }
-            if (signal?.aborted) {
-                return;
+        },
+        "/conduit/engine/cycle": {
+            POST: async () => {
+                const modelManager = getModelManager();
+                const sourceState = conduitStateManager.getState().state;
+                if (sourceState !== "bootingEngine" &&
+                    sourceState !== "online" &&
+                    sourceState !== "idle") {
+                    return {
+                        status: 409,
+                        statusText: "Engine can only be cycled while booting, online, or idle"
+                    };
+                }
+                if (sourceState !== "idle" && !modelManager.canStop) {
+                    return {
+                        status: 409,
+                        statusText: `Engine cannot be cycled from current state: ${modelManager.state}`
+                    };
+                }
+                try {
+                    logger.info("Received remote engine cycle request");
+                    await cycleEngine();
+                    return {
+                        body: {
+                            acknowledged: true
+                        },
+                        status: 202
+                    };
+                }
+                catch (error) {
+                    const parsedError = asError(error);
+                    setErrorState({ error: parsedError.message });
+                    return {
+                        status: 500,
+                        statusText: parsedError.message
+                    };
+                }
             }
-            if (!isTerminated) {
-                const connectionDurationMs = Date.now() - connectionStartedAt;
-                reconnectAttempt = connectionDurationMs > 10000 ? 0 : reconnectAttempt + 1;
-                const reconnectDelayMs = Math.min(maxReconnectDelayMs, Math.max(1000, 1000 * 2 ** Math.min(6, reconnectAttempt)));
-                logger.warn("SSE disconnected, retrying");
-                await sleep(reconnectDelayMs);
+        }
+    };
+}
+function createPostCycleEngineHandler(options) {
+    return createConduitGeneralAPIReferenceHandlers(options)["/conduit/engine/cycle"].POST;
+}
+function createPostStartEngineHandler(options) {
+    return createConduitGeneralAPIReferenceHandlers(options)["/conduit/engine/start"].POST;
+}
+function createPostStopEngineHandler(options) {
+    return createConduitGeneralAPIReferenceHandlers(options)["/conduit/engine/stop"].POST;
+}
+/**
+ * Coerce non-string tool_calls function.arguments to JSON strings.
+ * Some LLM backends return arguments as parsed objects instead of
+ * JSON strings, violating the OpenAI spec. This mutates in place
+ * and returns true if any coercion was performed.
+ */
+function coerceToolCallArguments(parsed) {
+    const choices = parsed.choices;
+    if (!Array.isArray(choices))
+        return false;
+    let modified = false;
+    for (const choice of choices) {
+        if (!choice || typeof choice !== "object")
+            continue;
+        const choiceRecord = choice;
+        const msg = choiceRecord.delta ?? choiceRecord.message;
+        if (!msg || typeof msg !== "object")
+            continue;
+        const toolCalls = msg.tool_calls;
+        if (!Array.isArray(toolCalls))
+            continue;
+        for (const tc of toolCalls) {
+            if (!tc || typeof tc !== "object")
+                continue;
+            const fn = tc.function;
+            if (!fn || typeof fn !== "object")
+                continue;
+            const fnRecord = fn;
+            if (fnRecord.arguments !== undefined && typeof fnRecord.arguments !== "string") {
+                fnRecord.arguments = JSON.stringify(fnRecord.arguments);
+                modified = true;
             }
         }
     }
+    return modified;
 }
-async function handleRequest({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
-    function reportMetricsSafe(payload) {
-        reportMetrics(payload).catch(error => {
-            logger.warn("Failed to upload LLM prompt metrics", {
-                error: asError(error),
-                requestUrl: request.path
-            });
-        });
-    }
-    const requestStartedAt = Date.now();
-    const requestBytes = calculateRequestBytes(request.body ?? null);
-    try {
-        await onRequestStart?.(request);
-        const response = await onRequest(request);
-        const responseMetrics = await streamResponse({
-            apiURL,
-            configuration,
-            logger,
-            requestID: request.requestID,
-            requestStartedAt,
-            response,
-            signal
-        });
-        const latencyMs = Math.max(0, Date.now() - requestStartedAt);
-        const totalTokens = 0;
-        const tokensPerSecond = calculateTokensPerSecond$1({
-            durationMs: latencyMs,
-            totalTokens
-        });
-        reportMetricsSafe({
-            bytes: requestBytes + responseMetrics.responseBytes,
-            completionTokens: 0,
-            engine: configuration.agentEngineType,
-            endpointId: null,
-            latencyMs,
-            modelId: modelID,
-            promptTokens: 0,
-            requestBytes,
-            requestId: request.requestID,
-            requestMethod: request.method,
-            requestPath: request.path,
-            responseBytes: responseMetrics.responseBytes,
-            successful: responseMetrics.status < 400,
-            timeToFirstTokenMs: responseMetrics.timeToFirstTokenMs,
-            tokensPerSecond,
-            totalTokens
-        });
-    }
-    catch (error) {
-        logger.error("SSE request failed", {
-            error: asError(error),
-            requestMethod: request.requestID
-        });
-        const failureMessage = "Bad gateway\n\nProxying failed";
-        const failureBytes = Buffer.byteLength(failureMessage, "utf8");
-        const latencyMs = Math.max(0, Date.now() - requestStartedAt);
-        const totalTokens = 0;
-        const tokensPerSecond = calculateTokensPerSecond$1({
-            durationMs: latencyMs,
-            totalTokens
-        });
-        const streamHandler = await sendChunkStream({
-            apiURL,
-            configuration,
-            requestID: request.requestID,
-            logger
-        });
-        await streamHandler.sendChunk({
-            data: encodeBinaryChunk(Buffer.from(failureMessage)),
-            sequence: 0,
-            status: 502
-        });
-        await streamHandler.sendChunk({
-            data: null,
-            sequence: 1,
-            status: 502
-        });
-        await streamHandler.end();
-        reportMetricsSafe({
-            bytes: requestBytes + failureBytes,
-            completionTokens: 0,
-            engine: configuration.agentEngineType,
-            endpointId: null,
-            latencyMs,
-            modelId: modelID,
-            promptTokens: 0,
-            requestBytes,
-            requestId: request.requestID,
-            requestMethod: request.method,
-            requestPath: request.path,
-            responseBytes: failureBytes,
-            successful: false,
-            timeToFirstTokenMs: latencyMs,
-            tokensPerSecond,
-            totalTokens
-        });
+function isEngineUsageChunk(value) {
+    if (!value || typeof value !== "object") {
+        return false;
     }
-    finally {
-        await onRequestEnd?.(request);
+    const record = value;
+    if (!record.usage || typeof record.usage !== "object") {
+        return false;
     }
+    return true;
 }
-async function streamResponse({ apiURL, configuration, logger, requestID, requestStartedAt, response, signal }) {
-    let sequence = 0;
+function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
+    const startedAt = requestStartedAt ?? Date.now();
+    const passThrough = new PassThrough();
     let responseBytes = 0;
-    let timeToFirstTokenMs = null;
-    const streamHandler = await sendChunkStream({
-        apiURL,
-        configuration,
-        requestID,
-        logger
-    });
-    if (response.body instanceof Readable) {
-        for await (const chunk of response.body) {
-            if (signal?.aborted) {
-                streamHandler.abort();
-                throw new Error("Request cancelled");
+    let firstChunkAt = null;
+    let usage = null;
+    let buffer = "";
+    let completed = false;
+    function modifyChunkWithUsage(chunk) {
+        const text = chunk.toString("utf8");
+        const lines = text.split("\n");
+        const modifiedLines = [];
+        for (const rawLine of lines) {
+            const line = rawLine.trim();
+            if (!line.startsWith("data:")) {
+                modifiedLines.push(rawLine);
+                continue;
             }
-            const buffer = Buffer.isBuffer(chunk)
-                ? chunk
-                : Buffer.from(chunk);
-            if (timeToFirstTokenMs === null) {
-                timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
+            const payload = line.slice(5).trim();
+            if (!payload || payload === "[DONE]") {
+                modifiedLines.push(rawLine);
+                continue;
             }
-            responseBytes += buffer.length;
-            await streamHandler.sendChunk({
-                data: encodeBinaryChunk(buffer),
-                sequence,
-                status: response.status
-            });
-            sequence += 1;
-        }
-        await streamHandler.sendChunk({
-            data: null,
-            sequence,
-            status: response.status
-        });
-        await streamHandler.end();
-        return {
-            responseBytes,
-            status: response.status,
-            timeToFirstTokenMs
-        };
-    }
-    const responsePayload = response.body
-        ? typeof response.body === "string"
-            ? response.body
-            : JSON.stringify(response.body)
-        : "";
-    if (responsePayload.length > 0) {
-        responseBytes = Buffer.byteLength(responsePayload, "utf8");
-        timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
-    }
-    await streamHandler.sendChunk({
-        data: encodeBinaryChunk(Buffer.from(responsePayload)),
-        headers: response.headers,
-        sequence,
-        status: response.status
-    });
-    await streamHandler.sendChunk({
-        data: null,
-        sequence: sequence + 1,
-        status: response.status
-    });
-    await streamHandler.end();
-    logger.info("SSE response queued", {
-        requestMethod: requestID
-    });
-    return {
-        responseBytes,
-        status: response.status,
-        timeToFirstTokenMs
-    };
-}
-function encodeBinaryChunk(chunk) {
-    return chunk.toString("base64");
-}
-async function sendChunkStream({ apiURL, configuration, requestID, logger }) {
-    const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/${requestID}/stream`;
-    const maxFlushAttempts = 3;
-    let isAborted = false;
-    let isClosed = false;
-    let activeAbortController = null;
-    const chunks = [];
-    const sendChunk = async (payload) => {
-        if (isAborted || isClosed) {
-            return;
-        }
-        const response = ClientToServerAPIResponseSchema.parse({
-            data: payload.data,
-            headers: payload.headers,
-            requestID,
-            status: payload.status
-        });
-        const chunk = JSON.stringify({
-            ...response,
-            sequence: payload.sequence
-        });
-        chunks.push(Buffer.from(chunk + "\n"));
-        if (chunks.length >= 10) {
-            await flushChunks();
-        }
-    };
-    const flushChunks = async () => {
-        if (chunks.length === 0 || isAborted) {
-            return;
-        }
-        const batch = chunks.splice(0, chunks.length);
-        const body = Buffer.concat(batch);
-        for (let attempt = 1; attempt <= maxFlushAttempts; attempt += 1) {
             try {
-                activeAbortController = new AbortController();
-                const response = await fetchWithRetry(streamURL, {
-                    body: body.toString(),
-                    headers: {
-                        "content-type": "application/json",
-                        "x-api-key": configuration.apiKey
-                    },
-                    method: "POST",
-                    signal: activeAbortController.signal
-                }, {
-                    maxAttempts: 2,
-                    timeoutMs: 15000
-                });
-                if (!response.ok) {
-                    throw new Error(`Chunk stream flush failed with status ${response.status}`);
+                const parsed = JSON.parse(payload);
+                let modified = false;
+                if (coerceToolCallArguments(parsed)) {
+                    modified = true;
                 }
-                return;
-            }
-            catch (error) {
-                if (isAborted) {
-                    return;
+                if (parsed.usage) {
+                    const usageChunk = parsed.usage;
+                    const effectiveContext = getEffectiveContextLength({
+                        contextLength,
+                        engine,
+                        parallelism
+                    });
+                    if (usageChunk.context_usage === undefined &&
+                        usageChunk.prompt_tokens !== undefined &&
+                        effectiveContext !== null) {
+                        usageChunk.context_usage = usageChunk.prompt_tokens / effectiveContext;
+                        modified = true;
+                    }
                 }
-                if (attempt >= maxFlushAttempts) {
-                    chunks.unshift(...batch);
-                    throw asError(error);
+                if (modified) {
+                    modifiedLines.push("data: " + JSON.stringify(parsed));
+                    continue;
                 }
-                logger.warn("Failed to send chunk batch", {
-                    ...getNetworkErrorAttributes(error),
-                    error: asError(error)
-                });
-                await sleep(100 * attempt);
             }
-            finally {
-                activeAbortController = null;
+            catch (_error) {
+                // Ignore malformed chunks
             }
+            modifiedLines.push(rawLine);
         }
-    };
-    const end = async () => {
-        if (isClosed || isAborted) {
-            return;
+        return Buffer.from(modifiedLines.join("\n"), "utf8");
+    }
+    function parseUsageFromBuffer() {
+        const lines = buffer.split("\n");
+        buffer = lines.pop() ?? "";
+        for (const rawLine of lines) {
+            const line = rawLine.trim();
+            if (!line.startsWith("data:")) {
+                continue;
+            }
+            const payload = line.slice(5).trim();
+            if (!payload || payload === "[DONE]") {
+                continue;
+            }
+            try {
+                const parsed = JSON.parse(payload);
+                if (isEngineUsageChunk(parsed)) {
+                    const completionTokens = parsed.usage?.completion_tokens ?? null;
+                    const promptTokens = parsed.usage?.prompt_tokens ?? null;
+                    const totalTokens = parsed.usage?.total_tokens ?? null;
+                    let contextUsage = parsed.usage?.context_usage ?? null;
+                    const effectiveContextForUsage = getEffectiveContextLength({
+                        contextLength,
+                        engine,
+                        parallelism
+                    });
+                    if (contextUsage === null &&
+                        promptTokens !== null &&
+                        effectiveContextForUsage !== null) {
+                        contextUsage = promptTokens / effectiveContextForUsage;
+                    }
+                    usage = {
+                        completionTokens,
+                        contextUsage,
+                        promptTokens,
+                        totalTokens
+                    };
+                }
+            }
+            catch (_error) {
+                // Ignore malformed chunks
+            }
         }
-        await flushChunks();
-        isClosed = true;
-    };
-    const abort = (error) => {
-        isAborted = true;
-        if (activeAbortController) {
-            activeAbortController.abort();
+    }
+    function finalize(error) {
+        if (completed) {
+            return;
         }
-        chunks.length = 0;
-        if (error) {
-            logger.error("Chunk stream aborted", {
-                error: asError(error)
+        completed = true;
+        if (onComplete) {
+            const completion = onComplete({
+                durationMs: Math.max(0, Date.now() - startedAt),
+                error,
+                requestBodyBytes,
+                responseBytes,
+                timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
+                usage
             });
+            if (completion && typeof completion.catch === "function") {
+                completion.catch(error => {
+                    logger.error("Engine metrics completion failed", {
+                        error: asError(error),
+                        requestUrl: requestPath
+                    });
+                });
+            }
         }
-    };
-    return {
-        sendChunk,
-        end,
-        abort
-    };
-}
-function calculateRequestBytes(body) {
-    if (body === null || body === undefined) {
-        return 0;
     }
-    if (typeof body === "string") {
-        return Buffer.byteLength(body, "utf8");
+    body.on("data", (chunk) => {
+        const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
+        if (firstChunkAt === null) {
+            firstChunkAt = Date.now();
+        }
+        responseBytes += chunkBuffer.length;
+        buffer += chunkBuffer.toString("utf8");
+        parseUsageFromBuffer();
+        passThrough.write(modifyChunkWithUsage(chunkBuffer));
+    });
+    body.once("error", err => {
+        logEngineMetrics({
+            agentEngineType,
+            error: err,
+            level: "error",
+            logger,
+            requestBodyBytes,
+            requestPath,
+            responseBytes,
+            usage
+        });
+        finalize(err);
+        passThrough.destroy(err);
+    });
+    body.once("end", () => {
+        parseUsageFromBuffer();
+        logEngineMetrics({
+            agentEngineType,
+            level: "info",
+            logger,
+            requestBodyBytes,
+            requestPath,
+            responseBytes,
+            usage
+        });
+        finalize(null);
+        passThrough.end();
+    });
+    body.once("close", () => {
+        if (completed) {
+            if (!passThrough.writableEnded) {
+                passThrough.end();
+            }
+            return;
+        }
+        const closeError = new Error("Engine response stream closed before completion");
+        logEngineMetrics({
+            agentEngineType,
+            error: closeError,
+            level: "error",
+            logger,
+            requestBodyBytes,
+            requestPath,
+            responseBytes,
+            usage
+        });
+        finalize(closeError);
+        if (!passThrough.writableEnded) {
+            passThrough.end();
+        }
+    });
+    return {
+        stream: passThrough
+    };
+}
+function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
+    const maxUsageCaptureBytes = 1024 * 1024;
+    const startedAt = requestStartedAt ?? Date.now();
+    const passThrough = new PassThrough();
+    let responseBytes = 0;
+    let firstChunkAt = null;
+    let usage = null;
+    const usageChunks = [];
+    let usageBytes = 0;
+    let usageCaptureEnabled = true;
+    let completed = false;
+    function finalize(error) {
+        if (completed) {
+            return;
+        }
+        completed = true;
+        if (onComplete) {
+            const completion = onComplete({
+                durationMs: Math.max(0, Date.now() - startedAt),
+                error,
+                requestBodyBytes,
+                responseBytes,
+                timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
+                usage
+            });
+            if (completion && typeof completion.catch === "function") {
+                completion.catch(error => {
+                    logger.error("Engine metrics completion failed", {
+                        error: asError(error),
+                        requestUrl: requestPath
+                    });
+                });
+            }
+        }
     }
-    return Buffer.byteLength(JSON.stringify(body), "utf8");
+    body.on("data", (chunk) => {
+        const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
+        if (firstChunkAt === null) {
+            firstChunkAt = Date.now();
+        }
+        responseBytes += chunkBuffer.length;
+        if (usageCaptureEnabled) {
+            const nextSize = usageBytes + chunkBuffer.length;
+            if (nextSize <= maxUsageCaptureBytes) {
+                usageChunks.push(chunkBuffer);
+                usageBytes = nextSize;
+            }
+            else {
+                usageCaptureEnabled = false;
+                usageChunks.length = 0;
+            }
+        }
+        passThrough.write(chunkBuffer);
+    });
+    body.once("error", err => {
+        logEngineMetrics({
+            agentEngineType,
+            error: err,
+            level: "error",
+            logger,
+            requestBodyBytes,
+            requestPath,
+            responseBytes,
+            usage
+        });
+        finalize(err);
+        passThrough.destroy(err);
+    });
+    body.once("end", () => {
+        if (usageCaptureEnabled) {
+            try {
+                const parsed = JSON.parse(Buffer.concat(usageChunks).toString("utf8"));
+                if (parsed.usage) {
+                    const usageChunk = parsed.usage;
+                    const completionTokens = usageChunk.completion_tokens ?? null;
+                    const promptTokens = usageChunk.prompt_tokens ?? null;
+                    const totalTokens = usageChunk.total_tokens ?? null;
+                    let contextUsage = usageChunk.context_usage ?? null;
+                    const effectiveContext = getEffectiveContextLength({
+                        contextLength,
+                        engine,
+                        parallelism
+                    });
+                    if (contextUsage === null &&
+                        promptTokens !== null &&
+                        effectiveContext !== null) {
+                        contextUsage = promptTokens / effectiveContext;
+                    }
+                    usage = {
+                        completionTokens,
+                        contextUsage,
+                        promptTokens,
+                        totalTokens
+                    };
+                }
+            }
+            catch (error) {
+                logger.error("Failed to parse engine response body", {
+                    error: asError(error),
+                    requestUrl: requestPath
+                });
+            }
+        }
+        logEngineMetrics({
+            agentEngineType,
+            level: "info",
+            logger,
+            requestBodyBytes,
+            requestPath,
+            responseBytes,
+            usage
+        });
+        finalize(null);
+        passThrough.end();
+    });
+    body.once("close", () => {
+        if (completed) {
+            if (!passThrough.writableEnded) {
+                passThrough.end();
+            }
+            return;
+        }
+        const closeError = new Error("Engine response stream closed before completion");
+        logEngineMetrics({
+            agentEngineType,
+            error: closeError,
+            level: "error",
+            logger,
+            requestBodyBytes,
+            requestPath,
+            responseBytes,
+            usage
+        });
+        finalize(closeError);
+        if (!passThrough.writableEnded) {
+            passThrough.end();
+        }
+    });
+    return {
+        stream: passThrough
+    };
+}
+function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }) {
+    const metricsMessage = [
+        "LLM engine stream metrics",
+        `path=${requestPath}`,
+        `bytesTo=${requestBodyBytes}`,
+        `bytesFrom=${responseBytes}`,
+        `promptTokens=${usage?.promptTokens ?? "n/a"}`,
+        `completionTokens=${usage?.completionTokens ?? "n/a"}`,
+        `totalTokens=${usage?.totalTokens ?? "n/a"}`,
+        `contextUsage=${usage?.contextUsage ?? "n/a"}`
+    ].join(" ");
+    const attributes = {
+        agentEngineType,
+        requestUrl: requestPath
+    };
+    if (error) {
+        attributes.error = error;
+    }
+    logger[level](metricsMessage, attributes);
+}
+function isPlainObject$1(value) {
+    return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+function serializeRequestBody(body) {
+    if (!isPlainObject$1(body)) {
+        const payload = typeof body === "string" ? body : JSON.stringify(body);
+        return {
+            bytes: Buffer.byteLength(payload, "utf8"),
+            payload
+        };
+    }
+    const requestPayload = { ...body };
+    const streamOptions = requestPayload.stream_options;
+    const normalizedStreamOptions = isPlainObject$1(streamOptions)
+        ? { ...streamOptions }
+        : {};
+    normalizedStreamOptions.include_usage = true;
+    requestPayload.stream_options = normalizedStreamOptions;
+    const payload = JSON.stringify(requestPayload);
+    return {
+        bytes: Buffer.byteLength(payload, "utf8"),
+        payload
+    };
 }
 function calculateTokensPerSecond$1({ durationMs, totalTokens }) {
     if (durationMs <= 0) {
@@ -109006,107 +109205,714 @@ function calculateTokensPerSecond$1({ durationMs, totalTokens }) {
     }
     return Math.round(tokensPerSecond);
 }
-/**
- * Proxy server requests to the local inference HTTP server.
- */
-async function proxyRequest({ configuration, request }) {
-    let finalPath = request.path;
-    if (request.parameters) {
-        Object.entries(request.parameters).forEach(([key, value]) => {
-            finalPath = finalPath.replace(`:${key}`, String(value));
+async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID, modelManager, path, reportMetrics }) {
+    function normalizeTokenCount(value) {
+        if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
+            return value;
+        }
+        return 0;
+    }
+    function reportMetricsSafe(payload) {
+        reportMetrics(payload).catch(error => {
+            logger.warn("Failed to upload LLM prompt metrics", {
+                error: asError(error),
+                requestUrl: path
+            });
         });
     }
-    const url = new URL(finalPath, `http://localhost:${configuration.port}`);
-    if (request.query) {
-        for (const [key, value] of Object.entries(request.query)) {
-            url.searchParams.set(key, value);
+    const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody(body);
+    const requestStartedAt = Date.now();
+    const requestBody = JSON.parse(serializedBody);
+    const streamRequested = requestBody.stream === true;
+    const onMonitoringComplete = ({ durationMs, error, responseBytes, timeToFirstTokenMs, usage }) => {
+        const completionTokens = normalizeTokenCount(usage?.completionTokens);
+        const promptTokens = normalizeTokenCount(usage?.promptTokens);
+        const totalTokens = normalizeTokenCount(usage?.totalTokens ?? completionTokens + promptTokens);
+        const latencyMs = Math.max(0, durationMs);
+        reportMetricsSafe({
+            bytes: requestBodyBytes + responseBytes,
+            completionTokens,
+            engine: configuration.agentEngineType,
+            endpointId: null,
+            latencyMs,
+            modelId: modelID,
+            promptTokens,
+            requestBytes: requestBodyBytes,
+            requestId: null,
+            requestMethod: "POST",
+            requestPath: path,
+            responseBytes,
+            successful: !error,
+            timeToFirstTokenMs,
+            tokensPerSecond: calculateTokensPerSecond$1({
+                durationMs: latencyMs,
+                totalTokens
+            }),
+            totalTokens
+        });
+    };
+    const response = await modelManager
+        .fetchOpenAI(path, {
+        body: serializedBody,
+        headers: {
+            "Content-Type": "application/json"
+        },
+        method: "POST"
+    })
+        .catch(error => {
+        logEngineMetrics({
+            agentEngineType: configuration.agentEngineType,
+            error: error,
+            level: "error",
+            logger,
+            requestBodyBytes,
+            requestPath: path,
+            responseBytes: 0,
+            usage: null
+        });
+        const latencyMs = Math.max(0, Date.now() - requestStartedAt);
+        reportMetricsSafe({
+            bytes: requestBodyBytes,
+            completionTokens: 0,
+            engine: configuration.agentEngineType,
+            endpointId: null,
+            latencyMs,
+            modelId: modelID,
+            promptTokens: 0,
+            requestBytes: requestBodyBytes,
+            requestId: null,
+            requestMethod: "POST",
+            requestPath: path,
+            responseBytes: 0,
+            successful: false,
+            timeToFirstTokenMs: null,
+            tokensPerSecond: 0,
+            totalTokens: 0
+        });
+        throw error;
+    });
+    const responseStatusText = response.statusText ?? "Upstream request failed";
+    if (!response.ok) {
+        const responseBody = await response.text().catch(() => null);
+        const responseError = new Error(responseBody
+            ? `Upstream error response: ${responseBody}`
+            : "Upstream error response: empty body");
+        logger.error("LLM engine request failed", {
+            error: responseError,
+            requestUrl: path,
+            statusCode: response.status,
+            statusText: responseStatusText,
+            responseBody: responseBody ?? undefined
+        });
+        if (!response.body) {
+            return {
+                status: response.status,
+                statusText: responseStatusText
+            };
         }
     }
-    const fetchOptions = {
-        method: request.method,
-        headers: request.headers
-    };
-    if (request.body) {
-        fetchOptions.body =
-            typeof request.body === "object" ? JSON.stringify(request.body) : request.body;
+    if (!response.body) {
+        logEngineMetrics({
+            agentEngineType: configuration.agentEngineType,
+            level: response.ok ? "info" : "error",
+            logger,
+            requestBodyBytes,
+            requestPath: path,
+            responseBytes: 0,
+            usage: null
+        });
+        const latencyMs = Math.max(0, Date.now() - requestStartedAt);
+        reportMetricsSafe({
+            bytes: requestBodyBytes,
+            completionTokens: 0,
+            engine: configuration.agentEngineType,
+            endpointId: null,
+            latencyMs,
+            modelId: modelID,
+            promptTokens: 0,
+            requestBytes: requestBodyBytes,
+            requestId: null,
+            requestMethod: "POST",
+            requestPath: path,
+            responseBytes: 0,
+            successful: false,
+            timeToFirstTokenMs: null,
+            tokensPerSecond: 0,
+            totalTokens: 0
+        });
+        return {
+            status: response.status,
+            statusText: responseStatusText
+        };
     }
-    const response = await undiciExports.fetch(url, fetchOptions);
+    const monitoredResponse = streamRequested
+        ? monitorEngineResponseStream({
+            agentEngineType: configuration.agentEngineType,
+            body: Readable.fromWeb(response.body),
+            contextLength: modelManager.contextLength,
+            engine: configuration.agentEngineType,
+            logger,
+            onComplete: onMonitoringComplete,
+            parallelism: modelManager.parallelism,
+            requestBodyBytes,
+            requestPath: path,
+            requestStartedAt
+        })
+        : monitorEngineResponseSingle({
+            agentEngineType: configuration.agentEngineType,
+            body: Readable.fromWeb(response.body),
+            contextLength: modelManager.contextLength,
+            engine: configuration.agentEngineType,
+            logger,
+            onComplete: onMonitoringComplete,
+            parallelism: modelManager.parallelism,
+            requestBodyBytes,
+            requestPath: path,
+            requestStartedAt
+        });
     return {
-        body: response.body ? Readable.fromWeb(response.body) : null,
+        body: monitoredResponse.stream,
         headers: Object.fromEntries(response.headers.entries()),
-        requestID: request.requestID,
         status: response.status
     };
 }
-class ConduitStateReportManager {
-    apiClient;
-    conduitStateManager;
-    downloadProgressReportIntervalMs;
-    logger;
-    stateIntervalMs;
-    conduitStateReportInFlight = false;
-    lastConduitStateReportAt = 0;
-    pendingConduitStateReport = null;
-    stateInterval = null;
-    constructor({ apiClient, conduitStateManager, downloadProgressReportIntervalMs, logger, stateIntervalMs }) {
-        this.apiClient = apiClient;
-        this.conduitStateManager = conduitStateManager;
-        this.downloadProgressReportIntervalMs = downloadProgressReportIntervalMs;
-        this.logger = logger;
-        this.stateIntervalMs = stateIntervalMs;
-    }
-    async start() {
-        await this.sendConduitState();
-        this.stateInterval = setInterval(() => {
-            this.sendConduitState().catch(error => {
-                this.logger.error("Conduit state update failed", {
-                    error: asError(error)
+function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger, startup }) {
+    return {
+        "/v1/chat/completions": {
+            POST: async ({ body }) => {
+                const modelID = getModelID();
+                const modelManager = getModelManager();
+                return proxyOpenAIStreamingRoute({
+                    body,
+                    configuration,
+                    logger,
+                    modelID,
+                    modelManager,
+                    path: "/v1/chat/completions",
+                    reportMetrics: apiClient.reportPromptMetrics
                 });
-            });
-        }, this.stateIntervalMs);
-    }
-    stop() {
-        if (this.stateInterval) {
-            clearInterval(this.stateInterval);
-            this.stateInterval = null;
-        }
-        if (this.pendingConduitStateReport) {
-            clearTimeout(this.pendingConduitStateReport);
-            this.pendingConduitStateReport = null;
-        }
-    }
-    reportDownloadProgress() {
-        this.scheduleConduitStateReport();
-    }
-    async reportNow() {
-        if (this.pendingConduitStateReport) {
-            clearTimeout(this.pendingConduitStateReport);
-            this.pendingConduitStateReport = null;
-        }
-        await this.triggerConduitStateReport();
-    }
-    reportStateChange() {
-        if (this.pendingConduitStateReport) {
-            clearTimeout(this.pendingConduitStateReport);
-            this.pendingConduitStateReport = null;
-        }
-        this.triggerConduitStateReport().catch(error => {
-            this.logger.error("Conduit state update failed", {
-                error: asError(error)
-            });
-        });
-    }
-    async sendConduitState() {
-        try {
-            await this.apiClient.reportConduitState(this.conduitStateManager.touch());
-            this.lastConduitStateReportAt = Date.now();
+            }
+        },
+        "/v1/completions": {
+            POST: async ({ body }) => {
+                const modelID = getModelID();
+                const modelManager = getModelManager();
+                return proxyOpenAIStreamingRoute({
+                    body,
+                    configuration,
+                    logger,
+                    modelID,
+                    modelManager,
+                    path: "/v1/completions",
+                    reportMetrics: apiClient.reportPromptMetrics
+                });
+            }
+        },
+        "/v1/models": {
+            GET: async () => {
+                const modelID = getModelID();
+                const modelManager = getModelManager();
+                const effectiveContextLength = getEffectiveContextLength({
+                    contextLength: modelManager.contextLength,
+                    engine: configuration.agentEngineType,
+                    parallelism: modelManager.parallelism
+                });
+                return {
+                    body: {
+                        data: [
+                            {
+                                created: startup / 1000,
+                                id: modelID,
+                                limit: {
+                                    context: effectiveContextLength
+                                },
+                                object: "model",
+                                owned_by: "infersec"
+                            }
+                        ],
+                        object: "list"
+                    },
+                    status: 200
+                };
+            }
         }
-        catch (error) {
-            this.logger.error("Conduit state update failed", {
-                ...getNetworkErrorAttributes(error),
-                error: asError(error)
+    };
+}
+function createGetModelsHandler(options) {
+    return createConduitOpenAIAPIReferenceHandlers(options)["/v1/models"].GET;
+}
+function createPostChatCompletionsHandler(options) {
+    return createConduitOpenAIAPIReferenceHandlers(options)["/v1/chat/completions"].POST;
+}
+function createPostCompletionsHandler(options) {
+    return createConduitOpenAIAPIReferenceHandlers(options)["/v1/completions"].POST;
+}
+function createHealthHandler() {
+    return (_req, res) => {
+        res.status(200).send("OK");
+    };
+}
+async function handleSSERequests({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, signal }) {
+    const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/stream`;
+    const maxReconnectDelayMs = 30000;
+    let reconnectAttempt = 0;
+    while (!signal?.aborted) {
+        const connectionStartedAt = Date.now();
+        try {
+            await connectSSE(streamURL, {
+                headers: {
+                    "x-api-key": configuration.apiKey
+                },
+                onError: (error) => {
+                    logger.error("SSE connection error", {
+                        error
+                    });
+                },
+                onMessage: (message) => {
+                    if (message.event !== "request") {
+                        return;
+                    }
+                    const payload = ServerToClientAPIRequestSchema.parse(JSON.parse(message.data));
+                    handleRequest({
+                        apiURL,
+                        configuration,
+                        logger,
+                        modelID,
+                        onRequest,
+                        onRequestEnd,
+                        onRequestStart,
+                        reportMetrics,
+                        request: payload,
+                        signal
+                    }).catch(error => {
+                        logger.error("SSE request handler failed", {
+                            error: asError(error),
+                            requestMethod: payload.requestID
+                        });
+                    });
+                },
+                signal
+            });
+        }
+        catch (error) {
+            if (signal?.aborted) {
+                return;
+            }
+            const isTerminated = isTerminatedError(error);
+            if (!isTerminated) {
+                logger.error("SSE connection failed", {
+                    ...getNetworkErrorAttributes(error),
+                    error: asError(error)
+                });
+            }
+            if (signal?.aborted) {
+                return;
+            }
+            if (!isTerminated) {
+                const connectionDurationMs = Date.now() - connectionStartedAt;
+                reconnectAttempt = connectionDurationMs > 10000 ? 0 : reconnectAttempt + 1;
+                const reconnectDelayMs = Math.min(maxReconnectDelayMs, Math.max(1000, 1000 * 2 ** Math.min(6, reconnectAttempt)));
+                logger.warn("SSE disconnected, retrying");
+                await sleep(reconnectDelayMs);
+            }
+        }
+    }
+}
+async function handleRequest({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
+    function reportMetricsSafe(payload) {
+        reportMetrics(payload).catch(error => {
+            logger.warn("Failed to upload LLM prompt metrics", {
+                error: asError(error),
+                requestUrl: request.path
+            });
+        });
+    }
+    const requestStartedAt = Date.now();
+    const requestBytes = calculateRequestBytes(request.body ?? null);
+    try {
+        await onRequestStart?.(request);
+        const response = await onRequest(request);
+        const responseMetrics = await streamResponse({
+            apiURL,
+            configuration,
+            logger,
+            requestID: request.requestID,
+            requestStartedAt,
+            response,
+            signal
+        });
+        const latencyMs = Math.max(0, Date.now() - requestStartedAt);
+        const totalTokens = 0;
+        const tokensPerSecond = calculateTokensPerSecond({
+            durationMs: latencyMs,
+            totalTokens
+        });
+        reportMetricsSafe({
+            bytes: requestBytes + responseMetrics.responseBytes,
+            completionTokens: 0,
+            engine: configuration.agentEngineType,
+            endpointId: null,
+            latencyMs,
+            modelId: modelID,
+            promptTokens: 0,
+            requestBytes,
+            requestId: request.requestID,
+            requestMethod: request.method,
+            requestPath: request.path,
+            responseBytes: responseMetrics.responseBytes,
+            successful: responseMetrics.status < 400,
+            timeToFirstTokenMs: responseMetrics.timeToFirstTokenMs,
+            tokensPerSecond,
+            totalTokens
+        });
+    }
+    catch (error) {
+        logger.error("SSE request failed", {
+            error: asError(error),
+            requestMethod: request.requestID
+        });
+        const failureMessage = "Bad gateway\n\nProxying failed";
+        const failureBytes = Buffer.byteLength(failureMessage, "utf8");
+        const latencyMs = Math.max(0, Date.now() - requestStartedAt);
+        const totalTokens = 0;
+        const tokensPerSecond = calculateTokensPerSecond({
+            durationMs: latencyMs,
+            totalTokens
+        });
+        const streamHandler = await sendChunkStream({
+            apiURL,
+            configuration,
+            requestID: request.requestID,
+            logger
+        });
+        await streamHandler.sendChunk({
+            data: encodeBinaryChunk(Buffer.from(failureMessage)),
+            sequence: 0,
+            status: 502
+        });
+        await streamHandler.sendChunk({
+            data: null,
+            sequence: 1,
+            status: 502
+        });
+        await streamHandler.end();
+        reportMetricsSafe({
+            bytes: requestBytes + failureBytes,
+            completionTokens: 0,
+            engine: configuration.agentEngineType,
+            endpointId: null,
+            latencyMs,
+            modelId: modelID,
+            promptTokens: 0,
+            requestBytes,
+            requestId: request.requestID,
+            requestMethod: request.method,
+            requestPath: request.path,
+            responseBytes: failureBytes,
+            successful: false,
+            timeToFirstTokenMs: latencyMs,
+            tokensPerSecond,
+            totalTokens
+        });
+    }
+    finally {
+        await onRequestEnd?.(request);
+    }
+}
+async function streamResponse({ apiURL, configuration, logger, requestID, requestStartedAt, response, signal }) {
+    let sequence = 0;
+    let responseBytes = 0;
+    let timeToFirstTokenMs = null;
+    const streamHandler = await sendChunkStream({
+        apiURL,
+        configuration,
+        requestID,
+        logger
+    });
+    if (response.body instanceof Readable) {
+        for await (const chunk of response.body) {
+            if (signal?.aborted) {
+                streamHandler.abort();
+                throw new Error("Request cancelled");
+            }
+            const buffer = Buffer.isBuffer(chunk)
+                ? chunk
+                : Buffer.from(chunk);
+            if (timeToFirstTokenMs === null) {
+                timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
+            }
+            responseBytes += buffer.length;
+            await streamHandler.sendChunk({
+                data: encodeBinaryChunk(buffer),
+                sequence,
+                status: response.status
+            });
+            sequence += 1;
+        }
+        await streamHandler.sendChunk({
+            data: null,
+            sequence,
+            status: response.status
+        });
+        await streamHandler.end();
+        return {
+            responseBytes,
+            status: response.status,
+            timeToFirstTokenMs
+        };
+    }
+    const responsePayload = response.body
+        ? typeof response.body === "string"
+            ? response.body
+            : JSON.stringify(response.body)
+        : "";
+    if (responsePayload.length > 0) {
+        responseBytes = Buffer.byteLength(responsePayload, "utf8");
+        timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
+    }
+    await streamHandler.sendChunk({
+        data: encodeBinaryChunk(Buffer.from(responsePayload)),
+        headers: response.headers,
+        sequence,
+        status: response.status
+    });
+    await streamHandler.sendChunk({
+        data: null,
+        sequence: sequence + 1,
+        status: response.status
+    });
+    await streamHandler.end();
+    logger.info("SSE response queued", {
+        requestMethod: requestID
+    });
+    return {
+        responseBytes,
+        status: response.status,
+        timeToFirstTokenMs
+    };
+}
+function encodeBinaryChunk(chunk) {
+    return chunk.toString("base64");
+}
+async function sendChunkStream({ apiURL, configuration, requestID, logger }) {
+    const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/${requestID}/stream`;
+    const maxFlushAttempts = 3;
+    let isAborted = false;
+    let isClosed = false;
+    let activeAbortController = null;
+    const chunks = [];
+    const sendChunk = async (payload) => {
+        if (isAborted || isClosed) {
+            return;
+        }
+        const response = ClientToServerAPIResponseSchema.parse({
+            data: payload.data,
+            headers: payload.headers,
+            requestID,
+            status: payload.status
+        });
+        const chunk = JSON.stringify({
+            ...response,
+            sequence: payload.sequence
+        });
+        chunks.push(Buffer.from(chunk + "\n"));
+        if (chunks.length >= 10) {
+            await flushChunks();
+        }
+    };
+    const flushChunks = async () => {
+        if (chunks.length === 0 || isAborted) {
+            return;
+        }
+        const batch = chunks.splice(0, chunks.length);
+        const body = Buffer.concat(batch);
+        for (let attempt = 1; attempt <= maxFlushAttempts; attempt += 1) {
+            try {
+                activeAbortController = new AbortController();
+                const response = await fetchWithRetry(streamURL, {
+                    body: body.toString(),
+                    headers: {
+                        "content-type": "application/json",
+                        "x-api-key": configuration.apiKey
+                    },
+                    method: "POST",
+                    signal: activeAbortController.signal
+                }, {
+                    maxAttempts: 2,
+                    timeoutMs: 15000
+                });
+                if (!response.ok) {
+                    throw new Error(`Chunk stream flush failed with status ${response.status}`);
+                }
+                return;
+            }
+            catch (error) {
+                if (isAborted) {
+                    return;
+                }
+                if (attempt >= maxFlushAttempts) {
+                    chunks.unshift(...batch);
+                    throw asError(error);
+                }
+                logger.warn("Failed to send chunk batch", {
+                    ...getNetworkErrorAttributes(error),
+                    error: asError(error)
+                });
+                await sleep(100 * attempt);
+            }
+            finally {
+                activeAbortController = null;
+            }
+        }
+    };
+    const end = async () => {
+        if (isClosed || isAborted) {
+            return;
+        }
+        await flushChunks();
+        isClosed = true;
+    };
+    const abort = (error) => {
+        isAborted = true;
+        if (activeAbortController) {
+            activeAbortController.abort();
+        }
+        chunks.length = 0;
+        if (error) {
+            logger.error("Chunk stream aborted", {
+                error: asError(error)
+            });
+        }
+    };
+    return {
+        sendChunk,
+        end,
+        abort
+    };
+}
+function calculateRequestBytes(body) {
+    if (body === null || body === undefined) {
+        return 0;
+    }
+    if (typeof body === "string") {
+        return Buffer.byteLength(body, "utf8");
+    }
+    return Buffer.byteLength(JSON.stringify(body), "utf8");
+}
+function calculateTokensPerSecond({ durationMs, totalTokens }) {
+    if (durationMs <= 0) {
+        return 0;
+    }
+    const tokensPerSecond = totalTokens / (durationMs / 1000);
+    if (!Number.isFinite(tokensPerSecond) || tokensPerSecond <= 0) {
+        return 0;
+    }
+    return Math.round(tokensPerSecond);
+}
+/**
+ * Proxy server requests to the local inference HTTP server.
+ */
+async function proxyRequest({ configuration, request }) {
+    let finalPath = request.path;
+    if (request.parameters) {
+        Object.entries(request.parameters).forEach(([key, value]) => {
+            finalPath = finalPath.replace(`:${key}`, String(value));
+        });
+    }
+    const url = new URL(finalPath, `http://localhost:${configuration.port}`);
+    if (request.query) {
+        for (const [key, value] of Object.entries(request.query)) {
+            url.searchParams.set(key, value);
+        }
+    }
+    const fetchOptions = {
+        method: request.method,
+        headers: request.headers
+    };
+    if (request.body) {
+        fetchOptions.body =
+            typeof request.body === "object" ? JSON.stringify(request.body) : request.body;
+    }
+    const response = await undiciExports.fetch(url, fetchOptions);
+    return {
+        body: response.body ? Readable.fromWeb(response.body) : null,
+        headers: Object.fromEntries(response.headers.entries()),
+        requestID: request.requestID,
+        status: response.status
+    };
+}
+class ConduitStateReportManager {
+    apiClient;
+    conduitStateManager;
+    downloadProgressReportIntervalMs;
+    logger;
+    stateIntervalMs;
+    conduitStateReportInFlight = false;
+    lastConduitStateReportAt = 0;
+    pendingConduitStateReport = null;
+    stateInterval = null;
+    constructor({ apiClient, conduitStateManager, downloadProgressReportIntervalMs, logger, stateIntervalMs }) {
+        this.apiClient = apiClient;
+        this.conduitStateManager = conduitStateManager;
+        this.downloadProgressReportIntervalMs = downloadProgressReportIntervalMs;
+        this.logger = logger;
+        this.stateIntervalMs = stateIntervalMs;
+    }
+    async start() {
+        await this.sendConduitState();
+        this.stateInterval = setInterval(() => {
+            this.sendConduitState().catch(error => {
+                this.logger.error("Conduit state update failed", {
+                    error: asError(error)
+                });
+            });
+        }, this.stateIntervalMs);
+    }
+    stop() {
+        if (this.stateInterval) {
+            clearInterval(this.stateInterval);
+            this.stateInterval = null;
+        }
+        if (this.pendingConduitStateReport) {
+            clearTimeout(this.pendingConduitStateReport);
+            this.pendingConduitStateReport = null;
+        }
+    }
+    reportDownloadProgress() {
+        this.scheduleConduitStateReport();
+    }
+    async reportNow() {
+        if (this.pendingConduitStateReport) {
+            clearTimeout(this.pendingConduitStateReport);
+            this.pendingConduitStateReport = null;
+        }
+        await this.triggerConduitStateReport();
+    }
+    reportStateChange() {
+        if (this.pendingConduitStateReport) {
+            clearTimeout(this.pendingConduitStateReport);
+            this.pendingConduitStateReport = null;
+        }
+        this.triggerConduitStateReport().catch(error => {
+            this.logger.error("Conduit state update failed", {
+                error: asError(error)
+            });
+        });
+    }
+    async sendConduitState() {
+        try {
+            await this.apiClient.reportConduitState(this.conduitStateManager.touch());
+            this.lastConduitStateReportAt = Date.now();
+        }
+        catch (error) {
+            this.logger.error("Conduit state update failed", {
+                ...getNetworkErrorAttributes(error),
+                error: asError(error)
             });
         }
     }
@@ -109171,7 +109977,7 @@ class ConduitStateManager {
     }
 }
-function isPlainObject$1(value) {
+function isPlainObject(value) {
 	if (typeof value !== 'object' || value === null) {
 		return false;
 	}
@@ -109208,7 +110014,7 @@ const normalizeFileUrl = file => file instanceof URL ? fileURLToPath(file) : fil
 // This also does basic validation on them and on the command file.
 const normalizeParameters = (rawFile, rawArguments = [], rawOptions = {}) => {
 	const filePath = safeNormalizeFileUrl(rawFile, 'First argument');
-	const [commandArguments, options] = isPlainObject$1(rawArguments)
+	const [commandArguments, options] = isPlainObject(rawArguments)
 		? [[], rawArguments]
 		: [rawArguments, rawOptions];
@@ -109226,7 +110032,7 @@ const normalizeParameters = (rawFile, rawArguments = [], rawOptions = {}) => {
 		throw new TypeError(`Arguments cannot contain null bytes ("\\0"): ${nullByteArgument}`);
 	}
-	if (!isPlainObject$1(options)) {
+	if (!isPlainObject(options)) {
 		throw new TypeError(`Last argument must be an options object: ${options}`);
 	}
@@ -109423,7 +110229,7 @@ const parseExpression = expression => {
 		return String(expression);
 	}
-	if (isPlainObject$1(expression) && ('stdout' in expression || 'isMaxBuffer' in expression)) {
+	if (isPlainObject(expression) && ('stdout' in expression || 'isMaxBuffer' in expression)) {
 		return getSubprocessResult(expression);
 	}
@@ -109479,7 +110285,7 @@ const getStdioLength = ({stdio}) => Array.isArray(stdio)
 	? Math.max(stdio.length, STANDARD_STREAMS_ALIASES.length)
 	: STANDARD_STREAMS_ALIASES.length;
-const normalizeFdSpecificValue = (optionValue, optionArray, optionName) => isPlainObject$1(optionValue)
+const normalizeFdSpecificValue = (optionValue, optionArray, optionName) => isPlainObject(optionValue)
 	? normalizeOptionObject(optionValue, optionArray, optionName)
 	: optionArray.fill(optionValue);
@@ -113962,13 +114768,13 @@ const checkBooleanOption = (value, optionName) => {
 const isGenerator = value => isAsyncGenerator(value) || isSyncGenerator(value);
 const isAsyncGenerator = value => Object.prototype.toString.call(value) === '[object AsyncGeneratorFunction]';
 const isSyncGenerator = value => Object.prototype.toString.call(value) === '[object GeneratorFunction]';
-const isTransformOptions = value => isPlainObject$1(value)
+const isTransformOptions = value => isPlainObject(value)
 	&& (value.transform !== undefined || value.final !== undefined);
 const isUrl = value => Object.prototype.toString.call(value) === '[object URL]';
 const isRegularUrl = value => isUrl(value) && value.protocol !== 'file:';
-const isFilePathObject = value => isPlainObject$1(value)
+const isFilePathObject = value => isPlainObject(value)
 	&& Object.keys(value).length > 0
 	&& Object.keys(value).every(key => FILE_PATH_KEYS.has(key))
 	&& isFilePathString(value.file);
@@ -114131,7 +114937,7 @@ const normalizeDuplex = ({
 };
 const normalizeTransformStream = ({stdioItem, stdioItem: {value}, index, newTransforms, direction}) => {
-	const {transform, objectMode} = isPlainObject$1(value) ? value : {transform: value};
+	const {transform, objectMode} = isPlainObject(value) ? value : {transform: value};
 	const {writableObjectMode, readableObjectMode} = getTransformObjectModes(objectMode, index, newTransforms, direction);
 	return ({
 		...stdioItem,
@@ -114146,7 +114952,7 @@ const normalizeGenerator = ({stdioItem, stdioItem: {value}, index, newTransforms
 		binary: binaryOption = false,
 		preserveNewlines = false,
 		objectMode,
-	} = isPlainObject$1(value) ? value : {transform: value};
+	} = isPlainObject(value) ? value : {transform: value};
 	const binary = binaryOption || BINARY_ENCODINGS.has(encoding);
 	const {writableObjectMode, readableObjectMode} = getTransformObjectModes(objectMode, index, newTransforms, direction);
 	return {
@@ -116909,7 +117715,7 @@ const unpipeOnSignalAbort = async (unpipeSignal, {sourceStream, mergedStream, fi
 // Pipe a subprocess' `stdout`/`stderr`/`stdio` into another subprocess' `stdin`
 const pipeToSubprocess = (sourceInfo, ...pipeArguments) => {
-	if (isPlainObject$1(pipeArguments[0])) {
+	if (isPlainObject(pipeArguments[0])) {
 		return pipeToSubprocess.bind(undefined, {
 			...sourceInfo,
 			boundOptions: {...sourceInfo.boundOptions, ...pipeArguments[0]},
@@ -118109,7 +118915,7 @@ const mergeOptions = (boundOptions, options) => {
 };
 const mergeOption = (optionName, boundOptionValue, optionValue) => {
-	if (DEEP_OPTIONS.has(optionName) && isPlainObject$1(boundOptionValue) && isPlainObject$1(optionValue)) {
+	if (DEEP_OPTIONS.has(optionName) && isPlainObject(boundOptionValue) && isPlainObject(optionValue)) {
 		return {...boundOptionValue, ...optionValue};
 	}
@@ -118141,7 +118947,7 @@ const createExeca = (mapArguments, boundOptions, deepOptions, setBoundExeca) =>
 };
 const callBoundExeca = ({mapArguments, deepOptions = {}, boundOptions = {}, setBoundExeca, createNested}, firstArgument, ...nextArguments) => {
-	if (isPlainObject$1(firstArgument)) {
+	if (isPlainObject(firstArgument)) {
 		return createNested(mapArguments, mergeOptions(boundOptions, firstArgument), setBoundExeca);
 	}
@@ -118172,795 +118978,193 @@ const parseArguments = ({mapArguments, firstArgument, nextArguments, deepOptions
 	return {
 		file,
 		commandArguments,
-		options,
-		isSync,
-	};
-};
-// Main logic for `execaCommand()`
-const mapCommandAsync = ({file, commandArguments}) => parseCommand(file, commandArguments);
-// Main logic for `execaCommandSync()`
-const mapCommandSync = ({file, commandArguments}) => ({...parseCommand(file, commandArguments), isSync: true});
-// Convert `execaCommand(command)` into `execa(file, ...commandArguments)`
-const parseCommand = (command, unusedArguments) => {
-	if (unusedArguments.length > 0) {
-		throw new TypeError(`The command and its arguments must be passed as a single string: ${command} ${unusedArguments}.`);
-	}
-	const [file, ...commandArguments] = parseCommandString(command);
-	return {file, commandArguments};
-};
-// Convert `command` string into an array of file or arguments to pass to $`${...fileOrCommandArguments}`
-const parseCommandString = command => {
-	if (typeof command !== 'string') {
-		throw new TypeError(`The command must be a string: ${String(command)}.`);
-	}
-	const trimmedCommand = command.trim();
-	if (trimmedCommand === '') {
-		return [];
-	}
-	const tokens = [];
-	for (const token of trimmedCommand.split(SPACES_REGEXP)) {
-		// Allow spaces to be escaped by a backslash if not meant as a delimiter
-		const previousToken = tokens.at(-1);
-		if (previousToken && previousToken.endsWith('\\')) {
-			// Merge previous token with current one
-			tokens[tokens.length - 1] = `${previousToken.slice(0, -1)} ${token}`;
-		} else {
-			tokens.push(token);
-		}
-	}
-	return tokens;
-};
-const SPACES_REGEXP = / +/g;
-// Sets `$.sync` and `$.s`
-const setScriptSync = (boundExeca, createNested, boundOptions) => {
-	boundExeca.sync = createNested(mapScriptSync, boundOptions);
-	boundExeca.s = boundExeca.sync;
-};
-// Main logic for `$`
-const mapScriptAsync = ({options}) => getScriptOptions(options);
-// Main logic for `$.sync`
-const mapScriptSync = ({options}) => ({...getScriptOptions(options), isSync: true});
-// `$` is like `execa` but with script-friendly options: `{stdin: 'inherit', preferLocal: true}`
-const getScriptOptions = options => ({options: {...getScriptStdinOption(options), ...options}});
-const getScriptStdinOption = ({input, inputFile, stdio}) => input === undefined && inputFile === undefined && stdio === undefined
-	? {stdin: 'inherit'}
-	: {};
-// When using $(...).pipe(...), most script-friendly options should apply to both commands.
-// However, some options (like `stdin: 'inherit'`) would create issues with piping, i.e. cannot be deep.
-const deepScriptOptions = {preferLocal: true};
-const execa = createExeca(() => ({}));
-createExeca(() => ({isSync: true}));
-createExeca(mapCommandAsync);
-createExeca(mapCommandSync);
-createExeca(mapNode);
-createExeca(mapScriptAsync, {}, deepScriptOptions, setScriptSync);
-getIpcExport();
-const MACHINE_ID_PATHS = ["/etc/machine-id", "/var/lib/dbus/machine-id"];
-async function readMachineIdentifier() {
-    for (const path of MACHINE_ID_PATHS) {
-        try {
-            const contents = await readFile(path, "utf8");
-            const trimmed = contents.trim();
-            if (trimmed.length > 0) {
-                return trimmed;
-            }
-        }
-        catch {
-            // Ignore and continue to next candidate
-        }
-    }
-    return os.hostname();
-}
-async function detectLlamaCppVersion() {
-    try {
-        const { stdout } = await execa("llama-server", ["--version"]);
-        const versionLine = stdout.trim();
-        return versionLine.length > 0 ? (versionLine.split("\n")[0] ?? null) : null;
-    }
-    catch {
-        return null;
-    }
-}
-async function detectVLLMVersion() {
-    try {
-        const { stdout } = await execa("python3", [
-            "-c",
-            "import importlib.metadata as md; print(md.version('vllm'))"
-        ]);
-        const version = stdout.trim();
-        return version.length > 0 ? version : null;
-    }
-    catch {
-        return null;
-    }
-}
-function normalizeMegabytes(value) {
-    if (typeof value !== "number" || Number.isNaN(value)) {
-        return null;
-    }
-    return Math.round(value * 1024 * 1024);
-}
-function resolveCpuValue(value) {
-    if (typeof value === "number" && Number.isFinite(value)) {
-        return value;
-    }
-    if (typeof value === "string") {
-        const parsed = Number(value);
-        return Number.isFinite(parsed) ? parsed : null;
-    }
-    return null;
-}
-async function collectMachineMetadata() {
-    const [cpuResult, memResult, osResult, graphicsResult] = await Promise.allSettled([
-        si.cpu(),
-        si.mem(),
-        si.osInfo(),
-        si.graphics()
-    ]);
-    const cpuInfo = cpuResult.status === "fulfilled" ? cpuResult.value : null;
-    const memInfo = memResult.status === "fulfilled" ? memResult.value : null;
-    const osInfo = osResult.status === "fulfilled" ? osResult.value : null;
-    const graphicsInfo = graphicsResult.status === "fulfilled"
-        ? graphicsResult.value
-        : { controllers: [] };
-    const gpus = (graphicsInfo.controllers ?? []).map((controller) => ({
-        bus: controller.bus ?? null,
-        driverVersion: controller.driverVersion ?? null,
-        memoryFreeBytes: normalizeMegabytes(controller.memoryFree ?? null),
-        memoryTotalBytes: normalizeMegabytes(controller.memoryTotal ?? null),
-        model: controller.model ?? controller.name ?? null,
-        temperatureCelsius: controller.temperatureGpu ?? null,
-        vendor: controller.vendor ?? null
-    }));
-    const machineMetadata = {
-        cpu: {
-            baseClockGHz: resolveCpuValue(cpuInfo?.speed ?? null),
-            logicalCores: cpuInfo?.cores ?? null,
-            maxClockGHz: resolveCpuValue(cpuInfo?.speedMax ?? null),
-            model: cpuInfo?.brand ?? null,
-            physicalCores: cpuInfo?.physicalCores ?? null
-        },
-        gpus,
-        hostname: os.hostname(),
-        llamaCppVersion: await detectLlamaCppVersion(),
-        machineID: await readMachineIdentifier(),
-        memory: {
-            availableBytes: memInfo?.available ?? null,
-            totalBytes: memInfo?.total ?? null
-        },
-        os: {
-            arch: osInfo?.arch ?? os.arch(),
-            platform: osInfo?.platform ?? os.platform(),
-            release: osInfo?.release ?? os.release(),
-            type: osInfo?.kernel ?? null,
-            version: osInfo?.build ?? null
-        },
-        vllmVersion: await detectVLLMVersion()
-    };
-    return machineMetadata;
-}
-/**
- * Coerce non-string tool_calls function.arguments to JSON strings.
- * Some LLM backends return arguments as parsed objects instead of
- * JSON strings, violating the OpenAI spec. This mutates in place
- * and returns true if any coercion was performed.
- */
-function coerceToolCallArguments(parsed) {
-    const choices = parsed.choices;
-    if (!Array.isArray(choices))
-        return false;
-    let modified = false;
-    for (const choice of choices) {
-        if (!choice || typeof choice !== "object")
-            continue;
-        const choiceRecord = choice;
-        const msg = choiceRecord.delta ?? choiceRecord.message;
-        if (!msg || typeof msg !== "object")
-            continue;
-        const toolCalls = msg.tool_calls;
-        if (!Array.isArray(toolCalls))
-            continue;
-        for (const tc of toolCalls) {
-            if (!tc || typeof tc !== "object")
-                continue;
-            const fn = tc.function;
-            if (!fn || typeof fn !== "object")
-                continue;
-            const fnRecord = fn;
-            if (fnRecord.arguments !== undefined && typeof fnRecord.arguments !== "string") {
-                fnRecord.arguments = JSON.stringify(fnRecord.arguments);
-                modified = true;
-            }
-        }
-    }
-    return modified;
-}
-function isEngineUsageChunk(value) {
-    if (!value || typeof value !== "object") {
-        return false;
-    }
-    const record = value;
-    if (!record.usage || typeof record.usage !== "object") {
-        return false;
-    }
-    return true;
-}
-function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
-    const startedAt = requestStartedAt ?? Date.now();
-    const passThrough = new PassThrough();
-    let responseBytes = 0;
-    let firstChunkAt = null;
-    let usage = null;
-    let buffer = "";
-    let completed = false;
-    function modifyChunkWithUsage(chunk) {
-        const text = chunk.toString("utf8");
-        const lines = text.split("\n");
-        const modifiedLines = [];
-        for (const rawLine of lines) {
-            const line = rawLine.trim();
-            if (!line.startsWith("data:")) {
-                modifiedLines.push(rawLine);
-                continue;
-            }
-            const payload = line.slice(5).trim();
-            if (!payload || payload === "[DONE]") {
-                modifiedLines.push(rawLine);
-                continue;
-            }
-            try {
-                const parsed = JSON.parse(payload);
-                let modified = false;
-                if (coerceToolCallArguments(parsed)) {
-                    modified = true;
-                }
-                if (parsed.usage) {
-                    const usageChunk = parsed.usage;
-                    const effectiveContext = getEffectiveContextLength({
-                        contextLength,
-                        engine,
-                        parallelism
-                    });
-                    if (usageChunk.context_usage === undefined &&
-                        usageChunk.prompt_tokens !== undefined &&
-                        effectiveContext !== null) {
-                        usageChunk.context_usage = usageChunk.prompt_tokens / effectiveContext;
-                        modified = true;
-                    }
-                }
-                if (modified) {
-                    modifiedLines.push("data: " + JSON.stringify(parsed));
-                    continue;
-                }
-            }
-            catch (_error) {
-                // Ignore malformed chunks
-            }
-            modifiedLines.push(rawLine);
-        }
-        return Buffer.from(modifiedLines.join("\n"), "utf8");
-    }
-    function parseUsageFromBuffer() {
-        const lines = buffer.split("\n");
-        buffer = lines.pop() ?? "";
-        for (const rawLine of lines) {
-            const line = rawLine.trim();
-            if (!line.startsWith("data:")) {
-                continue;
-            }
-            const payload = line.slice(5).trim();
-            if (!payload || payload === "[DONE]") {
-                continue;
-            }
-            try {
-                const parsed = JSON.parse(payload);
-                if (isEngineUsageChunk(parsed)) {
-                    const completionTokens = parsed.usage?.completion_tokens ?? null;
-                    const promptTokens = parsed.usage?.prompt_tokens ?? null;
-                    const totalTokens = parsed.usage?.total_tokens ?? null;
-                    let contextUsage = parsed.usage?.context_usage ?? null;
-                    const effectiveContextForUsage = getEffectiveContextLength({
-                        contextLength,
-                        engine,
-                        parallelism
-                    });
-                    if (contextUsage === null &&
-                        promptTokens !== null &&
-                        effectiveContextForUsage !== null) {
-                        contextUsage = promptTokens / effectiveContextForUsage;
-                    }
-                    usage = {
-                        completionTokens,
-                        contextUsage,
-                        promptTokens,
-                        totalTokens
-                    };
-                }
-            }
-            catch (_error) {
-                // Ignore malformed chunks
-            }
-        }
-    }
-    function finalize(error) {
-        if (completed) {
-            return;
-        }
-        completed = true;
-        if (onComplete) {
-            const completion = onComplete({
-                durationMs: Math.max(0, Date.now() - startedAt),
-                error,
-                requestBodyBytes,
-                responseBytes,
-                timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
-                usage
-            });
-            if (completion && typeof completion.catch === "function") {
-                completion.catch(error => {
-                    logger.error("Engine metrics completion failed", {
-                        error: asError(error),
-                        requestUrl: requestPath
-                    });
-                });
-            }
-        }
-    }
-    body.on("data", (chunk) => {
-        const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
-        if (firstChunkAt === null) {
-            firstChunkAt = Date.now();
-        }
-        responseBytes += chunkBuffer.length;
-        buffer += chunkBuffer.toString("utf8");
-        parseUsageFromBuffer();
-        passThrough.write(modifyChunkWithUsage(chunkBuffer));
-    });
-    body.once("error", err => {
-        logEngineMetrics({
-            agentEngineType,
-            error: err,
-            level: "error",
-            logger,
-            requestBodyBytes,
-            requestPath,
-            responseBytes,
-            usage
-        });
-        finalize(err);
-        passThrough.destroy(err);
-    });
-    body.once("end", () => {
-        parseUsageFromBuffer();
-        logEngineMetrics({
-            agentEngineType,
-            level: "info",
-            logger,
-            requestBodyBytes,
-            requestPath,
-            responseBytes,
-            usage
-        });
-        finalize(null);
-        passThrough.end();
-    });
-    body.once("close", () => {
-        if (completed) {
-            if (!passThrough.writableEnded) {
-                passThrough.end();
-            }
-            return;
-        }
-        const closeError = new Error("Engine response stream closed before completion");
-        logEngineMetrics({
-            agentEngineType,
-            error: closeError,
-            level: "error",
-            logger,
-            requestBodyBytes,
-            requestPath,
-            responseBytes,
-            usage
-        });
-        finalize(closeError);
-        if (!passThrough.writableEnded) {
-            passThrough.end();
-        }
-    });
-    return {
-        stream: passThrough
-    };
-}
-function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
-    const maxUsageCaptureBytes = 1024 * 1024;
-    const startedAt = requestStartedAt ?? Date.now();
-    const passThrough = new PassThrough();
-    let responseBytes = 0;
-    let firstChunkAt = null;
-    let usage = null;
-    const usageChunks = [];
-    let usageBytes = 0;
-    let usageCaptureEnabled = true;
-    let completed = false;
-    function finalize(error) {
-        if (completed) {
-            return;
-        }
-        completed = true;
-        if (onComplete) {
-            const completion = onComplete({
-                durationMs: Math.max(0, Date.now() - startedAt),
-                error,
-                requestBodyBytes,
-                responseBytes,
-                timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
-                usage
-            });
-            if (completion && typeof completion.catch === "function") {
-                completion.catch(error => {
-                    logger.error("Engine metrics completion failed", {
-                        error: asError(error),
-                        requestUrl: requestPath
-                    });
-                });
-            }
-        }
-    }
-    body.on("data", (chunk) => {
-        const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
-        if (firstChunkAt === null) {
-            firstChunkAt = Date.now();
-        }
-        responseBytes += chunkBuffer.length;
-        if (usageCaptureEnabled) {
-            const nextSize = usageBytes + chunkBuffer.length;
-            if (nextSize <= maxUsageCaptureBytes) {
-                usageChunks.push(chunkBuffer);
-                usageBytes = nextSize;
-            }
-            else {
-                usageCaptureEnabled = false;
-                usageChunks.length = 0;
-            }
-        }
-        passThrough.write(chunkBuffer);
-    });
-    body.once("error", err => {
-        logEngineMetrics({
-            agentEngineType,
-            error: err,
-            level: "error",
-            logger,
-            requestBodyBytes,
-            requestPath,
-            responseBytes,
-            usage
-        });
-        finalize(err);
-        passThrough.destroy(err);
-    });
-    body.once("end", () => {
-        if (usageCaptureEnabled) {
-            try {
-                const parsed = JSON.parse(Buffer.concat(usageChunks).toString("utf8"));
-                if (parsed.usage) {
-                    const usageChunk = parsed.usage;
-                    const completionTokens = usageChunk.completion_tokens ?? null;
-                    const promptTokens = usageChunk.prompt_tokens ?? null;
-                    const totalTokens = usageChunk.total_tokens ?? null;
-                    let contextUsage = usageChunk.context_usage ?? null;
-                    const effectiveContext = getEffectiveContextLength({
-                        contextLength,
-                        engine,
-                        parallelism
-                    });
-                    if (contextUsage === null &&
-                        promptTokens !== null &&
-                        effectiveContext !== null) {
-                        contextUsage = promptTokens / effectiveContext;
-                    }
-                    usage = {
-                        completionTokens,
-                        contextUsage,
-                        promptTokens,
-                        totalTokens
-                    };
-                }
-            }
-            catch (error) {
-                logger.error("Failed to parse engine response body", {
-                    error: asError(error),
-                    requestUrl: requestPath
-                });
-            }
-        }
-        logEngineMetrics({
-            agentEngineType,
-            level: "info",
-            logger,
-            requestBodyBytes,
-            requestPath,
-            responseBytes,
-            usage
-        });
-        finalize(null);
-        passThrough.end();
-    });
-    body.once("close", () => {
-        if (completed) {
-            if (!passThrough.writableEnded) {
-                passThrough.end();
+		options,
+		isSync,
+	};
+};
+// Main logic for `execaCommand()`
+const mapCommandAsync = ({file, commandArguments}) => parseCommand(file, commandArguments);
+// Main logic for `execaCommandSync()`
+const mapCommandSync = ({file, commandArguments}) => ({...parseCommand(file, commandArguments), isSync: true});
+// Convert `execaCommand(command)` into `execa(file, ...commandArguments)`
+const parseCommand = (command, unusedArguments) => {
+	if (unusedArguments.length > 0) {
+		throw new TypeError(`The command and its arguments must be passed as a single string: ${command} ${unusedArguments}.`);
+	}
+	const [file, ...commandArguments] = parseCommandString(command);
+	return {file, commandArguments};
+};
+// Convert `command` string into an array of file or arguments to pass to $`${...fileOrCommandArguments}`
+const parseCommandString = command => {
+	if (typeof command !== 'string') {
+		throw new TypeError(`The command must be a string: ${String(command)}.`);
+	}
+	const trimmedCommand = command.trim();
+	if (trimmedCommand === '') {
+		return [];
+	}
+	const tokens = [];
+	for (const token of trimmedCommand.split(SPACES_REGEXP)) {
+		// Allow spaces to be escaped by a backslash if not meant as a delimiter
+		const previousToken = tokens.at(-1);
+		if (previousToken && previousToken.endsWith('\\')) {
+			// Merge previous token with current one
+			tokens[tokens.length - 1] = `${previousToken.slice(0, -1)} ${token}`;
+		} else {
+			tokens.push(token);
+		}
+	}
+	return tokens;
+};
+const SPACES_REGEXP = / +/g;
+// Sets `$.sync` and `$.s`
+const setScriptSync = (boundExeca, createNested, boundOptions) => {
+	boundExeca.sync = createNested(mapScriptSync, boundOptions);
+	boundExeca.s = boundExeca.sync;
+};
+// Main logic for `$`
+const mapScriptAsync = ({options}) => getScriptOptions(options);
+// Main logic for `$.sync`
+const mapScriptSync = ({options}) => ({...getScriptOptions(options), isSync: true});
+// `$` is like `execa` but with script-friendly options: `{stdin: 'inherit', preferLocal: true}`
+const getScriptOptions = options => ({options: {...getScriptStdinOption(options), ...options}});
+const getScriptStdinOption = ({input, inputFile, stdio}) => input === undefined && inputFile === undefined && stdio === undefined
+	? {stdin: 'inherit'}
+	: {};
+// When using $(...).pipe(...), most script-friendly options should apply to both commands.
+// However, some options (like `stdin: 'inherit'`) would create issues with piping, i.e. cannot be deep.
+const deepScriptOptions = {preferLocal: true};
+const execa = createExeca(() => ({}));
+createExeca(() => ({isSync: true}));
+createExeca(mapCommandAsync);
+createExeca(mapCommandSync);
+createExeca(mapNode);
+createExeca(mapScriptAsync, {}, deepScriptOptions, setScriptSync);
+getIpcExport();
+const MACHINE_ID_PATHS = ["/etc/machine-id", "/var/lib/dbus/machine-id"];
+async function readMachineIdentifier() {
+    for (const path of MACHINE_ID_PATHS) {
+        try {
+            const contents = await readFile(path, "utf8");
+            const trimmed = contents.trim();
+            if (trimmed.length > 0) {
+                return trimmed;
             }
-            return;
         }
-        const closeError = new Error("Engine response stream closed before completion");
-        logEngineMetrics({
-            agentEngineType,
-            error: closeError,
-            level: "error",
-            logger,
-            requestBodyBytes,
-            requestPath,
-            responseBytes,
-            usage
-        });
-        finalize(closeError);
-        if (!passThrough.writableEnded) {
-            passThrough.end();
+        catch {
+            // Ignore and continue to next candidate
         }
-    });
-    return {
-        stream: passThrough
-    };
-}
-function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }) {
-    const metricsMessage = [
-        "LLM engine stream metrics",
-        `path=${requestPath}`,
-        `bytesTo=${requestBodyBytes}`,
-        `bytesFrom=${responseBytes}`,
-        `promptTokens=${usage?.promptTokens ?? "n/a"}`,
-        `completionTokens=${usage?.completionTokens ?? "n/a"}`,
-        `totalTokens=${usage?.totalTokens ?? "n/a"}`,
-        `contextUsage=${usage?.contextUsage ?? "n/a"}`
-    ].join(" ");
-    const attributes = {
-        agentEngineType,
-        requestUrl: requestPath
-    };
-    if (error) {
-        attributes.error = error;
     }
-    logger[level](metricsMessage, attributes);
-}
-function isPlainObject(value) {
-    return typeof value === "object" && value !== null && !Array.isArray(value);
+    return os.hostname();
 }
-function serializeRequestBody(body) {
-    if (!isPlainObject(body)) {
-        const payload = typeof body === "string" ? body : JSON.stringify(body);
-        return {
-            bytes: Buffer.byteLength(payload, "utf8"),
-            payload
-        };
+async function detectLlamaCppVersion() {
+    try {
+        const { stdout } = await execa("llama-server", ["--version"]);
+        const versionLine = stdout.trim();
+        return versionLine.length > 0 ? (versionLine.split("\n")[0] ?? null) : null;
+    }
+    catch {
+        return null;
     }
-    const requestPayload = { ...body };
-    const streamOptions = requestPayload.stream_options;
-    const normalizedStreamOptions = isPlainObject(streamOptions)
-        ? { ...streamOptions }
-        : {};
-    normalizedStreamOptions.include_usage = true;
-    requestPayload.stream_options = normalizedStreamOptions;
-    const payload = JSON.stringify(requestPayload);
-    return {
-        bytes: Buffer.byteLength(payload, "utf8"),
-        payload
-    };
 }
-function calculateTokensPerSecond({ durationMs, totalTokens }) {
-    if (durationMs <= 0) {
-        return 0;
+async function detectVLLMVersion() {
+    try {
+        const { stdout } = await execa("python3", [
+            "-c",
+            "import importlib.metadata as md; print(md.version('vllm'))"
+        ]);
+        const version = stdout.trim();
+        return version.length > 0 ? version : null;
     }
-    const tokensPerSecond = totalTokens / (durationMs / 1000);
-    if (!Number.isFinite(tokensPerSecond) || tokensPerSecond <= 0) {
-        return 0;
+    catch {
+        return null;
     }
-    return Math.round(tokensPerSecond);
 }
-async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID, modelManager, path, reportMetrics }) {
-    function normalizeTokenCount(value) {
-        if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
-            return value;
-        }
-        return 0;
-    }
-    function reportMetricsSafe(payload) {
-        reportMetrics(payload).catch(error => {
-            logger.warn("Failed to upload LLM prompt metrics", {
-                error: asError(error),
-                requestUrl: path
-            });
-        });
+function normalizeMegabytes(value) {
+    if (typeof value !== "number" || Number.isNaN(value)) {
+        return null;
     }
-    const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody(body);
-    const requestStartedAt = Date.now();
-    const requestBody = JSON.parse(serializedBody);
-    const streamRequested = requestBody.stream === true;
-    const onMonitoringComplete = ({ durationMs, error, responseBytes, timeToFirstTokenMs, usage }) => {
-        const completionTokens = normalizeTokenCount(usage?.completionTokens);
-        const promptTokens = normalizeTokenCount(usage?.promptTokens);
-        const totalTokens = normalizeTokenCount(usage?.totalTokens ?? completionTokens + promptTokens);
-        const latencyMs = Math.max(0, durationMs);
-        reportMetricsSafe({
-            bytes: requestBodyBytes + responseBytes,
-            completionTokens,
-            engine: configuration.agentEngineType,
-            endpointId: null,
-            latencyMs,
-            modelId: modelID,
-            promptTokens,
-            requestBytes: requestBodyBytes,
-            requestId: null,
-            requestMethod: "POST",
-            requestPath: path,
-            responseBytes,
-            successful: !error,
-            timeToFirstTokenMs,
-            tokensPerSecond: calculateTokensPerSecond({
-                durationMs: latencyMs,
-                totalTokens
-            }),
-            totalTokens
-        });
-    };
-    const response = await modelManager
-        .fetchOpenAI(path, {
-        body: serializedBody,
-        headers: {
-            "Content-Type": "application/json"
-        },
-        method: "POST"
-    })
-        .catch(error => {
-        logEngineMetrics({
-            agentEngineType: configuration.agentEngineType,
-            error: error,
-            level: "error",
-            logger,
-            requestBodyBytes,
-            requestPath: path,
-            responseBytes: 0,
-            usage: null
-        });
-        const latencyMs = Math.max(0, Date.now() - requestStartedAt);
-        reportMetricsSafe({
-            bytes: requestBodyBytes,
-            completionTokens: 0,
-            engine: configuration.agentEngineType,
-            endpointId: null,
-            latencyMs,
-            modelId: modelID,
-            promptTokens: 0,
-            requestBytes: requestBodyBytes,
-            requestId: null,
-            requestMethod: "POST",
-            requestPath: path,
-            responseBytes: 0,
-            successful: false,
-            timeToFirstTokenMs: null,
-            tokensPerSecond: 0,
-            totalTokens: 0
-        });
-        throw error;
-    });
-    const responseStatusText = response.statusText ?? "Upstream request failed";
-    if (!response.ok) {
-        const responseBody = await response.text().catch(() => null);
-        const responseError = new Error(responseBody
-            ? `Upstream error response: ${responseBody}`
-            : "Upstream error response: empty body");
-        logger.error("LLM engine request failed", {
-            error: responseError,
-            requestUrl: path,
-            statusCode: response.status,
-            statusText: responseStatusText,
-            responseBody: responseBody ?? undefined
-        });
-        if (!response.body) {
-            return {
-                status: response.status,
-                statusText: responseStatusText
-            };
-        }
+    return Math.round(value * 1024 * 1024);
+}
+function resolveCpuValue(value) {
+    if (typeof value === "number" && Number.isFinite(value)) {
+        return value;
     }
-    if (!response.body) {
-        logEngineMetrics({
-            agentEngineType: configuration.agentEngineType,
-            level: response.ok ? "info" : "error",
-            logger,
-            requestBodyBytes,
-            requestPath: path,
-            responseBytes: 0,
-            usage: null
-        });
-        const latencyMs = Math.max(0, Date.now() - requestStartedAt);
-        reportMetricsSafe({
-            bytes: requestBodyBytes,
-            completionTokens: 0,
-            engine: configuration.agentEngineType,
-            endpointId: null,
-            latencyMs,
-            modelId: modelID,
-            promptTokens: 0,
-            requestBytes: requestBodyBytes,
-            requestId: null,
-            requestMethod: "POST",
-            requestPath: path,
-            responseBytes: 0,
-            successful: false,
-            timeToFirstTokenMs: null,
-            tokensPerSecond: 0,
-            totalTokens: 0
-        });
-        return {
-            status: response.status,
-            statusText: responseStatusText
-        };
+    if (typeof value === "string") {
+        const parsed = Number(value);
+        return Number.isFinite(parsed) ? parsed : null;
     }
-    const monitoredResponse = streamRequested
-        ? monitorEngineResponseStream({
-            agentEngineType: configuration.agentEngineType,
-            body: Readable.fromWeb(response.body),
-            contextLength: modelManager.contextLength,
-            engine: configuration.agentEngineType,
-            logger,
-            onComplete: onMonitoringComplete,
-            parallelism: modelManager.parallelism,
-            requestBodyBytes,
-            requestPath: path,
-            requestStartedAt
-        })
-        : monitorEngineResponseSingle({
-            agentEngineType: configuration.agentEngineType,
-            body: Readable.fromWeb(response.body),
-            contextLength: modelManager.contextLength,
-            engine: configuration.agentEngineType,
-            logger,
-            onComplete: onMonitoringComplete,
-            parallelism: modelManager.parallelism,
-            requestBodyBytes,
-            requestPath: path,
-            requestStartedAt
-        });
-    return {
-        body: monitoredResponse.stream,
-        headers: Object.fromEntries(response.headers.entries()),
-        status: response.status
+    return null;
+}
+async function collectMachineMetadata() {
+    const [cpuResult, memResult, osResult, graphicsResult] = await Promise.allSettled([
+        si.cpu(),
+        si.mem(),
+        si.osInfo(),
+        si.graphics()
+    ]);
+    const cpuInfo = cpuResult.status === "fulfilled" ? cpuResult.value : null;
+    const memInfo = memResult.status === "fulfilled" ? memResult.value : null;
+    const osInfo = osResult.status === "fulfilled" ? osResult.value : null;
+    const graphicsInfo = graphicsResult.status === "fulfilled"
+        ? graphicsResult.value
+        : { controllers: [] };
+    const gpus = (graphicsInfo.controllers ?? []).map((controller) => ({
+        bus: controller.bus ?? null,
+        driverVersion: controller.driverVersion ?? null,
+        memoryFreeBytes: normalizeMegabytes(controller.memoryFree ?? null),
+        memoryTotalBytes: normalizeMegabytes(controller.memoryTotal ?? null),
+        model: controller.model ?? controller.name ?? null,
+        temperatureCelsius: controller.temperatureGpu ?? null,
+        vendor: controller.vendor ?? null
+    }));
+    const machineMetadata = {
+        cpu: {
+            baseClockGHz: resolveCpuValue(cpuInfo?.speed ?? null),
+            logicalCores: cpuInfo?.cores ?? null,
+            maxClockGHz: resolveCpuValue(cpuInfo?.speedMax ?? null),
+            model: cpuInfo?.brand ?? null,
+            physicalCores: cpuInfo?.physicalCores ?? null
+        },
+        gpus,
+        hostname: os.hostname(),
+        llamaCppVersion: await detectLlamaCppVersion(),
+        machineID: await readMachineIdentifier(),
+        memory: {
+            availableBytes: memInfo?.available ?? null,
+            totalBytes: memInfo?.total ?? null
+        },
+        os: {
+            arch: osInfo?.arch ?? os.arch(),
+            platform: osInfo?.platform ?? os.platform(),
+            release: osInfo?.release ?? os.release(),
+            type: osInfo?.kernel ?? null,
+            version: osInfo?.build ?? null
+        },
+        vllmVersion: await detectVLLMVersion()
     };
+    return machineMetadata;
 }
 async function createApplication({ abortController, apiClient, configuration, logger }) {
-    // Fetch configuration
     logger.info("Fetching conduit configuration");
     let conduitConfiguration = await apiClient.getConduitConfiguration();
     logger.info("Received configuration", {
@@ -118986,7 +119190,6 @@ async function createApplication({ abortController, apiClient, configuration, lo
     let modelFileName = getConduitModelFileName(conduitConfiguration);
     let modelName = getConduitModelName(conduitConfiguration);
     const startup = Date.now();
-    // Initialise model manager
     let modelManager = new ModelManager({
         contextLength: conduitConfiguration.contextLength ?? null,
         engine: configuration.agentEngineType,
@@ -119027,6 +119230,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
         });
         conduitStateReportManager.reportStateChange();
     };
+    let stopRequestedByControl = false;
     const attachLifecycleListeners = () => {
         modelManager.on("engineError", err => {
             logger.error("LLM engine error", {
@@ -119035,6 +119239,9 @@ async function createApplication({ abortController, apiClient, configuration, lo
             stopRequestedByControl = false;
             setErrorState({ error: err.message });
         });
+        modelManager.on("engineReady", () => {
+            setOnlineState();
+        });
         modelManager.on("engineTerminated", () => {
             if (stopRequestedByControl) {
                 stopRequestedByControl = false;
@@ -119046,9 +119253,6 @@ async function createApplication({ abortController, apiClient, configuration, lo
             });
             conduitStateReportManager.reportStateChange();
         });
-        modelManager.on("engineReady", () => {
-            setOnlineState();
-        });
     };
     attachLifecycleListeners();
     let lastDownloadKey = "";
@@ -119074,7 +119278,6 @@ async function createApplication({ abortController, apiClient, configuration, lo
         });
         conduitStateReportManager.reportDownloadProgress();
     };
-    let stopRequestedByControl = false;
     async function startEngine() {
         logger.info("Engine start requested");
         conduitStateManager.setState({
@@ -119106,31 +119309,49 @@ async function createApplication({ abortController, apiClient, configuration, lo
         });
         await conduitStateReportManager.reportNow();
         logger.info("Stopping engine process");
-        await modelManager.stop();
+        try {
+            await modelManager.stop();
+        }
+        catch (error) {
+            stopRequestedByControl = false;
+            throw error;
+        }
         logger.info("Engine process stopped");
         setIdleState({ reason });
     }
-    modelManager.on("engineError", err => {
-        logger.error("LLM engine error", {
-            error: err
-        });
-        stopRequestedByControl = false;
-        setErrorState({ error: err.message });
-    });
-    modelManager.on("engineTerminated", () => {
-        if (stopRequestedByControl) {
-            stopRequestedByControl = false;
-            setIdleState({ reason: "Remote shutdown requested" });
-            return;
+    async function cycleEngine() {
+        const sourceState = conduitStateManager.getState().state;
+        if (sourceState !== "idle") {
+            await stopEngine({
+                reason: "Remote cycle requested"
+            });
         }
-        conduitStateManager.setState({
-            state: "offline"
+        logger.info("Fetching new configuration");
+        const newConduitConfiguration = await apiClient.getConduitConfiguration();
+        logger.info("Received new configuration", {
+            modelID: newConduitConfiguration.targetModel.id
         });
-        conduitStateReportManager.reportStateChange();
-    });
-    modelManager.on("engineReady", () => {
-        setOnlineState();
-    });
+        logger.info("Updating configuration and model manager");
+        conduitConfiguration = newConduitConfiguration;
+        modelFileName = getConduitModelFileName(conduitConfiguration);
+        modelName = getConduitModelName(conduitConfiguration);
+        modelManager = new ModelManager({
+            contextLength: conduitConfiguration.contextLength ?? null,
+            engine: configuration.agentEngineType,
+            logger,
+            model: conduitConfiguration.targetModel,
+            parallelism: conduitConfiguration.parallelism ?? null,
+            root: configuration.rootDirectory
+        });
+        attachLifecycleListeners();
+        if (sourceState === "idle") {
+            logger.info("Restarting engine from idle");
+        }
+        else {
+            logger.info("Restarting engine");
+        }
+        await startEngine();
+    }
     if (configuration.startMode === "idle") {
         setIdleState({ reason: "Startup mode is idle" });
     }
@@ -119143,165 +119364,47 @@ async function createApplication({ abortController, apiClient, configuration, lo
             setErrorState({ error: parsedError.message });
         });
     }
-    // #region API routes
     const app = express();
     const publicRouter = createRouter();
     app.use(publicRouter);
-    publicRouter.get("/health", (_req, res) => {
-        res.status(200).send("OK");
-    });
+    publicRouter.get("/health", createHealthHandler());
     implementAPIReference({
         api: {
+            "/conduit/engine/cycle": {
+                POST: createPostCycleEngineHandler({
+                    cycleEngine,
+                    conduitStateManager,
+                    getModelManager: () => modelManager,
+                    logger,
+                    setErrorState,
+                    startEngine,
+                    stopEngine,
+                    stopRequestedByControl: () => stopRequestedByControl
+                })
+            },
             "/conduit/engine/start": {
-                POST: async () => {
-                    if (conduitStateManager.getState().state !== "idle") {
-                        return {
-                            status: 409,
-                            statusText: "Engine can only be started from idle state"
-                        };
-                    }
-                    if (!modelManager.canStart) {
-                        return {
-                            status: 409,
-                            statusText: `Engine cannot be started from current state: ${modelManager.state}`
-                        };
-                    }
-                    try {
-                        logger.info("Received remote engine start request");
-                        await startEngine();
-                        return {
-                            body: {
-                                acknowledged: true
-                            },
-                            status: 202
-                        };
-                    }
-                    catch (error) {
-                        if (stopRequestedByControl || modelManager.state === "stopped") {
-                            return {
-                                status: 409,
-                                statusText: "Engine start was interrupted"
-                            };
-                        }
-                        const parsedError = asError(error);
-                        setErrorState({ error: parsedError.message });
-                        return {
-                            status: 500,
-                            statusText: parsedError.message
-                        };
-                    }
-                }
+                POST: createPostStartEngineHandler({
+                    cycleEngine,
+                    conduitStateManager,
+                    getModelManager: () => modelManager,
+                    logger,
+                    setErrorState,
+                    startEngine,
+                    stopEngine,
+                    stopRequestedByControl: () => stopRequestedByControl
+                })
             },
             "/conduit/engine/stop": {
-                POST: async () => {
-                    const sourceState = conduitStateManager.getState().state;
-                    if (sourceState !== "bootingEngine" && sourceState !== "online") {
-                        return {
-                            status: 409,
-                            statusText: "Engine can only be stopped while booting or online"
-                        };
-                    }
-                    if (!modelManager.canStop) {
-                        return {
-                            status: 409,
-                            statusText: `Engine cannot be stopped from current state: ${modelManager.state}`
-                        };
-                    }
-                    try {
-                        logger.info("Received remote engine stop request");
-                        stopEngine({
-                            reason: "Remote shutdown requested"
-                        }).catch(error => {
-                            const parsedError = asError(error);
-                            logger.error("Remote engine stop request failed", {
-                                error: parsedError
-                            });
-                            setErrorState({ error: parsedError.message });
-                        });
-                        return {
-                            body: {
-                                acknowledged: true
-                            },
-                            status: 202
-                        };
-                    }
-                    catch (error) {
-                        const parsedError = asError(error);
-                        setErrorState({ error: parsedError.message });
-                        return {
-                            status: 500,
-                            statusText: parsedError.message
-                        };
-                    }
-                }
-            },
-            "/conduit/engine/cycle": {
-                POST: async ({ body }) => {
-                    const sourceState = conduitStateManager.getState().state;
-                    if (sourceState !== "bootingEngine" &&
-                        sourceState !== "online" &&
-                        sourceState !== "idle") {
-                        return {
-                            status: 409,
-                            statusText: "Engine can only be cycled while booting, online, or idle"
-                        };
-                    }
-                    if (sourceState !== "idle" && !modelManager.canStop) {
-                        return {
-                            status: 409,
-                            statusText: `Engine cannot be cycled from current state: ${modelManager.state}`
-                        };
-                    }
-                    try {
-                        logger.info("Received remote engine cycle request");
-                        const sourceState = conduitStateManager.getState().state;
-                        if (sourceState !== "idle") {
-                            await stopEngine({
-                                reason: "Remote cycle requested"
-                            });
-                        }
-                        logger.info("Fetching new configuration");
-                        const newConduitConfiguration = await apiClient.getConduitConfiguration();
-                        logger.info("Received new configuration", {
-                            modelID: newConduitConfiguration.targetModel.id
-                        });
-                        logger.info("Updating configuration and model manager");
-                        conduitConfiguration = newConduitConfiguration;
-                        modelFileName = getConduitModelFileName(conduitConfiguration);
-                        modelName = getConduitModelName(conduitConfiguration);
-                        modelManager = new ModelManager({
-                            contextLength: conduitConfiguration.contextLength ?? null,
-                            engine: configuration.agentEngineType,
-                            logger,
-                            model: conduitConfiguration.targetModel,
-                            parallelism: conduitConfiguration.parallelism ?? null,
-                            root: configuration.rootDirectory
-                        });
-                        attachLifecycleListeners();
-                        if (sourceState === "idle") {
-                            logger.info("Restarting engine from idle");
-                            await startEngine();
-                        }
-                        else {
-                            logger.info("Restarting engine");
-                            await startEngine();
-                        }
-                        return {
-                            body: {
-                                acknowledged: true
-                            },
-                            status: 202
-                        };
-                    }
-                    catch (error) {
-                        const parsedError = asError(error);
-                        setErrorState({ error: parsedError.message });
-                        return {
-                            status: 500,
-                            statusText: parsedError.message
-                        };
-                    }
-                }
+                POST: createPostStopEngineHandler({
+                    cycleEngine,
+                    conduitStateManager,
+                    getModelManager: () => modelManager,
+                    logger,
+                    setErrorState,
+                    startEngine,
+                    stopEngine,
+                    stopRequestedByControl: () => stopRequestedByControl
+                })
             }
         },
         logger,
@@ -119311,56 +119414,34 @@ async function createApplication({ abortController, apiClient, configuration, lo
     implementAPIReference({
         api: {
             "/v1/chat/completions": {
-                POST: async ({ body }) => {
-                    return proxyOpenAIStreamingRoute({
-                        body,
-                        configuration,
-                        logger,
-                        modelID: conduitConfiguration.targetModel.id,
-                        modelManager,
-                        path: "/v1/chat/completions",
-                        reportMetrics: apiClient.reportPromptMetrics
-                    });
-                }
+                POST: createPostChatCompletionsHandler({
+                    apiClient,
+                    configuration,
+                    getModelID: () => conduitConfiguration.targetModel.id,
+                    getModelManager: () => modelManager,
+                    logger,
+                    startup
+                })
             },
             "/v1/completions": {
-                POST: async ({ body }) => {
-                    return proxyOpenAIStreamingRoute({
-                        body,
-                        configuration,
-                        logger,
-                        modelID: conduitConfiguration.targetModel.id,
-                        modelManager,
-                        path: "/v1/completions",
-                        reportMetrics: apiClient.reportPromptMetrics
-                    });
-                }
+                POST: createPostCompletionsHandler({
+                    apiClient,
+                    configuration,
+                    getModelID: () => conduitConfiguration.targetModel.id,
+                    getModelManager: () => modelManager,
+                    logger,
+                    startup
+                })
             },
             "/v1/models": {
-                GET: async () => {
-                    const effectiveContextLength = getEffectiveContextLength({
-                        contextLength: modelManager.contextLength,
-                        engine: configuration.agentEngineType,
-                        parallelism: modelManager.parallelism
-                    });
-                    return {
-                        body: {
-                            object: "list",
-                            data: [
-                                {
-                                    id: conduitConfiguration.targetModel.id,
-                                    object: "model",
-                                    created: startup / 1000,
-                                    owned_by: "infersec",
-                                    limit: {
-                                        context: effectiveContextLength
-                                    }
-                                }
-                            ]
-                        },
-                        status: 200
-                    };
-                }
+                GET: createGetModelsHandler({
+                    apiClient,
+                    configuration,
+                    getModelID: () => conduitConfiguration.targetModel.id,
+                    getModelManager: () => modelManager,
+                    logger,
+                    startup
+                })
             }
         },
         logger,
@@ -119440,7 +119521,6 @@ async function createApplication({ abortController, apiClient, configuration, lo
         app,
         shutdown
     };
-    // #endregion
 }
 function getConduitModelFileName(configuration) {
     const { source } = configuration.targetModel;