npm - @infersec/conduit - Versions diffs - 1.67.0 → 1.68.0 - Mend

@infersec/conduit 1.67.0 → 1.68.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/ConduitConnection.d.ts +1 -2
package/dist/cli.js +166 -143
package/dist/configuration.d.ts +1 -3
package/dist/modelManagement/ModelManager.d.ts +6 -6
package/dist/requestHandlers/createConduitAnthropicAPIReferenceHandlers.d.ts +4 -2
package/dist/requestHandlers/createConduitOpenAIAPIReferenceHandlers.d.ts +6 -2
package/dist/sse/handler.d.ts +3 -2
package/dist/utils/anthropic.d.ts +3 -2
package/dist/utils/engineMetrics.d.ts +5 -6
package/dist/utils/openai.d.ts +3 -2
package/package.json +1 -1

package/dist/ConduitConnection.d.ts CHANGED Viewed

@@ -1,9 +1,8 @@
-import type { LLMEngine, ULID } from "@infersec/definitions";
+import type { ULID } from "@infersec/definitions";
 import type { Logger } from "@infersec/logger";
 export interface ConduitConnectionOptions {
     apiKey: string;
     apiURL: string;
-    engine: LLMEngine;
     enginePort: number;
     logger: Logger;
     port: number;

package/dist/cli.js CHANGED Viewed

@@ -4344,18 +4344,15 @@ function ulid$3(seedTime, prng) {
     return encodeTime(seed, TIME_LEN) + encodeRandom(RANDOM_LEN, currentPRNG);
 }
-/**
- * Calculates the effective context length per slot, accounting for
- * parallelism when using llama.cpp. For llama.cpp, the total context
- * window is divided across parallel slots; for other engines, the
- * full context length is used.
- */
-function getEffectiveContextLength({ contextLength, engine, parallelism }) {
+function getEffectiveContextLength({ contextLength, engineConfig, engineType }) {
     if (contextLength === null || contextLength <= 0) {
         return null;
     }
-    if (engine === "llama.cpp" && parallelism !== null && parallelism > 0) {
-        return contextLength / parallelism;
+    if (engineType === "llama.cpp" && engineConfig) {
+        const parallelism = engineConfig?.parallelism;
+        if (typeof parallelism === "number" && parallelism > 0) {
+            return contextLength / parallelism;
+        }
     }
     return contextLength;
 }
@@ -19893,6 +19890,28 @@ object$1({
 });
 const LLMEngineSchema = _enum$1(["llama.cpp", "vllm"]);
+const LlamacppEngineConfigSchema = object$1({
+    batchSize: number$1().int().positive().nullable().optional(),
+    cacheTypeK: string$3().nullable().optional(),
+    cacheTypeV: string$3().nullable().optional(),
+    extraArgs: array$2(string$3()).optional(),
+    flashAttn: boolean$1().optional(),
+    gpuLayers: number$1().int().min(0).optional(),
+    mainGpu: number$1().int().min(0).nullable().optional(),
+    parallelism: number$1().int().positive().optional(),
+    tensorSplit: string$3().nullable().optional(),
+    ubatchSize: number$1().int().positive().nullable().optional()
+});
+const VLLMEngineConfigSchema = object$1({
+    device: string$3().optional(),
+    dtype: string$3().optional(),
+    extraArgs: array$2(string$3()).optional(),
+    tensorParallelSize: number$1().int().positive().optional()
+});
+const EngineConfigSchema = discriminatedUnion("type", [
+    object$1({ config: LlamacppEngineConfigSchema, type: literal("llama.cpp") }),
+    object$1({ config: VLLMEngineConfigSchema, type: literal("vllm") })
+]);
 const LLMModelFormatSchema = _enum$1([
     // VLLM
     "safetensors",
@@ -20045,8 +20064,8 @@ const ConduitState = z.preprocess(value => {
 const InferenceAgentConfigurationSchema = object$1({
     contextLength: number$1().int().positive().nullable(),
+    engineConfig: EngineConfigSchema.nullable(),
     inferenceSourceID: ULIDSchema,
-    parallelism: number$1().int().positive().nullable(),
     targetModel: LLMModelSchema
 });
@@ -20775,7 +20794,8 @@ object$1({
     provider: _enum$1(["storage", "huggingface"]),
     providerSlug: string$3(),
     sources: array$2(object$1({
-        engine: LLMEngineSchema,
+        engine: LLMEngineSchema.nullable(),
+        engineId: ULIDSchema.nullable(),
         id: ULIDSchema,
         lastState: ConduitState.nullable(),
         lastStateTimestamp: string$3().nullable(),
@@ -20796,17 +20816,17 @@ object$1({
 });
 object$1({
     contextLength: number$1().int().positive().max(1048576).optional(),
-    engine: LLMEngineSchema,
+    engineId: ULIDSchema,
     modelID: ULIDSchema,
     name: ResourceNameSchema,
-    parallelism: number$1().int().positive().optional(),
     quantizationLabel: string$3().min(1).max(128).optional()
 });
 object$1({
     results: array$2(object$1({
         contextLength: number$1().int().positive().nullable(),
         created: string$3(),
-        engine: LLMEngineSchema,
+        engine: LLMEngineSchema.nullable(),
+        engineId: ULIDSchema.nullable(),
         id: ULIDSchema,
         lastState: ConduitState.nullable(),
         lastStateTimestamp: string$3().nullable(),
@@ -20819,7 +20839,10 @@ object$1({
 object$1({
     contextLength: number$1().int().positive().nullable(),
     created: string$3(),
-    engine: LLMEngineSchema,
+    engine: LLMEngineSchema.nullable(),
+    engineConfig: unknown$1().nullable(),
+    engineId: ULIDSchema.nullable(),
+    engineName: string$3().nullable(),
     id: ULIDSchema,
     lastState: ConduitState.nullable(),
     lastStateTimestamp: string$3().nullable(),
@@ -20832,15 +20855,13 @@ object$1({
     }),
     modelQuantizationLabel: string$3().nullable(),
     name: string$3(),
-    parallelism: number$1().int().positive().nullable(),
     updated: string$3()
 });
 object$1({
     contextLength: number$1().int().positive().nullable().optional(),
-    engine: LLMEngineSchema.optional(),
+    engineId: ULIDSchema.nullable().optional(),
     modelID: ULIDSchema.optional(),
     name: ResourceNameSchema.optional(),
-    parallelism: number$1().int().positive().nullable().optional(),
     quantizationLabel: string$3().min(1).max(128).nullable().optional()
 });
 object$1({
@@ -20870,7 +20891,8 @@ object$1({
     name: string$3(),
     routingMethod: nativeEnum(RoutingMethod),
     sources: array$2(object$1({
-        engine: LLMEngineSchema,
+        engine: LLMEngineSchema.nullable(),
+        engineId: ULIDSchema.nullable(),
         id: ULIDSchema,
         modelName: string$3(),
         name: string$3()
@@ -116803,8 +116825,10 @@ async function startVLLM({ enginePort, targetDirectory }) {
     if (this.model.format === "gguf") {
         modelPath = await findQuantizedModelTarget({ model: this.model, path: targetDirectory });
     }
-    const device = process.env.VLLM_DEVICE;
-    const dtype = process.env.VLLM_DTYPE;
+    const engineConfig = this.engineConfig;
+    const device = typeof engineConfig?.device === "string" ? engineConfig.device : process.env.VLLM_DEVICE;
+    const dtype = typeof engineConfig?.dtype === "string" ? engineConfig.dtype : process.env.VLLM_DTYPE;
+    const tensorParallelSize = typeof engineConfig?.tensorParallelSize === "number" ? engineConfig.tensorParallelSize : 1;
     const args = [
         ...VLLM_START_ARGS,
         "--port",
@@ -116816,7 +116840,7 @@ async function startVLLM({ enginePort, targetDirectory }) {
         "--max-model-len",
         String(contextLength),
         "--tensor-parallel-size",
-        "1"
+        String(tensorParallelSize)
     ];
     if (device) {
         args.push("--device", device);
@@ -116824,6 +116848,10 @@ async function startVLLM({ enginePort, targetDirectory }) {
     if (dtype) {
         args.push("--dtype", dtype);
     }
+    const extraArgs = engineConfig?.extraArgs;
+    if (Array.isArray(extraArgs) && extraArgs.every((v) => typeof v === "string")) {
+        args.push(...extraArgs);
+    }
     const processManager = new ProcessManager({
         command: VLLM_EXECUTABLE,
         args
@@ -122775,7 +122803,8 @@ const DEFAULT_LLAMACPP_CONTEXT_LENGTH = 131072;
 async function startLlamacpp({ enginePort, targetDirectory }) {
     const target = await findQuantizedModelTarget({ model: this.model, path: targetDirectory });
     const contextLength = Math.max(1, this.contextLength ?? DEFAULT_LLAMACPP_CONTEXT_LENGTH);
-    const parallelism = this.parallelism;
+    const engineConfig = this.engineConfig;
+    const parallelism = typeof engineConfig?.parallelism === "number" ? engineConfig.parallelism : null;
     const args = [
         ...LLAMACPP_START_ARGS,
         "--port",
@@ -122785,13 +122814,47 @@ async function startLlamacpp({ enginePort, targetDirectory }) {
         "--ctx-size",
         String(contextLength)
     ];
-    const gpuLayers = Number.parseInt(process.env.LLAMACPP_GPU_LAYERS ?? String(DEFAULT_LLAMACPP_GPU_LAYERS), 10);
+    const gpuLayers = typeof engineConfig?.gpuLayers === "number"
+        ? engineConfig.gpuLayers
+        : Number.parseInt(process.env.LLAMACPP_GPU_LAYERS ?? String(DEFAULT_LLAMACPP_GPU_LAYERS), 10);
     if (Number.isFinite(gpuLayers) && gpuLayers > 0) {
         args.push("--n-gpu-layers", String(gpuLayers));
     }
     if (typeof parallelism === "number") {
         args.push("--parallel", String(Math.max(1, parallelism)));
     }
+    const flashAttn = engineConfig?.flashAttn;
+    if (flashAttn === true || flashAttn === undefined) {
+        args.push("--flash-attn", "on");
+    }
+    const cacheTypeK = typeof engineConfig?.cacheTypeK === "string" ? engineConfig.cacheTypeK : null;
+    if (cacheTypeK) {
+        args.push("--cache-type-k", cacheTypeK);
+    }
+    const cacheTypeV = typeof engineConfig?.cacheTypeV === "string" ? engineConfig.cacheTypeV : null;
+    if (cacheTypeV) {
+        args.push("--cache-type-v", cacheTypeV);
+    }
+    const batchSize = typeof engineConfig?.batchSize === "number" ? engineConfig.batchSize : null;
+    if (batchSize !== null) {
+        args.push("--batch-size", String(batchSize));
+    }
+    const ubatchSize = typeof engineConfig?.ubatchSize === "number" ? engineConfig.ubatchSize : null;
+    if (ubatchSize !== null) {
+        args.push("--ubatch-size", String(ubatchSize));
+    }
+    const tensorSplit = typeof engineConfig?.tensorSplit === "string" ? engineConfig.tensorSplit : null;
+    if (tensorSplit) {
+        args.push("--tensor-split", tensorSplit);
+    }
+    const mainGpu = typeof engineConfig?.mainGpu === "number" ? engineConfig.mainGpu : null;
+    if (mainGpu !== null) {
+        args.push("--main-gpu", String(mainGpu));
+    }
+    const extraArgs = engineConfig?.extraArgs;
+    if (Array.isArray(extraArgs) && extraArgs.every((v) => typeof v === "string")) {
+        args.push(...extraArgs);
+    }
     const processManager = new ProcessManager({
         command: LLAMACPP_EXECUTABLE,
         args
@@ -122824,11 +122887,8 @@ function createModelStorageKey(model) {
     return `${model.source.type}${SEPARATOR}${sanitizeSegment(identifier)}`;
 }
-// 2 hours
 const ENGINE_FETCH_TIMEOUT_MS$1 = 7200000;
-// 20 minutes
 const DOWNLOAD_LOCK_TIMEOUT_MS = 20 * 60 * 1000;
-// 5 seconds
 const DOWNLOAD_LOCK_POLL_INTERVAL_MS = 5000;
 const ENGINE_AGENT = new undiciExports.Agent({
     bodyTimeout: ENGINE_FETCH_TIMEOUT_MS$1,
@@ -122836,9 +122896,9 @@ const ENGINE_AGENT = new undiciExports.Agent({
 });
 class ModelManager extends EventEmitter {
     engine;
+    engineConfig;
     enginePort;
     model;
-    parallelism;
     uniqueName;
     contextLength;
     logger;
@@ -122847,27 +122907,13 @@ class ModelManager extends EventEmitter {
     lifecycleState = "stopped";
     stopRequested = false;
     modelsDirectory;
-    constructor({ contextLength, engine, enginePort, logger, model, parallelism, root }) {
+    constructor({ contextLength, engineConfig, enginePort, engineType, logger, model, root }) {
         super();
-        // const models = getModels();
-        // const targetModel = models.find(model => model.id === modelID);
-        // if (!targetModel) {
-        //     throw new ConfigurationInvalidError({
-        //         message: `No model found for ID: ${modelID}`
-        //     });
-        // }
-        // const source = targetModel.sources.find(source => source.engine === engine);
-        // if (!source) {
-        //     throw new ConfigurationInvalidError({
-        //         message: `Model does not support current engine: ${modelID} has no support for engine: ${engine}`
-        //     });
-        // }
-        this.engine = engine;
+        this.engine = engineType;
+        this.engineConfig = engineConfig ?? null;
         this.enginePort = enginePort;
         this.model = model;
         this.contextLength = typeof contextLength === "number" ? contextLength : null;
-        this.parallelism = typeof parallelism === "number" ? parallelism : null;
-        // this.providerSlugentifier = source.identifier;
         this.logger = logger;
         this.uniqueName = createModelStorageKey(this.model);
         this.modelsDirectory = join(root, "models");
@@ -122908,12 +122954,6 @@ class ModelManager extends EventEmitter {
                     clearTimeout(timeout);
                 }
             }
-            // case "ollama":
-            //     console.log("FETCH", path, opts);
-            //     return fetch(
-            //         joinURL("http://localhost:11434", path),
-            //         opts
-            //     );
             default: {
                 const engineType = this.engine;
                 throw new ConfigurationInvalidError({
@@ -122948,15 +122988,6 @@ class ModelManager extends EventEmitter {
                     await this.releaseDownloadLock();
                 }
                 break;
-            // case "ollama":
-            //     this.logger.info("Loading model", {
-            //         modelID: this.model.id
-            //     });
-            //     await loadCurrentOllamaModel.call(this);
-            //     this.logger.info("Loaded model", {
-            //         modelID: this.model.id
-            //     });
-            //     return;
             default: {
                 const engineType = this.engine;
                 throw new ConfigurationInvalidError({
@@ -123392,12 +123423,6 @@ function createPostStopEngineHandler(options) {
     return createConduitGeneralAPIReferenceHandlers(options)["/conduit/engine/stop"].POST;
 }
-/**
- * Coerce non-string tool_calls function.arguments to JSON strings.
- * Some LLM backends return arguments as parsed objects instead of
- * JSON strings, violating the OpenAI spec. This mutates in place
- * and returns true if any coercion was performed.
- */
 function coerceToolCallArguments(parsed) {
     const choices = parsed.choices;
     if (!Array.isArray(choices))
@@ -123438,7 +123463,7 @@ function isEngineUsageChunk(value) {
     }
     return true;
 }
-function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
+function monitorEngineResponseStream({ agentEngineType, body, contextLength, engineConfig, engineType, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }) {
     const startedAt = requestStartedAt ?? Date.now();
     const passThrough = new PassThrough();
     passThrough.on("error", (error) => {
@@ -123477,8 +123502,8 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
                     const usageChunk = parsed.usage;
                     const effectiveContext = getEffectiveContextLength({
                         contextLength,
-                        engine,
-                        parallelism
+                        engineConfig,
+                        engineType
                     });
                     if (usageChunk.context_usage === undefined &&
                         usageChunk.prompt_tokens !== undefined &&
@@ -123520,8 +123545,8 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
                     let contextUsage = parsed.usage?.context_usage ?? null;
                     const effectiveContextForUsage = getEffectiveContextLength({
                         contextLength,
-                        engine,
-                        parallelism
+                        engineConfig,
+                        engineType
                     });
                     if (contextUsage === null &&
                         promptTokens !== null &&
@@ -123590,7 +123615,9 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
         passThrough.destroy(err);
     });
     body.once("end", () => {
-        parseUsageFromBuffer();
+        if (buffer.length > 0) {
+            parseUsageFromBuffer();
+        }
         logEngineMetrics({
             agentEngineType,
             level: "info",
@@ -123633,7 +123660,7 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
         stream: passThrough
     };
 }
-function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
+function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engineConfig, engineType, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }) {
     const maxUsageCaptureBytes = 1024 * 1024;
     const startedAt = requestStartedAt ?? Date.now();
     const passThrough = new PassThrough();
@@ -123719,8 +123746,8 @@ function monitorEngineResponseSingle({ agentEngineType, body, contextLength, eng
                     let contextUsage = usageChunk.context_usage ?? null;
                     const effectiveContext = getEffectiveContextLength({
                         contextLength,
-                        engine,
-                        parallelism
+                        engineConfig,
+                        engineType
                     });
                     if (contextUsage === null &&
                         promptTokens !== null &&
@@ -123839,7 +123866,7 @@ function calculateTokensPerSecond$2({ durationMs, totalTokens }) {
     }
     return Math.round(tokensPerSecond);
 }
-async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }) {
+async function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }) {
     function normalizeTokenCount(value) {
         if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
             return value;
@@ -123854,6 +123881,8 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
             });
         });
     }
+    const engineType = conduitConfiguration.engineConfig?.type ?? null;
+    const engineConfig = conduitConfiguration.engineConfig?.config ?? null;
     const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody$1(body);
     const requestStartedAt = Date.now();
     const requestBody = JSON.parse(serializedBody);
@@ -123866,7 +123895,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
         reportMetricsSafe({
             bytes: requestBodyBytes + responseBytes,
             completionTokens,
-            engine: configuration.agentEngineType,
+            engine: engineType,
             endpointId: endpointId ?? null,
             latencyMs,
             modelId: modelID,
@@ -123895,9 +123924,10 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
         signal
     })
         .catch(error => {
+        const err = asError(error);
         logEngineMetrics({
-            agentEngineType: configuration.agentEngineType,
-            error: error,
+            agentEngineType: engineType ?? "unknown",
+            error: err,
             level: "error",
             logger,
             requestBodyBytes,
@@ -123909,7 +123939,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
         reportMetricsSafe({
             bytes: requestBodyBytes,
             completionTokens: 0,
-            engine: configuration.agentEngineType,
+            engine: engineType,
             endpointId: endpointId ?? null,
             latencyMs,
             modelId: modelID,
@@ -123924,7 +123954,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
             tokensPerSecond: 0,
             totalTokens: 0
         });
-        throw error;
+        throw err;
     });
     const responseStatusText = response.statusText ?? "Upstream request failed";
     if (!response.ok) {
@@ -123946,7 +123976,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
     }
     if (!response.body) {
         logEngineMetrics({
-            agentEngineType: configuration.agentEngineType,
+            agentEngineType: engineType ?? "unknown",
             level: response.ok ? "info" : "error",
             logger,
             requestBodyBytes,
@@ -123958,7 +123988,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
         reportMetricsSafe({
             bytes: requestBodyBytes,
             completionTokens: 0,
-            engine: configuration.agentEngineType,
+            engine: engineType,
             endpointId: endpointId ?? null,
             latencyMs,
             modelId: modelID,
@@ -123980,25 +124010,25 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
     }
     const monitoredResponse = streamRequested
         ? monitorEngineResponseStream({
-            agentEngineType: configuration.agentEngineType,
+            agentEngineType: engineType ?? "unknown",
             body: Readable.fromWeb(response.body),
             contextLength: modelManager.contextLength,
-            engine: configuration.agentEngineType,
+            engineConfig,
+            engineType: engineType ?? "unknown",
             logger,
             onComplete: onMonitoringComplete,
-            parallelism: modelManager.parallelism,
             requestBodyBytes,
             requestPath: path,
             requestStartedAt
         })
         : monitorEngineResponseSingle({
-            agentEngineType: configuration.agentEngineType,
+            agentEngineType: engineType ?? "unknown",
             body: Readable.fromWeb(response.body),
             contextLength: modelManager.contextLength,
-            engine: configuration.agentEngineType,
+            engineConfig,
+            engineType: engineType ?? "unknown",
             logger,
             onComplete: onMonitoringComplete,
-            parallelism: modelManager.parallelism,
             requestBodyBytes,
             requestPath: path,
             requestStartedAt
@@ -124015,7 +124045,7 @@ function extractEndpointId$1(req) {
     const raw = typeof value === "string" ? value : Array.isArray(value) ? value[0] : null;
     return raw && isValid(raw) ? raw : null;
 }
-function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger, startup }) {
+function createConduitOpenAIAPIReferenceHandlers({ apiClient, conduitConfiguration, configuration, getModelID, getModelManager, logger, startup }) {
     return {
         "/v1/chat/completions": {
             POST: async ({ body, req, res }) => {
@@ -124033,7 +124063,7 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
                 });
                 const result = await proxyOpenAIStreamingRoute({
                     body,
-                    configuration,
+                    conduitConfiguration: conduitConfiguration(),
                     endpointId: extractEndpointId$1(req),
                     logger,
                     modelID,
@@ -124059,7 +124089,7 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
                 });
                 return proxyOpenAIStreamingRoute({
                     body,
-                    configuration,
+                    conduitConfiguration: conduitConfiguration(),
                     endpointId: extractEndpointId$1(req),
                     logger,
                     modelID,
@@ -124073,10 +124103,11 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
         "/v1/models": {
             GET: async () => {
                 const modelManager = getModelManager();
+                const currentConfig = conduitConfiguration();
                 const effectiveContextLength = getEffectiveContextLength({
                     contextLength: modelManager.contextLength,
-                    engine: configuration.agentEngineType,
-                    parallelism: modelManager.parallelism
+                    engineConfig: currentConfig.engineConfig?.config ?? null,
+                    engineType: currentConfig.engineConfig?.type ?? null
                 });
                 return {
                     body: {
@@ -124179,7 +124210,7 @@ function extractAnthropicNonStreamUsage(body) {
         return null;
     }
 }
-async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, logger, modelID, modelManager, reportMetrics, signal }) {
+async function proxyAnthropicStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, reportMetrics, signal }) {
     function reportMetricsSafe(payload) {
         reportMetrics(payload).catch(error => {
             logger.warn("Failed to upload LLM prompt metrics", {
@@ -124188,6 +124219,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
             });
         });
     }
+    const engineType = conduitConfiguration.engineConfig?.type ?? null;
     const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody(body);
     const requestStartedAt = Date.now();
     const requestBody = JSON.parse(serializedBody);
@@ -124200,7 +124232,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
         reportMetricsSafe({
             bytes: requestBodyBytes + responseBytes,
             completionTokens,
-            engine: configuration.agentEngineType,
+            engine: engineType,
             endpointId: endpointId ?? null,
             latencyMs,
             modelId: modelID,
@@ -124230,7 +124262,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
     })
         .catch(error => {
         logEngineMetrics({
-            agentEngineType: configuration.agentEngineType,
+            agentEngineType: engineType ?? "unknown",
             error: asError(error),
             level: "error",
             logger,
@@ -124243,7 +124275,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
         reportMetricsSafe({
             bytes: requestBodyBytes,
             completionTokens: 0,
-            engine: configuration.agentEngineType,
+            engine: engineType,
             endpointId: endpointId ?? null,
             latencyMs,
             modelId: modelID,
@@ -124276,7 +124308,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
     }
     if (!response.body) {
         logEngineMetrics({
-            agentEngineType: configuration.agentEngineType,
+            agentEngineType: engineType ?? "unknown",
             level: response.ok ? "info" : "error",
             logger,
             requestBodyBytes,
@@ -124288,7 +124320,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
         reportMetricsSafe({
             bytes: requestBodyBytes,
             completionTokens: 0,
-            engine: configuration.agentEngineType,
+            engine: engineType,
             endpointId: endpointId ?? null,
             latencyMs,
             modelId: modelID,
@@ -124349,7 +124381,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
         rawBody.once("error", err => {
             const normalizedError = asError(err);
             logEngineMetrics({
-                agentEngineType: configuration.agentEngineType,
+                agentEngineType: engineType ?? "unknown",
                 error: normalizedError,
                 level: "error",
                 logger,
@@ -124363,7 +124395,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
         });
         rawBody.once("end", () => {
             logEngineMetrics({
-                agentEngineType: configuration.agentEngineType,
+                agentEngineType: engineType ?? "unknown",
                 level: upstreamError ? "error" : "info",
                 logger,
                 requestBodyBytes,
@@ -124382,7 +124414,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
             }
             const closeError = new Error("Engine response stream closed before completion");
             logEngineMetrics({
-                agentEngineType: configuration.agentEngineType,
+                agentEngineType: engineType ?? "unknown",
                 error: closeError,
                 level: "error",
                 logger,
@@ -124407,7 +124439,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
         rawBody.once("error", err => {
             const normalizedError = asError(err);
             logEngineMetrics({
-                agentEngineType: configuration.agentEngineType,
+                agentEngineType: engineType ?? "unknown",
                 error: normalizedError,
                 level: "error",
                 logger,
@@ -124427,7 +124459,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
                 usage.outputTokens = extractedUsage.outputTokens;
             }
             logEngineMetrics({
-                agentEngineType: configuration.agentEngineType,
+                agentEngineType: engineType ?? "unknown",
                 level: upstreamError ? "error" : "info",
                 logger,
                 requestBodyBytes,
@@ -124446,7 +124478,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
             }
             const closeError = new Error("Engine response stream closed before completion");
             logEngineMetrics({
-                agentEngineType: configuration.agentEngineType,
+                agentEngineType: engineType ?? "unknown",
                 error: closeError,
                 level: "error",
                 logger,
@@ -124472,7 +124504,7 @@ function extractEndpointId(req) {
     const raw = typeof value === "string" ? value : Array.isArray(value) ? value[0] : null;
     return raw && isValid(raw) ? raw : null;
 }
-function createConduitAnthropicAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger }) {
+function createConduitAnthropicAPIReferenceHandlers({ apiClient, conduitConfiguration, configuration, getModelID, getModelManager, logger }) {
     return {
         "/v1/messages": {
             POST: async ({ body, req, res }) => {
@@ -124484,7 +124516,7 @@ function createConduitAnthropicAPIReferenceHandlers({ apiClient, configuration,
                 });
                 return proxyAnthropicStreamingRoute({
                     body,
-                    configuration,
+                    conduitConfiguration: conduitConfiguration(),
                     endpointId: extractEndpointId(req),
                     logger,
                     modelID,
@@ -124506,7 +124538,7 @@ function createHealthHandler() {
     };
 }
-async function handleSSERequests({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, signal }) {
+async function handleSSERequests({ apiURL, conduitConfiguration, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, signal }) {
     const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/stream`;
     const maxReconnectDelayMs = 30000;
     let reconnectAttempt = 0;
@@ -124548,6 +124580,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
                     handleRequest({
                         activeRequests,
                         apiURL,
+                        conduitConfiguration,
                         configuration,
                         logger,
                         modelID,
@@ -124591,7 +124624,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
         }
     }
 }
-async function handleRequest({ activeRequests, apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
+async function handleRequest({ activeRequests, apiURL, conduitConfiguration, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
     function reportMetricsSafe(payload) {
         reportMetrics(payload).catch(error => {
             logger.warn("Failed to upload LLM prompt metrics", {
@@ -124600,6 +124633,7 @@ async function handleRequest({ activeRequests, apiURL, configuration, logger, mo
             });
         });
     }
+    const engineType = conduitConfiguration().engineConfig?.type ?? null;
     const endpointId = request.parameters?.endpointID ?? null;
     const requestStartedAt = Date.now();
     const requestBytes = calculateRequestBytes(request.body ?? null);
@@ -124624,7 +124658,7 @@ async function handleRequest({ activeRequests, apiURL, configuration, logger, mo
         reportMetricsSafe({
             bytes: requestBytes + responseMetrics.responseBytes,
             completionTokens: 0,
-            engine: configuration.agentEngineType,
+            engine: engineType,
             endpointId,
             latencyMs,
             modelId: modelID,
@@ -124678,7 +124712,7 @@ async function handleRequest({ activeRequests, apiURL, configuration, logger, mo
         reportMetricsSafe({
             bytes: isCancelled ? requestBytes : requestBytes + failureBytes,
             completionTokens: 0,
-            engine: configuration.agentEngineType,
+            engine: engineType,
             endpointId,
             latencyMs,
             modelId: modelID,
@@ -134618,15 +134652,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
     let modelFileName = getConduitModelFileName(conduitConfiguration);
     let modelName = getConduitModelName(conduitConfiguration);
     const startup = Date.now();
-    let modelManager = new ModelManager({
-        contextLength: conduitConfiguration.contextLength ?? null,
-        engine: configuration.agentEngineType,
-        enginePort: configuration.enginePort,
-        logger,
-        model: conduitConfiguration.targetModel,
-        parallelism: conduitConfiguration.parallelism ?? null,
-        root: configuration.rootDirectory
-    });
+    let modelManager = createModelManagerFromConfig(conduitConfiguration, configuration, logger);
     const conduitStateReportManager = new ConduitStateReportManager({
         apiClient,
         collectMachineMetadata: collectMachineMetadata,
@@ -134765,15 +134791,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
         conduitConfiguration = newConduitConfiguration;
         modelFileName = getConduitModelFileName(conduitConfiguration);
         modelName = getConduitModelName(conduitConfiguration);
-        modelManager = new ModelManager({
-            contextLength: conduitConfiguration.contextLength ?? null,
-            engine: configuration.agentEngineType,
-            enginePort: configuration.enginePort,
-            logger,
-            model: conduitConfiguration.targetModel,
-            parallelism: conduitConfiguration.parallelism ?? null,
-            root: configuration.rootDirectory
-        });
+        modelManager = createModelManagerFromConfig(conduitConfiguration, configuration, logger);
         attachLifecycleListeners();
         if (sourceState === "idle") {
             logger.info("Restarting engine from idle");
@@ -134847,6 +134865,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
             "/v1/chat/completions": {
                 POST: createPostChatCompletionsHandler({
                     apiClient,
+                    conduitConfiguration: () => conduitConfiguration,
                     configuration,
                     getModelID: () => conduitConfiguration.targetModel.id,
                     getModelManager: () => modelManager,
@@ -134857,6 +134876,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
             "/v1/completions": {
                 POST: createPostCompletionsHandler({
                     apiClient,
+                    conduitConfiguration: () => conduitConfiguration,
                     configuration,
                     getModelID: () => conduitConfiguration.targetModel.id,
                     getModelManager: () => modelManager,
@@ -134867,6 +134887,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
             "/v1/models": {
                 GET: createGetModelsHandler({
                     apiClient,
+                    conduitConfiguration: () => conduitConfiguration,
                     configuration,
                     getModelID: () => conduitConfiguration.targetModel.id,
                     getModelManager: () => modelManager,
@@ -134884,6 +134905,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
             "/v1/messages": {
                 POST: createPostMessagesHandler({
                     apiClient,
+                    conduitConfiguration: () => conduitConfiguration,
                     configuration,
                     getModelID: () => conduitConfiguration.targetModel.id,
                     getModelManager: () => modelManager,
@@ -134897,6 +134919,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
     });
     handleSSERequests({
         apiURL: configuration.apiURL,
+        conduitConfiguration: () => conduitConfiguration,
         configuration,
         logger,
         modelID: conduitConfiguration.targetModel.id,
@@ -134973,6 +134996,18 @@ async function createApplication({ abortController, apiClient, configuration, lo
         shutdown
     };
 }
+function createModelManagerFromConfig(conduitConfiguration, configuration, logger) {
+    const engineConfig = conduitConfiguration.engineConfig;
+    return new ModelManager({
+        contextLength: conduitConfiguration.contextLength ?? null,
+        engineConfig: engineConfig?.config ?? null,
+        enginePort: configuration.enginePort,
+        engineType: engineConfig?.type ?? "llama.cpp",
+        logger,
+        model: conduitConfiguration.targetModel,
+        root: configuration.rootDirectory
+    });
+}
 function getConduitModelFileName(configuration) {
     const { source } = configuration.targetModel;
     return source.type === "huggingface" ? source.slug : source.irid;
@@ -134983,8 +135018,6 @@ function getConduitModelName(configuration) {
 const StartModeSchema = _enum(["auto", "idle"]);
 function getConfiguration({ overrides } = {}) {
-    const agentEngineTypeValue = overrides?.agentEngineType ?? readEnvString("ENGINE");
-    const agentEngineType = LLMEngineSchema.parse(agentEngineTypeValue);
     const apiKey = overrides?.apiKey ?? readEnvString("API_KEY");
     const apiURL = overrides?.apiURL ?? readEnvStringOptional("API_URL", "https://api.infersec.ai");
     const enginePort = overrides?.enginePort ??
@@ -135005,7 +135038,6 @@ function getConfiguration({ overrides } = {}) {
     const startModeValue = overrides?.startMode ?? readEnvStringOptional("START_MODE", "auto");
     const startMode = StartModeSchema.parse(startModeValue);
     return {
-        agentEngineType,
         apiKey,
         apiURL,
         enginePort,
@@ -135031,7 +135063,6 @@ class ConduitConnection {
         this.enginePort = options.enginePort;
         this.configuration = getConfiguration({
             overrides: {
-                agentEngineType: options.engine,
                 apiKey: options.apiKey,
                 apiURL: options.apiURL,
                 enginePort: options.enginePort,
@@ -135091,13 +135122,10 @@ async function startInferenceAgent({ configurationOverrides }) {
     const logger = createLogger({
         name: "infersec-conduit"
     });
-    logger.info("Application starting", {
-        agentEngineType: configuration.agentEngineType
-    });
+    logger.info("Application starting");
     const connection = new ConduitConnection({
         apiKey: configuration.apiKey,
         apiURL: configuration.apiURL,
-        engine: configuration.agentEngineType,
         enginePort: configuration.enginePort,
         logger,
         port: configuration.port,
@@ -135136,7 +135164,6 @@ function registerInferenceCommands({ program }) {
         .command("start")
         .description("Start the inference agent")
         .option("--api-url <url>", "API base URL (or API_URL env)")
-        .option("--engine <type>", "Engine type (or ENGINE env)")
         .option("--engine-port <number>", "Engine port (or ENGINE_PORT env)")
         .option("--key <value>", "API key (or API_KEY env)")
         .option("--port <number>", "Port to listen on (or PORT env)")
@@ -135148,9 +135175,6 @@ function registerInferenceCommands({ program }) {
         if (options["api-url"]) {
             configurationOverrides.apiURL = options["api-url"];
         }
-        if (options.engine) {
-            configurationOverrides.agentEngineType = options.engine;
-        }
         if (options["engine-port"]) {
             const enginePort = Number.parseInt(options["engine-port"], 10);
             if (Number.isNaN(enginePort) || enginePort < 1 || enginePort > 65535) {
@@ -161889,7 +161913,6 @@ async function runSingleBenchmark(options) {
             const conn = new ConduitConnection({
                 apiKey,
                 apiURL: apiUrl,
-                engine: entry.engine,
                 enginePort,
                 logger,
                 port,

package/dist/configuration.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { LLMEngine, ULID } from "@infersec/definitions";
+import { ULID } from "@infersec/definitions";
 import { z } from "zod";
 declare const StartModeSchema: z.ZodEnum<{
     idle: "idle";
@@ -6,7 +6,6 @@ declare const StartModeSchema: z.ZodEnum<{
 }>;
 export type StartMode = z.infer<typeof StartModeSchema>;
 export interface Configuration {
-    agentEngineType: LLMEngine;
     apiKey: string;
     apiURL: string;
     enginePort: number;
@@ -16,7 +15,6 @@ export interface Configuration {
     startMode: StartMode;
 }
 export interface ConfigurationOverrides {
-    agentEngineType?: string;
     apiKey?: string;
     apiURL?: string;
     enginePort?: number;

package/dist/modelManagement/ModelManager.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { LLMEngine, LLMModel } from "@infersec/definitions";
+import { LLMModel } from "@infersec/definitions";
 import { Logger } from "@infersec/logger";
 import EventEmitter from "eventemitter3";
 import { Response } from "undici";
@@ -11,10 +11,10 @@ interface ModelManagerEvents {
 }
 type EngineLifecycleState = "errored" | "running" | "starting" | "stopped" | "stopping";
 export declare class ModelManager extends EventEmitter<ModelManagerEvents> {
-    readonly engine: LLMEngine;
+    readonly engine: string;
+    readonly engineConfig: Record<string, unknown> | null;
     readonly enginePort: number;
     readonly model: LLMModel;
-    readonly parallelism: number | null;
     private uniqueName;
     readonly contextLength: number | null;
     protected readonly logger: Logger;
@@ -23,13 +23,13 @@ export declare class ModelManager extends EventEmitter<ModelManagerEvents> {
     private lifecycleState;
     private stopRequested;
     protected readonly modelsDirectory: string;
-    constructor({ contextLength, engine, enginePort, logger, model, parallelism, root }: {
+    constructor({ contextLength, engineConfig, enginePort, engineType, logger, model, root }: {
         contextLength?: number | null;
-        engine: LLMEngine;
+        engineConfig?: Record<string, unknown> | null;
         enginePort: number;
+        engineType: string;
         logger: Logger;
         model: LLMModel;
-        parallelism?: number | null;
         root: string;
     });
     fetchOpenAI(path: string, opts?: RequestInit): Promise<Response>;

package/dist/requestHandlers/createConduitAnthropicAPIReferenceHandlers.d.ts CHANGED Viewed

@@ -1,12 +1,13 @@
-import { API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE } from "@infersec/definitions";
+import { API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE, type InferenceAgentConfiguration } from "@infersec/definitions";
 import { implementAPIReference, type APIRequest } from "@infersec/fetch";
 import { Logger } from "@infersec/logger";
 import { APIClient } from "../apiClient/index.js";
 import { Configuration } from "../configuration.js";
 import { ModelManager } from "../modelManagement/ModelManager.js";
 type ConduitAnthropicAPIReferenceHandlers = Parameters<typeof implementAPIReference<typeof API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE>>[0]["api"];
-export declare function createConduitAnthropicAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger }: {
+export declare function createConduitAnthropicAPIReferenceHandlers({ apiClient, conduitConfiguration, configuration, getModelID, getModelManager, logger }: {
     apiClient: APIClient;
+    conduitConfiguration: () => InferenceAgentConfiguration;
     configuration: Configuration;
     getModelID: () => string;
     getModelManager: () => ModelManager;
@@ -14,6 +15,7 @@ export declare function createConduitAnthropicAPIReferenceHandlers({ apiClient,
 }): ConduitAnthropicAPIReferenceHandlers;
 export declare function createPostMessagesHandler(options: {
     apiClient: APIClient;
+    conduitConfiguration: () => InferenceAgentConfiguration;
     configuration: Configuration;
     getModelID: () => string;
     getModelManager: () => ModelManager;

package/dist/requestHandlers/createConduitOpenAIAPIReferenceHandlers.d.ts CHANGED Viewed

@@ -1,12 +1,13 @@
-import { API_CLIENT_CONDUIT_OPENAI_REFERENCE } from "@infersec/definitions";
+import { API_CLIENT_CONDUIT_OPENAI_REFERENCE, type InferenceAgentConfiguration } from "@infersec/definitions";
 import { implementAPIReference, type APIRequest } from "@infersec/fetch";
 import { Logger } from "@infersec/logger";
 import { APIClient } from "../apiClient/index.js";
 import { Configuration } from "../configuration.js";
 import { ModelManager } from "../modelManagement/ModelManager.js";
 type ConduitOpenAIAPIReferenceHandlers = Parameters<typeof implementAPIReference<typeof API_CLIENT_CONDUIT_OPENAI_REFERENCE>>[0]["api"];
-export declare function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger, startup }: {
+export declare function createConduitOpenAIAPIReferenceHandlers({ apiClient, conduitConfiguration, configuration, getModelID, getModelManager, logger, startup }: {
     apiClient: APIClient;
+    conduitConfiguration: () => InferenceAgentConfiguration;
     configuration: Configuration;
     getModelID: () => string;
     getModelManager: () => ModelManager;
@@ -15,6 +16,7 @@ export declare function createConduitOpenAIAPIReferenceHandlers({ apiClient, con
 }): ConduitOpenAIAPIReferenceHandlers;
 export declare function createGetModelsHandler(options: {
     apiClient: APIClient;
+    conduitConfiguration: () => InferenceAgentConfiguration;
     configuration: Configuration;
     getModelID: () => string;
     getModelManager: () => ModelManager;
@@ -60,6 +62,7 @@ export declare function createGetModelsHandler(options: {
 }>;
 export declare function createPostChatCompletionsHandler(options: {
     apiClient: APIClient;
+    conduitConfiguration: () => InferenceAgentConfiguration;
     configuration: Configuration;
     getModelID: () => string;
     getModelManager: () => ModelManager;
@@ -166,6 +169,7 @@ export declare function createPostChatCompletionsHandler(options: {
 }>;
 export declare function createPostCompletionsHandler(options: {
     apiClient: APIClient;
+    conduitConfiguration: () => InferenceAgentConfiguration;
     configuration: Configuration;
     getModelID: () => string;
     getModelManager: () => ModelManager;

package/dist/sse/handler.d.ts CHANGED Viewed

@@ -1,8 +1,9 @@
-import { InferenceAgentLLMMetricsPayload, type ULID, type APIResponse, type ServerToClientAPIRequest } from "@infersec/definitions";
+import { InferenceAgentConfiguration, InferenceAgentLLMMetricsPayload, type ULID, type APIResponse, type ServerToClientAPIRequest } from "@infersec/definitions";
 import { Logger } from "@infersec/logger";
 import { Configuration } from "../configuration.js";
-export declare function handleSSERequests({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, signal }: {
+export declare function handleSSERequests({ apiURL, conduitConfiguration, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, signal }: {
     apiURL: string;
+    conduitConfiguration: () => InferenceAgentConfiguration;
     configuration: Configuration;
     logger: Logger;
     modelID: ULID;

package/dist/utils/anthropic.d.ts CHANGED Viewed

@@ -1,10 +1,11 @@
 import { Readable } from "node:stream";
-import { InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definitions";
+import { InferenceAgentConfiguration, InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definitions";
 import { Logger } from "@infersec/logger";
 import { Configuration } from "../configuration.js";
 import { ModelManager } from "../modelManagement/ModelManager.js";
-export declare function proxyAnthropicStreamingRoute({ body, configuration, endpointId, logger, modelID, modelManager, reportMetrics, signal }: {
+export declare function proxyAnthropicStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, reportMetrics, signal }: {
     body: unknown;
+    conduitConfiguration: InferenceAgentConfiguration;
     configuration: Configuration;
     endpointId?: ULID | null;
     logger: Logger;

package/dist/utils/engineMetrics.d.ts CHANGED Viewed

@@ -1,5 +1,4 @@
 import { Readable } from "node:stream";
-import { LLMEngine } from "@infersec/definitions";
 import { Logger } from "@infersec/logger";
 export interface EngineUsageMetrics {
     completionTokens: number | null;
@@ -8,7 +7,7 @@ export interface EngineUsageMetrics {
     totalTokens: number | null;
 }
 interface EngineMetricsLoggerOptions {
-    agentEngineType: LLMEngine;
+    agentEngineType: string;
     logger: Logger;
     requestBodyBytes: number;
     requestPath: string;
@@ -24,9 +23,9 @@ interface EngineMetricsCompletion {
 interface MonitorEngineResponseOptions extends EngineMetricsLoggerOptions {
     body: Readable;
     contextLength: number | null;
-    engine: LLMEngine;
+    engineConfig: Record<string, unknown> | null;
+    engineType: string;
     onComplete?: (result: EngineMetricsCompletion) => void | Promise<void>;
-    parallelism: number | null;
     requestStartedAt?: number;
 }
 interface EngineMetricsLogOptions extends EngineMetricsLoggerOptions {
@@ -38,7 +37,7 @@ interface EngineMetricsLogOptions extends EngineMetricsLoggerOptions {
 interface MonitorEngineResponseResult {
     stream: Readable;
 }
-export declare function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
-export declare function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
+export declare function monitorEngineResponseStream({ agentEngineType, body, contextLength, engineConfig, engineType, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
+export declare function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engineConfig, engineType, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
 export declare function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }: EngineMetricsLogOptions): void;
 export {};

package/dist/utils/openai.d.ts CHANGED Viewed

@@ -1,10 +1,11 @@
 import { Readable } from "node:stream";
-import { InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definitions";
+import { InferenceAgentConfiguration, InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definitions";
 import { Logger } from "@infersec/logger";
 import { Configuration } from "../configuration.js";
 import { ModelManager } from "../modelManagement/ModelManager.js";
-export declare function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }: {
+export declare function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }: {
     body: unknown;
+    conduitConfiguration: InferenceAgentConfiguration;
     configuration: Configuration;
     endpointId?: ULID | null;
     logger: Logger;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@infersec/conduit",
   "description": "End user conduit agent for connecting local LLMs to the cloud.",
-  "version": "1.67.0",
+  "version": "1.68.0",
   "bin": {
     "infersec-conduit": "./dist/cli.js"
   },