npm - @infersec/conduit - Versions diffs - 1.52.0 → 1.53.0 - Mend

@infersec/conduit 1.52.0 → 1.53.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/cli.js +121 -47
package/dist/modelManagement/ModelManager.d.ts +2 -1
package/dist/modelManagement/quantization.d.ts +4 -0
package/dist/sse/requestProxy.d.ts +3 -4
package/dist/utils/docker.d.ts +1 -0
package/package.json +1 -1

package/dist/cli.js CHANGED Viewed

@@ -52,7 +52,7 @@ import 'fs/promises';
 import 'stream/promises';
 import { fileURLToPath } from 'node:url';
 import { StringDecoder } from 'node:string_decoder';
-import os, { constants as constants$5 } from 'node:os';
+import os, { constants as constants$5, hostname as hostname$2 } from 'node:os';
 import tty from 'node:tty';
 import require$$0$j from 'child_process';
 import { setTimeout as setTimeout$1, scheduler, setImmediate as setImmediate$1 } from 'node:timers/promises';
@@ -18497,6 +18497,8 @@ const RawPasswordSchema = string$1()
     .refine(raw => /[\p{P}\p{S}]/u.test(raw), {
     message: "Invalid password: Missing character variant: Symbols"
 });
+const RESOURCE_NAME_MAX_LENGTH = 256;
+string$1().trim().min(1).max(RESOURCE_NAME_MAX_LENGTH);
 const ULIDSchema = string$1().refine(isValid, { message: "Invalid ULID" });
 // IRID Format:
 //
@@ -57224,7 +57226,7 @@ var undiciExports = requireUndici();
 function createFetchWithHeaders({ fetchFn = undiciExports.fetch, headers }) {
     function fetchWithHeaders(url, options) {
-        const mergedHeaders = new Headers(options.headers);
+        const mergedHeaders = new undiciExports.Headers(options.headers);
         for (const [key, value] of Object.entries(headers)) {
             mergedHeaders.set(key, value);
         }
@@ -57252,7 +57254,7 @@ async function fetchByReference(options) {
     for (const [key, value] of Object.entries(query || {})) {
         targetURL.searchParams.set(key, String(value));
     }
-    const headers = new Headers();
+    const headers = new undiciExports.Headers();
     const fetchOptions = {
         headers,
         method: options.method
@@ -103234,25 +103236,30 @@ const VLLM_EXECUTABLE = "python3";
 const DEFAULT_VLLM_CONTEXT_LENGTH = 2048;
 async function startVLLM({ enginePort, targetDirectory }) {
     const contextLength = Math.max(1, this.contextLength ?? DEFAULT_VLLM_CONTEXT_LENGTH);
+    const device = process.env.VLLM_DEVICE;
+    const dtype = process.env.VLLM_DTYPE;
+    const args = [
+        ...VLLM_START_ARGS,
+        "--port",
+        String(enginePort),
+        "--model",
+        targetDirectory,
+        "--served-model-name",
+        this.model.id,
+        "--max-model-len",
+        String(contextLength),
+        "--tensor-parallel-size",
+        "1"
+    ];
+    if (device) {
+        args.push("--device", device);
+    }
+    if (dtype) {
+        args.push("--dtype", dtype);
+    }
     const processManager = new ProcessManager({
         command: VLLM_EXECUTABLE,
-        args: [
-            ...VLLM_START_ARGS,
-            "--port",
-            String(enginePort),
-            "--model",
-            targetDirectory,
-            "--served-model-name",
-            this.model.id,
-            "--device",
-            "cpu", // Force CPU mode
-            "--dtype",
-            "float16", // Use float16 to save memory on CPU
-            "--max-model-len",
-            String(contextLength),
-            "--tensor-parallel-size",
-            "1"
-        ]
+        args
     });
     await processManager.start();
     return processManager;
@@ -104324,15 +104331,12 @@ const ModelDownloadProgressSchema = object({
     completedFiles: array(string$1().min(1))
 });
-const DOWNLOAD_PROGRESS_TIMEOUT = 60000;
-const DOWNLOAD_RETRY_ATTEMPTS_FULL = 3;
-const DOWNLOAD_RETRY_ATTEMPTS_RANGE = 10;
 function matchesQuantizationVariant({ filePath, variant }) {
     if (!variant) {
         return false;
     }
     const escapedVariant = variant.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-    const trailingBoundary = variant.includes("_") ? "[\\-./_]" : "[\\-./]";
+    const trailingBoundary = "[\\-./]";
     const matcher = new RegExp(`(^|[\\-./_])${escapedVariant}(?=$|${trailingBoundary})`, "i");
     const normalizedPath = filePath.replace(/\\/g, "/");
     const segments = normalizedPath.split("/").filter(Boolean);
@@ -104345,6 +104349,10 @@ function matchesQuantizationVariant({ filePath, variant }) {
     }
     return segments.slice(0, -1).some(segment => matcher.test(segment));
 }
+const DOWNLOAD_PROGRESS_TIMEOUT = 60000;
+const DOWNLOAD_RETRY_ATTEMPTS_FULL = 3;
+const DOWNLOAD_RETRY_ATTEMPTS_RANGE = 10;
 async function downloadModelViaHuggingFace({ format, huggingFaceToken, modelSlug: rawModelSlug, onProgress, progressFilePath, targetDirectory }) {
     // Sanitise model ID
     const [modelSlugWithRevision, variant = null] = rawModelSlug.split(":");
@@ -112782,7 +112790,7 @@ async function findQuantizedModelTarget({ model, path }) {
         // Just return the first
         return modelFiles[0];
     }
-    const matches = modelFiles.filter(fileName => fileName.toLowerCase().includes(variant.toLowerCase()));
+    const matches = modelFiles.filter(fileName => matchesQuantizationVariant({ filePath: fileName, variant: variant ?? "" }));
     if (matches.length === 0) {
         throw new Error(`No model found for format and variant: ${model.format} / ${variant}`);
     }
@@ -112817,7 +112825,11 @@ async function startLlamacpp({ enginePort, targetDirectory }) {
 }
 // 2 hours
-const ENGINE_FETCH_TIMEOUT_MS = 7200000;
+const ENGINE_FETCH_TIMEOUT_MS$1 = 7200000;
+const ENGINE_AGENT = new undiciExports.Agent({
+    bodyTimeout: ENGINE_FETCH_TIMEOUT_MS$1,
+    headersTimeout: ENGINE_FETCH_TIMEOUT_MS$1
+});
 class ModelManager extends EventEmitter {
     engine;
     enginePort;
@@ -112870,19 +112882,28 @@ class ModelManager extends EventEmitter {
                 const controller = new AbortController();
                 const timeout = setTimeout(() => {
                     controller.abort(new Error("Inference request timeout"));
-                }, ENGINE_FETCH_TIMEOUT_MS);
+                }, ENGINE_FETCH_TIMEOUT_MS$1);
                 const effectiveSignal = callerSignal
                     ? AbortSignal.any([callerSignal, controller.signal])
                     : controller.signal;
                 try {
-                    return await undiciExports.fetch(joinURL(`http://localhost:${this.enginePort}`, path), {
+                    const fetchStartedAt = Date.now();
+                    const response = await undiciExports.fetch(joinURL(`http://localhost:${this.enginePort}`, path), {
                         ...opts,
+                        dispatcher: ENGINE_AGENT,
                         headers: {
                             ...opts?.headers,
                             Connection: "keep-alive"
                         },
                         signal: effectiveSignal
                     });
+                    const fetchElapsedMs = Date.now() - fetchStartedAt;
+                    this.logger.debug("Engine responded", {
+                        elapsedMs: fetchElapsedMs,
+                        requestUrl: path,
+                        statusCode: response.status
+                    });
+                    return response;
                 }
                 finally {
                     clearTimeout(timeout);
@@ -112979,6 +113000,9 @@ class ModelManager extends EventEmitter {
                 message: "Cannot stop LLM engine: already stopping"
             });
         }
+        if (this.lifecycleState === "stopped") {
+            return;
+        }
         if (this.lifecycleState !== "running" &&
             this.lifecycleState !== "starting" &&
             this.lifecycleState !== "errored") {
@@ -113355,6 +113379,12 @@ function isEngineUsageChunk(value) {
 function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
     const startedAt = requestStartedAt ?? Date.now();
     const passThrough = new PassThrough();
+    passThrough.on("error", (error) => {
+        logger.error("Engine response stream error", {
+            error: asError(error),
+            requestUrl: requestPath
+        });
+    });
     let responseBytes = 0;
     let firstChunkAt = null;
     let usage = null;
@@ -113545,6 +113575,12 @@ function monitorEngineResponseSingle({ agentEngineType, body, contextLength, eng
     const maxUsageCaptureBytes = 1024 * 1024;
     const startedAt = requestStartedAt ?? Date.now();
     const passThrough = new PassThrough();
+    passThrough.on("error", (error) => {
+        logger.error("Engine response stream error", {
+            error: asError(error),
+            requestUrl: requestPath
+        });
+    });
     let responseBytes = 0;
     let firstChunkAt = null;
     let usage = null;
@@ -113830,21 +113866,20 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
     });
     const responseStatusText = response.statusText ?? "Upstream request failed";
     if (!response.ok) {
-        const responseBody = await response.text().catch(() => null);
-        const responseError = new Error(responseBody
-            ? `Upstream error response: ${responseBody}`
-            : "Upstream error response: empty body");
-        logger.error("LLM engine request failed", {
-            error: responseError,
-            requestUrl: path,
-            statusCode: response.status,
-            statusText: responseStatusText
-        });
         if (!response.body) {
-            return {
-                status: response.status,
+            logger.error("LLM engine request failed (no body)", {
+                error: new Error("Upstream error response: empty body"),
+                requestUrl: path,
+                statusCode: response.status,
                 statusText: responseStatusText
-            };
+            });
+        }
+        else {
+            logger.error("LLM engine request failed", {
+                requestUrl: path,
+                statusCode: response.status,
+                statusText: responseStatusText
+            });
         }
     }
     if (!response.body) {
@@ -113926,9 +113961,15 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
                 const modelManager = getModelManager();
                 const abortController = new AbortController();
                 res.on("close", () => {
+                    logger.debug("Express route client disconnected, aborting", {
+                        requestUrl: "/v1/chat/completions"
+                    });
                     abortController.abort();
                 });
-                return proxyOpenAIStreamingRoute({
+                logger.debug("Express route handler entered, awaiting engine", {
+                    requestUrl: "/v1/chat/completions"
+                });
+                const result = await proxyOpenAIStreamingRoute({
                     body,
                     configuration,
                     endpointId: extractEndpointId$1(req),
@@ -113939,6 +113980,11 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
                     reportMetrics: apiClient.reportPromptMetrics,
                     signal: abortController.signal
                 });
+                logger.debug("Express route handler returning response", {
+                    requestUrl: "/v1/chat/completions",
+                    statusCode: "status" in result ? result.status : 0
+                });
+                return result;
             }
         },
         "/v1/completions": {
@@ -114688,7 +114734,7 @@ async function sendChunkStream({ apiURL, configuration, requestID, logger }) {
             sequence: payload.sequence
         });
         chunks.push(Buffer.from(chunk + "\n"));
-        if (chunks.length >= 10) {
+        if (payload.data === null || chunks.length >= 10) {
             await flushChunks();
         }
     };
@@ -114782,10 +114828,12 @@ function calculateTokensPerSecond({ durationMs, totalTokens }) {
     return Math.round(tokensPerSecond);
 }
-/**
- * Proxy server requests to the local inference HTTP server.
- */
-async function proxyRequest({ configuration, request, signal }) {
+const ENGINE_FETCH_TIMEOUT_MS = 7_200_000;
+const LOOPBACK_AGENT = new undiciExports.Agent({
+    bodyTimeout: ENGINE_FETCH_TIMEOUT_MS,
+    headersTimeout: ENGINE_FETCH_TIMEOUT_MS
+});
+async function proxyRequest({ configuration, logger, request, signal }) {
     let finalPath = request.path;
     if (request.parameters) {
         Object.entries(request.parameters).forEach(([key, value]) => {
@@ -114799,6 +114847,7 @@ async function proxyRequest({ configuration, request, signal }) {
         }
     }
     const fetchOptions = {
+        dispatcher: LOOPBACK_AGENT,
         method: request.method,
         headers: {
             ...request.headers,
@@ -114814,7 +114863,18 @@ async function proxyRequest({ configuration, request, signal }) {
         fetchOptions.body =
             typeof request.body === "object" ? JSON.stringify(request.body) : request.body;
     }
+    logger.debug("Loopback proxy request starting", {
+        requestMethod: request.method,
+        requestUrl: finalPath
+    });
+    const startedAt = Date.now();
     const response = await undiciExports.fetch(url, fetchOptions);
+    logger.debug("Loopback proxy response received", {
+        elapsedMs: Date.now() - startedAt,
+        requestMethod: request.method,
+        requestUrl: finalPath,
+        statusCode: response.status
+    });
     return {
         body: response.body ? Readable.fromWeb(response.body) : null,
         headers: Object.fromEntries(response.headers.entries()),
@@ -124435,7 +124495,20 @@ async function collectMachineMetadata() {
     return machineMetadata;
 }
+const CONTAINER_ID_RE = /^[0-9a-f]{12}$/;
+function ensureDockerValidEnv() {
+    if (!existsSync("/.dockerenv")) {
+        return;
+    }
+    const name = hostname$2();
+    if (CONTAINER_ID_RE.test(name)) {
+        throw new Error(`Container hostname is a default Docker container ID ("${name}"). ` +
+            `Set 'hostname' in your compose service definition.`);
+    }
+}
 async function createApplication({ abortController, apiClient, configuration, logger }) {
+    ensureDockerValidEnv();
     logger.info("Fetching conduit configuration");
     let conduitConfiguration = await apiClient.getConduitConfiguration();
     logger.info("Received configuration", {
@@ -124758,6 +124831,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
         onRequest: async ({ request, signal }) => {
             return proxyRequest({
                 configuration,
+                logger,
                 request,
                 signal
             });

package/dist/modelManagement/ModelManager.d.ts CHANGED Viewed

@@ -1,7 +1,8 @@
 import { LLMEngine, LLMModel } from "@infersec/definitions";
 import { Logger } from "@infersec/logger";
 import EventEmitter from "eventemitter3";
-import { RequestInit, Response } from "undici";
+import { Response } from "undici";
+import type { RequestInit } from "undici";
 import { type ModelDownloadProgressUpdate } from "./download.js";
 interface ModelManagerEvents {
     engineError: (error: Error) => void;

package/dist/modelManagement/quantization.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+export declare function matchesQuantizationVariant({ filePath, variant }: {
+    filePath: string;
+    variant: string;
+}): boolean;

package/dist/sse/requestProxy.d.ts CHANGED Viewed

@@ -1,10 +1,9 @@
 import type { APIResponse, ServerToClientAPIRequest } from "@infersec/definitions";
+import { Logger } from "@infersec/logger";
 import type { Configuration } from "../configuration.js";
-/**
- * Proxy server requests to the local inference HTTP server.
- */
-export declare function proxyRequest({ configuration, request, signal }: {
+export declare function proxyRequest({ configuration, logger, request, signal }: {
     configuration: Configuration;
+    logger: Logger;
     request: ServerToClientAPIRequest;
     signal?: AbortSignal;
 }): Promise<APIResponse>;

package/dist/utils/docker.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare function ensureDockerValidEnv(): void;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@infersec/conduit",
   "description": "End user conduit agent for connecting local LLMs to the cloud.",
-  "version": "1.52.0",
+  "version": "1.53.0",
   "bin": {
     "infersec-conduit": "./dist/cli.js"
   },