npm - @infersec/conduit - Versions diffs - 1.17.5 → 1.19.0 - Mend

@infersec/conduit 1.17.5 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/cli.js +1 -1
package/dist/index.js +1 -1
package/dist/sse/handler.d.ts +4 -2
package/dist/{start-BDCrsqSt.js → start-CopKwPN6.js} +247 -60
package/dist/utils/engineMetrics.d.ts +6 -3
package/dist/utils/openai.d.ts +3 -2
package/package.json +1 -1

package/dist/cli.js CHANGED Viewed

@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
 import { parseArgs } from 'node:util';
 import 'node:crypto';
-import { a as asError, s as startInferenceAgent } from './start-BDCrsqSt.js';
+import { a as asError, s as startInferenceAgent } from './start-CopKwPN6.js';
 import 'argon2';
 import 'node:child_process';
 import 'node:stream';

package/dist/index.js CHANGED Viewed

@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
 const __dirname = __pathDirname(__filename);
 import 'node:crypto';
-import { s as startInferenceAgent, a as asError } from './start-BDCrsqSt.js';
+import { s as startInferenceAgent, a as asError } from './start-CopKwPN6.js';
 import 'argon2';
 import 'node:child_process';
 import 'node:stream';

package/dist/sse/handler.d.ts CHANGED Viewed

@@ -1,12 +1,14 @@
-import { type APIResponse, type ServerToClientAPIRequest } from "@infersec/definitions";
+import { InferenceAgentLLMMetricsPayload, type ULID, type APIResponse, type ServerToClientAPIRequest } from "@infersec/definitions";
 import { Logger } from "@infersec/logger";
 import { Configuration } from "../configuration.js";
-export declare function handleSSERequests({ apiURL, configuration, logger, onRequest, onRequestEnd, onRequestStart, signal }: {
+export declare function handleSSERequests({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, signal }: {
     apiURL: string;
     configuration: Configuration;
     logger: Logger;
+    modelID: ULID;
     onRequest: (request: ServerToClientAPIRequest) => Promise<APIResponse>;
     onRequestEnd?: (request: ServerToClientAPIRequest) => Promise<void> | void;
     onRequestStart?: (request: ServerToClientAPIRequest) => Promise<void> | void;
+    reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
     signal?: AbortSignal;
 }): Promise<void>;

package/dist/{start-BDCrsqSt.js → start-CopKwPN6.js} RENAMED Viewed

@@ -14659,11 +14659,54 @@ custom((data) => {
     return result.success;
 }, "Invalid API reference structure");
+const LLMEngineSchema = _enum(["llama.cpp", "vllm"]);
+const LLMModelFormatSchema = _enum([
+    // VLLM
+    "safetensors",
+    "pytorch",
+    "awq",
+    "gptq",
+    // Llama.cpp
+    "gguf"
+]);
+const LLMModelSchema = object({
+    format: LLMModelFormatSchema,
+    id: string$1().min(1),
+    multimodalEnabled: boolean$1(),
+    source: discriminatedUnion("type", [
+        object({
+            irid: IRIDSchema,
+            type: literal("storage")
+        }),
+        object({
+            modelSecret: string$1().min(1).nullable(),
+            slug: string$1().min(1),
+            type: literal("huggingface")
+        })
+    ])
+});
+const QuantizationFileSchema = object({
+    filePath: string$1().min(1),
+    sizeBytes: number$1().int().nonnegative().nullable()
+});
+array(QuantizationFileSchema);
 const InferenceAgentLLMMetricsPayloadSchema = object({
     bytes: number$1().int().nonnegative(),
     completionTokens: number$1().int().nonnegative(),
+    engine: LLMEngineSchema.nullable(),
+    endpointId: ULIDSchema.nullable(),
+    latencyMs: number$1().int().nonnegative(),
+    modelId: ULIDSchema.nullable(),
     promptTokens: number$1().int().nonnegative(),
+    requestBytes: number$1().int().nonnegative(),
+    requestId: ULIDSchema.nullable(),
+    requestMethod: string$1().nullable(),
+    requestPath: string$1().nullable(),
+    responseBytes: number$1().int().nonnegative(),
     successful: boolean$1(),
+    timeToFirstTokenMs: number$1().int().nonnegative().nullable(),
+    tokensPerSecond: number$1().int().nonnegative(),
     totalTokens: number$1().int().nonnegative()
 });
 const InferenceAgentMachineGPUSchema = object({
@@ -14765,38 +14808,6 @@ const ConduitState = z.preprocess(value => {
     return value;
 }, ConduitStateSchema);
-const LLMEngineSchema = _enum(["llama.cpp", "vllm"]);
-const LLMModelFormatSchema = _enum([
-    // VLLM
-    "safetensors",
-    "pytorch",
-    "awq",
-    "gptq",
-    // Llama.cpp
-    "gguf"
-]);
-const LLMModelSchema = object({
-    format: LLMModelFormatSchema,
-    id: string$1().min(1),
-    multimodalEnabled: boolean$1(),
-    source: discriminatedUnion("type", [
-        object({
-            irid: IRIDSchema,
-            type: literal("storage")
-        }),
-        object({
-            modelSecret: string$1().min(1).nullable(),
-            slug: string$1().min(1),
-            type: literal("huggingface")
-        })
-    ])
-});
-const QuantizationFileSchema = object({
-    filePath: string$1().min(1),
-    sizeBytes: number$1().int().nonnegative().nullable()
-});
-array(QuantizationFileSchema);
 const InferenceAgentConfigurationSchema = object({
     contextLength: number$1().int().positive().nullable(),
     inferenceSourceID: ULIDSchema,
@@ -108293,7 +108304,7 @@ function sleep(ms) {
     });
 }
-async function handleSSERequests({ apiURL, configuration, logger, onRequest, onRequestEnd, onRequestStart, signal }) {
+async function handleSSERequests({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, signal }) {
     const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/stream`;
     const maxReconnectDelayMs = 30000;
     let reconnectAttempt = 0;
@@ -108318,9 +108329,11 @@ async function handleSSERequests({ apiURL, configuration, logger, onRequest, onR
                         apiURL,
                         configuration,
                         logger,
+                        modelID,
                         onRequest,
                         onRequestEnd,
                         onRequestStart,
+                        reportMetrics,
                         request: payload
                     }).catch(error => {
                         logger.error("SSE request handler failed", {
@@ -108350,28 +108363,71 @@ async function handleSSERequests({ apiURL, configuration, logger, onRequest, onR
         await sleep(reconnectDelayMs);
     }
 }
-async function handleRequest({ apiURL, configuration, logger, onRequest, onRequestEnd, onRequestStart, request }) {
+async function handleRequest({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request }) {
+    function reportMetricsSafe(payload) {
+        reportMetrics(payload).catch(error => {
+            logger.warn("Failed to upload LLM prompt metrics", {
+                error: asError(error),
+                requestUrl: request.path
+            });
+        });
+    }
+    const requestStartedAt = Date.now();
+    const requestBytes = calculateRequestBytes(request.body ?? null);
     try {
         await onRequestStart?.(request);
         const response = await onRequest(request);
-        await streamResponse({
+        const responseMetrics = await streamResponse({
             apiURL,
             configuration,
             logger,
             requestID: request.requestID,
+            requestStartedAt,
             response
         });
+        const latencyMs = Math.max(0, Date.now() - requestStartedAt);
+        const totalTokens = 0;
+        const tokensPerSecond = calculateTokensPerSecond$1({
+            durationMs: latencyMs,
+            totalTokens
+        });
+        reportMetricsSafe({
+            bytes: requestBytes + responseMetrics.responseBytes,
+            completionTokens: 0,
+            engine: configuration.agentEngineType,
+            endpointId: null,
+            latencyMs,
+            modelId: modelID,
+            promptTokens: 0,
+            requestBytes,
+            requestId: request.requestID,
+            requestMethod: request.method,
+            requestPath: request.path,
+            responseBytes: responseMetrics.responseBytes,
+            successful: responseMetrics.status < 400,
+            timeToFirstTokenMs: responseMetrics.timeToFirstTokenMs,
+            tokensPerSecond,
+            totalTokens
+        });
     }
     catch (error) {
         logger.error("SSE request failed", {
             error: asError(error),
             requestMethod: request.requestID
         });
+        const failureMessage = "Bad gateway\n\nProxying failed";
+        const failureBytes = Buffer.byteLength(failureMessage, "utf8");
+        const latencyMs = Math.max(0, Date.now() - requestStartedAt);
+        const totalTokens = 0;
+        const tokensPerSecond = calculateTokensPerSecond$1({
+            durationMs: latencyMs,
+            totalTokens
+        });
         await postChunk({
             apiURL,
             configuration,
             payload: {
-                data: encodeTextChunk("Bad gateway\n\nProxying failed"),
+                data: encodeTextChunk(failureMessage),
                 sequence: 0,
                 status: 502
             },
@@ -108387,16 +108443,40 @@ async function handleRequest({ apiURL, configuration, logger, onRequest, onReque
             },
             requestID: request.requestID
         });
+        reportMetricsSafe({
+            bytes: requestBytes + failureBytes,
+            completionTokens: 0,
+            engine: configuration.agentEngineType,
+            endpointId: null,
+            latencyMs,
+            modelId: modelID,
+            promptTokens: 0,
+            requestBytes,
+            requestId: request.requestID,
+            requestMethod: request.method,
+            requestPath: request.path,
+            responseBytes: failureBytes,
+            successful: false,
+            timeToFirstTokenMs: latencyMs,
+            tokensPerSecond,
+            totalTokens
+        });
     }
     finally {
         await onRequestEnd?.(request);
     }
 }
-async function streamResponse({ apiURL, configuration, logger, requestID, response }) {
+async function streamResponse({ apiURL, configuration, logger, requestID, requestStartedAt, response }) {
     let sequence = 0;
+    let responseBytes = 0;
+    let timeToFirstTokenMs = null;
     if (response.body instanceof Readable) {
         for await (const chunk of response.body) {
             const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
+            if (timeToFirstTokenMs === null) {
+                timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
+            }
+            responseBytes += buffer.length;
             await postChunk({
                 apiURL,
                 configuration,
@@ -108419,17 +108499,26 @@ async function streamResponse({ apiURL, configuration, logger, requestID, respon
             },
             requestID
         });
-        return;
+        return {
+            responseBytes,
+            status: response.status,
+            timeToFirstTokenMs
+        };
+    }
+    const responsePayload = response.body
+        ? typeof response.body === "string"
+            ? response.body
+            : JSON.stringify(response.body)
+        : "";
+    if (responsePayload.length > 0) {
+        responseBytes = Buffer.byteLength(responsePayload, "utf8");
+        timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
     }
     await postChunk({
         apiURL,
         configuration,
         payload: {
-            data: encodeTextChunk(response.body
-                ? typeof response.body === "string"
-                    ? response.body
-                    : JSON.stringify(response.body)
-                : ""),
+            data: encodeTextChunk(responsePayload),
             headers: response.headers,
             sequence,
             status: response.status
@@ -108449,6 +108538,11 @@ async function streamResponse({ apiURL, configuration, logger, requestID, respon
     logger.info("SSE response queued", {
         requestMethod: requestID
     });
+    return {
+        responseBytes,
+        status: response.status,
+        timeToFirstTokenMs
+    };
 }
 async function postChunk({ apiURL, configuration, payload, requestID }) {
     const response = ClientToServerAPIResponseSchema.parse({
@@ -108475,6 +108569,25 @@ function encodeTextChunk(chunk) {
     }
     return `data:text/plain;base64,${Buffer.from(chunk, "utf-8").toString("base64")}`;
 }
+function calculateRequestBytes(body) {
+    if (body === null || body === undefined) {
+        return 0;
+    }
+    if (typeof body === "string") {
+        return Buffer.byteLength(body, "utf8");
+    }
+    return Buffer.byteLength(JSON.stringify(body), "utf8");
+}
+function calculateTokensPerSecond$1({ durationMs, totalTokens }) {
+    if (durationMs <= 0) {
+        return 0;
+    }
+    const tokensPerSecond = totalTokens / (durationMs / 1000);
+    if (!Number.isFinite(tokensPerSecond) || tokensPerSecond <= 0) {
+        return 0;
+    }
+    return Math.round(tokensPerSecond);
+}
 /**
  * Proxy server requests to the local inference HTTP server.
@@ -117837,9 +117950,11 @@ function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBy
     }
     logger[level](metricsMessage, attributes);
 }
-function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete, requestBodyBytes, requestPath }) {
+function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }) {
+    const startedAt = requestStartedAt ?? Date.now();
     const passThrough = new PassThrough();
     let responseBytes = 0;
+    let firstChunkAt = null;
     let usage = null;
     let buffer = "";
     let completed = false;
@@ -117849,12 +117964,22 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
         }
         completed = true;
         if (onComplete) {
-            onComplete({
+            const completion = onComplete({
+                durationMs: Math.max(0, Date.now() - startedAt),
                 error,
                 requestBodyBytes,
                 responseBytes,
+                timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
                 usage
             });
+            if (completion && typeof completion.catch === "function") {
+                completion.catch(error => {
+                    logger.error("Engine metrics completion failed", {
+                        error: asError(error),
+                        requestUrl: requestPath
+                    });
+                });
+            }
         }
     }
     function parseUsageFromBuffer() {
@@ -117885,6 +118010,9 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
         }
     }
     body.on("data", (chunk) => {
+        if (firstChunkAt === null) {
+            firstChunkAt = Date.now();
+        }
         responseBytes += chunk.length;
         buffer += chunk.toString("utf8");
         parseUsageFromBuffer();
@@ -117970,25 +118098,33 @@ function serializeRequestBody(body) {
         payload
     };
 }
-async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelManager, path, reportMetrics }) {
+function calculateTokensPerSecond({ durationMs, totalTokens }) {
+    if (durationMs <= 0) {
+        return 0;
+    }
+    const tokensPerSecond = totalTokens / (durationMs / 1000);
+    if (!Number.isFinite(tokensPerSecond) || tokensPerSecond <= 0) {
+        return 0;
+    }
+    return Math.round(tokensPerSecond);
+}
+async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID, modelManager, path, reportMetrics }) {
     function normalizeTokenCount(value) {
         if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
             return value;
         }
         return 0;
     }
-    async function safeReportMetrics(payload) {
-        try {
-            await reportMetrics(payload);
-        }
-        catch (error) {
+    function reportMetricsSafe(payload) {
+        reportMetrics(payload).catch(error => {
             logger.warn("Failed to upload LLM prompt metrics", {
-                error: error,
+                error: asError(error),
                 requestUrl: path
             });
-        }
+        });
     }
     const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody(body);
+    const requestStartedAt = Date.now();
     const response = await modelManager
         .fetchOpenAI(path, {
         body: serializedBody,
@@ -118008,11 +118144,23 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelMan
             responseBytes: 0,
             usage: null
         });
-        void safeReportMetrics({
+        const latencyMs = Math.max(0, Date.now() - requestStartedAt);
+        reportMetricsSafe({
             bytes: requestBodyBytes,
             completionTokens: 0,
+            engine: configuration.agentEngineType,
+            endpointId: null,
+            latencyMs,
+            modelId: modelID,
             promptTokens: 0,
+            requestBytes: requestBodyBytes,
+            requestId: null,
+            requestMethod: "POST",
+            requestPath: path,
+            responseBytes: 0,
             successful: false,
+            timeToFirstTokenMs: null,
+            tokensPerSecond: 0,
             totalTokens: 0
         });
         throw error;
@@ -118027,10 +118175,17 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelMan
             error: responseError,
             requestUrl: path,
             statusCode: response.status,
-            statusText: responseStatusText
+            statusText: responseStatusText,
+            responseBody: responseBody ?? undefined
         });
+        if (!response.body) {
+            return {
+                status: response.status,
+                statusText: responseStatusText
+            };
+        }
     }
-    if (!response.body || !response.ok) {
+    if (!response.body) {
         logEngineMetrics({
             agentEngineType: configuration.agentEngineType,
             level: response.ok ? "info" : "error",
@@ -118040,11 +118195,23 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelMan
             responseBytes: 0,
             usage: null
         });
-        void safeReportMetrics({
+        const latencyMs = Math.max(0, Date.now() - requestStartedAt);
+        reportMetricsSafe({
             bytes: requestBodyBytes,
             completionTokens: 0,
+            engine: configuration.agentEngineType,
+            endpointId: null,
+            latencyMs,
+            modelId: modelID,
             promptTokens: 0,
+            requestBytes: requestBodyBytes,
+            requestId: null,
+            requestMethod: "POST",
+            requestPath: path,
+            responseBytes: 0,
             successful: false,
+            timeToFirstTokenMs: null,
+            tokensPerSecond: 0,
             totalTokens: 0
         });
         return {
@@ -118056,20 +118223,36 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelMan
         agentEngineType: configuration.agentEngineType,
         body: Readable.fromWeb(response.body),
         logger,
-        onComplete: ({ error, responseBytes, usage }) => {
+        onComplete: ({ durationMs, error, responseBytes, timeToFirstTokenMs, usage }) => {
             const completionTokens = normalizeTokenCount(usage?.completionTokens);
             const promptTokens = normalizeTokenCount(usage?.promptTokens);
             const totalTokens = normalizeTokenCount(usage?.totalTokens ?? completionTokens + promptTokens);
-            void safeReportMetrics({
+            const latencyMs = Math.max(0, durationMs);
+            reportMetricsSafe({
                 bytes: requestBodyBytes + responseBytes,
                 completionTokens,
+                engine: configuration.agentEngineType,
+                endpointId: null,
+                latencyMs,
+                modelId: modelID,
                 promptTokens,
+                requestBytes: requestBodyBytes,
+                requestId: null,
+                requestMethod: "POST",
+                requestPath: path,
+                responseBytes,
                 successful: !error,
+                timeToFirstTokenMs,
+                tokensPerSecond: calculateTokensPerSecond({
+                    durationMs: latencyMs,
+                    totalTokens
+                }),
                 totalTokens
             });
         },
         requestBodyBytes,
-        requestPath: path
+        requestPath: path,
+        requestStartedAt
     });
     return {
         body: monitoredResponse.stream,
@@ -118176,6 +118359,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
                         body,
                         configuration,
                         logger,
+                        modelID: conduitConfiguration.targetModel.id,
                         modelManager,
                         path: "/v1/chat/completions",
                         reportMetrics: apiClient.reportPromptMetrics
@@ -118188,6 +118372,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
                         body,
                         configuration,
                         logger,
+                        modelID: conduitConfiguration.targetModel.id,
                         modelManager,
                         path: "/v1/completions",
                         reportMetrics: apiClient.reportPromptMetrics
@@ -118247,6 +118432,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
         apiURL: configuration.apiURL,
         configuration,
         logger,
+        modelID: conduitConfiguration.targetModel.id,
         onRequest: async (request) => {
             return proxyRequest({
                 configuration,
@@ -118265,6 +118451,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
                 setOnlineState();
             }
         },
+        reportMetrics: apiClient.reportPromptMetrics,
         signal: abortController.signal
     }).catch(error => {
         logger.error("SSE handler failed", {

package/dist/utils/engineMetrics.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { Readable } from "node:stream";
-import { Logger } from "@infersec/logger";
 import { LLMEngine } from "@infersec/definitions";
+import { Logger } from "@infersec/logger";
 export interface EngineUsageMetrics {
     completionTokens: number | null;
     promptTokens: number | null;
@@ -13,14 +13,17 @@ interface EngineMetricsLoggerOptions {
     requestPath: string;
 }
 interface EngineMetricsCompletion {
+    durationMs: number;
     error: Error | null;
     requestBodyBytes: number;
     responseBytes: number;
+    timeToFirstTokenMs: number | null;
     usage: EngineUsageMetrics | null;
 }
 interface MonitorEngineResponseOptions extends EngineMetricsLoggerOptions {
     body: Readable;
-    onComplete?: (result: EngineMetricsCompletion) => void;
+    onComplete?: (result: EngineMetricsCompletion) => void | Promise<void>;
+    requestStartedAt?: number;
 }
 interface EngineMetricsLogOptions extends EngineMetricsLoggerOptions {
     error?: Error;
@@ -32,5 +35,5 @@ interface MonitorEngineResponseResult {
     stream: Readable;
 }
 export declare function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }: EngineMetricsLogOptions): void;
-export declare function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete, requestBodyBytes, requestPath }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
+export declare function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
 export {};

package/dist/utils/openai.d.ts CHANGED Viewed

@@ -1,12 +1,13 @@
 import { Readable } from "node:stream";
-import { InferenceAgentLLMMetricsPayload } from "@infersec/definitions";
+import { InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definitions";
 import { Logger } from "@infersec/logger";
 import { Configuration } from "../configuration.js";
 import { ModelManager } from "../modelManagement/ModelManager.js";
-export declare function proxyOpenAIStreamingRoute({ body, configuration, logger, modelManager, path, reportMetrics }: {
+export declare function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID, modelManager, path, reportMetrics }: {
     body: unknown;
     configuration: Configuration;
     logger: Logger;
+    modelID: ULID;
     modelManager: ModelManager;
     path: "/v1/chat/completions" | "/v1/completions";
     reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@infersec/conduit",
   "description": "End user conduit agent for connecting local LLMs to the cloud.",
-  "version": "1.17.5",
+  "version": "1.19.0",
   "bin": {
     "infersec-conduit": "./dist/cli.js"
   },