npm - @infersec/conduit - Versions diffs - 1.73.0 → 1.74.0 - Mend

@infersec/conduit 1.73.0 → 1.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/cli.js +258 -13
package/dist/requestHandlers/createConduitOpenAIAPIReferenceHandlers.d.ts +30 -0
package/dist/utils/openai.d.ts +17 -0
package/package.json +1 -1

package/dist/cli.js CHANGED Viewed

@@ -19921,6 +19921,7 @@ const LLMModelFormatSchema = _enum([
     // Llama.cpp
     "gguf"
 ]);
+const LLMModelTaskTypeSchema = _enum(["text-generation", "embeddings"]);
 const LLMModelSchema = object({
     format: LLMModelFormatSchema,
     id: string$1().min(1),
@@ -19935,7 +19936,8 @@ const LLMModelSchema = object({
             slug: string$1().min(1),
             type: literal("huggingface")
         })
-    ])
+    ]),
+    taskType: LLMModelTaskTypeSchema
 });
 object({
     filePath: string$1().min(1),
@@ -20643,6 +20645,34 @@ const CompletionCreateParamsSchema = object({
     top_p: number$1().min(0).max(1).nullable().optional(),
     user: string$1().optional()
 });
+// ==================== EMBEDDINGS ====================
+const EmbeddingCreateParamsSchema = object({
+    dimensions: number$1().int().positive().nullable().optional(),
+    encoding_format: _enum(["float", "base64"]).nullable().optional(),
+    input: union([
+        string$1(),
+        array(string$1()),
+        array(number$1()),
+        array(array(number$1()))
+    ]),
+    model: string$1(),
+    user: string$1().optional()
+});
+const EmbeddingUsageSchema = object({
+    prompt_tokens: number$1(),
+    total_tokens: number$1()
+});
+const EmbeddingDataSchema = object({
+    embedding: array(number$1()),
+    index: number$1(),
+    object: literal("embedding")
+});
+object({
+    data: array(EmbeddingDataSchema),
+    model: string$1(),
+    object: literal("list"),
+    usage: EmbeddingUsageSchema
+});
 const API_CLIENT_CONDUIT_GENERAL_REFERENCE = {
     "/conduit/engine/start": {
@@ -20708,6 +20738,17 @@ const API_CLIENT_CONDUIT_OPENAI_REFERENCE = {
             }
         }
     },
+    "/v1/embeddings": {
+        POST: {
+            auth: {
+                type: "shared-secret"
+            },
+            body: EmbeddingCreateParamsSchema,
+            response: {
+                type: "text-stream"
+            }
+        }
+    },
     "/v1/models": {
         GET: {
             auth: {
@@ -20743,6 +20784,12 @@ const API_CLIENT_CONDUIT_OPENAI_REFERENCE = {
                 endpointID: ULIDSchema.describe("Endpoint identifier")
             }}
     },
+    "/api/inferencing/:endpointID/oai/v1/embeddings": {
+        POST: {
+            parameters: {
+                endpointID: ULIDSchema.describe("Endpoint identifier")
+            }}
+    },
     "/api/inferencing/:endpointID/oai/v1/models": {
         GET: {
             parameters: {
@@ -20771,7 +20818,8 @@ object({
         .min(3)
         .refine(value => value.includes("/"), {
         message: "Slug must be fully qualified (owner/repo)"
-    })
+    }),
+    taskType: LLMModelTaskTypeSchema.optional()
 });
 object({
     results: array(object({
@@ -20782,6 +20830,7 @@ object({
         name: string$1(),
         provider: _enum(["storage", "huggingface"]),
         providerSlug: string$1(),
+        taskType: LLMModelTaskTypeSchema,
         updated: string$1()
     }))
 });
@@ -20802,11 +20851,13 @@ object({
         name: string$1(),
         updated: string$1()
     })),
+    taskType: LLMModelTaskTypeSchema,
     updated: string$1()
 });
 object({
+    multimodalEnabled: boolean$1().optional(),
     name: ResourceNameSchema.optional(),
-    multimodalEnabled: boolean$1().optional()
+    taskType: LLMModelTaskTypeSchema.optional()
 });
 object({
     success: literal(true)
@@ -20851,7 +20902,8 @@ object({
         modelFormat: LLMModelFormatSchema,
         name: string$1(),
         provider: _enum(["storage", "huggingface"]),
-        providerSlug: string$1()
+        providerSlug: string$1(),
+        taskType: LLMModelTaskTypeSchema
     })
         .nullable(),
     modelQuantizationLabel: string$1().nullable(),
@@ -114830,6 +114882,9 @@ async function startVLLM({ enginePort, targetDirectory }) {
         "--tensor-parallel-size",
         String(tensorParallelSize)
     ];
+    if (this.model.taskType === "embeddings") {
+        args.push("--task", "embed");
+    }
     if (device) {
         args.push("--device", device);
     }
@@ -116583,6 +116638,9 @@ async function startLlamacpp({ enginePort, targetDirectory }) {
         "--ctx-size",
         String(contextLength)
     ];
+    if (this.model.taskType === "embeddings") {
+        args.push("--embedding");
+    }
     const gpuLayers = typeof engineConfig?.gpuLayers === "number"
         ? engineConfig.gpuLayers
         : Number.parseInt(process.env.LLAMACPP_GPU_LAYERS ?? String(DEFAULT_LLAMACPP_GPU_LAYERS), 10);
@@ -117688,6 +117746,153 @@ function calculateTokensPerSecond$2({ durationMs, totalTokens }) {
     }
     return Math.round(tokensPerSecond);
 }
+async function proxyEmbeddingsRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, reportMetrics, signal }) {
+    function normalizeTokenCount(value) {
+        if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
+            return value;
+        }
+        return 0;
+    }
+    function reportMetricsSafe(payload) {
+        reportMetrics(payload).catch(error => {
+            logger.warn("Failed to upload LLM prompt metrics", {
+                error: asError(error),
+                requestUrl: "/v1/embeddings"
+            });
+        });
+    }
+    const engineType = conduitConfiguration.engineConfig?.type ?? null;
+    const engineConfig = conduitConfiguration.engineConfig?.config ?? null;
+    const serializedBody = isPlainObject$2(body)
+        ? JSON.stringify(body)
+        : typeof body === "string"
+            ? body
+            : JSON.stringify(body);
+    const requestBodyBytes = Buffer.byteLength(serializedBody, "utf8");
+    const requestStartedAt = Date.now();
+    let upstreamResponseOk = true;
+    const onMonitoringComplete = ({ durationMs, error, responseBytes, usage }) => {
+        const promptTokens = normalizeTokenCount(usage?.promptTokens);
+        const totalTokens = normalizeTokenCount(usage?.totalTokens ?? promptTokens);
+        const latencyMs = Math.max(0, durationMs);
+        reportMetricsSafe({
+            bytes: requestBodyBytes + responseBytes,
+            completionTokens: 0,
+            engine: engineType,
+            endpointId: endpointId ?? null,
+            latencyMs,
+            modelId: modelID,
+            promptTokens,
+            requestBytes: requestBodyBytes,
+            requestId: null,
+            requestMethod: "POST",
+            requestPath: "/v1/embeddings",
+            responseBytes,
+            successful: upstreamResponseOk && !error,
+            timeToFirstTokenMs: null,
+            tokensPerSecond: calculateTokensPerSecond$2({
+                durationMs: latencyMs,
+                totalTokens
+            }),
+            totalTokens
+        });
+    };
+    const response = await modelManager
+        .fetchOpenAI("/v1/embeddings", {
+        body: serializedBody,
+        headers: {
+            "Content-Type": "application/json"
+        },
+        method: "POST",
+        signal
+    })
+        .catch(error => {
+        const err = asError(error);
+        logEngineMetrics({
+            agentEngineType: engineType ?? "unknown",
+            error: err,
+            level: "error",
+            logger,
+            requestBodyBytes,
+            requestPath: "/v1/embeddings",
+            responseBytes: 0,
+            usage: null
+        });
+        const latencyMs = Math.max(0, Date.now() - requestStartedAt);
+        reportMetricsSafe({
+            bytes: requestBodyBytes,
+            completionTokens: 0,
+            engine: engineType,
+            endpointId: endpointId ?? null,
+            latencyMs,
+            modelId: modelID,
+            promptTokens: 0,
+            requestBytes: requestBodyBytes,
+            requestId: null,
+            requestMethod: "POST",
+            requestPath: "/v1/embeddings",
+            responseBytes: 0,
+            successful: false,
+            timeToFirstTokenMs: null,
+            tokensPerSecond: 0,
+            totalTokens: 0
+        });
+        throw err;
+    });
+    upstreamResponseOk = response.ok;
+    const responseStatusText = response.statusText ?? "Upstream request failed";
+    if (!response.body) {
+        logEngineMetrics({
+            agentEngineType: engineType ?? "unknown",
+            level: response.ok ? "info" : "error",
+            logger,
+            requestBodyBytes,
+            requestPath: "/v1/embeddings",
+            responseBytes: 0,
+            usage: null
+        });
+        const latencyMs = Math.max(0, Date.now() - requestStartedAt);
+        reportMetricsSafe({
+            bytes: requestBodyBytes,
+            completionTokens: 0,
+            engine: engineType,
+            endpointId: endpointId ?? null,
+            latencyMs,
+            modelId: modelID,
+            promptTokens: 0,
+            requestBytes: requestBodyBytes,
+            requestId: null,
+            requestMethod: "POST",
+            requestPath: "/v1/embeddings",
+            responseBytes: 0,
+            successful: false,
+            timeToFirstTokenMs: null,
+            tokensPerSecond: 0,
+            totalTokens: 0
+        });
+        return {
+            status: response.status,
+            statusText: responseStatusText
+        };
+    }
+    const monitoredResponse = monitorEngineResponseSingle({
+        agentEngineType: engineType ?? "unknown",
+        body: Readable.fromWeb(response.body),
+        contextLength: modelManager.contextLength,
+        engineConfig,
+        engineType: engineType ?? "unknown",
+        logger,
+        onComplete: onMonitoringComplete,
+        requestBodyBytes,
+        requestPath: "/v1/embeddings",
+        requestStartedAt
+    });
+    return {
+        body: monitoredResponse.stream,
+        headers: Object.fromEntries(response.headers.entries()),
+        status: response.status
+    };
+}
 async function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }) {
     function normalizeTokenCount(value) {
         if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
@@ -117710,6 +117915,7 @@ async function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointI
     const requestStartedAt = Date.now();
     const requestBody = JSON.parse(serializedBody);
     const streamRequested = requestBody.stream === true;
+    let upstreamResponseOk = true;
     const onMonitoringComplete = ({ durationMs, error, responseBytes, timeToFirstTokenMs, usage }) => {
         const completionTokens = normalizeTokenCount(usage?.completionTokens);
         const promptTokens = normalizeTokenCount(usage?.promptTokens);
@@ -117728,7 +117934,7 @@ async function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointI
             requestMethod: "POST",
             requestPath: path,
             responseBytes,
-            successful: !error,
+            successful: upstreamResponseOk && !error,
             timeToFirstTokenMs,
             tokensPerSecond: calculateTokensPerSecond$2({
                 durationMs: latencyMs,
@@ -117779,6 +117985,7 @@ async function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointI
         });
         throw err;
     });
+    upstreamResponseOk = response.ok;
     const responseStatusText = response.statusText ?? "Upstream request failed";
     if (!response.ok) {
         if (!response.body) {
@@ -117923,6 +118130,26 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, conduitConfigurati
                 });
             }
         },
+        "/v1/embeddings": {
+            POST: async ({ body, req, res }) => {
+                const modelID = getModelID();
+                const modelManager = getModelManager();
+                const abortController = new AbortController();
+                res.on("close", () => {
+                    abortController.abort();
+                });
+                return proxyEmbeddingsRoute({
+                    body,
+                    conduitConfiguration: conduitConfiguration(),
+                    endpointId: extractEndpointId$1(req),
+                    logger,
+                    modelID,
+                    modelManager,
+                    reportMetrics: apiClient.reportPromptMetrics,
+                    signal: abortController.signal
+                });
+            }
+        },
         "/v1/models": {
             GET: async () => {
                 const modelManager = getModelManager();
@@ -117962,6 +118189,9 @@ function createPostChatCompletionsHandler(options) {
 function createPostCompletionsHandler(options) {
     return createConduitOpenAIAPIReferenceHandlers(options)["/v1/completions"].POST;
 }
+function createPostEmbeddingsHandler(options) {
+    return createConduitOpenAIAPIReferenceHandlers(options)["/v1/embeddings"].POST;
+}
 function isPlainObject$1(value) {
     return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -128707,6 +128937,17 @@ async function createApplication({ abortController, apiClient, configuration, lo
                     startup
                 })
             },
+            "/v1/embeddings": {
+                POST: createPostEmbeddingsHandler({
+                    apiClient,
+                    conduitConfiguration: () => conduitConfiguration,
+                    configuration,
+                    getModelID: () => conduitConfiguration.targetModel.id,
+                    getModelManager: () => modelManager,
+                    logger,
+                    startup
+                })
+            },
             "/v1/models": {
                 GET: createGetModelsHandler({
                     apiClient,
@@ -129711,8 +129952,9 @@ class HuggingFaceClient {
                 }
             }
         }
-        const seenIds = new Set();
-        const models = [];
+        const taskPriority = new Map();
+        pipelineTasks.forEach((task, index) => taskPriority.set(task, index));
+        const modelsById = new Map();
         await Promise.all(queries.map(async ({ task, tag }) => {
             const searchParams = {
                 accessToken: this.apiKey ?? undefined,
@@ -129724,9 +129966,6 @@ class HuggingFaceClient {
                 }
             };
             for await (const entry of executeListWithRetry(searchParams)) {
-                if (seenIds.has(entry.id)) {
-                    continue;
-                }
                 const entryForUtils = {
                     config: entry.config,
                     gated: entry.gated,
@@ -129742,10 +129981,15 @@ class HuggingFaceClient {
                 if (targetFormats.length > 0 && !targetFormats.includes(format)) {
                     continue;
                 }
-                seenIds.add(entry.id);
+                const existing = modelsById.get(entry.id);
+                if (existing &&
+                    (taskPriority.get(task) ?? Number.MAX_SAFE_INTEGER) >=
+                        (taskPriority.get(existing.pipelineTask) ?? Number.MAX_SAFE_INTEGER)) {
+                    continue;
+                }
                 const parameterCount = parseParameterCount(entry.id, entry.safetensors?.parameters);
                 const slug = entry.name?.trim() || entry.id;
-                models.push({
+                modelsById.set(entry.id, {
                     downloads: entry.downloads,
                     format,
                     gated: entry.gated || false,
@@ -129753,13 +129997,14 @@ class HuggingFaceClient {
                     likes: entry.likes,
                     name: entry.name || entry.id,
                     parameterCount,
+                    pipelineTask: task,
                     quantization: extractQuantization(entryForUtils),
                     slug,
                     updatedAt: entry.updatedAt
                 });
             }
         }));
-        return models;
+        return Array.from(modelsById.values());
     }
 }

package/dist/requestHandlers/createConduitOpenAIAPIReferenceHandlers.d.ts CHANGED Viewed

@@ -209,4 +209,34 @@ export declare function createPostCompletionsHandler(options: {
     status: number;
     statusText: string;
 }>;
+export declare function createPostEmbeddingsHandler(options: {
+    apiClient: APIClient;
+    conduitConfiguration: () => InferenceAgentConfiguration;
+    configuration: Configuration;
+    getModelID: () => string;
+    getModelManager: () => ModelManager;
+    logger: Logger;
+    startup: number;
+}): (params: {
+    req: APIRequest;
+    res: import("@infersec/fetch").APIResponse;
+    parameters: Record<string, never>;
+    query: Record<string, never>;
+    body: {
+        input: string | number[] | string[] | number[][];
+        model: string;
+        dimensions?: number | null | undefined;
+        encoding_format?: "base64" | "float" | null | undefined;
+        user?: string | undefined;
+    };
+    responseSchema: undefined;
+}) => Promise<{
+    body: import("stream").Readable;
+    headers?: Record<string, string>;
+    status: number;
+} | {
+    headers?: Record<string, string>;
+    status: number;
+    statusText: string;
+}>;
 export {};

package/dist/utils/openai.d.ts CHANGED Viewed

@@ -3,6 +3,23 @@ import { InferenceAgentConfiguration, InferenceAgentLLMMetricsPayload, type ULID
 import { Logger } from "@infersec/logger";
 import { Configuration } from "../configuration.js";
 import { ModelManager } from "../modelManagement/ModelManager.js";
+export declare function proxyEmbeddingsRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, reportMetrics, signal }: {
+    body: unknown;
+    conduitConfiguration: InferenceAgentConfiguration;
+    endpointId?: ULID | null;
+    logger: Logger;
+    modelID: ULID;
+    modelManager: ModelManager;
+    reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
+    signal?: AbortSignal;
+}): Promise<{
+    body: Readable;
+    headers: Record<string, string>;
+    status: number;
+} | {
+    status: number;
+    statusText: string;
+}>;
 export declare function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }: {
     body: unknown;
     conduitConfiguration: InferenceAgentConfiguration;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@infersec/conduit",
   "description": "End user conduit agent for connecting local LLMs to the cloud.",
-  "version": "1.73.0",
+  "version": "1.74.0",
   "bin": {
     "infersec-conduit": "./dist/cli.js"
   },