@infersec/conduit 1.73.0 → 1.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -19921,6 +19921,7 @@ const LLMModelFormatSchema = _enum([
19921
19921
  // Llama.cpp
19922
19922
  "gguf"
19923
19923
  ]);
19924
+ const LLMModelTaskTypeSchema = _enum(["text-generation", "embeddings"]);
19924
19925
  const LLMModelSchema = object({
19925
19926
  format: LLMModelFormatSchema,
19926
19927
  id: string$1().min(1),
@@ -19935,7 +19936,8 @@ const LLMModelSchema = object({
19935
19936
  slug: string$1().min(1),
19936
19937
  type: literal("huggingface")
19937
19938
  })
19938
- ])
19939
+ ]),
19940
+ taskType: LLMModelTaskTypeSchema
19939
19941
  });
19940
19942
  object({
19941
19943
  filePath: string$1().min(1),
@@ -20643,6 +20645,34 @@ const CompletionCreateParamsSchema = object({
20643
20645
  top_p: number$1().min(0).max(1).nullable().optional(),
20644
20646
  user: string$1().optional()
20645
20647
  });
20648
+ // ==================== EMBEDDINGS ====================
20649
+ const EmbeddingCreateParamsSchema = object({
20650
+ dimensions: number$1().int().positive().nullable().optional(),
20651
+ encoding_format: _enum(["float", "base64"]).nullable().optional(),
20652
+ input: union([
20653
+ string$1(),
20654
+ array(string$1()),
20655
+ array(number$1()),
20656
+ array(array(number$1()))
20657
+ ]),
20658
+ model: string$1(),
20659
+ user: string$1().optional()
20660
+ });
20661
+ const EmbeddingUsageSchema = object({
20662
+ prompt_tokens: number$1(),
20663
+ total_tokens: number$1()
20664
+ });
20665
+ const EmbeddingDataSchema = object({
20666
+ embedding: array(number$1()),
20667
+ index: number$1(),
20668
+ object: literal("embedding")
20669
+ });
20670
+ object({
20671
+ data: array(EmbeddingDataSchema),
20672
+ model: string$1(),
20673
+ object: literal("list"),
20674
+ usage: EmbeddingUsageSchema
20675
+ });
20646
20676
 
20647
20677
  const API_CLIENT_CONDUIT_GENERAL_REFERENCE = {
20648
20678
  "/conduit/engine/start": {
@@ -20708,6 +20738,17 @@ const API_CLIENT_CONDUIT_OPENAI_REFERENCE = {
20708
20738
  }
20709
20739
  }
20710
20740
  },
20741
+ "/v1/embeddings": {
20742
+ POST: {
20743
+ auth: {
20744
+ type: "shared-secret"
20745
+ },
20746
+ body: EmbeddingCreateParamsSchema,
20747
+ response: {
20748
+ type: "text-stream"
20749
+ }
20750
+ }
20751
+ },
20711
20752
  "/v1/models": {
20712
20753
  GET: {
20713
20754
  auth: {
@@ -20743,6 +20784,12 @@ const API_CLIENT_CONDUIT_OPENAI_REFERENCE = {
20743
20784
  endpointID: ULIDSchema.describe("Endpoint identifier")
20744
20785
  }}
20745
20786
  },
20787
+ "/api/inferencing/:endpointID/oai/v1/embeddings": {
20788
+ POST: {
20789
+ parameters: {
20790
+ endpointID: ULIDSchema.describe("Endpoint identifier")
20791
+ }}
20792
+ },
20746
20793
  "/api/inferencing/:endpointID/oai/v1/models": {
20747
20794
  GET: {
20748
20795
  parameters: {
@@ -20771,7 +20818,8 @@ object({
20771
20818
  .min(3)
20772
20819
  .refine(value => value.includes("/"), {
20773
20820
  message: "Slug must be fully qualified (owner/repo)"
20774
- })
20821
+ }),
20822
+ taskType: LLMModelTaskTypeSchema.optional()
20775
20823
  });
20776
20824
  object({
20777
20825
  results: array(object({
@@ -20782,6 +20830,7 @@ object({
20782
20830
  name: string$1(),
20783
20831
  provider: _enum(["storage", "huggingface"]),
20784
20832
  providerSlug: string$1(),
20833
+ taskType: LLMModelTaskTypeSchema,
20785
20834
  updated: string$1()
20786
20835
  }))
20787
20836
  });
@@ -20802,11 +20851,13 @@ object({
20802
20851
  name: string$1(),
20803
20852
  updated: string$1()
20804
20853
  })),
20854
+ taskType: LLMModelTaskTypeSchema,
20805
20855
  updated: string$1()
20806
20856
  });
20807
20857
  object({
20858
+ multimodalEnabled: boolean$1().optional(),
20808
20859
  name: ResourceNameSchema.optional(),
20809
- multimodalEnabled: boolean$1().optional()
20860
+ taskType: LLMModelTaskTypeSchema.optional()
20810
20861
  });
20811
20862
  object({
20812
20863
  success: literal(true)
@@ -20851,7 +20902,8 @@ object({
20851
20902
  modelFormat: LLMModelFormatSchema,
20852
20903
  name: string$1(),
20853
20904
  provider: _enum(["storage", "huggingface"]),
20854
- providerSlug: string$1()
20905
+ providerSlug: string$1(),
20906
+ taskType: LLMModelTaskTypeSchema
20855
20907
  })
20856
20908
  .nullable(),
20857
20909
  modelQuantizationLabel: string$1().nullable(),
@@ -114830,6 +114882,9 @@ async function startVLLM({ enginePort, targetDirectory }) {
114830
114882
  "--tensor-parallel-size",
114831
114883
  String(tensorParallelSize)
114832
114884
  ];
114885
+ if (this.model.taskType === "embeddings") {
114886
+ args.push("--task", "embed");
114887
+ }
114833
114888
  if (device) {
114834
114889
  args.push("--device", device);
114835
114890
  }
@@ -116583,6 +116638,9 @@ async function startLlamacpp({ enginePort, targetDirectory }) {
116583
116638
  "--ctx-size",
116584
116639
  String(contextLength)
116585
116640
  ];
116641
+ if (this.model.taskType === "embeddings") {
116642
+ args.push("--embedding");
116643
+ }
116586
116644
  const gpuLayers = typeof engineConfig?.gpuLayers === "number"
116587
116645
  ? engineConfig.gpuLayers
116588
116646
  : Number.parseInt(process.env.LLAMACPP_GPU_LAYERS ?? String(DEFAULT_LLAMACPP_GPU_LAYERS), 10);
@@ -117688,6 +117746,153 @@ function calculateTokensPerSecond$2({ durationMs, totalTokens }) {
117688
117746
  }
117689
117747
  return Math.round(tokensPerSecond);
117690
117748
  }
117749
+ async function proxyEmbeddingsRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, reportMetrics, signal }) {
117750
+ function normalizeTokenCount(value) {
117751
+ if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
117752
+ return value;
117753
+ }
117754
+ return 0;
117755
+ }
117756
+ function reportMetricsSafe(payload) {
117757
+ reportMetrics(payload).catch(error => {
117758
+ logger.warn("Failed to upload LLM prompt metrics", {
117759
+ error: asError(error),
117760
+ requestUrl: "/v1/embeddings"
117761
+ });
117762
+ });
117763
+ }
117764
+ const engineType = conduitConfiguration.engineConfig?.type ?? null;
117765
+ const engineConfig = conduitConfiguration.engineConfig?.config ?? null;
117766
+ const serializedBody = isPlainObject$2(body)
117767
+ ? JSON.stringify(body)
117768
+ : typeof body === "string"
117769
+ ? body
117770
+ : JSON.stringify(body);
117771
+ const requestBodyBytes = Buffer.byteLength(serializedBody, "utf8");
117772
+ const requestStartedAt = Date.now();
117773
+ let upstreamResponseOk = true;
117774
+ const onMonitoringComplete = ({ durationMs, error, responseBytes, usage }) => {
117775
+ const promptTokens = normalizeTokenCount(usage?.promptTokens);
117776
+ const totalTokens = normalizeTokenCount(usage?.totalTokens ?? promptTokens);
117777
+ const latencyMs = Math.max(0, durationMs);
117778
+ reportMetricsSafe({
117779
+ bytes: requestBodyBytes + responseBytes,
117780
+ completionTokens: 0,
117781
+ engine: engineType,
117782
+ endpointId: endpointId ?? null,
117783
+ latencyMs,
117784
+ modelId: modelID,
117785
+ promptTokens,
117786
+ requestBytes: requestBodyBytes,
117787
+ requestId: null,
117788
+ requestMethod: "POST",
117789
+ requestPath: "/v1/embeddings",
117790
+ responseBytes,
117791
+ successful: upstreamResponseOk && !error,
117792
+ timeToFirstTokenMs: null,
117793
+ tokensPerSecond: calculateTokensPerSecond$2({
117794
+ durationMs: latencyMs,
117795
+ totalTokens
117796
+ }),
117797
+ totalTokens
117798
+ });
117799
+ };
117800
+ const response = await modelManager
117801
+ .fetchOpenAI("/v1/embeddings", {
117802
+ body: serializedBody,
117803
+ headers: {
117804
+ "Content-Type": "application/json"
117805
+ },
117806
+ method: "POST",
117807
+ signal
117808
+ })
117809
+ .catch(error => {
117810
+ const err = asError(error);
117811
+ logEngineMetrics({
117812
+ agentEngineType: engineType ?? "unknown",
117813
+ error: err,
117814
+ level: "error",
117815
+ logger,
117816
+ requestBodyBytes,
117817
+ requestPath: "/v1/embeddings",
117818
+ responseBytes: 0,
117819
+ usage: null
117820
+ });
117821
+ const latencyMs = Math.max(0, Date.now() - requestStartedAt);
117822
+ reportMetricsSafe({
117823
+ bytes: requestBodyBytes,
117824
+ completionTokens: 0,
117825
+ engine: engineType,
117826
+ endpointId: endpointId ?? null,
117827
+ latencyMs,
117828
+ modelId: modelID,
117829
+ promptTokens: 0,
117830
+ requestBytes: requestBodyBytes,
117831
+ requestId: null,
117832
+ requestMethod: "POST",
117833
+ requestPath: "/v1/embeddings",
117834
+ responseBytes: 0,
117835
+ successful: false,
117836
+ timeToFirstTokenMs: null,
117837
+ tokensPerSecond: 0,
117838
+ totalTokens: 0
117839
+ });
117840
+ throw err;
117841
+ });
117842
+ upstreamResponseOk = response.ok;
117843
+ const responseStatusText = response.statusText ?? "Upstream request failed";
117844
+ if (!response.body) {
117845
+ logEngineMetrics({
117846
+ agentEngineType: engineType ?? "unknown",
117847
+ level: response.ok ? "info" : "error",
117848
+ logger,
117849
+ requestBodyBytes,
117850
+ requestPath: "/v1/embeddings",
117851
+ responseBytes: 0,
117852
+ usage: null
117853
+ });
117854
+ const latencyMs = Math.max(0, Date.now() - requestStartedAt);
117855
+ reportMetricsSafe({
117856
+ bytes: requestBodyBytes,
117857
+ completionTokens: 0,
117858
+ engine: engineType,
117859
+ endpointId: endpointId ?? null,
117860
+ latencyMs,
117861
+ modelId: modelID,
117862
+ promptTokens: 0,
117863
+ requestBytes: requestBodyBytes,
117864
+ requestId: null,
117865
+ requestMethod: "POST",
117866
+ requestPath: "/v1/embeddings",
117867
+ responseBytes: 0,
117868
+ successful: false,
117869
+ timeToFirstTokenMs: null,
117870
+ tokensPerSecond: 0,
117871
+ totalTokens: 0
117872
+ });
117873
+ return {
117874
+ status: response.status,
117875
+ statusText: responseStatusText
117876
+ };
117877
+ }
117878
+ const monitoredResponse = monitorEngineResponseSingle({
117879
+ agentEngineType: engineType ?? "unknown",
117880
+ body: Readable.fromWeb(response.body),
117881
+ contextLength: modelManager.contextLength,
117882
+ engineConfig,
117883
+ engineType: engineType ?? "unknown",
117884
+ logger,
117885
+ onComplete: onMonitoringComplete,
117886
+ requestBodyBytes,
117887
+ requestPath: "/v1/embeddings",
117888
+ requestStartedAt
117889
+ });
117890
+ return {
117891
+ body: monitoredResponse.stream,
117892
+ headers: Object.fromEntries(response.headers.entries()),
117893
+ status: response.status
117894
+ };
117895
+ }
117691
117896
  async function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }) {
117692
117897
  function normalizeTokenCount(value) {
117693
117898
  if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
@@ -117710,6 +117915,7 @@ async function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointI
117710
117915
  const requestStartedAt = Date.now();
117711
117916
  const requestBody = JSON.parse(serializedBody);
117712
117917
  const streamRequested = requestBody.stream === true;
117918
+ let upstreamResponseOk = true;
117713
117919
  const onMonitoringComplete = ({ durationMs, error, responseBytes, timeToFirstTokenMs, usage }) => {
117714
117920
  const completionTokens = normalizeTokenCount(usage?.completionTokens);
117715
117921
  const promptTokens = normalizeTokenCount(usage?.promptTokens);
@@ -117728,7 +117934,7 @@ async function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointI
117728
117934
  requestMethod: "POST",
117729
117935
  requestPath: path,
117730
117936
  responseBytes,
117731
- successful: !error,
117937
+ successful: upstreamResponseOk && !error,
117732
117938
  timeToFirstTokenMs,
117733
117939
  tokensPerSecond: calculateTokensPerSecond$2({
117734
117940
  durationMs: latencyMs,
@@ -117779,6 +117985,7 @@ async function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointI
117779
117985
  });
117780
117986
  throw err;
117781
117987
  });
117988
+ upstreamResponseOk = response.ok;
117782
117989
  const responseStatusText = response.statusText ?? "Upstream request failed";
117783
117990
  if (!response.ok) {
117784
117991
  if (!response.body) {
@@ -117923,6 +118130,26 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, conduitConfigurati
117923
118130
  });
117924
118131
  }
117925
118132
  },
118133
+ "/v1/embeddings": {
118134
+ POST: async ({ body, req, res }) => {
118135
+ const modelID = getModelID();
118136
+ const modelManager = getModelManager();
118137
+ const abortController = new AbortController();
118138
+ res.on("close", () => {
118139
+ abortController.abort();
118140
+ });
118141
+ return proxyEmbeddingsRoute({
118142
+ body,
118143
+ conduitConfiguration: conduitConfiguration(),
118144
+ endpointId: extractEndpointId$1(req),
118145
+ logger,
118146
+ modelID,
118147
+ modelManager,
118148
+ reportMetrics: apiClient.reportPromptMetrics,
118149
+ signal: abortController.signal
118150
+ });
118151
+ }
118152
+ },
117926
118153
  "/v1/models": {
117927
118154
  GET: async () => {
117928
118155
  const modelManager = getModelManager();
@@ -117962,6 +118189,9 @@ function createPostChatCompletionsHandler(options) {
117962
118189
  function createPostCompletionsHandler(options) {
117963
118190
  return createConduitOpenAIAPIReferenceHandlers(options)["/v1/completions"].POST;
117964
118191
  }
118192
+ function createPostEmbeddingsHandler(options) {
118193
+ return createConduitOpenAIAPIReferenceHandlers(options)["/v1/embeddings"].POST;
118194
+ }
117965
118195
 
117966
118196
  function isPlainObject$1(value) {
117967
118197
  return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -128707,6 +128937,17 @@ async function createApplication({ abortController, apiClient, configuration, lo
128707
128937
  startup
128708
128938
  })
128709
128939
  },
128940
+ "/v1/embeddings": {
128941
+ POST: createPostEmbeddingsHandler({
128942
+ apiClient,
128943
+ conduitConfiguration: () => conduitConfiguration,
128944
+ configuration,
128945
+ getModelID: () => conduitConfiguration.targetModel.id,
128946
+ getModelManager: () => modelManager,
128947
+ logger,
128948
+ startup
128949
+ })
128950
+ },
128710
128951
  "/v1/models": {
128711
128952
  GET: createGetModelsHandler({
128712
128953
  apiClient,
@@ -129711,8 +129952,9 @@ class HuggingFaceClient {
129711
129952
  }
129712
129953
  }
129713
129954
  }
129714
- const seenIds = new Set();
129715
- const models = [];
129955
+ const taskPriority = new Map();
129956
+ pipelineTasks.forEach((task, index) => taskPriority.set(task, index));
129957
+ const modelsById = new Map();
129716
129958
  await Promise.all(queries.map(async ({ task, tag }) => {
129717
129959
  const searchParams = {
129718
129960
  accessToken: this.apiKey ?? undefined,
@@ -129724,9 +129966,6 @@ class HuggingFaceClient {
129724
129966
  }
129725
129967
  };
129726
129968
  for await (const entry of executeListWithRetry(searchParams)) {
129727
- if (seenIds.has(entry.id)) {
129728
- continue;
129729
- }
129730
129969
  const entryForUtils = {
129731
129970
  config: entry.config,
129732
129971
  gated: entry.gated,
@@ -129742,10 +129981,15 @@ class HuggingFaceClient {
129742
129981
  if (targetFormats.length > 0 && !targetFormats.includes(format)) {
129743
129982
  continue;
129744
129983
  }
129745
- seenIds.add(entry.id);
129984
+ const existing = modelsById.get(entry.id);
129985
+ if (existing &&
129986
+ (taskPriority.get(task) ?? Number.MAX_SAFE_INTEGER) >=
129987
+ (taskPriority.get(existing.pipelineTask) ?? Number.MAX_SAFE_INTEGER)) {
129988
+ continue;
129989
+ }
129746
129990
  const parameterCount = parseParameterCount(entry.id, entry.safetensors?.parameters);
129747
129991
  const slug = entry.name?.trim() || entry.id;
129748
- models.push({
129992
+ modelsById.set(entry.id, {
129749
129993
  downloads: entry.downloads,
129750
129994
  format,
129751
129995
  gated: entry.gated || false,
@@ -129753,13 +129997,14 @@ class HuggingFaceClient {
129753
129997
  likes: entry.likes,
129754
129998
  name: entry.name || entry.id,
129755
129999
  parameterCount,
130000
+ pipelineTask: task,
129756
130001
  quantization: extractQuantization(entryForUtils),
129757
130002
  slug,
129758
130003
  updatedAt: entry.updatedAt
129759
130004
  });
129760
130005
  }
129761
130006
  }));
129762
- return models;
130007
+ return Array.from(modelsById.values());
129763
130008
  }
129764
130009
  }
129765
130010
 
@@ -209,4 +209,34 @@ export declare function createPostCompletionsHandler(options: {
209
209
  status: number;
210
210
  statusText: string;
211
211
  }>;
212
+ export declare function createPostEmbeddingsHandler(options: {
213
+ apiClient: APIClient;
214
+ conduitConfiguration: () => InferenceAgentConfiguration;
215
+ configuration: Configuration;
216
+ getModelID: () => string;
217
+ getModelManager: () => ModelManager;
218
+ logger: Logger;
219
+ startup: number;
220
+ }): (params: {
221
+ req: APIRequest;
222
+ res: import("@infersec/fetch").APIResponse;
223
+ parameters: Record<string, never>;
224
+ query: Record<string, never>;
225
+ body: {
226
+ input: string | number[] | string[] | number[][];
227
+ model: string;
228
+ dimensions?: number | null | undefined;
229
+ encoding_format?: "base64" | "float" | null | undefined;
230
+ user?: string | undefined;
231
+ };
232
+ responseSchema: undefined;
233
+ }) => Promise<{
234
+ body: import("stream").Readable;
235
+ headers?: Record<string, string>;
236
+ status: number;
237
+ } | {
238
+ headers?: Record<string, string>;
239
+ status: number;
240
+ statusText: string;
241
+ }>;
212
242
  export {};
@@ -3,6 +3,23 @@ import { InferenceAgentConfiguration, InferenceAgentLLMMetricsPayload, type ULID
3
3
  import { Logger } from "@infersec/logger";
4
4
  import { Configuration } from "../configuration.js";
5
5
  import { ModelManager } from "../modelManagement/ModelManager.js";
6
+ export declare function proxyEmbeddingsRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, reportMetrics, signal }: {
7
+ body: unknown;
8
+ conduitConfiguration: InferenceAgentConfiguration;
9
+ endpointId?: ULID | null;
10
+ logger: Logger;
11
+ modelID: ULID;
12
+ modelManager: ModelManager;
13
+ reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
14
+ signal?: AbortSignal;
15
+ }): Promise<{
16
+ body: Readable;
17
+ headers: Record<string, string>;
18
+ status: number;
19
+ } | {
20
+ status: number;
21
+ statusText: string;
22
+ }>;
6
23
  export declare function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }: {
7
24
  body: unknown;
8
25
  conduitConfiguration: InferenceAgentConfiguration;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@infersec/conduit",
3
3
  "description": "End user conduit agent for connecting local LLMs to the cloud.",
4
- "version": "1.73.0",
4
+ "version": "1.74.0",
5
5
  "bin": {
6
6
  "infersec-conduit": "./dist/cli.js"
7
7
  },