@infersec/conduit 1.52.0 → 1.53.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -52,7 +52,7 @@ import 'fs/promises';
52
52
  import 'stream/promises';
53
53
  import { fileURLToPath } from 'node:url';
54
54
  import { StringDecoder } from 'node:string_decoder';
55
- import os, { constants as constants$5 } from 'node:os';
55
+ import os, { constants as constants$5, hostname as hostname$2 } from 'node:os';
56
56
  import tty from 'node:tty';
57
57
  import require$$0$j from 'child_process';
58
58
  import { setTimeout as setTimeout$1, scheduler, setImmediate as setImmediate$1 } from 'node:timers/promises';
@@ -18497,6 +18497,8 @@ const RawPasswordSchema = string$1()
18497
18497
  .refine(raw => /[\p{P}\p{S}]/u.test(raw), {
18498
18498
  message: "Invalid password: Missing character variant: Symbols"
18499
18499
  });
18500
+ const RESOURCE_NAME_MAX_LENGTH = 256;
18501
+ string$1().trim().min(1).max(RESOURCE_NAME_MAX_LENGTH);
18500
18502
  const ULIDSchema = string$1().refine(isValid, { message: "Invalid ULID" });
18501
18503
  // IRID Format:
18502
18504
  //
@@ -57224,7 +57226,7 @@ var undiciExports = requireUndici();
57224
57226
 
57225
57227
  function createFetchWithHeaders({ fetchFn = undiciExports.fetch, headers }) {
57226
57228
  function fetchWithHeaders(url, options) {
57227
- const mergedHeaders = new Headers(options.headers);
57229
+ const mergedHeaders = new undiciExports.Headers(options.headers);
57228
57230
  for (const [key, value] of Object.entries(headers)) {
57229
57231
  mergedHeaders.set(key, value);
57230
57232
  }
@@ -57252,7 +57254,7 @@ async function fetchByReference(options) {
57252
57254
  for (const [key, value] of Object.entries(query || {})) {
57253
57255
  targetURL.searchParams.set(key, String(value));
57254
57256
  }
57255
- const headers = new Headers();
57257
+ const headers = new undiciExports.Headers();
57256
57258
  const fetchOptions = {
57257
57259
  headers,
57258
57260
  method: options.method
@@ -103234,25 +103236,30 @@ const VLLM_EXECUTABLE = "python3";
103234
103236
  const DEFAULT_VLLM_CONTEXT_LENGTH = 2048;
103235
103237
  async function startVLLM({ enginePort, targetDirectory }) {
103236
103238
  const contextLength = Math.max(1, this.contextLength ?? DEFAULT_VLLM_CONTEXT_LENGTH);
103239
+ const device = process.env.VLLM_DEVICE;
103240
+ const dtype = process.env.VLLM_DTYPE;
103241
+ const args = [
103242
+ ...VLLM_START_ARGS,
103243
+ "--port",
103244
+ String(enginePort),
103245
+ "--model",
103246
+ targetDirectory,
103247
+ "--served-model-name",
103248
+ this.model.id,
103249
+ "--max-model-len",
103250
+ String(contextLength),
103251
+ "--tensor-parallel-size",
103252
+ "1"
103253
+ ];
103254
+ if (device) {
103255
+ args.push("--device", device);
103256
+ }
103257
+ if (dtype) {
103258
+ args.push("--dtype", dtype);
103259
+ }
103237
103260
  const processManager = new ProcessManager({
103238
103261
  command: VLLM_EXECUTABLE,
103239
- args: [
103240
- ...VLLM_START_ARGS,
103241
- "--port",
103242
- String(enginePort),
103243
- "--model",
103244
- targetDirectory,
103245
- "--served-model-name",
103246
- this.model.id,
103247
- "--device",
103248
- "cpu", // Force CPU mode
103249
- "--dtype",
103250
- "float16", // Use float16 to save memory on CPU
103251
- "--max-model-len",
103252
- String(contextLength),
103253
- "--tensor-parallel-size",
103254
- "1"
103255
- ]
103262
+ args
103256
103263
  });
103257
103264
  await processManager.start();
103258
103265
  return processManager;
@@ -104324,15 +104331,12 @@ const ModelDownloadProgressSchema = object({
104324
104331
  completedFiles: array(string$1().min(1))
104325
104332
  });
104326
104333
 
104327
- const DOWNLOAD_PROGRESS_TIMEOUT = 60000;
104328
- const DOWNLOAD_RETRY_ATTEMPTS_FULL = 3;
104329
- const DOWNLOAD_RETRY_ATTEMPTS_RANGE = 10;
104330
104334
  function matchesQuantizationVariant({ filePath, variant }) {
104331
104335
  if (!variant) {
104332
104336
  return false;
104333
104337
  }
104334
104338
  const escapedVariant = variant.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
104335
- const trailingBoundary = variant.includes("_") ? "[\\-./_]" : "[\\-./]";
104339
+ const trailingBoundary = "[\\-./]";
104336
104340
  const matcher = new RegExp(`(^|[\\-./_])${escapedVariant}(?=$|${trailingBoundary})`, "i");
104337
104341
  const normalizedPath = filePath.replace(/\\/g, "/");
104338
104342
  const segments = normalizedPath.split("/").filter(Boolean);
@@ -104345,6 +104349,10 @@ function matchesQuantizationVariant({ filePath, variant }) {
104345
104349
  }
104346
104350
  return segments.slice(0, -1).some(segment => matcher.test(segment));
104347
104351
  }
104352
+
104353
+ const DOWNLOAD_PROGRESS_TIMEOUT = 60000;
104354
+ const DOWNLOAD_RETRY_ATTEMPTS_FULL = 3;
104355
+ const DOWNLOAD_RETRY_ATTEMPTS_RANGE = 10;
104348
104356
  async function downloadModelViaHuggingFace({ format, huggingFaceToken, modelSlug: rawModelSlug, onProgress, progressFilePath, targetDirectory }) {
104349
104357
  // Sanitise model ID
104350
104358
  const [modelSlugWithRevision, variant = null] = rawModelSlug.split(":");
@@ -112782,7 +112790,7 @@ async function findQuantizedModelTarget({ model, path }) {
112782
112790
  // Just return the first
112783
112791
  return modelFiles[0];
112784
112792
  }
112785
- const matches = modelFiles.filter(fileName => fileName.toLowerCase().includes(variant.toLowerCase()));
112793
+ const matches = modelFiles.filter(fileName => matchesQuantizationVariant({ filePath: fileName, variant: variant ?? "" }));
112786
112794
  if (matches.length === 0) {
112787
112795
  throw new Error(`No model found for format and variant: ${model.format} / ${variant}`);
112788
112796
  }
@@ -112817,7 +112825,11 @@ async function startLlamacpp({ enginePort, targetDirectory }) {
112817
112825
  }
112818
112826
 
112819
112827
  // 2 hours
112820
- const ENGINE_FETCH_TIMEOUT_MS = 7200000;
112828
+ const ENGINE_FETCH_TIMEOUT_MS$1 = 7200000;
112829
+ const ENGINE_AGENT = new undiciExports.Agent({
112830
+ bodyTimeout: ENGINE_FETCH_TIMEOUT_MS$1,
112831
+ headersTimeout: ENGINE_FETCH_TIMEOUT_MS$1
112832
+ });
112821
112833
  class ModelManager extends EventEmitter {
112822
112834
  engine;
112823
112835
  enginePort;
@@ -112870,19 +112882,28 @@ class ModelManager extends EventEmitter {
112870
112882
  const controller = new AbortController();
112871
112883
  const timeout = setTimeout(() => {
112872
112884
  controller.abort(new Error("Inference request timeout"));
112873
- }, ENGINE_FETCH_TIMEOUT_MS);
112885
+ }, ENGINE_FETCH_TIMEOUT_MS$1);
112874
112886
  const effectiveSignal = callerSignal
112875
112887
  ? AbortSignal.any([callerSignal, controller.signal])
112876
112888
  : controller.signal;
112877
112889
  try {
112878
- return await undiciExports.fetch(joinURL(`http://localhost:${this.enginePort}`, path), {
112890
+ const fetchStartedAt = Date.now();
112891
+ const response = await undiciExports.fetch(joinURL(`http://localhost:${this.enginePort}`, path), {
112879
112892
  ...opts,
112893
+ dispatcher: ENGINE_AGENT,
112880
112894
  headers: {
112881
112895
  ...opts?.headers,
112882
112896
  Connection: "keep-alive"
112883
112897
  },
112884
112898
  signal: effectiveSignal
112885
112899
  });
112900
+ const fetchElapsedMs = Date.now() - fetchStartedAt;
112901
+ this.logger.debug("Engine responded", {
112902
+ elapsedMs: fetchElapsedMs,
112903
+ requestUrl: path,
112904
+ statusCode: response.status
112905
+ });
112906
+ return response;
112886
112907
  }
112887
112908
  finally {
112888
112909
  clearTimeout(timeout);
@@ -112979,6 +113000,9 @@ class ModelManager extends EventEmitter {
112979
113000
  message: "Cannot stop LLM engine: already stopping"
112980
113001
  });
112981
113002
  }
113003
+ if (this.lifecycleState === "stopped") {
113004
+ return;
113005
+ }
112982
113006
  if (this.lifecycleState !== "running" &&
112983
113007
  this.lifecycleState !== "starting" &&
112984
113008
  this.lifecycleState !== "errored") {
@@ -113355,6 +113379,12 @@ function isEngineUsageChunk(value) {
113355
113379
  function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
113356
113380
  const startedAt = requestStartedAt ?? Date.now();
113357
113381
  const passThrough = new PassThrough();
113382
+ passThrough.on("error", (error) => {
113383
+ logger.error("Engine response stream error", {
113384
+ error: asError(error),
113385
+ requestUrl: requestPath
113386
+ });
113387
+ });
113358
113388
  let responseBytes = 0;
113359
113389
  let firstChunkAt = null;
113360
113390
  let usage = null;
@@ -113545,6 +113575,12 @@ function monitorEngineResponseSingle({ agentEngineType, body, contextLength, eng
113545
113575
  const maxUsageCaptureBytes = 1024 * 1024;
113546
113576
  const startedAt = requestStartedAt ?? Date.now();
113547
113577
  const passThrough = new PassThrough();
113578
+ passThrough.on("error", (error) => {
113579
+ logger.error("Engine response stream error", {
113580
+ error: asError(error),
113581
+ requestUrl: requestPath
113582
+ });
113583
+ });
113548
113584
  let responseBytes = 0;
113549
113585
  let firstChunkAt = null;
113550
113586
  let usage = null;
@@ -113830,21 +113866,20 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
113830
113866
  });
113831
113867
  const responseStatusText = response.statusText ?? "Upstream request failed";
113832
113868
  if (!response.ok) {
113833
- const responseBody = await response.text().catch(() => null);
113834
- const responseError = new Error(responseBody
113835
- ? `Upstream error response: ${responseBody}`
113836
- : "Upstream error response: empty body");
113837
- logger.error("LLM engine request failed", {
113838
- error: responseError,
113839
- requestUrl: path,
113840
- statusCode: response.status,
113841
- statusText: responseStatusText
113842
- });
113843
113869
  if (!response.body) {
113844
- return {
113845
- status: response.status,
113870
+ logger.error("LLM engine request failed (no body)", {
113871
+ error: new Error("Upstream error response: empty body"),
113872
+ requestUrl: path,
113873
+ statusCode: response.status,
113846
113874
  statusText: responseStatusText
113847
- };
113875
+ });
113876
+ }
113877
+ else {
113878
+ logger.error("LLM engine request failed", {
113879
+ requestUrl: path,
113880
+ statusCode: response.status,
113881
+ statusText: responseStatusText
113882
+ });
113848
113883
  }
113849
113884
  }
113850
113885
  if (!response.body) {
@@ -113926,9 +113961,15 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
113926
113961
  const modelManager = getModelManager();
113927
113962
  const abortController = new AbortController();
113928
113963
  res.on("close", () => {
113964
+ logger.debug("Express route client disconnected, aborting", {
113965
+ requestUrl: "/v1/chat/completions"
113966
+ });
113929
113967
  abortController.abort();
113930
113968
  });
113931
- return proxyOpenAIStreamingRoute({
113969
+ logger.debug("Express route handler entered, awaiting engine", {
113970
+ requestUrl: "/v1/chat/completions"
113971
+ });
113972
+ const result = await proxyOpenAIStreamingRoute({
113932
113973
  body,
113933
113974
  configuration,
113934
113975
  endpointId: extractEndpointId$1(req),
@@ -113939,6 +113980,11 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
113939
113980
  reportMetrics: apiClient.reportPromptMetrics,
113940
113981
  signal: abortController.signal
113941
113982
  });
113983
+ logger.debug("Express route handler returning response", {
113984
+ requestUrl: "/v1/chat/completions",
113985
+ statusCode: "status" in result ? result.status : 0
113986
+ });
113987
+ return result;
113942
113988
  }
113943
113989
  },
113944
113990
  "/v1/completions": {
@@ -114688,7 +114734,7 @@ async function sendChunkStream({ apiURL, configuration, requestID, logger }) {
114688
114734
  sequence: payload.sequence
114689
114735
  });
114690
114736
  chunks.push(Buffer.from(chunk + "\n"));
114691
- if (chunks.length >= 10) {
114737
+ if (payload.data === null || chunks.length >= 10) {
114692
114738
  await flushChunks();
114693
114739
  }
114694
114740
  };
@@ -114782,10 +114828,12 @@ function calculateTokensPerSecond({ durationMs, totalTokens }) {
114782
114828
  return Math.round(tokensPerSecond);
114783
114829
  }
114784
114830
 
114785
- /**
114786
- * Proxy server requests to the local inference HTTP server.
114787
- */
114788
- async function proxyRequest({ configuration, request, signal }) {
114831
+ const ENGINE_FETCH_TIMEOUT_MS = 7_200_000;
114832
+ const LOOPBACK_AGENT = new undiciExports.Agent({
114833
+ bodyTimeout: ENGINE_FETCH_TIMEOUT_MS,
114834
+ headersTimeout: ENGINE_FETCH_TIMEOUT_MS
114835
+ });
114836
+ async function proxyRequest({ configuration, logger, request, signal }) {
114789
114837
  let finalPath = request.path;
114790
114838
  if (request.parameters) {
114791
114839
  Object.entries(request.parameters).forEach(([key, value]) => {
@@ -114799,6 +114847,7 @@ async function proxyRequest({ configuration, request, signal }) {
114799
114847
  }
114800
114848
  }
114801
114849
  const fetchOptions = {
114850
+ dispatcher: LOOPBACK_AGENT,
114802
114851
  method: request.method,
114803
114852
  headers: {
114804
114853
  ...request.headers,
@@ -114814,7 +114863,18 @@ async function proxyRequest({ configuration, request, signal }) {
114814
114863
  fetchOptions.body =
114815
114864
  typeof request.body === "object" ? JSON.stringify(request.body) : request.body;
114816
114865
  }
114866
+ logger.debug("Loopback proxy request starting", {
114867
+ requestMethod: request.method,
114868
+ requestUrl: finalPath
114869
+ });
114870
+ const startedAt = Date.now();
114817
114871
  const response = await undiciExports.fetch(url, fetchOptions);
114872
+ logger.debug("Loopback proxy response received", {
114873
+ elapsedMs: Date.now() - startedAt,
114874
+ requestMethod: request.method,
114875
+ requestUrl: finalPath,
114876
+ statusCode: response.status
114877
+ });
114818
114878
  return {
114819
114879
  body: response.body ? Readable.fromWeb(response.body) : null,
114820
114880
  headers: Object.fromEntries(response.headers.entries()),
@@ -124435,7 +124495,20 @@ async function collectMachineMetadata() {
124435
124495
  return machineMetadata;
124436
124496
  }
124437
124497
 
124498
+ const CONTAINER_ID_RE = /^[0-9a-f]{12}$/;
124499
+ function ensureDockerValidEnv() {
124500
+ if (!existsSync("/.dockerenv")) {
124501
+ return;
124502
+ }
124503
+ const name = hostname$2();
124504
+ if (CONTAINER_ID_RE.test(name)) {
124505
+ throw new Error(`Container hostname is a default Docker container ID ("${name}"). ` +
124506
+ `Set 'hostname' in your compose service definition.`);
124507
+ }
124508
+ }
124509
+
124438
124510
  async function createApplication({ abortController, apiClient, configuration, logger }) {
124511
+ ensureDockerValidEnv();
124439
124512
  logger.info("Fetching conduit configuration");
124440
124513
  let conduitConfiguration = await apiClient.getConduitConfiguration();
124441
124514
  logger.info("Received configuration", {
@@ -124758,6 +124831,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
124758
124831
  onRequest: async ({ request, signal }) => {
124759
124832
  return proxyRequest({
124760
124833
  configuration,
124834
+ logger,
124761
124835
  request,
124762
124836
  signal
124763
124837
  });
@@ -1,7 +1,8 @@
1
1
  import { LLMEngine, LLMModel } from "@infersec/definitions";
2
2
  import { Logger } from "@infersec/logger";
3
3
  import EventEmitter from "eventemitter3";
4
- import { RequestInit, Response } from "undici";
4
+ import { Response } from "undici";
5
+ import type { RequestInit } from "undici";
5
6
  import { type ModelDownloadProgressUpdate } from "./download.js";
6
7
  interface ModelManagerEvents {
7
8
  engineError: (error: Error) => void;
@@ -0,0 +1,4 @@
1
+ export declare function matchesQuantizationVariant({ filePath, variant }: {
2
+ filePath: string;
3
+ variant: string;
4
+ }): boolean;
@@ -1,10 +1,9 @@
1
1
  import type { APIResponse, ServerToClientAPIRequest } from "@infersec/definitions";
2
+ import { Logger } from "@infersec/logger";
2
3
  import type { Configuration } from "../configuration.js";
3
- /**
4
- * Proxy server requests to the local inference HTTP server.
5
- */
6
- export declare function proxyRequest({ configuration, request, signal }: {
4
+ export declare function proxyRequest({ configuration, logger, request, signal }: {
7
5
  configuration: Configuration;
6
+ logger: Logger;
8
7
  request: ServerToClientAPIRequest;
9
8
  signal?: AbortSignal;
10
9
  }): Promise<APIResponse>;
@@ -0,0 +1 @@
1
+ export declare function ensureDockerValidEnv(): void;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@infersec/conduit",
3
3
  "description": "End user conduit agent for connecting local LLMs to the cloud.",
4
- "version": "1.52.0",
4
+ "version": "1.53.0",
5
5
  "bin": {
6
6
  "infersec-conduit": "./dist/cli.js"
7
7
  },