@infersec/conduit 1.67.0 → 1.68.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,8 @@
1
- import type { LLMEngine, ULID } from "@infersec/definitions";
1
+ import type { ULID } from "@infersec/definitions";
2
2
  import type { Logger } from "@infersec/logger";
3
3
  export interface ConduitConnectionOptions {
4
4
  apiKey: string;
5
5
  apiURL: string;
6
- engine: LLMEngine;
7
6
  enginePort: number;
8
7
  logger: Logger;
9
8
  port: number;
package/dist/cli.js CHANGED
@@ -4344,18 +4344,15 @@ function ulid$3(seedTime, prng) {
4344
4344
  return encodeTime(seed, TIME_LEN) + encodeRandom(RANDOM_LEN, currentPRNG);
4345
4345
  }
4346
4346
 
4347
- /**
4348
- * Calculates the effective context length per slot, accounting for
4349
- * parallelism when using llama.cpp. For llama.cpp, the total context
4350
- * window is divided across parallel slots; for other engines, the
4351
- * full context length is used.
4352
- */
4353
- function getEffectiveContextLength({ contextLength, engine, parallelism }) {
4347
+ function getEffectiveContextLength({ contextLength, engineConfig, engineType }) {
4354
4348
  if (contextLength === null || contextLength <= 0) {
4355
4349
  return null;
4356
4350
  }
4357
- if (engine === "llama.cpp" && parallelism !== null && parallelism > 0) {
4358
- return contextLength / parallelism;
4351
+ if (engineType === "llama.cpp" && engineConfig) {
4352
+ const parallelism = engineConfig?.parallelism;
4353
+ if (typeof parallelism === "number" && parallelism > 0) {
4354
+ return contextLength / parallelism;
4355
+ }
4359
4356
  }
4360
4357
  return contextLength;
4361
4358
  }
@@ -19893,6 +19890,28 @@ object$1({
19893
19890
  });
19894
19891
 
19895
19892
  const LLMEngineSchema = _enum$1(["llama.cpp", "vllm"]);
19893
+ const LlamacppEngineConfigSchema = object$1({
19894
+ batchSize: number$1().int().positive().nullable().optional(),
19895
+ cacheTypeK: string$3().nullable().optional(),
19896
+ cacheTypeV: string$3().nullable().optional(),
19897
+ extraArgs: array$2(string$3()).optional(),
19898
+ flashAttn: boolean$1().optional(),
19899
+ gpuLayers: number$1().int().min(0).optional(),
19900
+ mainGpu: number$1().int().min(0).nullable().optional(),
19901
+ parallelism: number$1().int().positive().optional(),
19902
+ tensorSplit: string$3().nullable().optional(),
19903
+ ubatchSize: number$1().int().positive().nullable().optional()
19904
+ });
19905
+ const VLLMEngineConfigSchema = object$1({
19906
+ device: string$3().optional(),
19907
+ dtype: string$3().optional(),
19908
+ extraArgs: array$2(string$3()).optional(),
19909
+ tensorParallelSize: number$1().int().positive().optional()
19910
+ });
19911
+ const EngineConfigSchema = discriminatedUnion("type", [
19912
+ object$1({ config: LlamacppEngineConfigSchema, type: literal("llama.cpp") }),
19913
+ object$1({ config: VLLMEngineConfigSchema, type: literal("vllm") })
19914
+ ]);
19896
19915
  const LLMModelFormatSchema = _enum$1([
19897
19916
  // VLLM
19898
19917
  "safetensors",
@@ -20045,8 +20064,8 @@ const ConduitState = z.preprocess(value => {
20045
20064
 
20046
20065
  const InferenceAgentConfigurationSchema = object$1({
20047
20066
  contextLength: number$1().int().positive().nullable(),
20067
+ engineConfig: EngineConfigSchema.nullable(),
20048
20068
  inferenceSourceID: ULIDSchema,
20049
- parallelism: number$1().int().positive().nullable(),
20050
20069
  targetModel: LLMModelSchema
20051
20070
  });
20052
20071
 
@@ -20775,7 +20794,8 @@ object$1({
20775
20794
  provider: _enum$1(["storage", "huggingface"]),
20776
20795
  providerSlug: string$3(),
20777
20796
  sources: array$2(object$1({
20778
- engine: LLMEngineSchema,
20797
+ engine: LLMEngineSchema.nullable(),
20798
+ engineId: ULIDSchema.nullable(),
20779
20799
  id: ULIDSchema,
20780
20800
  lastState: ConduitState.nullable(),
20781
20801
  lastStateTimestamp: string$3().nullable(),
@@ -20796,17 +20816,17 @@ object$1({
20796
20816
  });
20797
20817
  object$1({
20798
20818
  contextLength: number$1().int().positive().max(1048576).optional(),
20799
- engine: LLMEngineSchema,
20819
+ engineId: ULIDSchema,
20800
20820
  modelID: ULIDSchema,
20801
20821
  name: ResourceNameSchema,
20802
- parallelism: number$1().int().positive().optional(),
20803
20822
  quantizationLabel: string$3().min(1).max(128).optional()
20804
20823
  });
20805
20824
  object$1({
20806
20825
  results: array$2(object$1({
20807
20826
  contextLength: number$1().int().positive().nullable(),
20808
20827
  created: string$3(),
20809
- engine: LLMEngineSchema,
20828
+ engine: LLMEngineSchema.nullable(),
20829
+ engineId: ULIDSchema.nullable(),
20810
20830
  id: ULIDSchema,
20811
20831
  lastState: ConduitState.nullable(),
20812
20832
  lastStateTimestamp: string$3().nullable(),
@@ -20819,7 +20839,10 @@ object$1({
20819
20839
  object$1({
20820
20840
  contextLength: number$1().int().positive().nullable(),
20821
20841
  created: string$3(),
20822
- engine: LLMEngineSchema,
20842
+ engine: LLMEngineSchema.nullable(),
20843
+ engineConfig: unknown$1().nullable(),
20844
+ engineId: ULIDSchema.nullable(),
20845
+ engineName: string$3().nullable(),
20823
20846
  id: ULIDSchema,
20824
20847
  lastState: ConduitState.nullable(),
20825
20848
  lastStateTimestamp: string$3().nullable(),
@@ -20832,15 +20855,13 @@ object$1({
20832
20855
  }),
20833
20856
  modelQuantizationLabel: string$3().nullable(),
20834
20857
  name: string$3(),
20835
- parallelism: number$1().int().positive().nullable(),
20836
20858
  updated: string$3()
20837
20859
  });
20838
20860
  object$1({
20839
20861
  contextLength: number$1().int().positive().nullable().optional(),
20840
- engine: LLMEngineSchema.optional(),
20862
+ engineId: ULIDSchema.nullable().optional(),
20841
20863
  modelID: ULIDSchema.optional(),
20842
20864
  name: ResourceNameSchema.optional(),
20843
- parallelism: number$1().int().positive().nullable().optional(),
20844
20865
  quantizationLabel: string$3().min(1).max(128).nullable().optional()
20845
20866
  });
20846
20867
  object$1({
@@ -20870,7 +20891,8 @@ object$1({
20870
20891
  name: string$3(),
20871
20892
  routingMethod: nativeEnum(RoutingMethod),
20872
20893
  sources: array$2(object$1({
20873
- engine: LLMEngineSchema,
20894
+ engine: LLMEngineSchema.nullable(),
20895
+ engineId: ULIDSchema.nullable(),
20874
20896
  id: ULIDSchema,
20875
20897
  modelName: string$3(),
20876
20898
  name: string$3()
@@ -116803,8 +116825,10 @@ async function startVLLM({ enginePort, targetDirectory }) {
116803
116825
  if (this.model.format === "gguf") {
116804
116826
  modelPath = await findQuantizedModelTarget({ model: this.model, path: targetDirectory });
116805
116827
  }
116806
- const device = process.env.VLLM_DEVICE;
116807
- const dtype = process.env.VLLM_DTYPE;
116828
+ const engineConfig = this.engineConfig;
116829
+ const device = typeof engineConfig?.device === "string" ? engineConfig.device : process.env.VLLM_DEVICE;
116830
+ const dtype = typeof engineConfig?.dtype === "string" ? engineConfig.dtype : process.env.VLLM_DTYPE;
116831
+ const tensorParallelSize = typeof engineConfig?.tensorParallelSize === "number" ? engineConfig.tensorParallelSize : 1;
116808
116832
  const args = [
116809
116833
  ...VLLM_START_ARGS,
116810
116834
  "--port",
@@ -116816,7 +116840,7 @@ async function startVLLM({ enginePort, targetDirectory }) {
116816
116840
  "--max-model-len",
116817
116841
  String(contextLength),
116818
116842
  "--tensor-parallel-size",
116819
- "1"
116843
+ String(tensorParallelSize)
116820
116844
  ];
116821
116845
  if (device) {
116822
116846
  args.push("--device", device);
@@ -116824,6 +116848,10 @@ async function startVLLM({ enginePort, targetDirectory }) {
116824
116848
  if (dtype) {
116825
116849
  args.push("--dtype", dtype);
116826
116850
  }
116851
+ const extraArgs = engineConfig?.extraArgs;
116852
+ if (Array.isArray(extraArgs) && extraArgs.every((v) => typeof v === "string")) {
116853
+ args.push(...extraArgs);
116854
+ }
116827
116855
  const processManager = new ProcessManager({
116828
116856
  command: VLLM_EXECUTABLE,
116829
116857
  args
@@ -122775,7 +122803,8 @@ const DEFAULT_LLAMACPP_CONTEXT_LENGTH = 131072;
122775
122803
  async function startLlamacpp({ enginePort, targetDirectory }) {
122776
122804
  const target = await findQuantizedModelTarget({ model: this.model, path: targetDirectory });
122777
122805
  const contextLength = Math.max(1, this.contextLength ?? DEFAULT_LLAMACPP_CONTEXT_LENGTH);
122778
- const parallelism = this.parallelism;
122806
+ const engineConfig = this.engineConfig;
122807
+ const parallelism = typeof engineConfig?.parallelism === "number" ? engineConfig.parallelism : null;
122779
122808
  const args = [
122780
122809
  ...LLAMACPP_START_ARGS,
122781
122810
  "--port",
@@ -122785,13 +122814,47 @@ async function startLlamacpp({ enginePort, targetDirectory }) {
122785
122814
  "--ctx-size",
122786
122815
  String(contextLength)
122787
122816
  ];
122788
- const gpuLayers = Number.parseInt(process.env.LLAMACPP_GPU_LAYERS ?? String(DEFAULT_LLAMACPP_GPU_LAYERS), 10);
122817
+ const gpuLayers = typeof engineConfig?.gpuLayers === "number"
122818
+ ? engineConfig.gpuLayers
122819
+ : Number.parseInt(process.env.LLAMACPP_GPU_LAYERS ?? String(DEFAULT_LLAMACPP_GPU_LAYERS), 10);
122789
122820
  if (Number.isFinite(gpuLayers) && gpuLayers > 0) {
122790
122821
  args.push("--n-gpu-layers", String(gpuLayers));
122791
122822
  }
122792
122823
  if (typeof parallelism === "number") {
122793
122824
  args.push("--parallel", String(Math.max(1, parallelism)));
122794
122825
  }
122826
+ const flashAttn = engineConfig?.flashAttn;
122827
+ if (flashAttn === true || flashAttn === undefined) {
122828
+ args.push("--flash-attn", "on");
122829
+ }
122830
+ const cacheTypeK = typeof engineConfig?.cacheTypeK === "string" ? engineConfig.cacheTypeK : null;
122831
+ if (cacheTypeK) {
122832
+ args.push("--cache-type-k", cacheTypeK);
122833
+ }
122834
+ const cacheTypeV = typeof engineConfig?.cacheTypeV === "string" ? engineConfig.cacheTypeV : null;
122835
+ if (cacheTypeV) {
122836
+ args.push("--cache-type-v", cacheTypeV);
122837
+ }
122838
+ const batchSize = typeof engineConfig?.batchSize === "number" ? engineConfig.batchSize : null;
122839
+ if (batchSize !== null) {
122840
+ args.push("--batch-size", String(batchSize));
122841
+ }
122842
+ const ubatchSize = typeof engineConfig?.ubatchSize === "number" ? engineConfig.ubatchSize : null;
122843
+ if (ubatchSize !== null) {
122844
+ args.push("--ubatch-size", String(ubatchSize));
122845
+ }
122846
+ const tensorSplit = typeof engineConfig?.tensorSplit === "string" ? engineConfig.tensorSplit : null;
122847
+ if (tensorSplit) {
122848
+ args.push("--tensor-split", tensorSplit);
122849
+ }
122850
+ const mainGpu = typeof engineConfig?.mainGpu === "number" ? engineConfig.mainGpu : null;
122851
+ if (mainGpu !== null) {
122852
+ args.push("--main-gpu", String(mainGpu));
122853
+ }
122854
+ const extraArgs = engineConfig?.extraArgs;
122855
+ if (Array.isArray(extraArgs) && extraArgs.every((v) => typeof v === "string")) {
122856
+ args.push(...extraArgs);
122857
+ }
122795
122858
  const processManager = new ProcessManager({
122796
122859
  command: LLAMACPP_EXECUTABLE,
122797
122860
  args
@@ -122824,11 +122887,8 @@ function createModelStorageKey(model) {
122824
122887
  return `${model.source.type}${SEPARATOR}${sanitizeSegment(identifier)}`;
122825
122888
  }
122826
122889
 
122827
- // 2 hours
122828
122890
  const ENGINE_FETCH_TIMEOUT_MS$1 = 7200000;
122829
- // 20 minutes
122830
122891
  const DOWNLOAD_LOCK_TIMEOUT_MS = 20 * 60 * 1000;
122831
- // 5 seconds
122832
122892
  const DOWNLOAD_LOCK_POLL_INTERVAL_MS = 5000;
122833
122893
  const ENGINE_AGENT = new undiciExports.Agent({
122834
122894
  bodyTimeout: ENGINE_FETCH_TIMEOUT_MS$1,
@@ -122836,9 +122896,9 @@ const ENGINE_AGENT = new undiciExports.Agent({
122836
122896
  });
122837
122897
  class ModelManager extends EventEmitter {
122838
122898
  engine;
122899
+ engineConfig;
122839
122900
  enginePort;
122840
122901
  model;
122841
- parallelism;
122842
122902
  uniqueName;
122843
122903
  contextLength;
122844
122904
  logger;
@@ -122847,27 +122907,13 @@ class ModelManager extends EventEmitter {
122847
122907
  lifecycleState = "stopped";
122848
122908
  stopRequested = false;
122849
122909
  modelsDirectory;
122850
- constructor({ contextLength, engine, enginePort, logger, model, parallelism, root }) {
122910
+ constructor({ contextLength, engineConfig, enginePort, engineType, logger, model, root }) {
122851
122911
  super();
122852
- // const models = getModels();
122853
- // const targetModel = models.find(model => model.id === modelID);
122854
- // if (!targetModel) {
122855
- // throw new ConfigurationInvalidError({
122856
- // message: `No model found for ID: ${modelID}`
122857
- // });
122858
- // }
122859
- // const source = targetModel.sources.find(source => source.engine === engine);
122860
- // if (!source) {
122861
- // throw new ConfigurationInvalidError({
122862
- // message: `Model does not support current engine: ${modelID} has no support for engine: ${engine}`
122863
- // });
122864
- // }
122865
- this.engine = engine;
122912
+ this.engine = engineType;
122913
+ this.engineConfig = engineConfig ?? null;
122866
122914
  this.enginePort = enginePort;
122867
122915
  this.model = model;
122868
122916
  this.contextLength = typeof contextLength === "number" ? contextLength : null;
122869
- this.parallelism = typeof parallelism === "number" ? parallelism : null;
122870
- // this.providerSlugentifier = source.identifier;
122871
122917
  this.logger = logger;
122872
122918
  this.uniqueName = createModelStorageKey(this.model);
122873
122919
  this.modelsDirectory = join(root, "models");
@@ -122908,12 +122954,6 @@ class ModelManager extends EventEmitter {
122908
122954
  clearTimeout(timeout);
122909
122955
  }
122910
122956
  }
122911
- // case "ollama":
122912
- // console.log("FETCH", path, opts);
122913
- // return fetch(
122914
- // joinURL("http://localhost:11434", path),
122915
- // opts
122916
- // );
122917
122957
  default: {
122918
122958
  const engineType = this.engine;
122919
122959
  throw new ConfigurationInvalidError({
@@ -122948,15 +122988,6 @@ class ModelManager extends EventEmitter {
122948
122988
  await this.releaseDownloadLock();
122949
122989
  }
122950
122990
  break;
122951
- // case "ollama":
122952
- // this.logger.info("Loading model", {
122953
- // modelID: this.model.id
122954
- // });
122955
- // await loadCurrentOllamaModel.call(this);
122956
- // this.logger.info("Loaded model", {
122957
- // modelID: this.model.id
122958
- // });
122959
- // return;
122960
122991
  default: {
122961
122992
  const engineType = this.engine;
122962
122993
  throw new ConfigurationInvalidError({
@@ -123392,12 +123423,6 @@ function createPostStopEngineHandler(options) {
123392
123423
  return createConduitGeneralAPIReferenceHandlers(options)["/conduit/engine/stop"].POST;
123393
123424
  }
123394
123425
 
123395
- /**
123396
- * Coerce non-string tool_calls function.arguments to JSON strings.
123397
- * Some LLM backends return arguments as parsed objects instead of
123398
- * JSON strings, violating the OpenAI spec. This mutates in place
123399
- * and returns true if any coercion was performed.
123400
- */
123401
123426
  function coerceToolCallArguments(parsed) {
123402
123427
  const choices = parsed.choices;
123403
123428
  if (!Array.isArray(choices))
@@ -123438,7 +123463,7 @@ function isEngineUsageChunk(value) {
123438
123463
  }
123439
123464
  return true;
123440
123465
  }
123441
- function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
123466
+ function monitorEngineResponseStream({ agentEngineType, body, contextLength, engineConfig, engineType, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }) {
123442
123467
  const startedAt = requestStartedAt ?? Date.now();
123443
123468
  const passThrough = new PassThrough();
123444
123469
  passThrough.on("error", (error) => {
@@ -123477,8 +123502,8 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
123477
123502
  const usageChunk = parsed.usage;
123478
123503
  const effectiveContext = getEffectiveContextLength({
123479
123504
  contextLength,
123480
- engine,
123481
- parallelism
123505
+ engineConfig,
123506
+ engineType
123482
123507
  });
123483
123508
  if (usageChunk.context_usage === undefined &&
123484
123509
  usageChunk.prompt_tokens !== undefined &&
@@ -123520,8 +123545,8 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
123520
123545
  let contextUsage = parsed.usage?.context_usage ?? null;
123521
123546
  const effectiveContextForUsage = getEffectiveContextLength({
123522
123547
  contextLength,
123523
- engine,
123524
- parallelism
123548
+ engineConfig,
123549
+ engineType
123525
123550
  });
123526
123551
  if (contextUsage === null &&
123527
123552
  promptTokens !== null &&
@@ -123590,7 +123615,9 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
123590
123615
  passThrough.destroy(err);
123591
123616
  });
123592
123617
  body.once("end", () => {
123593
- parseUsageFromBuffer();
123618
+ if (buffer.length > 0) {
123619
+ parseUsageFromBuffer();
123620
+ }
123594
123621
  logEngineMetrics({
123595
123622
  agentEngineType,
123596
123623
  level: "info",
@@ -123633,7 +123660,7 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
123633
123660
  stream: passThrough
123634
123661
  };
123635
123662
  }
123636
- function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
123663
+ function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engineConfig, engineType, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }) {
123637
123664
  const maxUsageCaptureBytes = 1024 * 1024;
123638
123665
  const startedAt = requestStartedAt ?? Date.now();
123639
123666
  const passThrough = new PassThrough();
@@ -123719,8 +123746,8 @@ function monitorEngineResponseSingle({ agentEngineType, body, contextLength, eng
123719
123746
  let contextUsage = usageChunk.context_usage ?? null;
123720
123747
  const effectiveContext = getEffectiveContextLength({
123721
123748
  contextLength,
123722
- engine,
123723
- parallelism
123749
+ engineConfig,
123750
+ engineType
123724
123751
  });
123725
123752
  if (contextUsage === null &&
123726
123753
  promptTokens !== null &&
@@ -123839,7 +123866,7 @@ function calculateTokensPerSecond$2({ durationMs, totalTokens }) {
123839
123866
  }
123840
123867
  return Math.round(tokensPerSecond);
123841
123868
  }
123842
- async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }) {
123869
+ async function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }) {
123843
123870
  function normalizeTokenCount(value) {
123844
123871
  if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
123845
123872
  return value;
@@ -123854,6 +123881,8 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
123854
123881
  });
123855
123882
  });
123856
123883
  }
123884
+ const engineType = conduitConfiguration.engineConfig?.type ?? null;
123885
+ const engineConfig = conduitConfiguration.engineConfig?.config ?? null;
123857
123886
  const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody$1(body);
123858
123887
  const requestStartedAt = Date.now();
123859
123888
  const requestBody = JSON.parse(serializedBody);
@@ -123866,7 +123895,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
123866
123895
  reportMetricsSafe({
123867
123896
  bytes: requestBodyBytes + responseBytes,
123868
123897
  completionTokens,
123869
- engine: configuration.agentEngineType,
123898
+ engine: engineType,
123870
123899
  endpointId: endpointId ?? null,
123871
123900
  latencyMs,
123872
123901
  modelId: modelID,
@@ -123895,9 +123924,10 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
123895
123924
  signal
123896
123925
  })
123897
123926
  .catch(error => {
123927
+ const err = asError(error);
123898
123928
  logEngineMetrics({
123899
- agentEngineType: configuration.agentEngineType,
123900
- error: error,
123929
+ agentEngineType: engineType ?? "unknown",
123930
+ error: err,
123901
123931
  level: "error",
123902
123932
  logger,
123903
123933
  requestBodyBytes,
@@ -123909,7 +123939,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
123909
123939
  reportMetricsSafe({
123910
123940
  bytes: requestBodyBytes,
123911
123941
  completionTokens: 0,
123912
- engine: configuration.agentEngineType,
123942
+ engine: engineType,
123913
123943
  endpointId: endpointId ?? null,
123914
123944
  latencyMs,
123915
123945
  modelId: modelID,
@@ -123924,7 +123954,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
123924
123954
  tokensPerSecond: 0,
123925
123955
  totalTokens: 0
123926
123956
  });
123927
- throw error;
123957
+ throw err;
123928
123958
  });
123929
123959
  const responseStatusText = response.statusText ?? "Upstream request failed";
123930
123960
  if (!response.ok) {
@@ -123946,7 +123976,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
123946
123976
  }
123947
123977
  if (!response.body) {
123948
123978
  logEngineMetrics({
123949
- agentEngineType: configuration.agentEngineType,
123979
+ agentEngineType: engineType ?? "unknown",
123950
123980
  level: response.ok ? "info" : "error",
123951
123981
  logger,
123952
123982
  requestBodyBytes,
@@ -123958,7 +123988,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
123958
123988
  reportMetricsSafe({
123959
123989
  bytes: requestBodyBytes,
123960
123990
  completionTokens: 0,
123961
- engine: configuration.agentEngineType,
123991
+ engine: engineType,
123962
123992
  endpointId: endpointId ?? null,
123963
123993
  latencyMs,
123964
123994
  modelId: modelID,
@@ -123980,25 +124010,25 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
123980
124010
  }
123981
124011
  const monitoredResponse = streamRequested
123982
124012
  ? monitorEngineResponseStream({
123983
- agentEngineType: configuration.agentEngineType,
124013
+ agentEngineType: engineType ?? "unknown",
123984
124014
  body: Readable.fromWeb(response.body),
123985
124015
  contextLength: modelManager.contextLength,
123986
- engine: configuration.agentEngineType,
124016
+ engineConfig,
124017
+ engineType: engineType ?? "unknown",
123987
124018
  logger,
123988
124019
  onComplete: onMonitoringComplete,
123989
- parallelism: modelManager.parallelism,
123990
124020
  requestBodyBytes,
123991
124021
  requestPath: path,
123992
124022
  requestStartedAt
123993
124023
  })
123994
124024
  : monitorEngineResponseSingle({
123995
- agentEngineType: configuration.agentEngineType,
124025
+ agentEngineType: engineType ?? "unknown",
123996
124026
  body: Readable.fromWeb(response.body),
123997
124027
  contextLength: modelManager.contextLength,
123998
- engine: configuration.agentEngineType,
124028
+ engineConfig,
124029
+ engineType: engineType ?? "unknown",
123999
124030
  logger,
124000
124031
  onComplete: onMonitoringComplete,
124001
- parallelism: modelManager.parallelism,
124002
124032
  requestBodyBytes,
124003
124033
  requestPath: path,
124004
124034
  requestStartedAt
@@ -124015,7 +124045,7 @@ function extractEndpointId$1(req) {
124015
124045
  const raw = typeof value === "string" ? value : Array.isArray(value) ? value[0] : null;
124016
124046
  return raw && isValid(raw) ? raw : null;
124017
124047
  }
124018
- function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger, startup }) {
124048
+ function createConduitOpenAIAPIReferenceHandlers({ apiClient, conduitConfiguration, configuration, getModelID, getModelManager, logger, startup }) {
124019
124049
  return {
124020
124050
  "/v1/chat/completions": {
124021
124051
  POST: async ({ body, req, res }) => {
@@ -124033,7 +124063,7 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
124033
124063
  });
124034
124064
  const result = await proxyOpenAIStreamingRoute({
124035
124065
  body,
124036
- configuration,
124066
+ conduitConfiguration: conduitConfiguration(),
124037
124067
  endpointId: extractEndpointId$1(req),
124038
124068
  logger,
124039
124069
  modelID,
@@ -124059,7 +124089,7 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
124059
124089
  });
124060
124090
  return proxyOpenAIStreamingRoute({
124061
124091
  body,
124062
- configuration,
124092
+ conduitConfiguration: conduitConfiguration(),
124063
124093
  endpointId: extractEndpointId$1(req),
124064
124094
  logger,
124065
124095
  modelID,
@@ -124073,10 +124103,11 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
124073
124103
  "/v1/models": {
124074
124104
  GET: async () => {
124075
124105
  const modelManager = getModelManager();
124106
+ const currentConfig = conduitConfiguration();
124076
124107
  const effectiveContextLength = getEffectiveContextLength({
124077
124108
  contextLength: modelManager.contextLength,
124078
- engine: configuration.agentEngineType,
124079
- parallelism: modelManager.parallelism
124109
+ engineConfig: currentConfig.engineConfig?.config ?? null,
124110
+ engineType: currentConfig.engineConfig?.type ?? null
124080
124111
  });
124081
124112
  return {
124082
124113
  body: {
@@ -124179,7 +124210,7 @@ function extractAnthropicNonStreamUsage(body) {
124179
124210
  return null;
124180
124211
  }
124181
124212
  }
124182
- async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, logger, modelID, modelManager, reportMetrics, signal }) {
124213
+ async function proxyAnthropicStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, reportMetrics, signal }) {
124183
124214
  function reportMetricsSafe(payload) {
124184
124215
  reportMetrics(payload).catch(error => {
124185
124216
  logger.warn("Failed to upload LLM prompt metrics", {
@@ -124188,6 +124219,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
124188
124219
  });
124189
124220
  });
124190
124221
  }
124222
+ const engineType = conduitConfiguration.engineConfig?.type ?? null;
124191
124223
  const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody(body);
124192
124224
  const requestStartedAt = Date.now();
124193
124225
  const requestBody = JSON.parse(serializedBody);
@@ -124200,7 +124232,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
124200
124232
  reportMetricsSafe({
124201
124233
  bytes: requestBodyBytes + responseBytes,
124202
124234
  completionTokens,
124203
- engine: configuration.agentEngineType,
124235
+ engine: engineType,
124204
124236
  endpointId: endpointId ?? null,
124205
124237
  latencyMs,
124206
124238
  modelId: modelID,
@@ -124230,7 +124262,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
124230
124262
  })
124231
124263
  .catch(error => {
124232
124264
  logEngineMetrics({
124233
- agentEngineType: configuration.agentEngineType,
124265
+ agentEngineType: engineType ?? "unknown",
124234
124266
  error: asError(error),
124235
124267
  level: "error",
124236
124268
  logger,
@@ -124243,7 +124275,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
124243
124275
  reportMetricsSafe({
124244
124276
  bytes: requestBodyBytes,
124245
124277
  completionTokens: 0,
124246
- engine: configuration.agentEngineType,
124278
+ engine: engineType,
124247
124279
  endpointId: endpointId ?? null,
124248
124280
  latencyMs,
124249
124281
  modelId: modelID,
@@ -124276,7 +124308,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
124276
124308
  }
124277
124309
  if (!response.body) {
124278
124310
  logEngineMetrics({
124279
- agentEngineType: configuration.agentEngineType,
124311
+ agentEngineType: engineType ?? "unknown",
124280
124312
  level: response.ok ? "info" : "error",
124281
124313
  logger,
124282
124314
  requestBodyBytes,
@@ -124288,7 +124320,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
124288
124320
  reportMetricsSafe({
124289
124321
  bytes: requestBodyBytes,
124290
124322
  completionTokens: 0,
124291
- engine: configuration.agentEngineType,
124323
+ engine: engineType,
124292
124324
  endpointId: endpointId ?? null,
124293
124325
  latencyMs,
124294
124326
  modelId: modelID,
@@ -124349,7 +124381,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
124349
124381
  rawBody.once("error", err => {
124350
124382
  const normalizedError = asError(err);
124351
124383
  logEngineMetrics({
124352
- agentEngineType: configuration.agentEngineType,
124384
+ agentEngineType: engineType ?? "unknown",
124353
124385
  error: normalizedError,
124354
124386
  level: "error",
124355
124387
  logger,
@@ -124363,7 +124395,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
124363
124395
  });
124364
124396
  rawBody.once("end", () => {
124365
124397
  logEngineMetrics({
124366
- agentEngineType: configuration.agentEngineType,
124398
+ agentEngineType: engineType ?? "unknown",
124367
124399
  level: upstreamError ? "error" : "info",
124368
124400
  logger,
124369
124401
  requestBodyBytes,
@@ -124382,7 +124414,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
124382
124414
  }
124383
124415
  const closeError = new Error("Engine response stream closed before completion");
124384
124416
  logEngineMetrics({
124385
- agentEngineType: configuration.agentEngineType,
124417
+ agentEngineType: engineType ?? "unknown",
124386
124418
  error: closeError,
124387
124419
  level: "error",
124388
124420
  logger,
@@ -124407,7 +124439,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
124407
124439
  rawBody.once("error", err => {
124408
124440
  const normalizedError = asError(err);
124409
124441
  logEngineMetrics({
124410
- agentEngineType: configuration.agentEngineType,
124442
+ agentEngineType: engineType ?? "unknown",
124411
124443
  error: normalizedError,
124412
124444
  level: "error",
124413
124445
  logger,
@@ -124427,7 +124459,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
124427
124459
  usage.outputTokens = extractedUsage.outputTokens;
124428
124460
  }
124429
124461
  logEngineMetrics({
124430
- agentEngineType: configuration.agentEngineType,
124462
+ agentEngineType: engineType ?? "unknown",
124431
124463
  level: upstreamError ? "error" : "info",
124432
124464
  logger,
124433
124465
  requestBodyBytes,
@@ -124446,7 +124478,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
124446
124478
  }
124447
124479
  const closeError = new Error("Engine response stream closed before completion");
124448
124480
  logEngineMetrics({
124449
- agentEngineType: configuration.agentEngineType,
124481
+ agentEngineType: engineType ?? "unknown",
124450
124482
  error: closeError,
124451
124483
  level: "error",
124452
124484
  logger,
@@ -124472,7 +124504,7 @@ function extractEndpointId(req) {
124472
124504
  const raw = typeof value === "string" ? value : Array.isArray(value) ? value[0] : null;
124473
124505
  return raw && isValid(raw) ? raw : null;
124474
124506
  }
124475
- function createConduitAnthropicAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger }) {
124507
+ function createConduitAnthropicAPIReferenceHandlers({ apiClient, conduitConfiguration, configuration, getModelID, getModelManager, logger }) {
124476
124508
  return {
124477
124509
  "/v1/messages": {
124478
124510
  POST: async ({ body, req, res }) => {
@@ -124484,7 +124516,7 @@ function createConduitAnthropicAPIReferenceHandlers({ apiClient, configuration,
124484
124516
  });
124485
124517
  return proxyAnthropicStreamingRoute({
124486
124518
  body,
124487
- configuration,
124519
+ conduitConfiguration: conduitConfiguration(),
124488
124520
  endpointId: extractEndpointId(req),
124489
124521
  logger,
124490
124522
  modelID,
@@ -124506,7 +124538,7 @@ function createHealthHandler() {
124506
124538
  };
124507
124539
  }
124508
124540
 
124509
- async function handleSSERequests({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, signal }) {
124541
+ async function handleSSERequests({ apiURL, conduitConfiguration, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, signal }) {
124510
124542
  const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/stream`;
124511
124543
  const maxReconnectDelayMs = 30000;
124512
124544
  let reconnectAttempt = 0;
@@ -124548,6 +124580,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
124548
124580
  handleRequest({
124549
124581
  activeRequests,
124550
124582
  apiURL,
124583
+ conduitConfiguration,
124551
124584
  configuration,
124552
124585
  logger,
124553
124586
  modelID,
@@ -124591,7 +124624,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
124591
124624
  }
124592
124625
  }
124593
124626
  }
124594
- async function handleRequest({ activeRequests, apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
124627
+ async function handleRequest({ activeRequests, apiURL, conduitConfiguration, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
124595
124628
  function reportMetricsSafe(payload) {
124596
124629
  reportMetrics(payload).catch(error => {
124597
124630
  logger.warn("Failed to upload LLM prompt metrics", {
@@ -124600,6 +124633,7 @@ async function handleRequest({ activeRequests, apiURL, configuration, logger, mo
124600
124633
  });
124601
124634
  });
124602
124635
  }
124636
+ const engineType = conduitConfiguration().engineConfig?.type ?? null;
124603
124637
  const endpointId = request.parameters?.endpointID ?? null;
124604
124638
  const requestStartedAt = Date.now();
124605
124639
  const requestBytes = calculateRequestBytes(request.body ?? null);
@@ -124624,7 +124658,7 @@ async function handleRequest({ activeRequests, apiURL, configuration, logger, mo
124624
124658
  reportMetricsSafe({
124625
124659
  bytes: requestBytes + responseMetrics.responseBytes,
124626
124660
  completionTokens: 0,
124627
- engine: configuration.agentEngineType,
124661
+ engine: engineType,
124628
124662
  endpointId,
124629
124663
  latencyMs,
124630
124664
  modelId: modelID,
@@ -124678,7 +124712,7 @@ async function handleRequest({ activeRequests, apiURL, configuration, logger, mo
124678
124712
  reportMetricsSafe({
124679
124713
  bytes: isCancelled ? requestBytes : requestBytes + failureBytes,
124680
124714
  completionTokens: 0,
124681
- engine: configuration.agentEngineType,
124715
+ engine: engineType,
124682
124716
  endpointId,
124683
124717
  latencyMs,
124684
124718
  modelId: modelID,
@@ -134618,15 +134652,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
134618
134652
  let modelFileName = getConduitModelFileName(conduitConfiguration);
134619
134653
  let modelName = getConduitModelName(conduitConfiguration);
134620
134654
  const startup = Date.now();
134621
- let modelManager = new ModelManager({
134622
- contextLength: conduitConfiguration.contextLength ?? null,
134623
- engine: configuration.agentEngineType,
134624
- enginePort: configuration.enginePort,
134625
- logger,
134626
- model: conduitConfiguration.targetModel,
134627
- parallelism: conduitConfiguration.parallelism ?? null,
134628
- root: configuration.rootDirectory
134629
- });
134655
+ let modelManager = createModelManagerFromConfig(conduitConfiguration, configuration, logger);
134630
134656
  const conduitStateReportManager = new ConduitStateReportManager({
134631
134657
  apiClient,
134632
134658
  collectMachineMetadata: collectMachineMetadata,
@@ -134765,15 +134791,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
134765
134791
  conduitConfiguration = newConduitConfiguration;
134766
134792
  modelFileName = getConduitModelFileName(conduitConfiguration);
134767
134793
  modelName = getConduitModelName(conduitConfiguration);
134768
- modelManager = new ModelManager({
134769
- contextLength: conduitConfiguration.contextLength ?? null,
134770
- engine: configuration.agentEngineType,
134771
- enginePort: configuration.enginePort,
134772
- logger,
134773
- model: conduitConfiguration.targetModel,
134774
- parallelism: conduitConfiguration.parallelism ?? null,
134775
- root: configuration.rootDirectory
134776
- });
134794
+ modelManager = createModelManagerFromConfig(conduitConfiguration, configuration, logger);
134777
134795
  attachLifecycleListeners();
134778
134796
  if (sourceState === "idle") {
134779
134797
  logger.info("Restarting engine from idle");
@@ -134847,6 +134865,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
134847
134865
  "/v1/chat/completions": {
134848
134866
  POST: createPostChatCompletionsHandler({
134849
134867
  apiClient,
134868
+ conduitConfiguration: () => conduitConfiguration,
134850
134869
  configuration,
134851
134870
  getModelID: () => conduitConfiguration.targetModel.id,
134852
134871
  getModelManager: () => modelManager,
@@ -134857,6 +134876,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
134857
134876
  "/v1/completions": {
134858
134877
  POST: createPostCompletionsHandler({
134859
134878
  apiClient,
134879
+ conduitConfiguration: () => conduitConfiguration,
134860
134880
  configuration,
134861
134881
  getModelID: () => conduitConfiguration.targetModel.id,
134862
134882
  getModelManager: () => modelManager,
@@ -134867,6 +134887,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
134867
134887
  "/v1/models": {
134868
134888
  GET: createGetModelsHandler({
134869
134889
  apiClient,
134890
+ conduitConfiguration: () => conduitConfiguration,
134870
134891
  configuration,
134871
134892
  getModelID: () => conduitConfiguration.targetModel.id,
134872
134893
  getModelManager: () => modelManager,
@@ -134884,6 +134905,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
134884
134905
  "/v1/messages": {
134885
134906
  POST: createPostMessagesHandler({
134886
134907
  apiClient,
134908
+ conduitConfiguration: () => conduitConfiguration,
134887
134909
  configuration,
134888
134910
  getModelID: () => conduitConfiguration.targetModel.id,
134889
134911
  getModelManager: () => modelManager,
@@ -134897,6 +134919,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
134897
134919
  });
134898
134920
  handleSSERequests({
134899
134921
  apiURL: configuration.apiURL,
134922
+ conduitConfiguration: () => conduitConfiguration,
134900
134923
  configuration,
134901
134924
  logger,
134902
134925
  modelID: conduitConfiguration.targetModel.id,
@@ -134973,6 +134996,18 @@ async function createApplication({ abortController, apiClient, configuration, lo
134973
134996
  shutdown
134974
134997
  };
134975
134998
  }
134999
+ function createModelManagerFromConfig(conduitConfiguration, configuration, logger) {
135000
+ const engineConfig = conduitConfiguration.engineConfig;
135001
+ return new ModelManager({
135002
+ contextLength: conduitConfiguration.contextLength ?? null,
135003
+ engineConfig: engineConfig?.config ?? null,
135004
+ enginePort: configuration.enginePort,
135005
+ engineType: engineConfig?.type ?? "llama.cpp",
135006
+ logger,
135007
+ model: conduitConfiguration.targetModel,
135008
+ root: configuration.rootDirectory
135009
+ });
135010
+ }
134976
135011
  function getConduitModelFileName(configuration) {
134977
135012
  const { source } = configuration.targetModel;
134978
135013
  return source.type === "huggingface" ? source.slug : source.irid;
@@ -134983,8 +135018,6 @@ function getConduitModelName(configuration) {
134983
135018
 
134984
135019
  const StartModeSchema = _enum(["auto", "idle"]);
134985
135020
  function getConfiguration({ overrides } = {}) {
134986
- const agentEngineTypeValue = overrides?.agentEngineType ?? readEnvString("ENGINE");
134987
- const agentEngineType = LLMEngineSchema.parse(agentEngineTypeValue);
134988
135021
  const apiKey = overrides?.apiKey ?? readEnvString("API_KEY");
134989
135022
  const apiURL = overrides?.apiURL ?? readEnvStringOptional("API_URL", "https://api.infersec.ai");
134990
135023
  const enginePort = overrides?.enginePort ??
@@ -135005,7 +135038,6 @@ function getConfiguration({ overrides } = {}) {
135005
135038
  const startModeValue = overrides?.startMode ?? readEnvStringOptional("START_MODE", "auto");
135006
135039
  const startMode = StartModeSchema.parse(startModeValue);
135007
135040
  return {
135008
- agentEngineType,
135009
135041
  apiKey,
135010
135042
  apiURL,
135011
135043
  enginePort,
@@ -135031,7 +135063,6 @@ class ConduitConnection {
135031
135063
  this.enginePort = options.enginePort;
135032
135064
  this.configuration = getConfiguration({
135033
135065
  overrides: {
135034
- agentEngineType: options.engine,
135035
135066
  apiKey: options.apiKey,
135036
135067
  apiURL: options.apiURL,
135037
135068
  enginePort: options.enginePort,
@@ -135091,13 +135122,10 @@ async function startInferenceAgent({ configurationOverrides }) {
135091
135122
  const logger = createLogger({
135092
135123
  name: "infersec-conduit"
135093
135124
  });
135094
- logger.info("Application starting", {
135095
- agentEngineType: configuration.agentEngineType
135096
- });
135125
+ logger.info("Application starting");
135097
135126
  const connection = new ConduitConnection({
135098
135127
  apiKey: configuration.apiKey,
135099
135128
  apiURL: configuration.apiURL,
135100
- engine: configuration.agentEngineType,
135101
135129
  enginePort: configuration.enginePort,
135102
135130
  logger,
135103
135131
  port: configuration.port,
@@ -135136,7 +135164,6 @@ function registerInferenceCommands({ program }) {
135136
135164
  .command("start")
135137
135165
  .description("Start the inference agent")
135138
135166
  .option("--api-url <url>", "API base URL (or API_URL env)")
135139
- .option("--engine <type>", "Engine type (or ENGINE env)")
135140
135167
  .option("--engine-port <number>", "Engine port (or ENGINE_PORT env)")
135141
135168
  .option("--key <value>", "API key (or API_KEY env)")
135142
135169
  .option("--port <number>", "Port to listen on (or PORT env)")
@@ -135148,9 +135175,6 @@ function registerInferenceCommands({ program }) {
135148
135175
  if (options["api-url"]) {
135149
135176
  configurationOverrides.apiURL = options["api-url"];
135150
135177
  }
135151
- if (options.engine) {
135152
- configurationOverrides.agentEngineType = options.engine;
135153
- }
135154
135178
  if (options["engine-port"]) {
135155
135179
  const enginePort = Number.parseInt(options["engine-port"], 10);
135156
135180
  if (Number.isNaN(enginePort) || enginePort < 1 || enginePort > 65535) {
@@ -161889,7 +161913,6 @@ async function runSingleBenchmark(options) {
161889
161913
  const conn = new ConduitConnection({
161890
161914
  apiKey,
161891
161915
  apiURL: apiUrl,
161892
- engine: entry.engine,
161893
161916
  enginePort,
161894
161917
  logger,
161895
161918
  port,
@@ -1,4 +1,4 @@
1
- import { LLMEngine, ULID } from "@infersec/definitions";
1
+ import { ULID } from "@infersec/definitions";
2
2
  import { z } from "zod";
3
3
  declare const StartModeSchema: z.ZodEnum<{
4
4
  idle: "idle";
@@ -6,7 +6,6 @@ declare const StartModeSchema: z.ZodEnum<{
6
6
  }>;
7
7
  export type StartMode = z.infer<typeof StartModeSchema>;
8
8
  export interface Configuration {
9
- agentEngineType: LLMEngine;
10
9
  apiKey: string;
11
10
  apiURL: string;
12
11
  enginePort: number;
@@ -16,7 +15,6 @@ export interface Configuration {
16
15
  startMode: StartMode;
17
16
  }
18
17
  export interface ConfigurationOverrides {
19
- agentEngineType?: string;
20
18
  apiKey?: string;
21
19
  apiURL?: string;
22
20
  enginePort?: number;
@@ -1,4 +1,4 @@
1
- import { LLMEngine, LLMModel } from "@infersec/definitions";
1
+ import { LLMModel } from "@infersec/definitions";
2
2
  import { Logger } from "@infersec/logger";
3
3
  import EventEmitter from "eventemitter3";
4
4
  import { Response } from "undici";
@@ -11,10 +11,10 @@ interface ModelManagerEvents {
11
11
  }
12
12
  type EngineLifecycleState = "errored" | "running" | "starting" | "stopped" | "stopping";
13
13
  export declare class ModelManager extends EventEmitter<ModelManagerEvents> {
14
- readonly engine: LLMEngine;
14
+ readonly engine: string;
15
+ readonly engineConfig: Record<string, unknown> | null;
15
16
  readonly enginePort: number;
16
17
  readonly model: LLMModel;
17
- readonly parallelism: number | null;
18
18
  private uniqueName;
19
19
  readonly contextLength: number | null;
20
20
  protected readonly logger: Logger;
@@ -23,13 +23,13 @@ export declare class ModelManager extends EventEmitter<ModelManagerEvents> {
23
23
  private lifecycleState;
24
24
  private stopRequested;
25
25
  protected readonly modelsDirectory: string;
26
- constructor({ contextLength, engine, enginePort, logger, model, parallelism, root }: {
26
+ constructor({ contextLength, engineConfig, enginePort, engineType, logger, model, root }: {
27
27
  contextLength?: number | null;
28
- engine: LLMEngine;
28
+ engineConfig?: Record<string, unknown> | null;
29
29
  enginePort: number;
30
+ engineType: string;
30
31
  logger: Logger;
31
32
  model: LLMModel;
32
- parallelism?: number | null;
33
33
  root: string;
34
34
  });
35
35
  fetchOpenAI(path: string, opts?: RequestInit): Promise<Response>;
@@ -1,12 +1,13 @@
1
- import { API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE } from "@infersec/definitions";
1
+ import { API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE, type InferenceAgentConfiguration } from "@infersec/definitions";
2
2
  import { implementAPIReference, type APIRequest } from "@infersec/fetch";
3
3
  import { Logger } from "@infersec/logger";
4
4
  import { APIClient } from "../apiClient/index.js";
5
5
  import { Configuration } from "../configuration.js";
6
6
  import { ModelManager } from "../modelManagement/ModelManager.js";
7
7
  type ConduitAnthropicAPIReferenceHandlers = Parameters<typeof implementAPIReference<typeof API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE>>[0]["api"];
8
- export declare function createConduitAnthropicAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger }: {
8
+ export declare function createConduitAnthropicAPIReferenceHandlers({ apiClient, conduitConfiguration, configuration, getModelID, getModelManager, logger }: {
9
9
  apiClient: APIClient;
10
+ conduitConfiguration: () => InferenceAgentConfiguration;
10
11
  configuration: Configuration;
11
12
  getModelID: () => string;
12
13
  getModelManager: () => ModelManager;
@@ -14,6 +15,7 @@ export declare function createConduitAnthropicAPIReferenceHandlers({ apiClient,
14
15
  }): ConduitAnthropicAPIReferenceHandlers;
15
16
  export declare function createPostMessagesHandler(options: {
16
17
  apiClient: APIClient;
18
+ conduitConfiguration: () => InferenceAgentConfiguration;
17
19
  configuration: Configuration;
18
20
  getModelID: () => string;
19
21
  getModelManager: () => ModelManager;
@@ -1,12 +1,13 @@
1
- import { API_CLIENT_CONDUIT_OPENAI_REFERENCE } from "@infersec/definitions";
1
+ import { API_CLIENT_CONDUIT_OPENAI_REFERENCE, type InferenceAgentConfiguration } from "@infersec/definitions";
2
2
  import { implementAPIReference, type APIRequest } from "@infersec/fetch";
3
3
  import { Logger } from "@infersec/logger";
4
4
  import { APIClient } from "../apiClient/index.js";
5
5
  import { Configuration } from "../configuration.js";
6
6
  import { ModelManager } from "../modelManagement/ModelManager.js";
7
7
  type ConduitOpenAIAPIReferenceHandlers = Parameters<typeof implementAPIReference<typeof API_CLIENT_CONDUIT_OPENAI_REFERENCE>>[0]["api"];
8
- export declare function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger, startup }: {
8
+ export declare function createConduitOpenAIAPIReferenceHandlers({ apiClient, conduitConfiguration, configuration, getModelID, getModelManager, logger, startup }: {
9
9
  apiClient: APIClient;
10
+ conduitConfiguration: () => InferenceAgentConfiguration;
10
11
  configuration: Configuration;
11
12
  getModelID: () => string;
12
13
  getModelManager: () => ModelManager;
@@ -15,6 +16,7 @@ export declare function createConduitOpenAIAPIReferenceHandlers({ apiClient, con
15
16
  }): ConduitOpenAIAPIReferenceHandlers;
16
17
  export declare function createGetModelsHandler(options: {
17
18
  apiClient: APIClient;
19
+ conduitConfiguration: () => InferenceAgentConfiguration;
18
20
  configuration: Configuration;
19
21
  getModelID: () => string;
20
22
  getModelManager: () => ModelManager;
@@ -60,6 +62,7 @@ export declare function createGetModelsHandler(options: {
60
62
  }>;
61
63
  export declare function createPostChatCompletionsHandler(options: {
62
64
  apiClient: APIClient;
65
+ conduitConfiguration: () => InferenceAgentConfiguration;
63
66
  configuration: Configuration;
64
67
  getModelID: () => string;
65
68
  getModelManager: () => ModelManager;
@@ -166,6 +169,7 @@ export declare function createPostChatCompletionsHandler(options: {
166
169
  }>;
167
170
  export declare function createPostCompletionsHandler(options: {
168
171
  apiClient: APIClient;
172
+ conduitConfiguration: () => InferenceAgentConfiguration;
169
173
  configuration: Configuration;
170
174
  getModelID: () => string;
171
175
  getModelManager: () => ModelManager;
@@ -1,8 +1,9 @@
1
- import { InferenceAgentLLMMetricsPayload, type ULID, type APIResponse, type ServerToClientAPIRequest } from "@infersec/definitions";
1
+ import { InferenceAgentConfiguration, InferenceAgentLLMMetricsPayload, type ULID, type APIResponse, type ServerToClientAPIRequest } from "@infersec/definitions";
2
2
  import { Logger } from "@infersec/logger";
3
3
  import { Configuration } from "../configuration.js";
4
- export declare function handleSSERequests({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, signal }: {
4
+ export declare function handleSSERequests({ apiURL, conduitConfiguration, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, signal }: {
5
5
  apiURL: string;
6
+ conduitConfiguration: () => InferenceAgentConfiguration;
6
7
  configuration: Configuration;
7
8
  logger: Logger;
8
9
  modelID: ULID;
@@ -1,10 +1,11 @@
1
1
  import { Readable } from "node:stream";
2
- import { InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definitions";
2
+ import { InferenceAgentConfiguration, InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definitions";
3
3
  import { Logger } from "@infersec/logger";
4
4
  import { Configuration } from "../configuration.js";
5
5
  import { ModelManager } from "../modelManagement/ModelManager.js";
6
- export declare function proxyAnthropicStreamingRoute({ body, configuration, endpointId, logger, modelID, modelManager, reportMetrics, signal }: {
6
+ export declare function proxyAnthropicStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, reportMetrics, signal }: {
7
7
  body: unknown;
8
+ conduitConfiguration: InferenceAgentConfiguration;
8
9
  configuration: Configuration;
9
10
  endpointId?: ULID | null;
10
11
  logger: Logger;
@@ -1,5 +1,4 @@
1
1
  import { Readable } from "node:stream";
2
- import { LLMEngine } from "@infersec/definitions";
3
2
  import { Logger } from "@infersec/logger";
4
3
  export interface EngineUsageMetrics {
5
4
  completionTokens: number | null;
@@ -8,7 +7,7 @@ export interface EngineUsageMetrics {
8
7
  totalTokens: number | null;
9
8
  }
10
9
  interface EngineMetricsLoggerOptions {
11
- agentEngineType: LLMEngine;
10
+ agentEngineType: string;
12
11
  logger: Logger;
13
12
  requestBodyBytes: number;
14
13
  requestPath: string;
@@ -24,9 +23,9 @@ interface EngineMetricsCompletion {
24
23
  interface MonitorEngineResponseOptions extends EngineMetricsLoggerOptions {
25
24
  body: Readable;
26
25
  contextLength: number | null;
27
- engine: LLMEngine;
26
+ engineConfig: Record<string, unknown> | null;
27
+ engineType: string;
28
28
  onComplete?: (result: EngineMetricsCompletion) => void | Promise<void>;
29
- parallelism: number | null;
30
29
  requestStartedAt?: number;
31
30
  }
32
31
  interface EngineMetricsLogOptions extends EngineMetricsLoggerOptions {
@@ -38,7 +37,7 @@ interface EngineMetricsLogOptions extends EngineMetricsLoggerOptions {
38
37
  interface MonitorEngineResponseResult {
39
38
  stream: Readable;
40
39
  }
41
- export declare function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
42
- export declare function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
40
+ export declare function monitorEngineResponseStream({ agentEngineType, body, contextLength, engineConfig, engineType, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
41
+ export declare function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engineConfig, engineType, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
43
42
  export declare function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }: EngineMetricsLogOptions): void;
44
43
  export {};
@@ -1,10 +1,11 @@
1
1
  import { Readable } from "node:stream";
2
- import { InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definitions";
2
+ import { InferenceAgentConfiguration, InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definitions";
3
3
  import { Logger } from "@infersec/logger";
4
4
  import { Configuration } from "../configuration.js";
5
5
  import { ModelManager } from "../modelManagement/ModelManager.js";
6
- export declare function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }: {
6
+ export declare function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }: {
7
7
  body: unknown;
8
+ conduitConfiguration: InferenceAgentConfiguration;
8
9
  configuration: Configuration;
9
10
  endpointId?: ULID | null;
10
11
  logger: Logger;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@infersec/conduit",
3
3
  "description": "End user conduit agent for connecting local LLMs to the cloud.",
4
- "version": "1.67.0",
4
+ "version": "1.68.0",
5
5
  "bin": {
6
6
  "infersec-conduit": "./dist/cli.js"
7
7
  },