@infersec/conduit 1.67.0 → 1.68.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ConduitConnection.d.ts +1 -2
- package/dist/cli.js +166 -143
- package/dist/configuration.d.ts +1 -3
- package/dist/modelManagement/ModelManager.d.ts +6 -6
- package/dist/requestHandlers/createConduitAnthropicAPIReferenceHandlers.d.ts +4 -2
- package/dist/requestHandlers/createConduitOpenAIAPIReferenceHandlers.d.ts +6 -2
- package/dist/sse/handler.d.ts +3 -2
- package/dist/utils/anthropic.d.ts +3 -2
- package/dist/utils/engineMetrics.d.ts +5 -6
- package/dist/utils/openai.d.ts +3 -2
- package/package.json +1 -1
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { ULID } from "@infersec/definitions";
|
|
2
2
|
import type { Logger } from "@infersec/logger";
|
|
3
3
|
export interface ConduitConnectionOptions {
|
|
4
4
|
apiKey: string;
|
|
5
5
|
apiURL: string;
|
|
6
|
-
engine: LLMEngine;
|
|
7
6
|
enginePort: number;
|
|
8
7
|
logger: Logger;
|
|
9
8
|
port: number;
|
package/dist/cli.js
CHANGED
|
@@ -4344,18 +4344,15 @@ function ulid$3(seedTime, prng) {
|
|
|
4344
4344
|
return encodeTime(seed, TIME_LEN) + encodeRandom(RANDOM_LEN, currentPRNG);
|
|
4345
4345
|
}
|
|
4346
4346
|
|
|
4347
|
-
|
|
4348
|
-
* Calculates the effective context length per slot, accounting for
|
|
4349
|
-
* parallelism when using llama.cpp. For llama.cpp, the total context
|
|
4350
|
-
* window is divided across parallel slots; for other engines, the
|
|
4351
|
-
* full context length is used.
|
|
4352
|
-
*/
|
|
4353
|
-
function getEffectiveContextLength({ contextLength, engine, parallelism }) {
|
|
4347
|
+
function getEffectiveContextLength({ contextLength, engineConfig, engineType }) {
|
|
4354
4348
|
if (contextLength === null || contextLength <= 0) {
|
|
4355
4349
|
return null;
|
|
4356
4350
|
}
|
|
4357
|
-
if (
|
|
4358
|
-
|
|
4351
|
+
if (engineType === "llama.cpp" && engineConfig) {
|
|
4352
|
+
const parallelism = engineConfig?.parallelism;
|
|
4353
|
+
if (typeof parallelism === "number" && parallelism > 0) {
|
|
4354
|
+
return contextLength / parallelism;
|
|
4355
|
+
}
|
|
4359
4356
|
}
|
|
4360
4357
|
return contextLength;
|
|
4361
4358
|
}
|
|
@@ -19893,6 +19890,28 @@ object$1({
|
|
|
19893
19890
|
});
|
|
19894
19891
|
|
|
19895
19892
|
const LLMEngineSchema = _enum$1(["llama.cpp", "vllm"]);
|
|
19893
|
+
const LlamacppEngineConfigSchema = object$1({
|
|
19894
|
+
batchSize: number$1().int().positive().nullable().optional(),
|
|
19895
|
+
cacheTypeK: string$3().nullable().optional(),
|
|
19896
|
+
cacheTypeV: string$3().nullable().optional(),
|
|
19897
|
+
extraArgs: array$2(string$3()).optional(),
|
|
19898
|
+
flashAttn: boolean$1().optional(),
|
|
19899
|
+
gpuLayers: number$1().int().min(0).optional(),
|
|
19900
|
+
mainGpu: number$1().int().min(0).nullable().optional(),
|
|
19901
|
+
parallelism: number$1().int().positive().optional(),
|
|
19902
|
+
tensorSplit: string$3().nullable().optional(),
|
|
19903
|
+
ubatchSize: number$1().int().positive().nullable().optional()
|
|
19904
|
+
});
|
|
19905
|
+
const VLLMEngineConfigSchema = object$1({
|
|
19906
|
+
device: string$3().optional(),
|
|
19907
|
+
dtype: string$3().optional(),
|
|
19908
|
+
extraArgs: array$2(string$3()).optional(),
|
|
19909
|
+
tensorParallelSize: number$1().int().positive().optional()
|
|
19910
|
+
});
|
|
19911
|
+
const EngineConfigSchema = discriminatedUnion("type", [
|
|
19912
|
+
object$1({ config: LlamacppEngineConfigSchema, type: literal("llama.cpp") }),
|
|
19913
|
+
object$1({ config: VLLMEngineConfigSchema, type: literal("vllm") })
|
|
19914
|
+
]);
|
|
19896
19915
|
const LLMModelFormatSchema = _enum$1([
|
|
19897
19916
|
// VLLM
|
|
19898
19917
|
"safetensors",
|
|
@@ -20045,8 +20064,8 @@ const ConduitState = z.preprocess(value => {
|
|
|
20045
20064
|
|
|
20046
20065
|
const InferenceAgentConfigurationSchema = object$1({
|
|
20047
20066
|
contextLength: number$1().int().positive().nullable(),
|
|
20067
|
+
engineConfig: EngineConfigSchema.nullable(),
|
|
20048
20068
|
inferenceSourceID: ULIDSchema,
|
|
20049
|
-
parallelism: number$1().int().positive().nullable(),
|
|
20050
20069
|
targetModel: LLMModelSchema
|
|
20051
20070
|
});
|
|
20052
20071
|
|
|
@@ -20775,7 +20794,8 @@ object$1({
|
|
|
20775
20794
|
provider: _enum$1(["storage", "huggingface"]),
|
|
20776
20795
|
providerSlug: string$3(),
|
|
20777
20796
|
sources: array$2(object$1({
|
|
20778
|
-
engine: LLMEngineSchema,
|
|
20797
|
+
engine: LLMEngineSchema.nullable(),
|
|
20798
|
+
engineId: ULIDSchema.nullable(),
|
|
20779
20799
|
id: ULIDSchema,
|
|
20780
20800
|
lastState: ConduitState.nullable(),
|
|
20781
20801
|
lastStateTimestamp: string$3().nullable(),
|
|
@@ -20796,17 +20816,17 @@ object$1({
|
|
|
20796
20816
|
});
|
|
20797
20817
|
object$1({
|
|
20798
20818
|
contextLength: number$1().int().positive().max(1048576).optional(),
|
|
20799
|
-
|
|
20819
|
+
engineId: ULIDSchema,
|
|
20800
20820
|
modelID: ULIDSchema,
|
|
20801
20821
|
name: ResourceNameSchema,
|
|
20802
|
-
parallelism: number$1().int().positive().optional(),
|
|
20803
20822
|
quantizationLabel: string$3().min(1).max(128).optional()
|
|
20804
20823
|
});
|
|
20805
20824
|
object$1({
|
|
20806
20825
|
results: array$2(object$1({
|
|
20807
20826
|
contextLength: number$1().int().positive().nullable(),
|
|
20808
20827
|
created: string$3(),
|
|
20809
|
-
engine: LLMEngineSchema,
|
|
20828
|
+
engine: LLMEngineSchema.nullable(),
|
|
20829
|
+
engineId: ULIDSchema.nullable(),
|
|
20810
20830
|
id: ULIDSchema,
|
|
20811
20831
|
lastState: ConduitState.nullable(),
|
|
20812
20832
|
lastStateTimestamp: string$3().nullable(),
|
|
@@ -20819,7 +20839,10 @@ object$1({
|
|
|
20819
20839
|
object$1({
|
|
20820
20840
|
contextLength: number$1().int().positive().nullable(),
|
|
20821
20841
|
created: string$3(),
|
|
20822
|
-
engine: LLMEngineSchema,
|
|
20842
|
+
engine: LLMEngineSchema.nullable(),
|
|
20843
|
+
engineConfig: unknown$1().nullable(),
|
|
20844
|
+
engineId: ULIDSchema.nullable(),
|
|
20845
|
+
engineName: string$3().nullable(),
|
|
20823
20846
|
id: ULIDSchema,
|
|
20824
20847
|
lastState: ConduitState.nullable(),
|
|
20825
20848
|
lastStateTimestamp: string$3().nullable(),
|
|
@@ -20832,15 +20855,13 @@ object$1({
|
|
|
20832
20855
|
}),
|
|
20833
20856
|
modelQuantizationLabel: string$3().nullable(),
|
|
20834
20857
|
name: string$3(),
|
|
20835
|
-
parallelism: number$1().int().positive().nullable(),
|
|
20836
20858
|
updated: string$3()
|
|
20837
20859
|
});
|
|
20838
20860
|
object$1({
|
|
20839
20861
|
contextLength: number$1().int().positive().nullable().optional(),
|
|
20840
|
-
|
|
20862
|
+
engineId: ULIDSchema.nullable().optional(),
|
|
20841
20863
|
modelID: ULIDSchema.optional(),
|
|
20842
20864
|
name: ResourceNameSchema.optional(),
|
|
20843
|
-
parallelism: number$1().int().positive().nullable().optional(),
|
|
20844
20865
|
quantizationLabel: string$3().min(1).max(128).nullable().optional()
|
|
20845
20866
|
});
|
|
20846
20867
|
object$1({
|
|
@@ -20870,7 +20891,8 @@ object$1({
|
|
|
20870
20891
|
name: string$3(),
|
|
20871
20892
|
routingMethod: nativeEnum(RoutingMethod),
|
|
20872
20893
|
sources: array$2(object$1({
|
|
20873
|
-
engine: LLMEngineSchema,
|
|
20894
|
+
engine: LLMEngineSchema.nullable(),
|
|
20895
|
+
engineId: ULIDSchema.nullable(),
|
|
20874
20896
|
id: ULIDSchema,
|
|
20875
20897
|
modelName: string$3(),
|
|
20876
20898
|
name: string$3()
|
|
@@ -116803,8 +116825,10 @@ async function startVLLM({ enginePort, targetDirectory }) {
|
|
|
116803
116825
|
if (this.model.format === "gguf") {
|
|
116804
116826
|
modelPath = await findQuantizedModelTarget({ model: this.model, path: targetDirectory });
|
|
116805
116827
|
}
|
|
116806
|
-
const
|
|
116807
|
-
const
|
|
116828
|
+
const engineConfig = this.engineConfig;
|
|
116829
|
+
const device = typeof engineConfig?.device === "string" ? engineConfig.device : process.env.VLLM_DEVICE;
|
|
116830
|
+
const dtype = typeof engineConfig?.dtype === "string" ? engineConfig.dtype : process.env.VLLM_DTYPE;
|
|
116831
|
+
const tensorParallelSize = typeof engineConfig?.tensorParallelSize === "number" ? engineConfig.tensorParallelSize : 1;
|
|
116808
116832
|
const args = [
|
|
116809
116833
|
...VLLM_START_ARGS,
|
|
116810
116834
|
"--port",
|
|
@@ -116816,7 +116840,7 @@ async function startVLLM({ enginePort, targetDirectory }) {
|
|
|
116816
116840
|
"--max-model-len",
|
|
116817
116841
|
String(contextLength),
|
|
116818
116842
|
"--tensor-parallel-size",
|
|
116819
|
-
|
|
116843
|
+
String(tensorParallelSize)
|
|
116820
116844
|
];
|
|
116821
116845
|
if (device) {
|
|
116822
116846
|
args.push("--device", device);
|
|
@@ -116824,6 +116848,10 @@ async function startVLLM({ enginePort, targetDirectory }) {
|
|
|
116824
116848
|
if (dtype) {
|
|
116825
116849
|
args.push("--dtype", dtype);
|
|
116826
116850
|
}
|
|
116851
|
+
const extraArgs = engineConfig?.extraArgs;
|
|
116852
|
+
if (Array.isArray(extraArgs) && extraArgs.every((v) => typeof v === "string")) {
|
|
116853
|
+
args.push(...extraArgs);
|
|
116854
|
+
}
|
|
116827
116855
|
const processManager = new ProcessManager({
|
|
116828
116856
|
command: VLLM_EXECUTABLE,
|
|
116829
116857
|
args
|
|
@@ -122775,7 +122803,8 @@ const DEFAULT_LLAMACPP_CONTEXT_LENGTH = 131072;
|
|
|
122775
122803
|
async function startLlamacpp({ enginePort, targetDirectory }) {
|
|
122776
122804
|
const target = await findQuantizedModelTarget({ model: this.model, path: targetDirectory });
|
|
122777
122805
|
const contextLength = Math.max(1, this.contextLength ?? DEFAULT_LLAMACPP_CONTEXT_LENGTH);
|
|
122778
|
-
const
|
|
122806
|
+
const engineConfig = this.engineConfig;
|
|
122807
|
+
const parallelism = typeof engineConfig?.parallelism === "number" ? engineConfig.parallelism : null;
|
|
122779
122808
|
const args = [
|
|
122780
122809
|
...LLAMACPP_START_ARGS,
|
|
122781
122810
|
"--port",
|
|
@@ -122785,13 +122814,47 @@ async function startLlamacpp({ enginePort, targetDirectory }) {
|
|
|
122785
122814
|
"--ctx-size",
|
|
122786
122815
|
String(contextLength)
|
|
122787
122816
|
];
|
|
122788
|
-
const gpuLayers =
|
|
122817
|
+
const gpuLayers = typeof engineConfig?.gpuLayers === "number"
|
|
122818
|
+
? engineConfig.gpuLayers
|
|
122819
|
+
: Number.parseInt(process.env.LLAMACPP_GPU_LAYERS ?? String(DEFAULT_LLAMACPP_GPU_LAYERS), 10);
|
|
122789
122820
|
if (Number.isFinite(gpuLayers) && gpuLayers > 0) {
|
|
122790
122821
|
args.push("--n-gpu-layers", String(gpuLayers));
|
|
122791
122822
|
}
|
|
122792
122823
|
if (typeof parallelism === "number") {
|
|
122793
122824
|
args.push("--parallel", String(Math.max(1, parallelism)));
|
|
122794
122825
|
}
|
|
122826
|
+
const flashAttn = engineConfig?.flashAttn;
|
|
122827
|
+
if (flashAttn === true || flashAttn === undefined) {
|
|
122828
|
+
args.push("--flash-attn", "on");
|
|
122829
|
+
}
|
|
122830
|
+
const cacheTypeK = typeof engineConfig?.cacheTypeK === "string" ? engineConfig.cacheTypeK : null;
|
|
122831
|
+
if (cacheTypeK) {
|
|
122832
|
+
args.push("--cache-type-k", cacheTypeK);
|
|
122833
|
+
}
|
|
122834
|
+
const cacheTypeV = typeof engineConfig?.cacheTypeV === "string" ? engineConfig.cacheTypeV : null;
|
|
122835
|
+
if (cacheTypeV) {
|
|
122836
|
+
args.push("--cache-type-v", cacheTypeV);
|
|
122837
|
+
}
|
|
122838
|
+
const batchSize = typeof engineConfig?.batchSize === "number" ? engineConfig.batchSize : null;
|
|
122839
|
+
if (batchSize !== null) {
|
|
122840
|
+
args.push("--batch-size", String(batchSize));
|
|
122841
|
+
}
|
|
122842
|
+
const ubatchSize = typeof engineConfig?.ubatchSize === "number" ? engineConfig.ubatchSize : null;
|
|
122843
|
+
if (ubatchSize !== null) {
|
|
122844
|
+
args.push("--ubatch-size", String(ubatchSize));
|
|
122845
|
+
}
|
|
122846
|
+
const tensorSplit = typeof engineConfig?.tensorSplit === "string" ? engineConfig.tensorSplit : null;
|
|
122847
|
+
if (tensorSplit) {
|
|
122848
|
+
args.push("--tensor-split", tensorSplit);
|
|
122849
|
+
}
|
|
122850
|
+
const mainGpu = typeof engineConfig?.mainGpu === "number" ? engineConfig.mainGpu : null;
|
|
122851
|
+
if (mainGpu !== null) {
|
|
122852
|
+
args.push("--main-gpu", String(mainGpu));
|
|
122853
|
+
}
|
|
122854
|
+
const extraArgs = engineConfig?.extraArgs;
|
|
122855
|
+
if (Array.isArray(extraArgs) && extraArgs.every((v) => typeof v === "string")) {
|
|
122856
|
+
args.push(...extraArgs);
|
|
122857
|
+
}
|
|
122795
122858
|
const processManager = new ProcessManager({
|
|
122796
122859
|
command: LLAMACPP_EXECUTABLE,
|
|
122797
122860
|
args
|
|
@@ -122824,11 +122887,8 @@ function createModelStorageKey(model) {
|
|
|
122824
122887
|
return `${model.source.type}${SEPARATOR}${sanitizeSegment(identifier)}`;
|
|
122825
122888
|
}
|
|
122826
122889
|
|
|
122827
|
-
// 2 hours
|
|
122828
122890
|
const ENGINE_FETCH_TIMEOUT_MS$1 = 7200000;
|
|
122829
|
-
// 20 minutes
|
|
122830
122891
|
const DOWNLOAD_LOCK_TIMEOUT_MS = 20 * 60 * 1000;
|
|
122831
|
-
// 5 seconds
|
|
122832
122892
|
const DOWNLOAD_LOCK_POLL_INTERVAL_MS = 5000;
|
|
122833
122893
|
const ENGINE_AGENT = new undiciExports.Agent({
|
|
122834
122894
|
bodyTimeout: ENGINE_FETCH_TIMEOUT_MS$1,
|
|
@@ -122836,9 +122896,9 @@ const ENGINE_AGENT = new undiciExports.Agent({
|
|
|
122836
122896
|
});
|
|
122837
122897
|
class ModelManager extends EventEmitter {
|
|
122838
122898
|
engine;
|
|
122899
|
+
engineConfig;
|
|
122839
122900
|
enginePort;
|
|
122840
122901
|
model;
|
|
122841
|
-
parallelism;
|
|
122842
122902
|
uniqueName;
|
|
122843
122903
|
contextLength;
|
|
122844
122904
|
logger;
|
|
@@ -122847,27 +122907,13 @@ class ModelManager extends EventEmitter {
|
|
|
122847
122907
|
lifecycleState = "stopped";
|
|
122848
122908
|
stopRequested = false;
|
|
122849
122909
|
modelsDirectory;
|
|
122850
|
-
constructor({ contextLength,
|
|
122910
|
+
constructor({ contextLength, engineConfig, enginePort, engineType, logger, model, root }) {
|
|
122851
122911
|
super();
|
|
122852
|
-
|
|
122853
|
-
|
|
122854
|
-
// if (!targetModel) {
|
|
122855
|
-
// throw new ConfigurationInvalidError({
|
|
122856
|
-
// message: `No model found for ID: ${modelID}`
|
|
122857
|
-
// });
|
|
122858
|
-
// }
|
|
122859
|
-
// const source = targetModel.sources.find(source => source.engine === engine);
|
|
122860
|
-
// if (!source) {
|
|
122861
|
-
// throw new ConfigurationInvalidError({
|
|
122862
|
-
// message: `Model does not support current engine: ${modelID} has no support for engine: ${engine}`
|
|
122863
|
-
// });
|
|
122864
|
-
// }
|
|
122865
|
-
this.engine = engine;
|
|
122912
|
+
this.engine = engineType;
|
|
122913
|
+
this.engineConfig = engineConfig ?? null;
|
|
122866
122914
|
this.enginePort = enginePort;
|
|
122867
122915
|
this.model = model;
|
|
122868
122916
|
this.contextLength = typeof contextLength === "number" ? contextLength : null;
|
|
122869
|
-
this.parallelism = typeof parallelism === "number" ? parallelism : null;
|
|
122870
|
-
// this.providerSlugentifier = source.identifier;
|
|
122871
122917
|
this.logger = logger;
|
|
122872
122918
|
this.uniqueName = createModelStorageKey(this.model);
|
|
122873
122919
|
this.modelsDirectory = join(root, "models");
|
|
@@ -122908,12 +122954,6 @@ class ModelManager extends EventEmitter {
|
|
|
122908
122954
|
clearTimeout(timeout);
|
|
122909
122955
|
}
|
|
122910
122956
|
}
|
|
122911
|
-
// case "ollama":
|
|
122912
|
-
// console.log("FETCH", path, opts);
|
|
122913
|
-
// return fetch(
|
|
122914
|
-
// joinURL("http://localhost:11434", path),
|
|
122915
|
-
// opts
|
|
122916
|
-
// );
|
|
122917
122957
|
default: {
|
|
122918
122958
|
const engineType = this.engine;
|
|
122919
122959
|
throw new ConfigurationInvalidError({
|
|
@@ -122948,15 +122988,6 @@ class ModelManager extends EventEmitter {
|
|
|
122948
122988
|
await this.releaseDownloadLock();
|
|
122949
122989
|
}
|
|
122950
122990
|
break;
|
|
122951
|
-
// case "ollama":
|
|
122952
|
-
// this.logger.info("Loading model", {
|
|
122953
|
-
// modelID: this.model.id
|
|
122954
|
-
// });
|
|
122955
|
-
// await loadCurrentOllamaModel.call(this);
|
|
122956
|
-
// this.logger.info("Loaded model", {
|
|
122957
|
-
// modelID: this.model.id
|
|
122958
|
-
// });
|
|
122959
|
-
// return;
|
|
122960
122991
|
default: {
|
|
122961
122992
|
const engineType = this.engine;
|
|
122962
122993
|
throw new ConfigurationInvalidError({
|
|
@@ -123392,12 +123423,6 @@ function createPostStopEngineHandler(options) {
|
|
|
123392
123423
|
return createConduitGeneralAPIReferenceHandlers(options)["/conduit/engine/stop"].POST;
|
|
123393
123424
|
}
|
|
123394
123425
|
|
|
123395
|
-
/**
|
|
123396
|
-
* Coerce non-string tool_calls function.arguments to JSON strings.
|
|
123397
|
-
* Some LLM backends return arguments as parsed objects instead of
|
|
123398
|
-
* JSON strings, violating the OpenAI spec. This mutates in place
|
|
123399
|
-
* and returns true if any coercion was performed.
|
|
123400
|
-
*/
|
|
123401
123426
|
function coerceToolCallArguments(parsed) {
|
|
123402
123427
|
const choices = parsed.choices;
|
|
123403
123428
|
if (!Array.isArray(choices))
|
|
@@ -123438,7 +123463,7 @@ function isEngineUsageChunk(value) {
|
|
|
123438
123463
|
}
|
|
123439
123464
|
return true;
|
|
123440
123465
|
}
|
|
123441
|
-
function monitorEngineResponseStream({ agentEngineType, body, contextLength,
|
|
123466
|
+
function monitorEngineResponseStream({ agentEngineType, body, contextLength, engineConfig, engineType, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }) {
|
|
123442
123467
|
const startedAt = requestStartedAt ?? Date.now();
|
|
123443
123468
|
const passThrough = new PassThrough();
|
|
123444
123469
|
passThrough.on("error", (error) => {
|
|
@@ -123477,8 +123502,8 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
|
|
|
123477
123502
|
const usageChunk = parsed.usage;
|
|
123478
123503
|
const effectiveContext = getEffectiveContextLength({
|
|
123479
123504
|
contextLength,
|
|
123480
|
-
|
|
123481
|
-
|
|
123505
|
+
engineConfig,
|
|
123506
|
+
engineType
|
|
123482
123507
|
});
|
|
123483
123508
|
if (usageChunk.context_usage === undefined &&
|
|
123484
123509
|
usageChunk.prompt_tokens !== undefined &&
|
|
@@ -123520,8 +123545,8 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
|
|
|
123520
123545
|
let contextUsage = parsed.usage?.context_usage ?? null;
|
|
123521
123546
|
const effectiveContextForUsage = getEffectiveContextLength({
|
|
123522
123547
|
contextLength,
|
|
123523
|
-
|
|
123524
|
-
|
|
123548
|
+
engineConfig,
|
|
123549
|
+
engineType
|
|
123525
123550
|
});
|
|
123526
123551
|
if (contextUsage === null &&
|
|
123527
123552
|
promptTokens !== null &&
|
|
@@ -123590,7 +123615,9 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
|
|
|
123590
123615
|
passThrough.destroy(err);
|
|
123591
123616
|
});
|
|
123592
123617
|
body.once("end", () => {
|
|
123593
|
-
|
|
123618
|
+
if (buffer.length > 0) {
|
|
123619
|
+
parseUsageFromBuffer();
|
|
123620
|
+
}
|
|
123594
123621
|
logEngineMetrics({
|
|
123595
123622
|
agentEngineType,
|
|
123596
123623
|
level: "info",
|
|
@@ -123633,7 +123660,7 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
|
|
|
123633
123660
|
stream: passThrough
|
|
123634
123661
|
};
|
|
123635
123662
|
}
|
|
123636
|
-
function monitorEngineResponseSingle({ agentEngineType, body, contextLength,
|
|
123663
|
+
function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engineConfig, engineType, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }) {
|
|
123637
123664
|
const maxUsageCaptureBytes = 1024 * 1024;
|
|
123638
123665
|
const startedAt = requestStartedAt ?? Date.now();
|
|
123639
123666
|
const passThrough = new PassThrough();
|
|
@@ -123719,8 +123746,8 @@ function monitorEngineResponseSingle({ agentEngineType, body, contextLength, eng
|
|
|
123719
123746
|
let contextUsage = usageChunk.context_usage ?? null;
|
|
123720
123747
|
const effectiveContext = getEffectiveContextLength({
|
|
123721
123748
|
contextLength,
|
|
123722
|
-
|
|
123723
|
-
|
|
123749
|
+
engineConfig,
|
|
123750
|
+
engineType
|
|
123724
123751
|
});
|
|
123725
123752
|
if (contextUsage === null &&
|
|
123726
123753
|
promptTokens !== null &&
|
|
@@ -123839,7 +123866,7 @@ function calculateTokensPerSecond$2({ durationMs, totalTokens }) {
|
|
|
123839
123866
|
}
|
|
123840
123867
|
return Math.round(tokensPerSecond);
|
|
123841
123868
|
}
|
|
123842
|
-
async function proxyOpenAIStreamingRoute({ body,
|
|
123869
|
+
async function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }) {
|
|
123843
123870
|
function normalizeTokenCount(value) {
|
|
123844
123871
|
if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
|
|
123845
123872
|
return value;
|
|
@@ -123854,6 +123881,8 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
|
|
|
123854
123881
|
});
|
|
123855
123882
|
});
|
|
123856
123883
|
}
|
|
123884
|
+
const engineType = conduitConfiguration.engineConfig?.type ?? null;
|
|
123885
|
+
const engineConfig = conduitConfiguration.engineConfig?.config ?? null;
|
|
123857
123886
|
const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody$1(body);
|
|
123858
123887
|
const requestStartedAt = Date.now();
|
|
123859
123888
|
const requestBody = JSON.parse(serializedBody);
|
|
@@ -123866,7 +123895,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
|
|
|
123866
123895
|
reportMetricsSafe({
|
|
123867
123896
|
bytes: requestBodyBytes + responseBytes,
|
|
123868
123897
|
completionTokens,
|
|
123869
|
-
engine:
|
|
123898
|
+
engine: engineType,
|
|
123870
123899
|
endpointId: endpointId ?? null,
|
|
123871
123900
|
latencyMs,
|
|
123872
123901
|
modelId: modelID,
|
|
@@ -123895,9 +123924,10 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
|
|
|
123895
123924
|
signal
|
|
123896
123925
|
})
|
|
123897
123926
|
.catch(error => {
|
|
123927
|
+
const err = asError(error);
|
|
123898
123928
|
logEngineMetrics({
|
|
123899
|
-
agentEngineType:
|
|
123900
|
-
error:
|
|
123929
|
+
agentEngineType: engineType ?? "unknown",
|
|
123930
|
+
error: err,
|
|
123901
123931
|
level: "error",
|
|
123902
123932
|
logger,
|
|
123903
123933
|
requestBodyBytes,
|
|
@@ -123909,7 +123939,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
|
|
|
123909
123939
|
reportMetricsSafe({
|
|
123910
123940
|
bytes: requestBodyBytes,
|
|
123911
123941
|
completionTokens: 0,
|
|
123912
|
-
engine:
|
|
123942
|
+
engine: engineType,
|
|
123913
123943
|
endpointId: endpointId ?? null,
|
|
123914
123944
|
latencyMs,
|
|
123915
123945
|
modelId: modelID,
|
|
@@ -123924,7 +123954,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
|
|
|
123924
123954
|
tokensPerSecond: 0,
|
|
123925
123955
|
totalTokens: 0
|
|
123926
123956
|
});
|
|
123927
|
-
throw
|
|
123957
|
+
throw err;
|
|
123928
123958
|
});
|
|
123929
123959
|
const responseStatusText = response.statusText ?? "Upstream request failed";
|
|
123930
123960
|
if (!response.ok) {
|
|
@@ -123946,7 +123976,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
|
|
|
123946
123976
|
}
|
|
123947
123977
|
if (!response.body) {
|
|
123948
123978
|
logEngineMetrics({
|
|
123949
|
-
agentEngineType:
|
|
123979
|
+
agentEngineType: engineType ?? "unknown",
|
|
123950
123980
|
level: response.ok ? "info" : "error",
|
|
123951
123981
|
logger,
|
|
123952
123982
|
requestBodyBytes,
|
|
@@ -123958,7 +123988,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
|
|
|
123958
123988
|
reportMetricsSafe({
|
|
123959
123989
|
bytes: requestBodyBytes,
|
|
123960
123990
|
completionTokens: 0,
|
|
123961
|
-
engine:
|
|
123991
|
+
engine: engineType,
|
|
123962
123992
|
endpointId: endpointId ?? null,
|
|
123963
123993
|
latencyMs,
|
|
123964
123994
|
modelId: modelID,
|
|
@@ -123980,25 +124010,25 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
|
|
|
123980
124010
|
}
|
|
123981
124011
|
const monitoredResponse = streamRequested
|
|
123982
124012
|
? monitorEngineResponseStream({
|
|
123983
|
-
agentEngineType:
|
|
124013
|
+
agentEngineType: engineType ?? "unknown",
|
|
123984
124014
|
body: Readable.fromWeb(response.body),
|
|
123985
124015
|
contextLength: modelManager.contextLength,
|
|
123986
|
-
|
|
124016
|
+
engineConfig,
|
|
124017
|
+
engineType: engineType ?? "unknown",
|
|
123987
124018
|
logger,
|
|
123988
124019
|
onComplete: onMonitoringComplete,
|
|
123989
|
-
parallelism: modelManager.parallelism,
|
|
123990
124020
|
requestBodyBytes,
|
|
123991
124021
|
requestPath: path,
|
|
123992
124022
|
requestStartedAt
|
|
123993
124023
|
})
|
|
123994
124024
|
: monitorEngineResponseSingle({
|
|
123995
|
-
agentEngineType:
|
|
124025
|
+
agentEngineType: engineType ?? "unknown",
|
|
123996
124026
|
body: Readable.fromWeb(response.body),
|
|
123997
124027
|
contextLength: modelManager.contextLength,
|
|
123998
|
-
|
|
124028
|
+
engineConfig,
|
|
124029
|
+
engineType: engineType ?? "unknown",
|
|
123999
124030
|
logger,
|
|
124000
124031
|
onComplete: onMonitoringComplete,
|
|
124001
|
-
parallelism: modelManager.parallelism,
|
|
124002
124032
|
requestBodyBytes,
|
|
124003
124033
|
requestPath: path,
|
|
124004
124034
|
requestStartedAt
|
|
@@ -124015,7 +124045,7 @@ function extractEndpointId$1(req) {
|
|
|
124015
124045
|
const raw = typeof value === "string" ? value : Array.isArray(value) ? value[0] : null;
|
|
124016
124046
|
return raw && isValid(raw) ? raw : null;
|
|
124017
124047
|
}
|
|
124018
|
-
function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger, startup }) {
|
|
124048
|
+
function createConduitOpenAIAPIReferenceHandlers({ apiClient, conduitConfiguration, configuration, getModelID, getModelManager, logger, startup }) {
|
|
124019
124049
|
return {
|
|
124020
124050
|
"/v1/chat/completions": {
|
|
124021
124051
|
POST: async ({ body, req, res }) => {
|
|
@@ -124033,7 +124063,7 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
|
|
|
124033
124063
|
});
|
|
124034
124064
|
const result = await proxyOpenAIStreamingRoute({
|
|
124035
124065
|
body,
|
|
124036
|
-
|
|
124066
|
+
conduitConfiguration: conduitConfiguration(),
|
|
124037
124067
|
endpointId: extractEndpointId$1(req),
|
|
124038
124068
|
logger,
|
|
124039
124069
|
modelID,
|
|
@@ -124059,7 +124089,7 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
|
|
|
124059
124089
|
});
|
|
124060
124090
|
return proxyOpenAIStreamingRoute({
|
|
124061
124091
|
body,
|
|
124062
|
-
|
|
124092
|
+
conduitConfiguration: conduitConfiguration(),
|
|
124063
124093
|
endpointId: extractEndpointId$1(req),
|
|
124064
124094
|
logger,
|
|
124065
124095
|
modelID,
|
|
@@ -124073,10 +124103,11 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
|
|
|
124073
124103
|
"/v1/models": {
|
|
124074
124104
|
GET: async () => {
|
|
124075
124105
|
const modelManager = getModelManager();
|
|
124106
|
+
const currentConfig = conduitConfiguration();
|
|
124076
124107
|
const effectiveContextLength = getEffectiveContextLength({
|
|
124077
124108
|
contextLength: modelManager.contextLength,
|
|
124078
|
-
|
|
124079
|
-
|
|
124109
|
+
engineConfig: currentConfig.engineConfig?.config ?? null,
|
|
124110
|
+
engineType: currentConfig.engineConfig?.type ?? null
|
|
124080
124111
|
});
|
|
124081
124112
|
return {
|
|
124082
124113
|
body: {
|
|
@@ -124179,7 +124210,7 @@ function extractAnthropicNonStreamUsage(body) {
|
|
|
124179
124210
|
return null;
|
|
124180
124211
|
}
|
|
124181
124212
|
}
|
|
124182
|
-
async function proxyAnthropicStreamingRoute({ body,
|
|
124213
|
+
async function proxyAnthropicStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, reportMetrics, signal }) {
|
|
124183
124214
|
function reportMetricsSafe(payload) {
|
|
124184
124215
|
reportMetrics(payload).catch(error => {
|
|
124185
124216
|
logger.warn("Failed to upload LLM prompt metrics", {
|
|
@@ -124188,6 +124219,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
|
|
|
124188
124219
|
});
|
|
124189
124220
|
});
|
|
124190
124221
|
}
|
|
124222
|
+
const engineType = conduitConfiguration.engineConfig?.type ?? null;
|
|
124191
124223
|
const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody(body);
|
|
124192
124224
|
const requestStartedAt = Date.now();
|
|
124193
124225
|
const requestBody = JSON.parse(serializedBody);
|
|
@@ -124200,7 +124232,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
|
|
|
124200
124232
|
reportMetricsSafe({
|
|
124201
124233
|
bytes: requestBodyBytes + responseBytes,
|
|
124202
124234
|
completionTokens,
|
|
124203
|
-
engine:
|
|
124235
|
+
engine: engineType,
|
|
124204
124236
|
endpointId: endpointId ?? null,
|
|
124205
124237
|
latencyMs,
|
|
124206
124238
|
modelId: modelID,
|
|
@@ -124230,7 +124262,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
|
|
|
124230
124262
|
})
|
|
124231
124263
|
.catch(error => {
|
|
124232
124264
|
logEngineMetrics({
|
|
124233
|
-
agentEngineType:
|
|
124265
|
+
agentEngineType: engineType ?? "unknown",
|
|
124234
124266
|
error: asError(error),
|
|
124235
124267
|
level: "error",
|
|
124236
124268
|
logger,
|
|
@@ -124243,7 +124275,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
|
|
|
124243
124275
|
reportMetricsSafe({
|
|
124244
124276
|
bytes: requestBodyBytes,
|
|
124245
124277
|
completionTokens: 0,
|
|
124246
|
-
engine:
|
|
124278
|
+
engine: engineType,
|
|
124247
124279
|
endpointId: endpointId ?? null,
|
|
124248
124280
|
latencyMs,
|
|
124249
124281
|
modelId: modelID,
|
|
@@ -124276,7 +124308,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
|
|
|
124276
124308
|
}
|
|
124277
124309
|
if (!response.body) {
|
|
124278
124310
|
logEngineMetrics({
|
|
124279
|
-
agentEngineType:
|
|
124311
|
+
agentEngineType: engineType ?? "unknown",
|
|
124280
124312
|
level: response.ok ? "info" : "error",
|
|
124281
124313
|
logger,
|
|
124282
124314
|
requestBodyBytes,
|
|
@@ -124288,7 +124320,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
|
|
|
124288
124320
|
reportMetricsSafe({
|
|
124289
124321
|
bytes: requestBodyBytes,
|
|
124290
124322
|
completionTokens: 0,
|
|
124291
|
-
engine:
|
|
124323
|
+
engine: engineType,
|
|
124292
124324
|
endpointId: endpointId ?? null,
|
|
124293
124325
|
latencyMs,
|
|
124294
124326
|
modelId: modelID,
|
|
@@ -124349,7 +124381,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
|
|
|
124349
124381
|
rawBody.once("error", err => {
|
|
124350
124382
|
const normalizedError = asError(err);
|
|
124351
124383
|
logEngineMetrics({
|
|
124352
|
-
agentEngineType:
|
|
124384
|
+
agentEngineType: engineType ?? "unknown",
|
|
124353
124385
|
error: normalizedError,
|
|
124354
124386
|
level: "error",
|
|
124355
124387
|
logger,
|
|
@@ -124363,7 +124395,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
|
|
|
124363
124395
|
});
|
|
124364
124396
|
rawBody.once("end", () => {
|
|
124365
124397
|
logEngineMetrics({
|
|
124366
|
-
agentEngineType:
|
|
124398
|
+
agentEngineType: engineType ?? "unknown",
|
|
124367
124399
|
level: upstreamError ? "error" : "info",
|
|
124368
124400
|
logger,
|
|
124369
124401
|
requestBodyBytes,
|
|
@@ -124382,7 +124414,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
|
|
|
124382
124414
|
}
|
|
124383
124415
|
const closeError = new Error("Engine response stream closed before completion");
|
|
124384
124416
|
logEngineMetrics({
|
|
124385
|
-
agentEngineType:
|
|
124417
|
+
agentEngineType: engineType ?? "unknown",
|
|
124386
124418
|
error: closeError,
|
|
124387
124419
|
level: "error",
|
|
124388
124420
|
logger,
|
|
@@ -124407,7 +124439,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
|
|
|
124407
124439
|
rawBody.once("error", err => {
|
|
124408
124440
|
const normalizedError = asError(err);
|
|
124409
124441
|
logEngineMetrics({
|
|
124410
|
-
agentEngineType:
|
|
124442
|
+
agentEngineType: engineType ?? "unknown",
|
|
124411
124443
|
error: normalizedError,
|
|
124412
124444
|
level: "error",
|
|
124413
124445
|
logger,
|
|
@@ -124427,7 +124459,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
|
|
|
124427
124459
|
usage.outputTokens = extractedUsage.outputTokens;
|
|
124428
124460
|
}
|
|
124429
124461
|
logEngineMetrics({
|
|
124430
|
-
agentEngineType:
|
|
124462
|
+
agentEngineType: engineType ?? "unknown",
|
|
124431
124463
|
level: upstreamError ? "error" : "info",
|
|
124432
124464
|
logger,
|
|
124433
124465
|
requestBodyBytes,
|
|
@@ -124446,7 +124478,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, l
|
|
|
124446
124478
|
}
|
|
124447
124479
|
const closeError = new Error("Engine response stream closed before completion");
|
|
124448
124480
|
logEngineMetrics({
|
|
124449
|
-
agentEngineType:
|
|
124481
|
+
agentEngineType: engineType ?? "unknown",
|
|
124450
124482
|
error: closeError,
|
|
124451
124483
|
level: "error",
|
|
124452
124484
|
logger,
|
|
@@ -124472,7 +124504,7 @@ function extractEndpointId(req) {
|
|
|
124472
124504
|
const raw = typeof value === "string" ? value : Array.isArray(value) ? value[0] : null;
|
|
124473
124505
|
return raw && isValid(raw) ? raw : null;
|
|
124474
124506
|
}
|
|
124475
|
-
function createConduitAnthropicAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger }) {
|
|
124507
|
+
function createConduitAnthropicAPIReferenceHandlers({ apiClient, conduitConfiguration, configuration, getModelID, getModelManager, logger }) {
|
|
124476
124508
|
return {
|
|
124477
124509
|
"/v1/messages": {
|
|
124478
124510
|
POST: async ({ body, req, res }) => {
|
|
@@ -124484,7 +124516,7 @@ function createConduitAnthropicAPIReferenceHandlers({ apiClient, configuration,
|
|
|
124484
124516
|
});
|
|
124485
124517
|
return proxyAnthropicStreamingRoute({
|
|
124486
124518
|
body,
|
|
124487
|
-
|
|
124519
|
+
conduitConfiguration: conduitConfiguration(),
|
|
124488
124520
|
endpointId: extractEndpointId(req),
|
|
124489
124521
|
logger,
|
|
124490
124522
|
modelID,
|
|
@@ -124506,7 +124538,7 @@ function createHealthHandler() {
|
|
|
124506
124538
|
};
|
|
124507
124539
|
}
|
|
124508
124540
|
|
|
124509
|
-
async function handleSSERequests({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, signal }) {
|
|
124541
|
+
async function handleSSERequests({ apiURL, conduitConfiguration, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, signal }) {
|
|
124510
124542
|
const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/stream`;
|
|
124511
124543
|
const maxReconnectDelayMs = 30000;
|
|
124512
124544
|
let reconnectAttempt = 0;
|
|
@@ -124548,6 +124580,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
|
|
|
124548
124580
|
handleRequest({
|
|
124549
124581
|
activeRequests,
|
|
124550
124582
|
apiURL,
|
|
124583
|
+
conduitConfiguration,
|
|
124551
124584
|
configuration,
|
|
124552
124585
|
logger,
|
|
124553
124586
|
modelID,
|
|
@@ -124591,7 +124624,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
|
|
|
124591
124624
|
}
|
|
124592
124625
|
}
|
|
124593
124626
|
}
|
|
124594
|
-
async function handleRequest({ activeRequests, apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
|
|
124627
|
+
async function handleRequest({ activeRequests, apiURL, conduitConfiguration, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
|
|
124595
124628
|
function reportMetricsSafe(payload) {
|
|
124596
124629
|
reportMetrics(payload).catch(error => {
|
|
124597
124630
|
logger.warn("Failed to upload LLM prompt metrics", {
|
|
@@ -124600,6 +124633,7 @@ async function handleRequest({ activeRequests, apiURL, configuration, logger, mo
|
|
|
124600
124633
|
});
|
|
124601
124634
|
});
|
|
124602
124635
|
}
|
|
124636
|
+
const engineType = conduitConfiguration().engineConfig?.type ?? null;
|
|
124603
124637
|
const endpointId = request.parameters?.endpointID ?? null;
|
|
124604
124638
|
const requestStartedAt = Date.now();
|
|
124605
124639
|
const requestBytes = calculateRequestBytes(request.body ?? null);
|
|
@@ -124624,7 +124658,7 @@ async function handleRequest({ activeRequests, apiURL, configuration, logger, mo
|
|
|
124624
124658
|
reportMetricsSafe({
|
|
124625
124659
|
bytes: requestBytes + responseMetrics.responseBytes,
|
|
124626
124660
|
completionTokens: 0,
|
|
124627
|
-
engine:
|
|
124661
|
+
engine: engineType,
|
|
124628
124662
|
endpointId,
|
|
124629
124663
|
latencyMs,
|
|
124630
124664
|
modelId: modelID,
|
|
@@ -124678,7 +124712,7 @@ async function handleRequest({ activeRequests, apiURL, configuration, logger, mo
|
|
|
124678
124712
|
reportMetricsSafe({
|
|
124679
124713
|
bytes: isCancelled ? requestBytes : requestBytes + failureBytes,
|
|
124680
124714
|
completionTokens: 0,
|
|
124681
|
-
engine:
|
|
124715
|
+
engine: engineType,
|
|
124682
124716
|
endpointId,
|
|
124683
124717
|
latencyMs,
|
|
124684
124718
|
modelId: modelID,
|
|
@@ -134618,15 +134652,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
134618
134652
|
let modelFileName = getConduitModelFileName(conduitConfiguration);
|
|
134619
134653
|
let modelName = getConduitModelName(conduitConfiguration);
|
|
134620
134654
|
const startup = Date.now();
|
|
134621
|
-
let modelManager =
|
|
134622
|
-
contextLength: conduitConfiguration.contextLength ?? null,
|
|
134623
|
-
engine: configuration.agentEngineType,
|
|
134624
|
-
enginePort: configuration.enginePort,
|
|
134625
|
-
logger,
|
|
134626
|
-
model: conduitConfiguration.targetModel,
|
|
134627
|
-
parallelism: conduitConfiguration.parallelism ?? null,
|
|
134628
|
-
root: configuration.rootDirectory
|
|
134629
|
-
});
|
|
134655
|
+
let modelManager = createModelManagerFromConfig(conduitConfiguration, configuration, logger);
|
|
134630
134656
|
const conduitStateReportManager = new ConduitStateReportManager({
|
|
134631
134657
|
apiClient,
|
|
134632
134658
|
collectMachineMetadata: collectMachineMetadata,
|
|
@@ -134765,15 +134791,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
134765
134791
|
conduitConfiguration = newConduitConfiguration;
|
|
134766
134792
|
modelFileName = getConduitModelFileName(conduitConfiguration);
|
|
134767
134793
|
modelName = getConduitModelName(conduitConfiguration);
|
|
134768
|
-
modelManager =
|
|
134769
|
-
contextLength: conduitConfiguration.contextLength ?? null,
|
|
134770
|
-
engine: configuration.agentEngineType,
|
|
134771
|
-
enginePort: configuration.enginePort,
|
|
134772
|
-
logger,
|
|
134773
|
-
model: conduitConfiguration.targetModel,
|
|
134774
|
-
parallelism: conduitConfiguration.parallelism ?? null,
|
|
134775
|
-
root: configuration.rootDirectory
|
|
134776
|
-
});
|
|
134794
|
+
modelManager = createModelManagerFromConfig(conduitConfiguration, configuration, logger);
|
|
134777
134795
|
attachLifecycleListeners();
|
|
134778
134796
|
if (sourceState === "idle") {
|
|
134779
134797
|
logger.info("Restarting engine from idle");
|
|
@@ -134847,6 +134865,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
134847
134865
|
"/v1/chat/completions": {
|
|
134848
134866
|
POST: createPostChatCompletionsHandler({
|
|
134849
134867
|
apiClient,
|
|
134868
|
+
conduitConfiguration: () => conduitConfiguration,
|
|
134850
134869
|
configuration,
|
|
134851
134870
|
getModelID: () => conduitConfiguration.targetModel.id,
|
|
134852
134871
|
getModelManager: () => modelManager,
|
|
@@ -134857,6 +134876,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
134857
134876
|
"/v1/completions": {
|
|
134858
134877
|
POST: createPostCompletionsHandler({
|
|
134859
134878
|
apiClient,
|
|
134879
|
+
conduitConfiguration: () => conduitConfiguration,
|
|
134860
134880
|
configuration,
|
|
134861
134881
|
getModelID: () => conduitConfiguration.targetModel.id,
|
|
134862
134882
|
getModelManager: () => modelManager,
|
|
@@ -134867,6 +134887,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
134867
134887
|
"/v1/models": {
|
|
134868
134888
|
GET: createGetModelsHandler({
|
|
134869
134889
|
apiClient,
|
|
134890
|
+
conduitConfiguration: () => conduitConfiguration,
|
|
134870
134891
|
configuration,
|
|
134871
134892
|
getModelID: () => conduitConfiguration.targetModel.id,
|
|
134872
134893
|
getModelManager: () => modelManager,
|
|
@@ -134884,6 +134905,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
134884
134905
|
"/v1/messages": {
|
|
134885
134906
|
POST: createPostMessagesHandler({
|
|
134886
134907
|
apiClient,
|
|
134908
|
+
conduitConfiguration: () => conduitConfiguration,
|
|
134887
134909
|
configuration,
|
|
134888
134910
|
getModelID: () => conduitConfiguration.targetModel.id,
|
|
134889
134911
|
getModelManager: () => modelManager,
|
|
@@ -134897,6 +134919,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
134897
134919
|
});
|
|
134898
134920
|
handleSSERequests({
|
|
134899
134921
|
apiURL: configuration.apiURL,
|
|
134922
|
+
conduitConfiguration: () => conduitConfiguration,
|
|
134900
134923
|
configuration,
|
|
134901
134924
|
logger,
|
|
134902
134925
|
modelID: conduitConfiguration.targetModel.id,
|
|
@@ -134973,6 +134996,18 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
134973
134996
|
shutdown
|
|
134974
134997
|
};
|
|
134975
134998
|
}
|
|
134999
|
+
function createModelManagerFromConfig(conduitConfiguration, configuration, logger) {
|
|
135000
|
+
const engineConfig = conduitConfiguration.engineConfig;
|
|
135001
|
+
return new ModelManager({
|
|
135002
|
+
contextLength: conduitConfiguration.contextLength ?? null,
|
|
135003
|
+
engineConfig: engineConfig?.config ?? null,
|
|
135004
|
+
enginePort: configuration.enginePort,
|
|
135005
|
+
engineType: engineConfig?.type ?? "llama.cpp",
|
|
135006
|
+
logger,
|
|
135007
|
+
model: conduitConfiguration.targetModel,
|
|
135008
|
+
root: configuration.rootDirectory
|
|
135009
|
+
});
|
|
135010
|
+
}
|
|
134976
135011
|
function getConduitModelFileName(configuration) {
|
|
134977
135012
|
const { source } = configuration.targetModel;
|
|
134978
135013
|
return source.type === "huggingface" ? source.slug : source.irid;
|
|
@@ -134983,8 +135018,6 @@ function getConduitModelName(configuration) {
|
|
|
134983
135018
|
|
|
134984
135019
|
const StartModeSchema = _enum(["auto", "idle"]);
|
|
134985
135020
|
function getConfiguration({ overrides } = {}) {
|
|
134986
|
-
const agentEngineTypeValue = overrides?.agentEngineType ?? readEnvString("ENGINE");
|
|
134987
|
-
const agentEngineType = LLMEngineSchema.parse(agentEngineTypeValue);
|
|
134988
135021
|
const apiKey = overrides?.apiKey ?? readEnvString("API_KEY");
|
|
134989
135022
|
const apiURL = overrides?.apiURL ?? readEnvStringOptional("API_URL", "https://api.infersec.ai");
|
|
134990
135023
|
const enginePort = overrides?.enginePort ??
|
|
@@ -135005,7 +135038,6 @@ function getConfiguration({ overrides } = {}) {
|
|
|
135005
135038
|
const startModeValue = overrides?.startMode ?? readEnvStringOptional("START_MODE", "auto");
|
|
135006
135039
|
const startMode = StartModeSchema.parse(startModeValue);
|
|
135007
135040
|
return {
|
|
135008
|
-
agentEngineType,
|
|
135009
135041
|
apiKey,
|
|
135010
135042
|
apiURL,
|
|
135011
135043
|
enginePort,
|
|
@@ -135031,7 +135063,6 @@ class ConduitConnection {
|
|
|
135031
135063
|
this.enginePort = options.enginePort;
|
|
135032
135064
|
this.configuration = getConfiguration({
|
|
135033
135065
|
overrides: {
|
|
135034
|
-
agentEngineType: options.engine,
|
|
135035
135066
|
apiKey: options.apiKey,
|
|
135036
135067
|
apiURL: options.apiURL,
|
|
135037
135068
|
enginePort: options.enginePort,
|
|
@@ -135091,13 +135122,10 @@ async function startInferenceAgent({ configurationOverrides }) {
|
|
|
135091
135122
|
const logger = createLogger({
|
|
135092
135123
|
name: "infersec-conduit"
|
|
135093
135124
|
});
|
|
135094
|
-
logger.info("Application starting"
|
|
135095
|
-
agentEngineType: configuration.agentEngineType
|
|
135096
|
-
});
|
|
135125
|
+
logger.info("Application starting");
|
|
135097
135126
|
const connection = new ConduitConnection({
|
|
135098
135127
|
apiKey: configuration.apiKey,
|
|
135099
135128
|
apiURL: configuration.apiURL,
|
|
135100
|
-
engine: configuration.agentEngineType,
|
|
135101
135129
|
enginePort: configuration.enginePort,
|
|
135102
135130
|
logger,
|
|
135103
135131
|
port: configuration.port,
|
|
@@ -135136,7 +135164,6 @@ function registerInferenceCommands({ program }) {
|
|
|
135136
135164
|
.command("start")
|
|
135137
135165
|
.description("Start the inference agent")
|
|
135138
135166
|
.option("--api-url <url>", "API base URL (or API_URL env)")
|
|
135139
|
-
.option("--engine <type>", "Engine type (or ENGINE env)")
|
|
135140
135167
|
.option("--engine-port <number>", "Engine port (or ENGINE_PORT env)")
|
|
135141
135168
|
.option("--key <value>", "API key (or API_KEY env)")
|
|
135142
135169
|
.option("--port <number>", "Port to listen on (or PORT env)")
|
|
@@ -135148,9 +135175,6 @@ function registerInferenceCommands({ program }) {
|
|
|
135148
135175
|
if (options["api-url"]) {
|
|
135149
135176
|
configurationOverrides.apiURL = options["api-url"];
|
|
135150
135177
|
}
|
|
135151
|
-
if (options.engine) {
|
|
135152
|
-
configurationOverrides.agentEngineType = options.engine;
|
|
135153
|
-
}
|
|
135154
135178
|
if (options["engine-port"]) {
|
|
135155
135179
|
const enginePort = Number.parseInt(options["engine-port"], 10);
|
|
135156
135180
|
if (Number.isNaN(enginePort) || enginePort < 1 || enginePort > 65535) {
|
|
@@ -161889,7 +161913,6 @@ async function runSingleBenchmark(options) {
|
|
|
161889
161913
|
const conn = new ConduitConnection({
|
|
161890
161914
|
apiKey,
|
|
161891
161915
|
apiURL: apiUrl,
|
|
161892
|
-
engine: entry.engine,
|
|
161893
161916
|
enginePort,
|
|
161894
161917
|
logger,
|
|
161895
161918
|
port,
|
package/dist/configuration.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { ULID } from "@infersec/definitions";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
declare const StartModeSchema: z.ZodEnum<{
|
|
4
4
|
idle: "idle";
|
|
@@ -6,7 +6,6 @@ declare const StartModeSchema: z.ZodEnum<{
|
|
|
6
6
|
}>;
|
|
7
7
|
export type StartMode = z.infer<typeof StartModeSchema>;
|
|
8
8
|
export interface Configuration {
|
|
9
|
-
agentEngineType: LLMEngine;
|
|
10
9
|
apiKey: string;
|
|
11
10
|
apiURL: string;
|
|
12
11
|
enginePort: number;
|
|
@@ -16,7 +15,6 @@ export interface Configuration {
|
|
|
16
15
|
startMode: StartMode;
|
|
17
16
|
}
|
|
18
17
|
export interface ConfigurationOverrides {
|
|
19
|
-
agentEngineType?: string;
|
|
20
18
|
apiKey?: string;
|
|
21
19
|
apiURL?: string;
|
|
22
20
|
enginePort?: number;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { LLMModel } from "@infersec/definitions";
|
|
2
2
|
import { Logger } from "@infersec/logger";
|
|
3
3
|
import EventEmitter from "eventemitter3";
|
|
4
4
|
import { Response } from "undici";
|
|
@@ -11,10 +11,10 @@ interface ModelManagerEvents {
|
|
|
11
11
|
}
|
|
12
12
|
type EngineLifecycleState = "errored" | "running" | "starting" | "stopped" | "stopping";
|
|
13
13
|
export declare class ModelManager extends EventEmitter<ModelManagerEvents> {
|
|
14
|
-
readonly engine:
|
|
14
|
+
readonly engine: string;
|
|
15
|
+
readonly engineConfig: Record<string, unknown> | null;
|
|
15
16
|
readonly enginePort: number;
|
|
16
17
|
readonly model: LLMModel;
|
|
17
|
-
readonly parallelism: number | null;
|
|
18
18
|
private uniqueName;
|
|
19
19
|
readonly contextLength: number | null;
|
|
20
20
|
protected readonly logger: Logger;
|
|
@@ -23,13 +23,13 @@ export declare class ModelManager extends EventEmitter<ModelManagerEvents> {
|
|
|
23
23
|
private lifecycleState;
|
|
24
24
|
private stopRequested;
|
|
25
25
|
protected readonly modelsDirectory: string;
|
|
26
|
-
constructor({ contextLength,
|
|
26
|
+
constructor({ contextLength, engineConfig, enginePort, engineType, logger, model, root }: {
|
|
27
27
|
contextLength?: number | null;
|
|
28
|
-
|
|
28
|
+
engineConfig?: Record<string, unknown> | null;
|
|
29
29
|
enginePort: number;
|
|
30
|
+
engineType: string;
|
|
30
31
|
logger: Logger;
|
|
31
32
|
model: LLMModel;
|
|
32
|
-
parallelism?: number | null;
|
|
33
33
|
root: string;
|
|
34
34
|
});
|
|
35
35
|
fetchOpenAI(path: string, opts?: RequestInit): Promise<Response>;
|
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
import { API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE } from "@infersec/definitions";
|
|
1
|
+
import { API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE, type InferenceAgentConfiguration } from "@infersec/definitions";
|
|
2
2
|
import { implementAPIReference, type APIRequest } from "@infersec/fetch";
|
|
3
3
|
import { Logger } from "@infersec/logger";
|
|
4
4
|
import { APIClient } from "../apiClient/index.js";
|
|
5
5
|
import { Configuration } from "../configuration.js";
|
|
6
6
|
import { ModelManager } from "../modelManagement/ModelManager.js";
|
|
7
7
|
type ConduitAnthropicAPIReferenceHandlers = Parameters<typeof implementAPIReference<typeof API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE>>[0]["api"];
|
|
8
|
-
export declare function createConduitAnthropicAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger }: {
|
|
8
|
+
export declare function createConduitAnthropicAPIReferenceHandlers({ apiClient, conduitConfiguration, configuration, getModelID, getModelManager, logger }: {
|
|
9
9
|
apiClient: APIClient;
|
|
10
|
+
conduitConfiguration: () => InferenceAgentConfiguration;
|
|
10
11
|
configuration: Configuration;
|
|
11
12
|
getModelID: () => string;
|
|
12
13
|
getModelManager: () => ModelManager;
|
|
@@ -14,6 +15,7 @@ export declare function createConduitAnthropicAPIReferenceHandlers({ apiClient,
|
|
|
14
15
|
}): ConduitAnthropicAPIReferenceHandlers;
|
|
15
16
|
export declare function createPostMessagesHandler(options: {
|
|
16
17
|
apiClient: APIClient;
|
|
18
|
+
conduitConfiguration: () => InferenceAgentConfiguration;
|
|
17
19
|
configuration: Configuration;
|
|
18
20
|
getModelID: () => string;
|
|
19
21
|
getModelManager: () => ModelManager;
|
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
import { API_CLIENT_CONDUIT_OPENAI_REFERENCE } from "@infersec/definitions";
|
|
1
|
+
import { API_CLIENT_CONDUIT_OPENAI_REFERENCE, type InferenceAgentConfiguration } from "@infersec/definitions";
|
|
2
2
|
import { implementAPIReference, type APIRequest } from "@infersec/fetch";
|
|
3
3
|
import { Logger } from "@infersec/logger";
|
|
4
4
|
import { APIClient } from "../apiClient/index.js";
|
|
5
5
|
import { Configuration } from "../configuration.js";
|
|
6
6
|
import { ModelManager } from "../modelManagement/ModelManager.js";
|
|
7
7
|
type ConduitOpenAIAPIReferenceHandlers = Parameters<typeof implementAPIReference<typeof API_CLIENT_CONDUIT_OPENAI_REFERENCE>>[0]["api"];
|
|
8
|
-
export declare function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger, startup }: {
|
|
8
|
+
export declare function createConduitOpenAIAPIReferenceHandlers({ apiClient, conduitConfiguration, configuration, getModelID, getModelManager, logger, startup }: {
|
|
9
9
|
apiClient: APIClient;
|
|
10
|
+
conduitConfiguration: () => InferenceAgentConfiguration;
|
|
10
11
|
configuration: Configuration;
|
|
11
12
|
getModelID: () => string;
|
|
12
13
|
getModelManager: () => ModelManager;
|
|
@@ -15,6 +16,7 @@ export declare function createConduitOpenAIAPIReferenceHandlers({ apiClient, con
|
|
|
15
16
|
}): ConduitOpenAIAPIReferenceHandlers;
|
|
16
17
|
export declare function createGetModelsHandler(options: {
|
|
17
18
|
apiClient: APIClient;
|
|
19
|
+
conduitConfiguration: () => InferenceAgentConfiguration;
|
|
18
20
|
configuration: Configuration;
|
|
19
21
|
getModelID: () => string;
|
|
20
22
|
getModelManager: () => ModelManager;
|
|
@@ -60,6 +62,7 @@ export declare function createGetModelsHandler(options: {
|
|
|
60
62
|
}>;
|
|
61
63
|
export declare function createPostChatCompletionsHandler(options: {
|
|
62
64
|
apiClient: APIClient;
|
|
65
|
+
conduitConfiguration: () => InferenceAgentConfiguration;
|
|
63
66
|
configuration: Configuration;
|
|
64
67
|
getModelID: () => string;
|
|
65
68
|
getModelManager: () => ModelManager;
|
|
@@ -166,6 +169,7 @@ export declare function createPostChatCompletionsHandler(options: {
|
|
|
166
169
|
}>;
|
|
167
170
|
export declare function createPostCompletionsHandler(options: {
|
|
168
171
|
apiClient: APIClient;
|
|
172
|
+
conduitConfiguration: () => InferenceAgentConfiguration;
|
|
169
173
|
configuration: Configuration;
|
|
170
174
|
getModelID: () => string;
|
|
171
175
|
getModelManager: () => ModelManager;
|
package/dist/sse/handler.d.ts
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
import { InferenceAgentLLMMetricsPayload, type ULID, type APIResponse, type ServerToClientAPIRequest } from "@infersec/definitions";
|
|
1
|
+
import { InferenceAgentConfiguration, InferenceAgentLLMMetricsPayload, type ULID, type APIResponse, type ServerToClientAPIRequest } from "@infersec/definitions";
|
|
2
2
|
import { Logger } from "@infersec/logger";
|
|
3
3
|
import { Configuration } from "../configuration.js";
|
|
4
|
-
export declare function handleSSERequests({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, signal }: {
|
|
4
|
+
export declare function handleSSERequests({ apiURL, conduitConfiguration, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, signal }: {
|
|
5
5
|
apiURL: string;
|
|
6
|
+
conduitConfiguration: () => InferenceAgentConfiguration;
|
|
6
7
|
configuration: Configuration;
|
|
7
8
|
logger: Logger;
|
|
8
9
|
modelID: ULID;
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import { Readable } from "node:stream";
|
|
2
|
-
import { InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definitions";
|
|
2
|
+
import { InferenceAgentConfiguration, InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definitions";
|
|
3
3
|
import { Logger } from "@infersec/logger";
|
|
4
4
|
import { Configuration } from "../configuration.js";
|
|
5
5
|
import { ModelManager } from "../modelManagement/ModelManager.js";
|
|
6
|
-
export declare function proxyAnthropicStreamingRoute({ body,
|
|
6
|
+
export declare function proxyAnthropicStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, reportMetrics, signal }: {
|
|
7
7
|
body: unknown;
|
|
8
|
+
conduitConfiguration: InferenceAgentConfiguration;
|
|
8
9
|
configuration: Configuration;
|
|
9
10
|
endpointId?: ULID | null;
|
|
10
11
|
logger: Logger;
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { Readable } from "node:stream";
|
|
2
|
-
import { LLMEngine } from "@infersec/definitions";
|
|
3
2
|
import { Logger } from "@infersec/logger";
|
|
4
3
|
export interface EngineUsageMetrics {
|
|
5
4
|
completionTokens: number | null;
|
|
@@ -8,7 +7,7 @@ export interface EngineUsageMetrics {
|
|
|
8
7
|
totalTokens: number | null;
|
|
9
8
|
}
|
|
10
9
|
interface EngineMetricsLoggerOptions {
|
|
11
|
-
agentEngineType:
|
|
10
|
+
agentEngineType: string;
|
|
12
11
|
logger: Logger;
|
|
13
12
|
requestBodyBytes: number;
|
|
14
13
|
requestPath: string;
|
|
@@ -24,9 +23,9 @@ interface EngineMetricsCompletion {
|
|
|
24
23
|
interface MonitorEngineResponseOptions extends EngineMetricsLoggerOptions {
|
|
25
24
|
body: Readable;
|
|
26
25
|
contextLength: number | null;
|
|
27
|
-
|
|
26
|
+
engineConfig: Record<string, unknown> | null;
|
|
27
|
+
engineType: string;
|
|
28
28
|
onComplete?: (result: EngineMetricsCompletion) => void | Promise<void>;
|
|
29
|
-
parallelism: number | null;
|
|
30
29
|
requestStartedAt?: number;
|
|
31
30
|
}
|
|
32
31
|
interface EngineMetricsLogOptions extends EngineMetricsLoggerOptions {
|
|
@@ -38,7 +37,7 @@ interface EngineMetricsLogOptions extends EngineMetricsLoggerOptions {
|
|
|
38
37
|
interface MonitorEngineResponseResult {
|
|
39
38
|
stream: Readable;
|
|
40
39
|
}
|
|
41
|
-
export declare function monitorEngineResponseStream({ agentEngineType, body, contextLength,
|
|
42
|
-
export declare function monitorEngineResponseSingle({ agentEngineType, body, contextLength,
|
|
40
|
+
export declare function monitorEngineResponseStream({ agentEngineType, body, contextLength, engineConfig, engineType, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
|
|
41
|
+
export declare function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engineConfig, engineType, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
|
|
43
42
|
export declare function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }: EngineMetricsLogOptions): void;
|
|
44
43
|
export {};
|
package/dist/utils/openai.d.ts
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import { Readable } from "node:stream";
|
|
2
|
-
import { InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definitions";
|
|
2
|
+
import { InferenceAgentConfiguration, InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definitions";
|
|
3
3
|
import { Logger } from "@infersec/logger";
|
|
4
4
|
import { Configuration } from "../configuration.js";
|
|
5
5
|
import { ModelManager } from "../modelManagement/ModelManager.js";
|
|
6
|
-
export declare function proxyOpenAIStreamingRoute({ body,
|
|
6
|
+
export declare function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }: {
|
|
7
7
|
body: unknown;
|
|
8
|
+
conduitConfiguration: InferenceAgentConfiguration;
|
|
8
9
|
configuration: Configuration;
|
|
9
10
|
endpointId?: ULID | null;
|
|
10
11
|
logger: Logger;
|