@infersec/conduit 1.51.0 → 1.53.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js
CHANGED
|
@@ -52,7 +52,7 @@ import 'fs/promises';
|
|
|
52
52
|
import 'stream/promises';
|
|
53
53
|
import { fileURLToPath } from 'node:url';
|
|
54
54
|
import { StringDecoder } from 'node:string_decoder';
|
|
55
|
-
import os, { constants as constants$5 } from 'node:os';
|
|
55
|
+
import os, { constants as constants$5, hostname as hostname$2 } from 'node:os';
|
|
56
56
|
import tty from 'node:tty';
|
|
57
57
|
import require$$0$j from 'child_process';
|
|
58
58
|
import { setTimeout as setTimeout$1, scheduler, setImmediate as setImmediate$1 } from 'node:timers/promises';
|
|
@@ -18497,6 +18497,8 @@ const RawPasswordSchema = string$1()
|
|
|
18497
18497
|
.refine(raw => /[\p{P}\p{S}]/u.test(raw), {
|
|
18498
18498
|
message: "Invalid password: Missing character variant: Symbols"
|
|
18499
18499
|
});
|
|
18500
|
+
const RESOURCE_NAME_MAX_LENGTH = 256;
|
|
18501
|
+
string$1().trim().min(1).max(RESOURCE_NAME_MAX_LENGTH);
|
|
18500
18502
|
const ULIDSchema = string$1().refine(isValid, { message: "Invalid ULID" });
|
|
18501
18503
|
// IRID Format:
|
|
18502
18504
|
//
|
|
@@ -57224,7 +57226,7 @@ var undiciExports = requireUndici();
|
|
|
57224
57226
|
|
|
57225
57227
|
function createFetchWithHeaders({ fetchFn = undiciExports.fetch, headers }) {
|
|
57226
57228
|
function fetchWithHeaders(url, options) {
|
|
57227
|
-
const mergedHeaders = new Headers(options.headers);
|
|
57229
|
+
const mergedHeaders = new undiciExports.Headers(options.headers);
|
|
57228
57230
|
for (const [key, value] of Object.entries(headers)) {
|
|
57229
57231
|
mergedHeaders.set(key, value);
|
|
57230
57232
|
}
|
|
@@ -57252,7 +57254,7 @@ async function fetchByReference(options) {
|
|
|
57252
57254
|
for (const [key, value] of Object.entries(query || {})) {
|
|
57253
57255
|
targetURL.searchParams.set(key, String(value));
|
|
57254
57256
|
}
|
|
57255
|
-
const headers = new Headers();
|
|
57257
|
+
const headers = new undiciExports.Headers();
|
|
57256
57258
|
const fetchOptions = {
|
|
57257
57259
|
headers,
|
|
57258
57260
|
method: options.method
|
|
@@ -103234,25 +103236,30 @@ const VLLM_EXECUTABLE = "python3";
|
|
|
103234
103236
|
const DEFAULT_VLLM_CONTEXT_LENGTH = 2048;
|
|
103235
103237
|
async function startVLLM({ enginePort, targetDirectory }) {
|
|
103236
103238
|
const contextLength = Math.max(1, this.contextLength ?? DEFAULT_VLLM_CONTEXT_LENGTH);
|
|
103239
|
+
const device = process.env.VLLM_DEVICE;
|
|
103240
|
+
const dtype = process.env.VLLM_DTYPE;
|
|
103241
|
+
const args = [
|
|
103242
|
+
...VLLM_START_ARGS,
|
|
103243
|
+
"--port",
|
|
103244
|
+
String(enginePort),
|
|
103245
|
+
"--model",
|
|
103246
|
+
targetDirectory,
|
|
103247
|
+
"--served-model-name",
|
|
103248
|
+
this.model.id,
|
|
103249
|
+
"--max-model-len",
|
|
103250
|
+
String(contextLength),
|
|
103251
|
+
"--tensor-parallel-size",
|
|
103252
|
+
"1"
|
|
103253
|
+
];
|
|
103254
|
+
if (device) {
|
|
103255
|
+
args.push("--device", device);
|
|
103256
|
+
}
|
|
103257
|
+
if (dtype) {
|
|
103258
|
+
args.push("--dtype", dtype);
|
|
103259
|
+
}
|
|
103237
103260
|
const processManager = new ProcessManager({
|
|
103238
103261
|
command: VLLM_EXECUTABLE,
|
|
103239
|
-
args
|
|
103240
|
-
...VLLM_START_ARGS,
|
|
103241
|
-
"--port",
|
|
103242
|
-
String(enginePort),
|
|
103243
|
-
"--model",
|
|
103244
|
-
targetDirectory,
|
|
103245
|
-
"--served-model-name",
|
|
103246
|
-
this.model.id,
|
|
103247
|
-
"--device",
|
|
103248
|
-
"cpu", // Force CPU mode
|
|
103249
|
-
"--dtype",
|
|
103250
|
-
"float16", // Use float16 to save memory on CPU
|
|
103251
|
-
"--max-model-len",
|
|
103252
|
-
String(contextLength),
|
|
103253
|
-
"--tensor-parallel-size",
|
|
103254
|
-
"1"
|
|
103255
|
-
]
|
|
103262
|
+
args
|
|
103256
103263
|
});
|
|
103257
103264
|
await processManager.start();
|
|
103258
103265
|
return processManager;
|
|
@@ -104324,15 +104331,12 @@ const ModelDownloadProgressSchema = object({
|
|
|
104324
104331
|
completedFiles: array(string$1().min(1))
|
|
104325
104332
|
});
|
|
104326
104333
|
|
|
104327
|
-
const DOWNLOAD_PROGRESS_TIMEOUT = 60000;
|
|
104328
|
-
const DOWNLOAD_RETRY_ATTEMPTS_FULL = 3;
|
|
104329
|
-
const DOWNLOAD_RETRY_ATTEMPTS_RANGE = 10;
|
|
104330
104334
|
function matchesQuantizationVariant({ filePath, variant }) {
|
|
104331
104335
|
if (!variant) {
|
|
104332
104336
|
return false;
|
|
104333
104337
|
}
|
|
104334
104338
|
const escapedVariant = variant.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
104335
|
-
const trailingBoundary =
|
|
104339
|
+
const trailingBoundary = "[\\-./]";
|
|
104336
104340
|
const matcher = new RegExp(`(^|[\\-./_])${escapedVariant}(?=$|${trailingBoundary})`, "i");
|
|
104337
104341
|
const normalizedPath = filePath.replace(/\\/g, "/");
|
|
104338
104342
|
const segments = normalizedPath.split("/").filter(Boolean);
|
|
@@ -104345,6 +104349,10 @@ function matchesQuantizationVariant({ filePath, variant }) {
|
|
|
104345
104349
|
}
|
|
104346
104350
|
return segments.slice(0, -1).some(segment => matcher.test(segment));
|
|
104347
104351
|
}
|
|
104352
|
+
|
|
104353
|
+
const DOWNLOAD_PROGRESS_TIMEOUT = 60000;
|
|
104354
|
+
const DOWNLOAD_RETRY_ATTEMPTS_FULL = 3;
|
|
104355
|
+
const DOWNLOAD_RETRY_ATTEMPTS_RANGE = 10;
|
|
104348
104356
|
async function downloadModelViaHuggingFace({ format, huggingFaceToken, modelSlug: rawModelSlug, onProgress, progressFilePath, targetDirectory }) {
|
|
104349
104357
|
// Sanitise model ID
|
|
104350
104358
|
const [modelSlugWithRevision, variant = null] = rawModelSlug.split(":");
|
|
@@ -112782,7 +112790,7 @@ async function findQuantizedModelTarget({ model, path }) {
|
|
|
112782
112790
|
// Just return the first
|
|
112783
112791
|
return modelFiles[0];
|
|
112784
112792
|
}
|
|
112785
|
-
const matches = modelFiles.filter(fileName => fileName
|
|
112793
|
+
const matches = modelFiles.filter(fileName => matchesQuantizationVariant({ filePath: fileName, variant: variant ?? "" }));
|
|
112786
112794
|
if (matches.length === 0) {
|
|
112787
112795
|
throw new Error(`No model found for format and variant: ${model.format} / ${variant}`);
|
|
112788
112796
|
}
|
|
@@ -112817,7 +112825,11 @@ async function startLlamacpp({ enginePort, targetDirectory }) {
|
|
|
112817
112825
|
}
|
|
112818
112826
|
|
|
112819
112827
|
// 2 hours
|
|
112820
|
-
const ENGINE_FETCH_TIMEOUT_MS = 7200000;
|
|
112828
|
+
const ENGINE_FETCH_TIMEOUT_MS$1 = 7200000;
|
|
112829
|
+
const ENGINE_AGENT = new undiciExports.Agent({
|
|
112830
|
+
bodyTimeout: ENGINE_FETCH_TIMEOUT_MS$1,
|
|
112831
|
+
headersTimeout: ENGINE_FETCH_TIMEOUT_MS$1
|
|
112832
|
+
});
|
|
112821
112833
|
class ModelManager extends EventEmitter {
|
|
112822
112834
|
engine;
|
|
112823
112835
|
enginePort;
|
|
@@ -112870,19 +112882,28 @@ class ModelManager extends EventEmitter {
|
|
|
112870
112882
|
const controller = new AbortController();
|
|
112871
112883
|
const timeout = setTimeout(() => {
|
|
112872
112884
|
controller.abort(new Error("Inference request timeout"));
|
|
112873
|
-
}, ENGINE_FETCH_TIMEOUT_MS);
|
|
112885
|
+
}, ENGINE_FETCH_TIMEOUT_MS$1);
|
|
112874
112886
|
const effectiveSignal = callerSignal
|
|
112875
112887
|
? AbortSignal.any([callerSignal, controller.signal])
|
|
112876
112888
|
: controller.signal;
|
|
112877
112889
|
try {
|
|
112878
|
-
|
|
112890
|
+
const fetchStartedAt = Date.now();
|
|
112891
|
+
const response = await undiciExports.fetch(joinURL(`http://localhost:${this.enginePort}`, path), {
|
|
112879
112892
|
...opts,
|
|
112893
|
+
dispatcher: ENGINE_AGENT,
|
|
112880
112894
|
headers: {
|
|
112881
112895
|
...opts?.headers,
|
|
112882
112896
|
Connection: "keep-alive"
|
|
112883
112897
|
},
|
|
112884
112898
|
signal: effectiveSignal
|
|
112885
112899
|
});
|
|
112900
|
+
const fetchElapsedMs = Date.now() - fetchStartedAt;
|
|
112901
|
+
this.logger.debug("Engine responded", {
|
|
112902
|
+
elapsedMs: fetchElapsedMs,
|
|
112903
|
+
requestUrl: path,
|
|
112904
|
+
statusCode: response.status
|
|
112905
|
+
});
|
|
112906
|
+
return response;
|
|
112886
112907
|
}
|
|
112887
112908
|
finally {
|
|
112888
112909
|
clearTimeout(timeout);
|
|
@@ -112979,6 +113000,9 @@ class ModelManager extends EventEmitter {
|
|
|
112979
113000
|
message: "Cannot stop LLM engine: already stopping"
|
|
112980
113001
|
});
|
|
112981
113002
|
}
|
|
113003
|
+
if (this.lifecycleState === "stopped") {
|
|
113004
|
+
return;
|
|
113005
|
+
}
|
|
112982
113006
|
if (this.lifecycleState !== "running" &&
|
|
112983
113007
|
this.lifecycleState !== "starting" &&
|
|
112984
113008
|
this.lifecycleState !== "errored") {
|
|
@@ -113355,6 +113379,12 @@ function isEngineUsageChunk(value) {
|
|
|
113355
113379
|
function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
|
|
113356
113380
|
const startedAt = requestStartedAt ?? Date.now();
|
|
113357
113381
|
const passThrough = new PassThrough();
|
|
113382
|
+
passThrough.on("error", (error) => {
|
|
113383
|
+
logger.error("Engine response stream error", {
|
|
113384
|
+
error: asError(error),
|
|
113385
|
+
requestUrl: requestPath
|
|
113386
|
+
});
|
|
113387
|
+
});
|
|
113358
113388
|
let responseBytes = 0;
|
|
113359
113389
|
let firstChunkAt = null;
|
|
113360
113390
|
let usage = null;
|
|
@@ -113545,6 +113575,12 @@ function monitorEngineResponseSingle({ agentEngineType, body, contextLength, eng
|
|
|
113545
113575
|
const maxUsageCaptureBytes = 1024 * 1024;
|
|
113546
113576
|
const startedAt = requestStartedAt ?? Date.now();
|
|
113547
113577
|
const passThrough = new PassThrough();
|
|
113578
|
+
passThrough.on("error", (error) => {
|
|
113579
|
+
logger.error("Engine response stream error", {
|
|
113580
|
+
error: asError(error),
|
|
113581
|
+
requestUrl: requestPath
|
|
113582
|
+
});
|
|
113583
|
+
});
|
|
113548
113584
|
let responseBytes = 0;
|
|
113549
113585
|
let firstChunkAt = null;
|
|
113550
113586
|
let usage = null;
|
|
@@ -113830,21 +113866,20 @@ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logg
|
|
|
113830
113866
|
});
|
|
113831
113867
|
const responseStatusText = response.statusText ?? "Upstream request failed";
|
|
113832
113868
|
if (!response.ok) {
|
|
113833
|
-
const responseBody = await response.text().catch(() => null);
|
|
113834
|
-
const responseError = new Error(responseBody
|
|
113835
|
-
? `Upstream error response: ${responseBody}`
|
|
113836
|
-
: "Upstream error response: empty body");
|
|
113837
|
-
logger.error("LLM engine request failed", {
|
|
113838
|
-
error: responseError,
|
|
113839
|
-
requestUrl: path,
|
|
113840
|
-
statusCode: response.status,
|
|
113841
|
-
statusText: responseStatusText
|
|
113842
|
-
});
|
|
113843
113869
|
if (!response.body) {
|
|
113844
|
-
|
|
113845
|
-
|
|
113870
|
+
logger.error("LLM engine request failed (no body)", {
|
|
113871
|
+
error: new Error("Upstream error response: empty body"),
|
|
113872
|
+
requestUrl: path,
|
|
113873
|
+
statusCode: response.status,
|
|
113846
113874
|
statusText: responseStatusText
|
|
113847
|
-
};
|
|
113875
|
+
});
|
|
113876
|
+
}
|
|
113877
|
+
else {
|
|
113878
|
+
logger.error("LLM engine request failed", {
|
|
113879
|
+
requestUrl: path,
|
|
113880
|
+
statusCode: response.status,
|
|
113881
|
+
statusText: responseStatusText
|
|
113882
|
+
});
|
|
113848
113883
|
}
|
|
113849
113884
|
}
|
|
113850
113885
|
if (!response.body) {
|
|
@@ -113926,9 +113961,15 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
|
|
|
113926
113961
|
const modelManager = getModelManager();
|
|
113927
113962
|
const abortController = new AbortController();
|
|
113928
113963
|
res.on("close", () => {
|
|
113964
|
+
logger.debug("Express route client disconnected, aborting", {
|
|
113965
|
+
requestUrl: "/v1/chat/completions"
|
|
113966
|
+
});
|
|
113929
113967
|
abortController.abort();
|
|
113930
113968
|
});
|
|
113931
|
-
|
|
113969
|
+
logger.debug("Express route handler entered, awaiting engine", {
|
|
113970
|
+
requestUrl: "/v1/chat/completions"
|
|
113971
|
+
});
|
|
113972
|
+
const result = await proxyOpenAIStreamingRoute({
|
|
113932
113973
|
body,
|
|
113933
113974
|
configuration,
|
|
113934
113975
|
endpointId: extractEndpointId$1(req),
|
|
@@ -113939,6 +113980,11 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
|
|
|
113939
113980
|
reportMetrics: apiClient.reportPromptMetrics,
|
|
113940
113981
|
signal: abortController.signal
|
|
113941
113982
|
});
|
|
113983
|
+
logger.debug("Express route handler returning response", {
|
|
113984
|
+
requestUrl: "/v1/chat/completions",
|
|
113985
|
+
statusCode: "status" in result ? result.status : 0
|
|
113986
|
+
});
|
|
113987
|
+
return result;
|
|
113942
113988
|
}
|
|
113943
113989
|
},
|
|
113944
113990
|
"/v1/completions": {
|
|
@@ -114688,7 +114734,7 @@ async function sendChunkStream({ apiURL, configuration, requestID, logger }) {
|
|
|
114688
114734
|
sequence: payload.sequence
|
|
114689
114735
|
});
|
|
114690
114736
|
chunks.push(Buffer.from(chunk + "\n"));
|
|
114691
|
-
if (chunks.length >= 10) {
|
|
114737
|
+
if (payload.data === null || chunks.length >= 10) {
|
|
114692
114738
|
await flushChunks();
|
|
114693
114739
|
}
|
|
114694
114740
|
};
|
|
@@ -114782,10 +114828,12 @@ function calculateTokensPerSecond({ durationMs, totalTokens }) {
|
|
|
114782
114828
|
return Math.round(tokensPerSecond);
|
|
114783
114829
|
}
|
|
114784
114830
|
|
|
114785
|
-
|
|
114786
|
-
|
|
114787
|
-
|
|
114788
|
-
|
|
114831
|
+
const ENGINE_FETCH_TIMEOUT_MS = 7_200_000;
|
|
114832
|
+
const LOOPBACK_AGENT = new undiciExports.Agent({
|
|
114833
|
+
bodyTimeout: ENGINE_FETCH_TIMEOUT_MS,
|
|
114834
|
+
headersTimeout: ENGINE_FETCH_TIMEOUT_MS
|
|
114835
|
+
});
|
|
114836
|
+
async function proxyRequest({ configuration, logger, request, signal }) {
|
|
114789
114837
|
let finalPath = request.path;
|
|
114790
114838
|
if (request.parameters) {
|
|
114791
114839
|
Object.entries(request.parameters).forEach(([key, value]) => {
|
|
@@ -114799,6 +114847,7 @@ async function proxyRequest({ configuration, request, signal }) {
|
|
|
114799
114847
|
}
|
|
114800
114848
|
}
|
|
114801
114849
|
const fetchOptions = {
|
|
114850
|
+
dispatcher: LOOPBACK_AGENT,
|
|
114802
114851
|
method: request.method,
|
|
114803
114852
|
headers: {
|
|
114804
114853
|
...request.headers,
|
|
@@ -114814,7 +114863,18 @@ async function proxyRequest({ configuration, request, signal }) {
|
|
|
114814
114863
|
fetchOptions.body =
|
|
114815
114864
|
typeof request.body === "object" ? JSON.stringify(request.body) : request.body;
|
|
114816
114865
|
}
|
|
114866
|
+
logger.debug("Loopback proxy request starting", {
|
|
114867
|
+
requestMethod: request.method,
|
|
114868
|
+
requestUrl: finalPath
|
|
114869
|
+
});
|
|
114870
|
+
const startedAt = Date.now();
|
|
114817
114871
|
const response = await undiciExports.fetch(url, fetchOptions);
|
|
114872
|
+
logger.debug("Loopback proxy response received", {
|
|
114873
|
+
elapsedMs: Date.now() - startedAt,
|
|
114874
|
+
requestMethod: request.method,
|
|
114875
|
+
requestUrl: finalPath,
|
|
114876
|
+
statusCode: response.status
|
|
114877
|
+
});
|
|
114818
114878
|
return {
|
|
114819
114879
|
body: response.body ? Readable.fromWeb(response.body) : null,
|
|
114820
114880
|
headers: Object.fromEntries(response.headers.entries()),
|
|
@@ -124435,7 +124495,20 @@ async function collectMachineMetadata() {
|
|
|
124435
124495
|
return machineMetadata;
|
|
124436
124496
|
}
|
|
124437
124497
|
|
|
124498
|
+
const CONTAINER_ID_RE = /^[0-9a-f]{12}$/;
|
|
124499
|
+
function ensureDockerValidEnv() {
|
|
124500
|
+
if (!existsSync("/.dockerenv")) {
|
|
124501
|
+
return;
|
|
124502
|
+
}
|
|
124503
|
+
const name = hostname$2();
|
|
124504
|
+
if (CONTAINER_ID_RE.test(name)) {
|
|
124505
|
+
throw new Error(`Container hostname is a default Docker container ID ("${name}"). ` +
|
|
124506
|
+
`Set 'hostname' in your compose service definition.`);
|
|
124507
|
+
}
|
|
124508
|
+
}
|
|
124509
|
+
|
|
124438
124510
|
async function createApplication({ abortController, apiClient, configuration, logger }) {
|
|
124511
|
+
ensureDockerValidEnv();
|
|
124439
124512
|
logger.info("Fetching conduit configuration");
|
|
124440
124513
|
let conduitConfiguration = await apiClient.getConduitConfiguration();
|
|
124441
124514
|
logger.info("Received configuration", {
|
|
@@ -124758,6 +124831,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
124758
124831
|
onRequest: async ({ request, signal }) => {
|
|
124759
124832
|
return proxyRequest({
|
|
124760
124833
|
configuration,
|
|
124834
|
+
logger,
|
|
124761
124835
|
request,
|
|
124762
124836
|
signal
|
|
124763
124837
|
});
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import { LLMEngine, LLMModel } from "@infersec/definitions";
|
|
2
2
|
import { Logger } from "@infersec/logger";
|
|
3
3
|
import EventEmitter from "eventemitter3";
|
|
4
|
-
import {
|
|
4
|
+
import { Response } from "undici";
|
|
5
|
+
import type { RequestInit } from "undici";
|
|
5
6
|
import { type ModelDownloadProgressUpdate } from "./download.js";
|
|
6
7
|
interface ModelManagerEvents {
|
|
7
8
|
engineError: (error: Error) => void;
|
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
import type { APIResponse, ServerToClientAPIRequest } from "@infersec/definitions";
|
|
2
|
+
import { Logger } from "@infersec/logger";
|
|
2
3
|
import type { Configuration } from "../configuration.js";
|
|
3
|
-
|
|
4
|
-
* Proxy server requests to the local inference HTTP server.
|
|
5
|
-
*/
|
|
6
|
-
export declare function proxyRequest({ configuration, request, signal }: {
|
|
4
|
+
export declare function proxyRequest({ configuration, logger, request, signal }: {
|
|
7
5
|
configuration: Configuration;
|
|
6
|
+
logger: Logger;
|
|
8
7
|
request: ServerToClientAPIRequest;
|
|
9
8
|
signal?: AbortSignal;
|
|
10
9
|
}): Promise<APIResponse>;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function ensureDockerValidEnv(): void;
|