@infersec/conduit 1.31.0 → 1.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -13
- package/dist/cli.js +12 -1
- package/dist/configuration.d.ts +2 -0
- package/dist/index.js +1 -1
- package/dist/modelManagement/ModelManager.d.ts +3 -1
- package/dist/modelManagement/llamacpp.d.ts +3 -2
- package/dist/modelManagement/vllm.d.ts +2 -1
- package/dist/{start-CC3HzuZU.js → start-BXwggMaM.js} +66 -23
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -10,28 +10,31 @@ npx @infersec/conduit --engine <type> --key <api-key> --source <source-id>
|
|
|
10
10
|
|
|
11
11
|
### Flags
|
|
12
12
|
|
|
13
|
-
| Flag
|
|
14
|
-
|
|
|
15
|
-
| `--engine`
|
|
16
|
-
| `--
|
|
17
|
-
| `--
|
|
18
|
-
| `--
|
|
19
|
-
| `--
|
|
20
|
-
| `--
|
|
21
|
-
| `--
|
|
13
|
+
| Flag | Required | Default | Notes |
|
|
14
|
+
| --------------- | -------- | ------------------------------ | -------------------------------------------------------------------------------------- |
|
|
15
|
+
| `--engine` | Yes | - | Engine type (matches `ENGINE`). |
|
|
16
|
+
| `--engine-port` | No | `8000` | Engine port (matches `ENGINE_PORT`). |
|
|
17
|
+
| `--key` | Yes | - | API key (matches `API_KEY`). |
|
|
18
|
+
| `--port` | No | `9505` | Port to listen on (matches `PORT`). |
|
|
19
|
+
| `--root` | No | `$HOME/.cache/infersec/iagent` | Root directory (matches `ROOT_DIRECTORY`). |
|
|
20
|
+
| `--source` | Yes | - | Inference source ID (matches `SOURCE`). |
|
|
21
|
+
| `--api-url` | No | `https://api.infersec.ai` | API base URL (matches `API_URL`). |
|
|
22
|
+
| `--start-mode` | No | `auto` | Startup mode (matches `START_MODE`): `auto` starts engine, `idle` leaves conduit idle. |
|
|
22
23
|
|
|
23
24
|
### Examples
|
|
24
25
|
|
|
25
|
-
| Scenario
|
|
26
|
-
|
|
|
27
|
-
| Required only
|
|
28
|
-
| Custom root/port
|
|
26
|
+
| Scenario | Command |
|
|
27
|
+
| ------------------ | ------------------------------------------------------------------------------------------------------------ |
|
|
28
|
+
| Required only | `npx @infersec/conduit --engine vllm --key <api-key> --source <source-id>` |
|
|
29
|
+
| Custom root/port | `npx @infersec/conduit --engine vllm --key <api-key> --source <source-id> --root /data/infersec --port 9506` |
|
|
30
|
+
| Custom engine/port | `npx @infersec/conduit --engine vllm --key <api-key> --source <source-id> --port 9506 --engine-port 8989` |
|
|
29
31
|
|
|
30
32
|
## Environment variables
|
|
31
33
|
|
|
32
34
|
| Variable | Required | Default | Notes |
|
|
33
35
|
| ---------------- | -------- | ------------------------------ | ----------------------------------------- |
|
|
34
36
|
| `ENGINE` | Yes | - | Engine type (matches `--engine`). |
|
|
37
|
+
| `ENGINE_PORT` | No | `8000` | Engine port (matches `--engine-port`). |
|
|
35
38
|
| `API_KEY` | Yes | - | API key (matches `--key`). |
|
|
36
39
|
| `SOURCE` | Yes | - | Inference source ID (matches `--source`). |
|
|
37
40
|
| `API_URL` | No | `https://api.infersec.ai` | API base URL (matches `--api-url`). |
|
package/dist/cli.js
CHANGED
|
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
|
|
|
6
6
|
|
|
7
7
|
import { parseArgs } from 'node:util';
|
|
8
8
|
import 'node:crypto';
|
|
9
|
-
import { a as asError, s as startInferenceAgent } from './start-
|
|
9
|
+
import { a as asError, s as startInferenceAgent } from './start-BXwggMaM.js';
|
|
10
10
|
import 'argon2';
|
|
11
11
|
import 'node:child_process';
|
|
12
12
|
import 'node:stream';
|
|
@@ -65,6 +65,7 @@ Options:
|
|
|
65
65
|
--api-key <value> API key (or API_KEY)
|
|
66
66
|
--api-url <url> API base URL (or API_URL)
|
|
67
67
|
--engine <type> Engine type (or ENGINE)
|
|
68
|
+
--engine-port <number> Engine port (or ENGINE_PORT)
|
|
68
69
|
--key <value> API key (or API_KEY)
|
|
69
70
|
--port <number> Port to listen on (or PORT)
|
|
70
71
|
--root <path> Root directory (or ROOT_DIRECTORY)
|
|
@@ -82,6 +83,9 @@ async function run() {
|
|
|
82
83
|
engine: {
|
|
83
84
|
type: "string"
|
|
84
85
|
},
|
|
86
|
+
"engine-port": {
|
|
87
|
+
type: "string"
|
|
88
|
+
},
|
|
85
89
|
help: {
|
|
86
90
|
short: "h",
|
|
87
91
|
type: "boolean"
|
|
@@ -133,6 +137,13 @@ async function run() {
|
|
|
133
137
|
}
|
|
134
138
|
configurationOverrides.port = port;
|
|
135
139
|
}
|
|
140
|
+
if (values["engine-port"]) {
|
|
141
|
+
const enginePort = Number.parseInt(values["engine-port"], 10);
|
|
142
|
+
if (Number.isNaN(enginePort) || enginePort < 1 || enginePort > 65535) {
|
|
143
|
+
throw new Error(`Invalid engine port: ${values["engine-port"]}`);
|
|
144
|
+
}
|
|
145
|
+
configurationOverrides.enginePort = enginePort;
|
|
146
|
+
}
|
|
136
147
|
await startInferenceAgent({
|
|
137
148
|
configurationOverrides
|
|
138
149
|
});
|
package/dist/configuration.d.ts
CHANGED
|
@@ -9,6 +9,7 @@ export interface Configuration {
|
|
|
9
9
|
agentEngineType: LLMEngine;
|
|
10
10
|
apiKey: string;
|
|
11
11
|
apiURL: string;
|
|
12
|
+
enginePort: number;
|
|
12
13
|
inferenceSourceID: ULID;
|
|
13
14
|
port: number;
|
|
14
15
|
rootDirectory: string;
|
|
@@ -18,6 +19,7 @@ export interface ConfigurationOverrides {
|
|
|
18
19
|
agentEngineType?: string;
|
|
19
20
|
apiKey?: string;
|
|
20
21
|
apiURL?: string;
|
|
22
|
+
enginePort?: number;
|
|
21
23
|
inferenceSourceID?: ULID;
|
|
22
24
|
port?: number;
|
|
23
25
|
rootDirectory?: string;
|
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
|
|
|
5
5
|
const __dirname = __pathDirname(__filename);
|
|
6
6
|
|
|
7
7
|
import 'node:crypto';
|
|
8
|
-
import { s as startInferenceAgent, a as asError } from './start-
|
|
8
|
+
import { s as startInferenceAgent, a as asError } from './start-BXwggMaM.js';
|
|
9
9
|
import 'argon2';
|
|
10
10
|
import 'node:child_process';
|
|
11
11
|
import 'node:stream';
|
|
@@ -11,6 +11,7 @@ interface ModelManagerEvents {
|
|
|
11
11
|
type EngineLifecycleState = "errored" | "running" | "starting" | "stopped" | "stopping";
|
|
12
12
|
export declare class ModelManager extends EventEmitter<ModelManagerEvents> {
|
|
13
13
|
readonly engine: LLMEngine;
|
|
14
|
+
readonly enginePort: number;
|
|
14
15
|
readonly model: LLMModel;
|
|
15
16
|
readonly parallelism: number | null;
|
|
16
17
|
private uniqueName;
|
|
@@ -20,9 +21,10 @@ export declare class ModelManager extends EventEmitter<ModelManagerEvents> {
|
|
|
20
21
|
private lifecycleState;
|
|
21
22
|
private stopRequested;
|
|
22
23
|
protected readonly modelsDirectory: string;
|
|
23
|
-
constructor({ contextLength, engine, logger, model, parallelism, root }: {
|
|
24
|
+
constructor({ contextLength, engine, enginePort, logger, model, parallelism, root }: {
|
|
24
25
|
contextLength?: number | null;
|
|
25
26
|
engine: LLMEngine;
|
|
27
|
+
enginePort: number;
|
|
26
28
|
logger: Logger;
|
|
27
29
|
model: LLMModel;
|
|
28
30
|
parallelism?: number | null;
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { ProcessManager } from "@infersec/utils";
|
|
2
2
|
import { ModelManager } from "./ModelManager.js";
|
|
3
|
-
export declare function startLlamacpp(this: ModelManager, { targetDirectory }: {
|
|
3
|
+
export declare function startLlamacpp(this: ModelManager, { enginePort, targetDirectory }: {
|
|
4
|
+
enginePort: number;
|
|
4
5
|
targetDirectory: string;
|
|
5
6
|
}): Promise<ProcessManager>;
|
|
6
|
-
export declare function checkLlamacppHealth(): Promise<boolean>;
|
|
7
|
+
export declare function checkLlamacppHealth(port?: number | string): Promise<boolean>;
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { ProcessManager } from "@infersec/utils";
|
|
2
2
|
import type { ModelManager } from "./ModelManager.js";
|
|
3
|
-
export declare function startVLLM(this: ModelManager, { targetDirectory }: {
|
|
3
|
+
export declare function startVLLM(this: ModelManager, { enginePort, targetDirectory }: {
|
|
4
|
+
enginePort: number;
|
|
4
5
|
targetDirectory: string;
|
|
5
6
|
}): Promise<ProcessManager>;
|
|
@@ -15468,6 +15468,13 @@ object({
|
|
|
15468
15468
|
websiteIconPath: string$1()
|
|
15469
15469
|
});
|
|
15470
15470
|
|
|
15471
|
+
function readEnvIntegerOptional({ defaultValue, name }) {
|
|
15472
|
+
const str = readEnvStringOptional(name, `${defaultValue}`).trim();
|
|
15473
|
+
if (!/^\d+$/.test(str)) {
|
|
15474
|
+
throw new Error(`Invalid integer value for environment variable '${name}': ${str}`);
|
|
15475
|
+
}
|
|
15476
|
+
return parseInt(str, 10);
|
|
15477
|
+
}
|
|
15471
15478
|
function readEnvString(name) {
|
|
15472
15479
|
if (typeof process.env[name] !== "string") {
|
|
15473
15480
|
throw new Error(`Expected environment variable was not present: ${name}`);
|
|
@@ -98842,26 +98849,19 @@ function requireExpressPromiseRouter () {
|
|
|
98842
98849
|
var expressPromiseRouterExports = requireExpressPromiseRouter();
|
|
98843
98850
|
var createRouter = /*@__PURE__*/getDefaultExportFromCjs(expressPromiseRouterExports);
|
|
98844
98851
|
|
|
98845
|
-
const VLLM_START_ARGS = [
|
|
98846
|
-
"-m",
|
|
98847
|
-
"vllm.entrypoints.openai.api_server",
|
|
98848
|
-
"--host",
|
|
98849
|
-
"0.0.0.0",
|
|
98850
|
-
"--port",
|
|
98851
|
-
"8000"
|
|
98852
|
-
];
|
|
98852
|
+
const VLLM_START_ARGS = ["-m", "vllm.entrypoints.openai.api_server", "--host", "0.0.0.0"];
|
|
98853
98853
|
const VLLM_EXECUTABLE = "python3";
|
|
98854
98854
|
const DEFAULT_VLLM_CONTEXT_LENGTH = 2048;
|
|
98855
|
-
async function startVLLM({ targetDirectory }) {
|
|
98855
|
+
async function startVLLM({ enginePort, targetDirectory }) {
|
|
98856
98856
|
const contextLength = Math.max(1, this.contextLength ?? DEFAULT_VLLM_CONTEXT_LENGTH);
|
|
98857
98857
|
const processManager = new ProcessManager({
|
|
98858
98858
|
command: VLLM_EXECUTABLE,
|
|
98859
98859
|
args: [
|
|
98860
98860
|
...VLLM_START_ARGS,
|
|
98861
|
+
"--port",
|
|
98862
|
+
String(enginePort),
|
|
98861
98863
|
"--model",
|
|
98862
98864
|
targetDirectory,
|
|
98863
|
-
// "--host", "0.0.0.0",
|
|
98864
|
-
// "--port", "8000",
|
|
98865
98865
|
"--served-model-name",
|
|
98866
98866
|
this.model.id,
|
|
98867
98867
|
"--device",
|
|
@@ -98870,7 +98870,6 @@ async function startVLLM({ targetDirectory }) {
|
|
|
98870
98870
|
"float16", // Use float16 to save memory on CPU
|
|
98871
98871
|
"--max-model-len",
|
|
98872
98872
|
String(contextLength),
|
|
98873
|
-
// Add GPU/ROCm specific args if needed
|
|
98874
98873
|
"--tensor-parallel-size",
|
|
98875
98874
|
"1"
|
|
98876
98875
|
]
|
|
@@ -108377,7 +108376,7 @@ const glob = Object.assign(glob_, {
|
|
|
108377
108376
|
glob.glob = glob;
|
|
108378
108377
|
|
|
108379
108378
|
const DEFAULT_LLAMACPP_GPU_LAYERS = 999;
|
|
108380
|
-
const LLAMACPP_START_ARGS = ["--host", "0.0.0.0", "--
|
|
108379
|
+
const LLAMACPP_START_ARGS = ["--host", "0.0.0.0", "--jinja"];
|
|
108381
108380
|
const LLAMACPP_EXECUTABLE = process.env.LLAMACPP_EXECUTABLE ?? "llama-server";
|
|
108382
108381
|
const DEFAULT_LLAMACPP_CONTEXT_LENGTH = 131072;
|
|
108383
108382
|
async function findQuantizedModelTarget({ model, path }) {
|
|
@@ -108409,11 +108408,19 @@ async function findQuantizedModelTarget({ model, path }) {
|
|
|
108409
108408
|
}
|
|
108410
108409
|
return matches[0];
|
|
108411
108410
|
}
|
|
108412
|
-
async function startLlamacpp({ targetDirectory }) {
|
|
108411
|
+
async function startLlamacpp({ enginePort, targetDirectory }) {
|
|
108413
108412
|
const target = await findQuantizedModelTarget({ model: this.model, path: targetDirectory });
|
|
108414
108413
|
const contextLength = Math.max(1, this.contextLength ?? DEFAULT_LLAMACPP_CONTEXT_LENGTH);
|
|
108415
108414
|
const parallelism = this.parallelism;
|
|
108416
|
-
const args = [
|
|
108415
|
+
const args = [
|
|
108416
|
+
...LLAMACPP_START_ARGS,
|
|
108417
|
+
"--port",
|
|
108418
|
+
String(enginePort),
|
|
108419
|
+
"--model",
|
|
108420
|
+
target,
|
|
108421
|
+
"--ctx-size",
|
|
108422
|
+
String(contextLength)
|
|
108423
|
+
];
|
|
108417
108424
|
const gpuLayers = Number.parseInt(process.env.LLAMACPP_GPU_LAYERS ?? String(DEFAULT_LLAMACPP_GPU_LAYERS), 10);
|
|
108418
108425
|
if (Number.isFinite(gpuLayers) && gpuLayers > 0) {
|
|
108419
108426
|
args.push("--n-gpu-layers", String(gpuLayers));
|
|
@@ -108429,8 +108436,11 @@ async function startLlamacpp({ targetDirectory }) {
|
|
|
108429
108436
|
return processManager;
|
|
108430
108437
|
}
|
|
108431
108438
|
|
|
108439
|
+
// 2 hours
|
|
108440
|
+
const ENGINE_FETCH_TIMEOUT_MS = 7200000;
|
|
108432
108441
|
class ModelManager extends EventEmitter {
|
|
108433
108442
|
engine;
|
|
108443
|
+
enginePort;
|
|
108434
108444
|
model;
|
|
108435
108445
|
parallelism;
|
|
108436
108446
|
uniqueName;
|
|
@@ -108440,7 +108450,7 @@ class ModelManager extends EventEmitter {
|
|
|
108440
108450
|
lifecycleState = "stopped";
|
|
108441
108451
|
stopRequested = false;
|
|
108442
108452
|
modelsDirectory;
|
|
108443
|
-
constructor({ contextLength, engine, logger, model, parallelism, root }) {
|
|
108453
|
+
constructor({ contextLength, engine, enginePort, logger, model, parallelism, root }) {
|
|
108444
108454
|
super();
|
|
108445
108455
|
// const models = getModels();
|
|
108446
108456
|
// const targetModel = models.find(model => model.id === modelID);
|
|
@@ -108456,6 +108466,7 @@ class ModelManager extends EventEmitter {
|
|
|
108456
108466
|
// });
|
|
108457
108467
|
// }
|
|
108458
108468
|
this.engine = engine;
|
|
108469
|
+
this.enginePort = enginePort;
|
|
108459
108470
|
this.model = model;
|
|
108460
108471
|
this.contextLength = typeof contextLength === "number" ? contextLength : null;
|
|
108461
108472
|
this.parallelism = typeof parallelism === "number" ? parallelism : null;
|
|
@@ -108472,9 +108483,28 @@ class ModelManager extends EventEmitter {
|
|
|
108472
108483
|
async fetchOpenAI(path, opts) {
|
|
108473
108484
|
switch (this.engine) {
|
|
108474
108485
|
case "llama.cpp":
|
|
108475
|
-
case "vllm":
|
|
108476
|
-
|
|
108477
|
-
|
|
108486
|
+
case "vllm": {
|
|
108487
|
+
this.logger.debug(`Fetching from engine: ${path}`);
|
|
108488
|
+
const callerSignal = opts?.signal;
|
|
108489
|
+
const controller = new AbortController();
|
|
108490
|
+
const timeout = setTimeout(() => {
|
|
108491
|
+
controller.abort(new Error("Inference request timeout"));
|
|
108492
|
+
}, ENGINE_FETCH_TIMEOUT_MS);
|
|
108493
|
+
const effectiveSignal = callerSignal ?? controller.signal;
|
|
108494
|
+
try {
|
|
108495
|
+
return await undiciExports.fetch(joinURL(`http://localhost:${this.enginePort}`, path), {
|
|
108496
|
+
...opts,
|
|
108497
|
+
headers: {
|
|
108498
|
+
...opts?.headers,
|
|
108499
|
+
Connection: "keep-alive"
|
|
108500
|
+
},
|
|
108501
|
+
signal: effectiveSignal
|
|
108502
|
+
});
|
|
108503
|
+
}
|
|
108504
|
+
finally {
|
|
108505
|
+
clearTimeout(timeout);
|
|
108506
|
+
}
|
|
108507
|
+
}
|
|
108478
108508
|
// case "ollama":
|
|
108479
108509
|
// console.log("FETCH", path, opts);
|
|
108480
108510
|
// return fetch(
|
|
@@ -108683,10 +108713,12 @@ class ModelManager extends EventEmitter {
|
|
|
108683
108713
|
switch (this.engine) {
|
|
108684
108714
|
case "llama.cpp":
|
|
108685
108715
|
return startLlamacpp.call(this, {
|
|
108716
|
+
enginePort: this.enginePort,
|
|
108686
108717
|
targetDirectory: join(this.modelsDirectory, this.uniqueName)
|
|
108687
108718
|
});
|
|
108688
108719
|
case "vllm":
|
|
108689
108720
|
return startVLLM.call(this, {
|
|
108721
|
+
enginePort: this.enginePort,
|
|
108690
108722
|
targetDirectory: join(this.modelsDirectory, this.uniqueName)
|
|
108691
108723
|
});
|
|
108692
108724
|
default: {
|
|
@@ -119255,6 +119287,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
119255
119287
|
let modelManager = new ModelManager({
|
|
119256
119288
|
contextLength: conduitConfiguration.contextLength ?? null,
|
|
119257
119289
|
engine: configuration.agentEngineType,
|
|
119290
|
+
enginePort: configuration.enginePort,
|
|
119258
119291
|
logger,
|
|
119259
119292
|
model: conduitConfiguration.targetModel,
|
|
119260
119293
|
parallelism: conduitConfiguration.parallelism ?? null,
|
|
@@ -119400,6 +119433,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
119400
119433
|
modelManager = new ModelManager({
|
|
119401
119434
|
contextLength: conduitConfiguration.contextLength ?? null,
|
|
119402
119435
|
engine: configuration.agentEngineType,
|
|
119436
|
+
enginePort: configuration.enginePort,
|
|
119403
119437
|
logger,
|
|
119404
119438
|
model: conduitConfiguration.targetModel,
|
|
119405
119439
|
parallelism: conduitConfiguration.parallelism ?? null,
|
|
@@ -119598,11 +119632,18 @@ function getConfiguration({ overrides } = {}) {
|
|
|
119598
119632
|
const agentEngineType = LLMEngineSchema.parse(agentEngineTypeValue);
|
|
119599
119633
|
const apiKey = overrides?.apiKey ?? readEnvString("API_KEY");
|
|
119600
119634
|
const apiURL = overrides?.apiURL ?? readEnvStringOptional("API_URL", "https://api.infersec.ai");
|
|
119635
|
+
const enginePort = overrides?.enginePort ??
|
|
119636
|
+
readEnvIntegerOptional({ defaultValue: 8000, name: "ENGINE_PORT" });
|
|
119637
|
+
if (enginePort < 1 || enginePort > 65535) {
|
|
119638
|
+
throw new Error(`Invalid engine port: ${enginePort}`);
|
|
119639
|
+
}
|
|
119601
119640
|
const inferenceSourceID = overrides?.inferenceSourceID ?? readEnvString("SOURCE");
|
|
119602
|
-
const
|
|
119603
|
-
|
|
119604
|
-
|
|
119605
|
-
|
|
119641
|
+
const port = overrides?.port ?? readEnvIntegerOptional({ defaultValue: 9505, name: "PORT" });
|
|
119642
|
+
if (port < 1 || port > 65535) {
|
|
119643
|
+
throw new Error(`Invalid port: ${port}`);
|
|
119644
|
+
}
|
|
119645
|
+
if (port === enginePort) {
|
|
119646
|
+
throw new Error("Port and engine port cannot be the same");
|
|
119606
119647
|
}
|
|
119607
119648
|
const defaultRootDirectory = join(process.env.HOME ?? "/tmp", ".cache", "infersec", "iagent");
|
|
119608
119649
|
const rootDirectory = overrides?.rootDirectory ?? readEnvStringOptional("ROOT_DIRECTORY", defaultRootDirectory);
|
|
@@ -119612,6 +119653,7 @@ function getConfiguration({ overrides } = {}) {
|
|
|
119612
119653
|
agentEngineType,
|
|
119613
119654
|
apiKey,
|
|
119614
119655
|
apiURL,
|
|
119656
|
+
enginePort,
|
|
119615
119657
|
inferenceSourceID,
|
|
119616
119658
|
port,
|
|
119617
119659
|
rootDirectory,
|
|
@@ -119648,6 +119690,7 @@ async function startInferenceAgent({ configurationOverrides }) {
|
|
|
119648
119690
|
resolve();
|
|
119649
119691
|
});
|
|
119650
119692
|
});
|
|
119693
|
+
logger.info("Engine port", { port: configuration.enginePort });
|
|
119651
119694
|
process.on("SIGINT", createSignalShutdown({ logger, shutdown }));
|
|
119652
119695
|
process.on("SIGTERM", createSignalShutdown({ logger, shutdown }));
|
|
119653
119696
|
abortController.signal.addEventListener("abort", () => {
|