@infersec/conduit 1.31.1 → 1.33.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -13
- package/dist/cli.js +12 -1
- package/dist/configuration.d.ts +2 -0
- package/dist/index.js +1 -1
- package/dist/modelManagement/ModelManager.d.ts +3 -1
- package/dist/modelManagement/llamacpp.d.ts +3 -2
- package/dist/modelManagement/vllm.d.ts +2 -1
- package/dist/{start-DvkkeCrj.js → start-BXwggMaM.js} +43 -21
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -10,28 +10,31 @@ npx @infersec/conduit --engine <type> --key <api-key> --source <source-id>
|
|
|
10
10
|
|
|
11
11
|
### Flags
|
|
12
12
|
|
|
13
|
-
| Flag
|
|
14
|
-
|
|
|
15
|
-
| `--engine`
|
|
16
|
-
| `--
|
|
17
|
-
| `--
|
|
18
|
-
| `--
|
|
19
|
-
| `--
|
|
20
|
-
| `--
|
|
21
|
-
| `--
|
|
13
|
+
| Flag | Required | Default | Notes |
|
|
14
|
+
| --------------- | -------- | ------------------------------ | -------------------------------------------------------------------------------------- |
|
|
15
|
+
| `--engine` | Yes | - | Engine type (matches `ENGINE`). |
|
|
16
|
+
| `--engine-port` | No | `8000` | Engine port (matches `ENGINE_PORT`). |
|
|
17
|
+
| `--key` | Yes | - | API key (matches `API_KEY`). |
|
|
18
|
+
| `--port` | No | `9505` | Port to listen on (matches `PORT`). |
|
|
19
|
+
| `--root` | No | `$HOME/.cache/infersec/iagent` | Root directory (matches `ROOT_DIRECTORY`). |
|
|
20
|
+
| `--source` | Yes | - | Inference source ID (matches `SOURCE`). |
|
|
21
|
+
| `--api-url` | No | `https://api.infersec.ai` | API base URL (matches `API_URL`). |
|
|
22
|
+
| `--start-mode` | No | `auto` | Startup mode (matches `START_MODE`): `auto` starts engine, `idle` leaves conduit idle. |
|
|
22
23
|
|
|
23
24
|
### Examples
|
|
24
25
|
|
|
25
|
-
| Scenario
|
|
26
|
-
|
|
|
27
|
-
| Required only
|
|
28
|
-
| Custom root/port
|
|
26
|
+
| Scenario | Command |
|
|
27
|
+
| ------------------ | ------------------------------------------------------------------------------------------------------------ |
|
|
28
|
+
| Required only | `npx @infersec/conduit --engine vllm --key <api-key> --source <source-id>` |
|
|
29
|
+
| Custom root/port | `npx @infersec/conduit --engine vllm --key <api-key> --source <source-id> --root /data/infersec --port 9506` |
|
|
30
|
+
| Custom engine/port | `npx @infersec/conduit --engine vllm --key <api-key> --source <source-id> --port 9506 --engine-port 8989` |
|
|
29
31
|
|
|
30
32
|
## Environment variables
|
|
31
33
|
|
|
32
34
|
| Variable | Required | Default | Notes |
|
|
33
35
|
| ---------------- | -------- | ------------------------------ | ----------------------------------------- |
|
|
34
36
|
| `ENGINE` | Yes | - | Engine type (matches `--engine`). |
|
|
37
|
+
| `ENGINE_PORT` | No | `8000` | Engine port (matches `--engine-port`). |
|
|
35
38
|
| `API_KEY` | Yes | - | API key (matches `--key`). |
|
|
36
39
|
| `SOURCE` | Yes | - | Inference source ID (matches `--source`). |
|
|
37
40
|
| `API_URL` | No | `https://api.infersec.ai` | API base URL (matches `--api-url`). |
|
package/dist/cli.js
CHANGED
|
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
|
|
|
6
6
|
|
|
7
7
|
import { parseArgs } from 'node:util';
|
|
8
8
|
import 'node:crypto';
|
|
9
|
-
import { a as asError, s as startInferenceAgent } from './start-
|
|
9
|
+
import { a as asError, s as startInferenceAgent } from './start-BXwggMaM.js';
|
|
10
10
|
import 'argon2';
|
|
11
11
|
import 'node:child_process';
|
|
12
12
|
import 'node:stream';
|
|
@@ -65,6 +65,7 @@ Options:
|
|
|
65
65
|
--api-key <value> API key (or API_KEY)
|
|
66
66
|
--api-url <url> API base URL (or API_URL)
|
|
67
67
|
--engine <type> Engine type (or ENGINE)
|
|
68
|
+
--engine-port <number> Engine port (or ENGINE_PORT)
|
|
68
69
|
--key <value> API key (or API_KEY)
|
|
69
70
|
--port <number> Port to listen on (or PORT)
|
|
70
71
|
--root <path> Root directory (or ROOT_DIRECTORY)
|
|
@@ -82,6 +83,9 @@ async function run() {
|
|
|
82
83
|
engine: {
|
|
83
84
|
type: "string"
|
|
84
85
|
},
|
|
86
|
+
"engine-port": {
|
|
87
|
+
type: "string"
|
|
88
|
+
},
|
|
85
89
|
help: {
|
|
86
90
|
short: "h",
|
|
87
91
|
type: "boolean"
|
|
@@ -133,6 +137,13 @@ async function run() {
|
|
|
133
137
|
}
|
|
134
138
|
configurationOverrides.port = port;
|
|
135
139
|
}
|
|
140
|
+
if (values["engine-port"]) {
|
|
141
|
+
const enginePort = Number.parseInt(values["engine-port"], 10);
|
|
142
|
+
if (Number.isNaN(enginePort) || enginePort < 1 || enginePort > 65535) {
|
|
143
|
+
throw new Error(`Invalid engine port: ${values["engine-port"]}`);
|
|
144
|
+
}
|
|
145
|
+
configurationOverrides.enginePort = enginePort;
|
|
146
|
+
}
|
|
136
147
|
await startInferenceAgent({
|
|
137
148
|
configurationOverrides
|
|
138
149
|
});
|
package/dist/configuration.d.ts
CHANGED
|
@@ -9,6 +9,7 @@ export interface Configuration {
|
|
|
9
9
|
agentEngineType: LLMEngine;
|
|
10
10
|
apiKey: string;
|
|
11
11
|
apiURL: string;
|
|
12
|
+
enginePort: number;
|
|
12
13
|
inferenceSourceID: ULID;
|
|
13
14
|
port: number;
|
|
14
15
|
rootDirectory: string;
|
|
@@ -18,6 +19,7 @@ export interface ConfigurationOverrides {
|
|
|
18
19
|
agentEngineType?: string;
|
|
19
20
|
apiKey?: string;
|
|
20
21
|
apiURL?: string;
|
|
22
|
+
enginePort?: number;
|
|
21
23
|
inferenceSourceID?: ULID;
|
|
22
24
|
port?: number;
|
|
23
25
|
rootDirectory?: string;
|
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
|
|
|
5
5
|
const __dirname = __pathDirname(__filename);
|
|
6
6
|
|
|
7
7
|
import 'node:crypto';
|
|
8
|
-
import { s as startInferenceAgent, a as asError } from './start-
|
|
8
|
+
import { s as startInferenceAgent, a as asError } from './start-BXwggMaM.js';
|
|
9
9
|
import 'argon2';
|
|
10
10
|
import 'node:child_process';
|
|
11
11
|
import 'node:stream';
|
|
@@ -11,6 +11,7 @@ interface ModelManagerEvents {
|
|
|
11
11
|
type EngineLifecycleState = "errored" | "running" | "starting" | "stopped" | "stopping";
|
|
12
12
|
export declare class ModelManager extends EventEmitter<ModelManagerEvents> {
|
|
13
13
|
readonly engine: LLMEngine;
|
|
14
|
+
readonly enginePort: number;
|
|
14
15
|
readonly model: LLMModel;
|
|
15
16
|
readonly parallelism: number | null;
|
|
16
17
|
private uniqueName;
|
|
@@ -20,9 +21,10 @@ export declare class ModelManager extends EventEmitter<ModelManagerEvents> {
|
|
|
20
21
|
private lifecycleState;
|
|
21
22
|
private stopRequested;
|
|
22
23
|
protected readonly modelsDirectory: string;
|
|
23
|
-
constructor({ contextLength, engine, logger, model, parallelism, root }: {
|
|
24
|
+
constructor({ contextLength, engine, enginePort, logger, model, parallelism, root }: {
|
|
24
25
|
contextLength?: number | null;
|
|
25
26
|
engine: LLMEngine;
|
|
27
|
+
enginePort: number;
|
|
26
28
|
logger: Logger;
|
|
27
29
|
model: LLMModel;
|
|
28
30
|
parallelism?: number | null;
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { ProcessManager } from "@infersec/utils";
|
|
2
2
|
import { ModelManager } from "./ModelManager.js";
|
|
3
|
-
export declare function startLlamacpp(this: ModelManager, { targetDirectory }: {
|
|
3
|
+
export declare function startLlamacpp(this: ModelManager, { enginePort, targetDirectory }: {
|
|
4
|
+
enginePort: number;
|
|
4
5
|
targetDirectory: string;
|
|
5
6
|
}): Promise<ProcessManager>;
|
|
6
|
-
export declare function checkLlamacppHealth(): Promise<boolean>;
|
|
7
|
+
export declare function checkLlamacppHealth(port?: number | string): Promise<boolean>;
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { ProcessManager } from "@infersec/utils";
|
|
2
2
|
import type { ModelManager } from "./ModelManager.js";
|
|
3
|
-
export declare function startVLLM(this: ModelManager, { targetDirectory }: {
|
|
3
|
+
export declare function startVLLM(this: ModelManager, { enginePort, targetDirectory }: {
|
|
4
|
+
enginePort: number;
|
|
4
5
|
targetDirectory: string;
|
|
5
6
|
}): Promise<ProcessManager>;
|
|
@@ -15468,6 +15468,13 @@ object({
|
|
|
15468
15468
|
websiteIconPath: string$1()
|
|
15469
15469
|
});
|
|
15470
15470
|
|
|
15471
|
+
function readEnvIntegerOptional({ defaultValue, name }) {
|
|
15472
|
+
const str = readEnvStringOptional(name, `${defaultValue}`).trim();
|
|
15473
|
+
if (!/^\d+$/.test(str)) {
|
|
15474
|
+
throw new Error(`Invalid integer value for environment variable '${name}': ${str}`);
|
|
15475
|
+
}
|
|
15476
|
+
return parseInt(str, 10);
|
|
15477
|
+
}
|
|
15471
15478
|
function readEnvString(name) {
|
|
15472
15479
|
if (typeof process.env[name] !== "string") {
|
|
15473
15480
|
throw new Error(`Expected environment variable was not present: ${name}`);
|
|
@@ -98842,26 +98849,19 @@ function requireExpressPromiseRouter () {
|
|
|
98842
98849
|
var expressPromiseRouterExports = requireExpressPromiseRouter();
|
|
98843
98850
|
var createRouter = /*@__PURE__*/getDefaultExportFromCjs(expressPromiseRouterExports);
|
|
98844
98851
|
|
|
98845
|
-
const VLLM_START_ARGS = [
|
|
98846
|
-
"-m",
|
|
98847
|
-
"vllm.entrypoints.openai.api_server",
|
|
98848
|
-
"--host",
|
|
98849
|
-
"0.0.0.0",
|
|
98850
|
-
"--port",
|
|
98851
|
-
"8000"
|
|
98852
|
-
];
|
|
98852
|
+
const VLLM_START_ARGS = ["-m", "vllm.entrypoints.openai.api_server", "--host", "0.0.0.0"];
|
|
98853
98853
|
const VLLM_EXECUTABLE = "python3";
|
|
98854
98854
|
const DEFAULT_VLLM_CONTEXT_LENGTH = 2048;
|
|
98855
|
-
async function startVLLM({ targetDirectory }) {
|
|
98855
|
+
async function startVLLM({ enginePort, targetDirectory }) {
|
|
98856
98856
|
const contextLength = Math.max(1, this.contextLength ?? DEFAULT_VLLM_CONTEXT_LENGTH);
|
|
98857
98857
|
const processManager = new ProcessManager({
|
|
98858
98858
|
command: VLLM_EXECUTABLE,
|
|
98859
98859
|
args: [
|
|
98860
98860
|
...VLLM_START_ARGS,
|
|
98861
|
+
"--port",
|
|
98862
|
+
String(enginePort),
|
|
98861
98863
|
"--model",
|
|
98862
98864
|
targetDirectory,
|
|
98863
|
-
// "--host", "0.0.0.0",
|
|
98864
|
-
// "--port", "8000",
|
|
98865
98865
|
"--served-model-name",
|
|
98866
98866
|
this.model.id,
|
|
98867
98867
|
"--device",
|
|
@@ -98870,7 +98870,6 @@ async function startVLLM({ targetDirectory }) {
|
|
|
98870
98870
|
"float16", // Use float16 to save memory on CPU
|
|
98871
98871
|
"--max-model-len",
|
|
98872
98872
|
String(contextLength),
|
|
98873
|
-
// Add GPU/ROCm specific args if needed
|
|
98874
98873
|
"--tensor-parallel-size",
|
|
98875
98874
|
"1"
|
|
98876
98875
|
]
|
|
@@ -108377,7 +108376,7 @@ const glob = Object.assign(glob_, {
|
|
|
108377
108376
|
glob.glob = glob;
|
|
108378
108377
|
|
|
108379
108378
|
const DEFAULT_LLAMACPP_GPU_LAYERS = 999;
|
|
108380
|
-
const LLAMACPP_START_ARGS = ["--host", "0.0.0.0", "--
|
|
108379
|
+
const LLAMACPP_START_ARGS = ["--host", "0.0.0.0", "--jinja"];
|
|
108381
108380
|
const LLAMACPP_EXECUTABLE = process.env.LLAMACPP_EXECUTABLE ?? "llama-server";
|
|
108382
108381
|
const DEFAULT_LLAMACPP_CONTEXT_LENGTH = 131072;
|
|
108383
108382
|
async function findQuantizedModelTarget({ model, path }) {
|
|
@@ -108409,11 +108408,19 @@ async function findQuantizedModelTarget({ model, path }) {
|
|
|
108409
108408
|
}
|
|
108410
108409
|
return matches[0];
|
|
108411
108410
|
}
|
|
108412
|
-
async function startLlamacpp({ targetDirectory }) {
|
|
108411
|
+
async function startLlamacpp({ enginePort, targetDirectory }) {
|
|
108413
108412
|
const target = await findQuantizedModelTarget({ model: this.model, path: targetDirectory });
|
|
108414
108413
|
const contextLength = Math.max(1, this.contextLength ?? DEFAULT_LLAMACPP_CONTEXT_LENGTH);
|
|
108415
108414
|
const parallelism = this.parallelism;
|
|
108416
|
-
const args = [
|
|
108415
|
+
const args = [
|
|
108416
|
+
...LLAMACPP_START_ARGS,
|
|
108417
|
+
"--port",
|
|
108418
|
+
String(enginePort),
|
|
108419
|
+
"--model",
|
|
108420
|
+
target,
|
|
108421
|
+
"--ctx-size",
|
|
108422
|
+
String(contextLength)
|
|
108423
|
+
];
|
|
108417
108424
|
const gpuLayers = Number.parseInt(process.env.LLAMACPP_GPU_LAYERS ?? String(DEFAULT_LLAMACPP_GPU_LAYERS), 10);
|
|
108418
108425
|
if (Number.isFinite(gpuLayers) && gpuLayers > 0) {
|
|
108419
108426
|
args.push("--n-gpu-layers", String(gpuLayers));
|
|
@@ -108433,6 +108440,7 @@ async function startLlamacpp({ targetDirectory }) {
|
|
|
108433
108440
|
const ENGINE_FETCH_TIMEOUT_MS = 7200000;
|
|
108434
108441
|
class ModelManager extends EventEmitter {
|
|
108435
108442
|
engine;
|
|
108443
|
+
enginePort;
|
|
108436
108444
|
model;
|
|
108437
108445
|
parallelism;
|
|
108438
108446
|
uniqueName;
|
|
@@ -108442,7 +108450,7 @@ class ModelManager extends EventEmitter {
|
|
|
108442
108450
|
lifecycleState = "stopped";
|
|
108443
108451
|
stopRequested = false;
|
|
108444
108452
|
modelsDirectory;
|
|
108445
|
-
constructor({ contextLength, engine, logger, model, parallelism, root }) {
|
|
108453
|
+
constructor({ contextLength, engine, enginePort, logger, model, parallelism, root }) {
|
|
108446
108454
|
super();
|
|
108447
108455
|
// const models = getModels();
|
|
108448
108456
|
// const targetModel = models.find(model => model.id === modelID);
|
|
@@ -108458,6 +108466,7 @@ class ModelManager extends EventEmitter {
|
|
|
108458
108466
|
// });
|
|
108459
108467
|
// }
|
|
108460
108468
|
this.engine = engine;
|
|
108469
|
+
this.enginePort = enginePort;
|
|
108461
108470
|
this.model = model;
|
|
108462
108471
|
this.contextLength = typeof contextLength === "number" ? contextLength : null;
|
|
108463
108472
|
this.parallelism = typeof parallelism === "number" ? parallelism : null;
|
|
@@ -108483,7 +108492,7 @@ class ModelManager extends EventEmitter {
|
|
|
108483
108492
|
}, ENGINE_FETCH_TIMEOUT_MS);
|
|
108484
108493
|
const effectiveSignal = callerSignal ?? controller.signal;
|
|
108485
108494
|
try {
|
|
108486
|
-
return await undiciExports.fetch(joinURL(
|
|
108495
|
+
return await undiciExports.fetch(joinURL(`http://localhost:${this.enginePort}`, path), {
|
|
108487
108496
|
...opts,
|
|
108488
108497
|
headers: {
|
|
108489
108498
|
...opts?.headers,
|
|
@@ -108704,10 +108713,12 @@ class ModelManager extends EventEmitter {
|
|
|
108704
108713
|
switch (this.engine) {
|
|
108705
108714
|
case "llama.cpp":
|
|
108706
108715
|
return startLlamacpp.call(this, {
|
|
108716
|
+
enginePort: this.enginePort,
|
|
108707
108717
|
targetDirectory: join(this.modelsDirectory, this.uniqueName)
|
|
108708
108718
|
});
|
|
108709
108719
|
case "vllm":
|
|
108710
108720
|
return startVLLM.call(this, {
|
|
108721
|
+
enginePort: this.enginePort,
|
|
108711
108722
|
targetDirectory: join(this.modelsDirectory, this.uniqueName)
|
|
108712
108723
|
});
|
|
108713
108724
|
default: {
|
|
@@ -119276,6 +119287,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
119276
119287
|
let modelManager = new ModelManager({
|
|
119277
119288
|
contextLength: conduitConfiguration.contextLength ?? null,
|
|
119278
119289
|
engine: configuration.agentEngineType,
|
|
119290
|
+
enginePort: configuration.enginePort,
|
|
119279
119291
|
logger,
|
|
119280
119292
|
model: conduitConfiguration.targetModel,
|
|
119281
119293
|
parallelism: conduitConfiguration.parallelism ?? null,
|
|
@@ -119421,6 +119433,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
119421
119433
|
modelManager = new ModelManager({
|
|
119422
119434
|
contextLength: conduitConfiguration.contextLength ?? null,
|
|
119423
119435
|
engine: configuration.agentEngineType,
|
|
119436
|
+
enginePort: configuration.enginePort,
|
|
119424
119437
|
logger,
|
|
119425
119438
|
model: conduitConfiguration.targetModel,
|
|
119426
119439
|
parallelism: conduitConfiguration.parallelism ?? null,
|
|
@@ -119619,11 +119632,18 @@ function getConfiguration({ overrides } = {}) {
|
|
|
119619
119632
|
const agentEngineType = LLMEngineSchema.parse(agentEngineTypeValue);
|
|
119620
119633
|
const apiKey = overrides?.apiKey ?? readEnvString("API_KEY");
|
|
119621
119634
|
const apiURL = overrides?.apiURL ?? readEnvStringOptional("API_URL", "https://api.infersec.ai");
|
|
119635
|
+
const enginePort = overrides?.enginePort ??
|
|
119636
|
+
readEnvIntegerOptional({ defaultValue: 8000, name: "ENGINE_PORT" });
|
|
119637
|
+
if (enginePort < 1 || enginePort > 65535) {
|
|
119638
|
+
throw new Error(`Invalid engine port: ${enginePort}`);
|
|
119639
|
+
}
|
|
119622
119640
|
const inferenceSourceID = overrides?.inferenceSourceID ?? readEnvString("SOURCE");
|
|
119623
|
-
const
|
|
119624
|
-
|
|
119625
|
-
|
|
119626
|
-
|
|
119641
|
+
const port = overrides?.port ?? readEnvIntegerOptional({ defaultValue: 9505, name: "PORT" });
|
|
119642
|
+
if (port < 1 || port > 65535) {
|
|
119643
|
+
throw new Error(`Invalid port: ${port}`);
|
|
119644
|
+
}
|
|
119645
|
+
if (port === enginePort) {
|
|
119646
|
+
throw new Error("Port and engine port cannot be the same");
|
|
119627
119647
|
}
|
|
119628
119648
|
const defaultRootDirectory = join(process.env.HOME ?? "/tmp", ".cache", "infersec", "iagent");
|
|
119629
119649
|
const rootDirectory = overrides?.rootDirectory ?? readEnvStringOptional("ROOT_DIRECTORY", defaultRootDirectory);
|
|
@@ -119633,6 +119653,7 @@ function getConfiguration({ overrides } = {}) {
|
|
|
119633
119653
|
agentEngineType,
|
|
119634
119654
|
apiKey,
|
|
119635
119655
|
apiURL,
|
|
119656
|
+
enginePort,
|
|
119636
119657
|
inferenceSourceID,
|
|
119637
119658
|
port,
|
|
119638
119659
|
rootDirectory,
|
|
@@ -119669,6 +119690,7 @@ async function startInferenceAgent({ configurationOverrides }) {
|
|
|
119669
119690
|
resolve();
|
|
119670
119691
|
});
|
|
119671
119692
|
});
|
|
119693
|
+
logger.info("Engine port", { port: configuration.enginePort });
|
|
119672
119694
|
process.on("SIGINT", createSignalShutdown({ logger, shutdown }));
|
|
119673
119695
|
process.on("SIGTERM", createSignalShutdown({ logger, shutdown }));
|
|
119674
119696
|
abortController.signal.addEventListener("abort", () => {
|