@infersec/conduit 1.31.0 → 1.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,28 +10,31 @@ npx @infersec/conduit --engine <type> --key <api-key> --source <source-id>
10
10
 
11
11
  ### Flags
12
12
 
13
- | Flag | Required | Default | Notes |
14
- | -------------- | -------- | ------------------------------ | -------------------------------------------------------------------------------------- |
15
- | `--engine` | Yes | - | Engine type (matches `ENGINE`). |
16
- | `--key` | Yes | - | API key (matches `API_KEY`). |
17
- | `--source` | Yes | - | Inference source ID (matches `SOURCE`). |
18
- | `--api-url` | No | `https://api.infersec.ai` | API base URL (matches `API_URL`). |
19
- | `--port` | No | `9505` | Port to listen on (matches `PORT`). |
20
- | `--root` | No | `$HOME/.cache/infersec/iagent` | Root directory (matches `ROOT_DIRECTORY`). |
21
- | `--start-mode` | No | `auto` | Startup mode (matches `START_MODE`): `auto` starts engine, `idle` leaves conduit idle. |
13
+ | Flag | Required | Default | Notes |
14
+ | --------------- | -------- | ------------------------------ | -------------------------------------------------------------------------------------- |
15
+ | `--engine` | Yes | - | Engine type (matches `ENGINE`). |
16
+ | `--engine-port` | No | `8000` | Engine port (matches `ENGINE_PORT`). |
17
+ | `--key` | Yes | - | API key (matches `API_KEY`). |
18
+ | `--port` | No | `9505` | Port to listen on (matches `PORT`). |
19
+ | `--root` | No | `$HOME/.cache/infersec/iagent` | Root directory (matches `ROOT_DIRECTORY`). |
20
+ | `--source` | Yes | - | Inference source ID (matches `SOURCE`). |
21
+ | `--api-url` | No | `https://api.infersec.ai` | API base URL (matches `API_URL`). |
22
+ | `--start-mode` | No | `auto` | Startup mode (matches `START_MODE`): `auto` starts engine, `idle` leaves conduit idle. |
22
23
 
23
24
  ### Examples
24
25
 
25
- | Scenario | Command |
26
- | ---------------- | ------------------------------------------------------------------------------------------------------------ |
27
- | Required only | `npx @infersec/conduit --engine vllm --key <api-key> --source <source-id>` |
28
- | Custom root/port | `npx @infersec/conduit --engine vllm --key <api-key> --source <source-id> --root /data/infersec --port 9506` |
26
+ | Scenario | Command |
27
+ | ------------------ | ------------------------------------------------------------------------------------------------------------ |
28
+ | Required only | `npx @infersec/conduit --engine vllm --key <api-key> --source <source-id>` |
29
+ | Custom root/port | `npx @infersec/conduit --engine vllm --key <api-key> --source <source-id> --root /data/infersec --port 9506` |
30
+ | Custom engine/port | `npx @infersec/conduit --engine vllm --key <api-key> --source <source-id> --port 9506 --engine-port 8989` |
29
31
 
30
32
  ## Environment variables
31
33
 
32
34
  | Variable | Required | Default | Notes |
33
35
  | ---------------- | -------- | ------------------------------ | ----------------------------------------- |
34
36
  | `ENGINE` | Yes | - | Engine type (matches `--engine`). |
37
+ | `ENGINE_PORT` | No | `8000` | Engine port (matches `--engine-port`). |
35
38
  | `API_KEY` | Yes | - | API key (matches `--key`). |
36
39
  | `SOURCE` | Yes | - | Inference source ID (matches `--source`). |
37
40
  | `API_URL` | No | `https://api.infersec.ai` | API base URL (matches `--api-url`). |
package/dist/cli.js CHANGED
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import { parseArgs } from 'node:util';
8
8
  import 'node:crypto';
9
- import { a as asError, s as startInferenceAgent } from './start-CC3HzuZU.js';
9
+ import { a as asError, s as startInferenceAgent } from './start-BXwggMaM.js';
10
10
  import 'argon2';
11
11
  import 'node:child_process';
12
12
  import 'node:stream';
@@ -65,6 +65,7 @@ Options:
65
65
  --api-key <value> API key (or API_KEY)
66
66
  --api-url <url> API base URL (or API_URL)
67
67
  --engine <type> Engine type (or ENGINE)
68
+ --engine-port <number> Engine port (or ENGINE_PORT)
68
69
  --key <value> API key (or API_KEY)
69
70
  --port <number> Port to listen on (or PORT)
70
71
  --root <path> Root directory (or ROOT_DIRECTORY)
@@ -82,6 +83,9 @@ async function run() {
82
83
  engine: {
83
84
  type: "string"
84
85
  },
86
+ "engine-port": {
87
+ type: "string"
88
+ },
85
89
  help: {
86
90
  short: "h",
87
91
  type: "boolean"
@@ -133,6 +137,13 @@ async function run() {
133
137
  }
134
138
  configurationOverrides.port = port;
135
139
  }
140
+ if (values["engine-port"]) {
141
+ const enginePort = Number.parseInt(values["engine-port"], 10);
142
+ if (Number.isNaN(enginePort) || enginePort < 1 || enginePort > 65535) {
143
+ throw new Error(`Invalid engine port: ${values["engine-port"]}`);
144
+ }
145
+ configurationOverrides.enginePort = enginePort;
146
+ }
136
147
  await startInferenceAgent({
137
148
  configurationOverrides
138
149
  });
@@ -9,6 +9,7 @@ export interface Configuration {
9
9
  agentEngineType: LLMEngine;
10
10
  apiKey: string;
11
11
  apiURL: string;
12
+ enginePort: number;
12
13
  inferenceSourceID: ULID;
13
14
  port: number;
14
15
  rootDirectory: string;
@@ -18,6 +19,7 @@ export interface ConfigurationOverrides {
18
19
  agentEngineType?: string;
19
20
  apiKey?: string;
20
21
  apiURL?: string;
22
+ enginePort?: number;
21
23
  inferenceSourceID?: ULID;
22
24
  port?: number;
23
25
  rootDirectory?: string;
package/dist/index.js CHANGED
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
5
5
  const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import 'node:crypto';
8
- import { s as startInferenceAgent, a as asError } from './start-CC3HzuZU.js';
8
+ import { s as startInferenceAgent, a as asError } from './start-BXwggMaM.js';
9
9
  import 'argon2';
10
10
  import 'node:child_process';
11
11
  import 'node:stream';
@@ -11,6 +11,7 @@ interface ModelManagerEvents {
11
11
  type EngineLifecycleState = "errored" | "running" | "starting" | "stopped" | "stopping";
12
12
  export declare class ModelManager extends EventEmitter<ModelManagerEvents> {
13
13
  readonly engine: LLMEngine;
14
+ readonly enginePort: number;
14
15
  readonly model: LLMModel;
15
16
  readonly parallelism: number | null;
16
17
  private uniqueName;
@@ -20,9 +21,10 @@ export declare class ModelManager extends EventEmitter<ModelManagerEvents> {
20
21
  private lifecycleState;
21
22
  private stopRequested;
22
23
  protected readonly modelsDirectory: string;
23
- constructor({ contextLength, engine, logger, model, parallelism, root }: {
24
+ constructor({ contextLength, engine, enginePort, logger, model, parallelism, root }: {
24
25
  contextLength?: number | null;
25
26
  engine: LLMEngine;
27
+ enginePort: number;
26
28
  logger: Logger;
27
29
  model: LLMModel;
28
30
  parallelism?: number | null;
@@ -1,6 +1,7 @@
1
1
  import { ProcessManager } from "@infersec/utils";
2
2
  import { ModelManager } from "./ModelManager.js";
3
- export declare function startLlamacpp(this: ModelManager, { targetDirectory }: {
3
+ export declare function startLlamacpp(this: ModelManager, { enginePort, targetDirectory }: {
4
+ enginePort: number;
4
5
  targetDirectory: string;
5
6
  }): Promise<ProcessManager>;
6
- export declare function checkLlamacppHealth(): Promise<boolean>;
7
+ export declare function checkLlamacppHealth(port?: number | string): Promise<boolean>;
@@ -1,5 +1,6 @@
1
1
  import { ProcessManager } from "@infersec/utils";
2
2
  import type { ModelManager } from "./ModelManager.js";
3
- export declare function startVLLM(this: ModelManager, { targetDirectory }: {
3
+ export declare function startVLLM(this: ModelManager, { enginePort, targetDirectory }: {
4
+ enginePort: number;
4
5
  targetDirectory: string;
5
6
  }): Promise<ProcessManager>;
@@ -15468,6 +15468,13 @@ object({
15468
15468
  websiteIconPath: string$1()
15469
15469
  });
15470
15470
 
15471
+ function readEnvIntegerOptional({ defaultValue, name }) {
15472
+ const str = readEnvStringOptional(name, `${defaultValue}`).trim();
15473
+ if (!/^\d+$/.test(str)) {
15474
+ throw new Error(`Invalid integer value for environment variable '${name}': ${str}`);
15475
+ }
15476
+ return parseInt(str, 10);
15477
+ }
15471
15478
  function readEnvString(name) {
15472
15479
  if (typeof process.env[name] !== "string") {
15473
15480
  throw new Error(`Expected environment variable was not present: ${name}`);
@@ -98842,26 +98849,19 @@ function requireExpressPromiseRouter () {
98842
98849
  var expressPromiseRouterExports = requireExpressPromiseRouter();
98843
98850
  var createRouter = /*@__PURE__*/getDefaultExportFromCjs(expressPromiseRouterExports);
98844
98851
 
98845
- const VLLM_START_ARGS = [
98846
- "-m",
98847
- "vllm.entrypoints.openai.api_server",
98848
- "--host",
98849
- "0.0.0.0",
98850
- "--port",
98851
- "8000"
98852
- ];
98852
+ const VLLM_START_ARGS = ["-m", "vllm.entrypoints.openai.api_server", "--host", "0.0.0.0"];
98853
98853
  const VLLM_EXECUTABLE = "python3";
98854
98854
  const DEFAULT_VLLM_CONTEXT_LENGTH = 2048;
98855
- async function startVLLM({ targetDirectory }) {
98855
+ async function startVLLM({ enginePort, targetDirectory }) {
98856
98856
  const contextLength = Math.max(1, this.contextLength ?? DEFAULT_VLLM_CONTEXT_LENGTH);
98857
98857
  const processManager = new ProcessManager({
98858
98858
  command: VLLM_EXECUTABLE,
98859
98859
  args: [
98860
98860
  ...VLLM_START_ARGS,
98861
+ "--port",
98862
+ String(enginePort),
98861
98863
  "--model",
98862
98864
  targetDirectory,
98863
- // "--host", "0.0.0.0",
98864
- // "--port", "8000",
98865
98865
  "--served-model-name",
98866
98866
  this.model.id,
98867
98867
  "--device",
@@ -98870,7 +98870,6 @@ async function startVLLM({ targetDirectory }) {
98870
98870
  "float16", // Use float16 to save memory on CPU
98871
98871
  "--max-model-len",
98872
98872
  String(contextLength),
98873
- // Add GPU/ROCm specific args if needed
98874
98873
  "--tensor-parallel-size",
98875
98874
  "1"
98876
98875
  ]
@@ -108377,7 +108376,7 @@ const glob = Object.assign(glob_, {
108377
108376
  glob.glob = glob;
108378
108377
 
108379
108378
  const DEFAULT_LLAMACPP_GPU_LAYERS = 999;
108380
- const LLAMACPP_START_ARGS = ["--host", "0.0.0.0", "--port", "8000", "--jinja"];
108379
+ const LLAMACPP_START_ARGS = ["--host", "0.0.0.0", "--jinja"];
108381
108380
  const LLAMACPP_EXECUTABLE = process.env.LLAMACPP_EXECUTABLE ?? "llama-server";
108382
108381
  const DEFAULT_LLAMACPP_CONTEXT_LENGTH = 131072;
108383
108382
  async function findQuantizedModelTarget({ model, path }) {
@@ -108409,11 +108408,19 @@ async function findQuantizedModelTarget({ model, path }) {
108409
108408
  }
108410
108409
  return matches[0];
108411
108410
  }
108412
- async function startLlamacpp({ targetDirectory }) {
108411
+ async function startLlamacpp({ enginePort, targetDirectory }) {
108413
108412
  const target = await findQuantizedModelTarget({ model: this.model, path: targetDirectory });
108414
108413
  const contextLength = Math.max(1, this.contextLength ?? DEFAULT_LLAMACPP_CONTEXT_LENGTH);
108415
108414
  const parallelism = this.parallelism;
108416
- const args = [...LLAMACPP_START_ARGS, "--model", target, "--ctx-size", String(contextLength)];
108415
+ const args = [
108416
+ ...LLAMACPP_START_ARGS,
108417
+ "--port",
108418
+ String(enginePort),
108419
+ "--model",
108420
+ target,
108421
+ "--ctx-size",
108422
+ String(contextLength)
108423
+ ];
108417
108424
  const gpuLayers = Number.parseInt(process.env.LLAMACPP_GPU_LAYERS ?? String(DEFAULT_LLAMACPP_GPU_LAYERS), 10);
108418
108425
  if (Number.isFinite(gpuLayers) && gpuLayers > 0) {
108419
108426
  args.push("--n-gpu-layers", String(gpuLayers));
@@ -108429,8 +108436,11 @@ async function startLlamacpp({ targetDirectory }) {
108429
108436
  return processManager;
108430
108437
  }
108431
108438
 
108439
+ // 2 hours
108440
+ const ENGINE_FETCH_TIMEOUT_MS = 7200000;
108432
108441
  class ModelManager extends EventEmitter {
108433
108442
  engine;
108443
+ enginePort;
108434
108444
  model;
108435
108445
  parallelism;
108436
108446
  uniqueName;
@@ -108440,7 +108450,7 @@ class ModelManager extends EventEmitter {
108440
108450
  lifecycleState = "stopped";
108441
108451
  stopRequested = false;
108442
108452
  modelsDirectory;
108443
- constructor({ contextLength, engine, logger, model, parallelism, root }) {
108453
+ constructor({ contextLength, engine, enginePort, logger, model, parallelism, root }) {
108444
108454
  super();
108445
108455
  // const models = getModels();
108446
108456
  // const targetModel = models.find(model => model.id === modelID);
@@ -108456,6 +108466,7 @@ class ModelManager extends EventEmitter {
108456
108466
  // });
108457
108467
  // }
108458
108468
  this.engine = engine;
108469
+ this.enginePort = enginePort;
108459
108470
  this.model = model;
108460
108471
  this.contextLength = typeof contextLength === "number" ? contextLength : null;
108461
108472
  this.parallelism = typeof parallelism === "number" ? parallelism : null;
@@ -108472,9 +108483,28 @@ class ModelManager extends EventEmitter {
108472
108483
  async fetchOpenAI(path, opts) {
108473
108484
  switch (this.engine) {
108474
108485
  case "llama.cpp":
108475
- case "vllm":
108476
- console.log("FETCH", path, opts);
108477
- return undiciExports.fetch(joinURL("http://localhost:8000", path), opts);
108486
+ case "vllm": {
108487
+ this.logger.debug(`Fetching from engine: ${path}`);
108488
+ const callerSignal = opts?.signal;
108489
+ const controller = new AbortController();
108490
+ const timeout = setTimeout(() => {
108491
+ controller.abort(new Error("Inference request timeout"));
108492
+ }, ENGINE_FETCH_TIMEOUT_MS);
108493
+ const effectiveSignal = callerSignal ?? controller.signal;
108494
+ try {
108495
+ return await undiciExports.fetch(joinURL(`http://localhost:${this.enginePort}`, path), {
108496
+ ...opts,
108497
+ headers: {
108498
+ ...opts?.headers,
108499
+ Connection: "keep-alive"
108500
+ },
108501
+ signal: effectiveSignal
108502
+ });
108503
+ }
108504
+ finally {
108505
+ clearTimeout(timeout);
108506
+ }
108507
+ }
108478
108508
  // case "ollama":
108479
108509
  // console.log("FETCH", path, opts);
108480
108510
  // return fetch(
@@ -108683,10 +108713,12 @@ class ModelManager extends EventEmitter {
108683
108713
  switch (this.engine) {
108684
108714
  case "llama.cpp":
108685
108715
  return startLlamacpp.call(this, {
108716
+ enginePort: this.enginePort,
108686
108717
  targetDirectory: join(this.modelsDirectory, this.uniqueName)
108687
108718
  });
108688
108719
  case "vllm":
108689
108720
  return startVLLM.call(this, {
108721
+ enginePort: this.enginePort,
108690
108722
  targetDirectory: join(this.modelsDirectory, this.uniqueName)
108691
108723
  });
108692
108724
  default: {
@@ -119255,6 +119287,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
119255
119287
  let modelManager = new ModelManager({
119256
119288
  contextLength: conduitConfiguration.contextLength ?? null,
119257
119289
  engine: configuration.agentEngineType,
119290
+ enginePort: configuration.enginePort,
119258
119291
  logger,
119259
119292
  model: conduitConfiguration.targetModel,
119260
119293
  parallelism: conduitConfiguration.parallelism ?? null,
@@ -119400,6 +119433,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
119400
119433
  modelManager = new ModelManager({
119401
119434
  contextLength: conduitConfiguration.contextLength ?? null,
119402
119435
  engine: configuration.agentEngineType,
119436
+ enginePort: configuration.enginePort,
119403
119437
  logger,
119404
119438
  model: conduitConfiguration.targetModel,
119405
119439
  parallelism: conduitConfiguration.parallelism ?? null,
@@ -119598,11 +119632,18 @@ function getConfiguration({ overrides } = {}) {
119598
119632
  const agentEngineType = LLMEngineSchema.parse(agentEngineTypeValue);
119599
119633
  const apiKey = overrides?.apiKey ?? readEnvString("API_KEY");
119600
119634
  const apiURL = overrides?.apiURL ?? readEnvStringOptional("API_URL", "https://api.infersec.ai");
119635
+ const enginePort = overrides?.enginePort ??
119636
+ readEnvIntegerOptional({ defaultValue: 8000, name: "ENGINE_PORT" });
119637
+ if (enginePort < 1 || enginePort > 65535) {
119638
+ throw new Error(`Invalid engine port: ${enginePort}`);
119639
+ }
119601
119640
  const inferenceSourceID = overrides?.inferenceSourceID ?? readEnvString("SOURCE");
119602
- const portValue = overrides?.port ?? readEnvStringOptional("PORT", "9505");
119603
- const port = Number.parseInt(String(portValue), 10);
119604
- if (Number.isNaN(port)) {
119605
- throw new Error(`Invalid port: ${portValue}`);
119641
+ const port = overrides?.port ?? readEnvIntegerOptional({ defaultValue: 9505, name: "PORT" });
119642
+ if (port < 1 || port > 65535) {
119643
+ throw new Error(`Invalid port: ${port}`);
119644
+ }
119645
+ if (port === enginePort) {
119646
+ throw new Error("Port and engine port cannot be the same");
119606
119647
  }
119607
119648
  const defaultRootDirectory = join(process.env.HOME ?? "/tmp", ".cache", "infersec", "iagent");
119608
119649
  const rootDirectory = overrides?.rootDirectory ?? readEnvStringOptional("ROOT_DIRECTORY", defaultRootDirectory);
@@ -119612,6 +119653,7 @@ function getConfiguration({ overrides } = {}) {
119612
119653
  agentEngineType,
119613
119654
  apiKey,
119614
119655
  apiURL,
119656
+ enginePort,
119615
119657
  inferenceSourceID,
119616
119658
  port,
119617
119659
  rootDirectory,
@@ -119648,6 +119690,7 @@ async function startInferenceAgent({ configurationOverrides }) {
119648
119690
  resolve();
119649
119691
  });
119650
119692
  });
119693
+ logger.info("Engine port", { port: configuration.enginePort });
119651
119694
  process.on("SIGINT", createSignalShutdown({ logger, shutdown }));
119652
119695
  process.on("SIGTERM", createSignalShutdown({ logger, shutdown }));
119653
119696
  abortController.signal.addEventListener("abort", () => {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@infersec/conduit",
3
3
  "description": "End user conduit agent for connecting local LLMs to the cloud.",
4
- "version": "1.31.0",
4
+ "version": "1.32.0",
5
5
  "bin": {
6
6
  "infersec-conduit": "./dist/cli.js"
7
7
  },