npm - @infersec/conduit - Versions diffs - 1.31.0 → 1.32.0 - Mend

@infersec/conduit 1.31.0 → 1.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +16 -13
package/dist/cli.js +12 -1
package/dist/configuration.d.ts +2 -0
package/dist/index.js +1 -1
package/dist/modelManagement/ModelManager.d.ts +3 -1
package/dist/modelManagement/llamacpp.d.ts +3 -2
package/dist/modelManagement/vllm.d.ts +2 -1
package/dist/{start-CC3HzuZU.js → start-BXwggMaM.js} +66 -23
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -10,28 +10,31 @@ npx @infersec/conduit --engine <type> --key <api-key> --source <source-id>
 ### Flags
-| Flag           | Required | Default                        | Notes                                                                                  |
-| -------------- | -------- | ------------------------------ | -------------------------------------------------------------------------------------- |
-| `--engine`     | Yes      | -                              | Engine type (matches `ENGINE`).                                                        |
-| `--key`        | Yes      | -                              | API key (matches `API_KEY`).                                                           |
-| `--source`     | Yes      | -                              | Inference source ID (matches `SOURCE`).                                                |
-| `--api-url`    | No       | `https://api.infersec.ai`      | API base URL (matches `API_URL`).                                                      |
-| `--port`       | No       | `9505`                         | Port to listen on (matches `PORT`).                                                    |
-| `--root`       | No       | `$HOME/.cache/infersec/iagent` | Root directory (matches `ROOT_DIRECTORY`).                                             |
-| `--start-mode` | No       | `auto`                         | Startup mode (matches `START_MODE`): `auto` starts engine, `idle` leaves conduit idle. |
+| Flag            | Required | Default                        | Notes                                                                                  |
+| --------------- | -------- | ------------------------------ | -------------------------------------------------------------------------------------- |
+| `--engine`      | Yes      | -                              | Engine type (matches `ENGINE`).                                                        |
+| `--engine-port` | No       | `8000`                         | Engine port (matches `ENGINE_PORT`).                                                   |
+| `--key`         | Yes      | -                              | API key (matches `API_KEY`).                                                           |
+| `--port`        | No       | `9505`                         | Port to listen on (matches `PORT`).                                                    |
+| `--root`        | No       | `$HOME/.cache/infersec/iagent` | Root directory (matches `ROOT_DIRECTORY`).                                             |
+| `--source`      | Yes      | -                              | Inference source ID (matches `SOURCE`).                                                |
+| `--api-url`     | No       | `https://api.infersec.ai`      | API base URL (matches `API_URL`).                                                      |
+| `--start-mode`  | No       | `auto`                         | Startup mode (matches `START_MODE`): `auto` starts engine, `idle` leaves conduit idle. |
 ### Examples
-| Scenario         | Command                                                                                                      |
-| ---------------- | ------------------------------------------------------------------------------------------------------------ |
-| Required only    | `npx @infersec/conduit --engine vllm --key <api-key> --source <source-id>`                                   |
-| Custom root/port | `npx @infersec/conduit --engine vllm --key <api-key> --source <source-id> --root /data/infersec --port 9506` |
+| Scenario           | Command                                                                                                      |
+| ------------------ | ------------------------------------------------------------------------------------------------------------ |
+| Required only      | `npx @infersec/conduit --engine vllm --key <api-key> --source <source-id>`                                   |
+| Custom root/port   | `npx @infersec/conduit --engine vllm --key <api-key> --source <source-id> --root /data/infersec --port 9506` |
+| Custom engine/port | `npx @infersec/conduit --engine vllm --key <api-key> --source <source-id> --port 9506 --engine-port 8989`    |
 ## Environment variables
 | Variable         | Required | Default                        | Notes                                     |
 | ---------------- | -------- | ------------------------------ | ----------------------------------------- |
 | `ENGINE`         | Yes      | -                              | Engine type (matches `--engine`).         |
+| `ENGINE_PORT`    | No       | `8000`                         | Engine port (matches `--engine-port`).    |
 | `API_KEY`        | Yes      | -                              | API key (matches `--key`).                |
 | `SOURCE`         | Yes      | -                              | Inference source ID (matches `--source`). |
 | `API_URL`        | No       | `https://api.infersec.ai`      | API base URL (matches `--api-url`).       |

package/dist/cli.js CHANGED Viewed

@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
 import { parseArgs } from 'node:util';
 import 'node:crypto';
-import { a as asError, s as startInferenceAgent } from './start-CC3HzuZU.js';
+import { a as asError, s as startInferenceAgent } from './start-BXwggMaM.js';
 import 'argon2';
 import 'node:child_process';
 import 'node:stream';
@@ -65,6 +65,7 @@ Options:
   --api-key <value>       API key (or API_KEY)
   --api-url <url>         API base URL (or API_URL)
   --engine <type>         Engine type (or ENGINE)
+  --engine-port <number>  Engine port (or ENGINE_PORT)
   --key <value>           API key (or API_KEY)
   --port <number>         Port to listen on (or PORT)
   --root <path>           Root directory (or ROOT_DIRECTORY)
@@ -82,6 +83,9 @@ async function run() {
             engine: {
                 type: "string"
             },
+            "engine-port": {
+                type: "string"
+            },
             help: {
                 short: "h",
                 type: "boolean"
@@ -133,6 +137,13 @@ async function run() {
         }
         configurationOverrides.port = port;
     }
+    if (values["engine-port"]) {
+        const enginePort = Number.parseInt(values["engine-port"], 10);
+        if (Number.isNaN(enginePort) || enginePort < 1 || enginePort > 65535) {
+            throw new Error(`Invalid engine port: ${values["engine-port"]}`);
+        }
+        configurationOverrides.enginePort = enginePort;
+    }
     await startInferenceAgent({
         configurationOverrides
     });

package/dist/configuration.d.ts CHANGED Viewed

@@ -9,6 +9,7 @@ export interface Configuration {
     agentEngineType: LLMEngine;
     apiKey: string;
     apiURL: string;
+    enginePort: number;
     inferenceSourceID: ULID;
     port: number;
     rootDirectory: string;
@@ -18,6 +19,7 @@ export interface ConfigurationOverrides {
     agentEngineType?: string;
     apiKey?: string;
     apiURL?: string;
+    enginePort?: number;
     inferenceSourceID?: ULID;
     port?: number;
     rootDirectory?: string;

package/dist/index.js CHANGED Viewed

@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
 const __dirname = __pathDirname(__filename);
 import 'node:crypto';
-import { s as startInferenceAgent, a as asError } from './start-CC3HzuZU.js';
+import { s as startInferenceAgent, a as asError } from './start-BXwggMaM.js';
 import 'argon2';
 import 'node:child_process';
 import 'node:stream';

package/dist/modelManagement/ModelManager.d.ts CHANGED Viewed

@@ -11,6 +11,7 @@ interface ModelManagerEvents {
 type EngineLifecycleState = "errored" | "running" | "starting" | "stopped" | "stopping";
 export declare class ModelManager extends EventEmitter<ModelManagerEvents> {
     readonly engine: LLMEngine;
+    readonly enginePort: number;
     readonly model: LLMModel;
     readonly parallelism: number | null;
     private uniqueName;
@@ -20,9 +21,10 @@ export declare class ModelManager extends EventEmitter<ModelManagerEvents> {
     private lifecycleState;
     private stopRequested;
     protected readonly modelsDirectory: string;
-    constructor({ contextLength, engine, logger, model, parallelism, root }: {
+    constructor({ contextLength, engine, enginePort, logger, model, parallelism, root }: {
         contextLength?: number | null;
         engine: LLMEngine;
+        enginePort: number;
         logger: Logger;
         model: LLMModel;
         parallelism?: number | null;

package/dist/modelManagement/llamacpp.d.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import { ProcessManager } from "@infersec/utils";
 import { ModelManager } from "./ModelManager.js";
-export declare function startLlamacpp(this: ModelManager, { targetDirectory }: {
+export declare function startLlamacpp(this: ModelManager, { enginePort, targetDirectory }: {
+    enginePort: number;
     targetDirectory: string;
 }): Promise<ProcessManager>;
-export declare function checkLlamacppHealth(): Promise<boolean>;
+export declare function checkLlamacppHealth(port?: number | string): Promise<boolean>;

package/dist/modelManagement/vllm.d.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { ProcessManager } from "@infersec/utils";
 import type { ModelManager } from "./ModelManager.js";
-export declare function startVLLM(this: ModelManager, { targetDirectory }: {
+export declare function startVLLM(this: ModelManager, { enginePort, targetDirectory }: {
+    enginePort: number;
     targetDirectory: string;
 }): Promise<ProcessManager>;

package/dist/{start-CC3HzuZU.js → start-BXwggMaM.js} RENAMED Viewed

@@ -15468,6 +15468,13 @@ object({
     websiteIconPath: string$1()
 });
+function readEnvIntegerOptional({ defaultValue, name }) {
+    const str = readEnvStringOptional(name, `${defaultValue}`).trim();
+    if (!/^\d+$/.test(str)) {
+        throw new Error(`Invalid integer value for environment variable '${name}': ${str}`);
+    }
+    return parseInt(str, 10);
+}
 function readEnvString(name) {
     if (typeof process.env[name] !== "string") {
         throw new Error(`Expected environment variable was not present: ${name}`);
@@ -98842,26 +98849,19 @@ function requireExpressPromiseRouter () {
 var expressPromiseRouterExports = requireExpressPromiseRouter();
 var createRouter = /*@__PURE__*/getDefaultExportFromCjs(expressPromiseRouterExports);
-const VLLM_START_ARGS = [
-    "-m",
-    "vllm.entrypoints.openai.api_server",
-    "--host",
-    "0.0.0.0",
-    "--port",
-    "8000"
-];
+const VLLM_START_ARGS = ["-m", "vllm.entrypoints.openai.api_server", "--host", "0.0.0.0"];
 const VLLM_EXECUTABLE = "python3";
 const DEFAULT_VLLM_CONTEXT_LENGTH = 2048;
-async function startVLLM({ targetDirectory }) {
+async function startVLLM({ enginePort, targetDirectory }) {
     const contextLength = Math.max(1, this.contextLength ?? DEFAULT_VLLM_CONTEXT_LENGTH);
     const processManager = new ProcessManager({
         command: VLLM_EXECUTABLE,
         args: [
             ...VLLM_START_ARGS,
+            "--port",
+            String(enginePort),
             "--model",
             targetDirectory,
-            // "--host", "0.0.0.0",
-            // "--port", "8000",
             "--served-model-name",
             this.model.id,
             "--device",
@@ -98870,7 +98870,6 @@ async function startVLLM({ targetDirectory }) {
             "float16", // Use float16 to save memory on CPU
             "--max-model-len",
             String(contextLength),
-            // Add GPU/ROCm specific args if needed
             "--tensor-parallel-size",
             "1"
         ]
@@ -108377,7 +108376,7 @@ const glob = Object.assign(glob_, {
 glob.glob = glob;
 const DEFAULT_LLAMACPP_GPU_LAYERS = 999;
-const LLAMACPP_START_ARGS = ["--host", "0.0.0.0", "--port", "8000", "--jinja"];
+const LLAMACPP_START_ARGS = ["--host", "0.0.0.0", "--jinja"];
 const LLAMACPP_EXECUTABLE = process.env.LLAMACPP_EXECUTABLE ?? "llama-server";
 const DEFAULT_LLAMACPP_CONTEXT_LENGTH = 131072;
 async function findQuantizedModelTarget({ model, path }) {
@@ -108409,11 +108408,19 @@ async function findQuantizedModelTarget({ model, path }) {
     }
     return matches[0];
 }
-async function startLlamacpp({ targetDirectory }) {
+async function startLlamacpp({ enginePort, targetDirectory }) {
     const target = await findQuantizedModelTarget({ model: this.model, path: targetDirectory });
     const contextLength = Math.max(1, this.contextLength ?? DEFAULT_LLAMACPP_CONTEXT_LENGTH);
     const parallelism = this.parallelism;
-    const args = [...LLAMACPP_START_ARGS, "--model", target, "--ctx-size", String(contextLength)];
+    const args = [
+        ...LLAMACPP_START_ARGS,
+        "--port",
+        String(enginePort),
+        "--model",
+        target,
+        "--ctx-size",
+        String(contextLength)
+    ];
     const gpuLayers = Number.parseInt(process.env.LLAMACPP_GPU_LAYERS ?? String(DEFAULT_LLAMACPP_GPU_LAYERS), 10);
     if (Number.isFinite(gpuLayers) && gpuLayers > 0) {
         args.push("--n-gpu-layers", String(gpuLayers));
@@ -108429,8 +108436,11 @@ async function startLlamacpp({ targetDirectory }) {
     return processManager;
 }
+// 2 hours
+const ENGINE_FETCH_TIMEOUT_MS = 7200000;
 class ModelManager extends EventEmitter {
     engine;
+    enginePort;
     model;
     parallelism;
     uniqueName;
@@ -108440,7 +108450,7 @@ class ModelManager extends EventEmitter {
     lifecycleState = "stopped";
     stopRequested = false;
     modelsDirectory;
-    constructor({ contextLength, engine, logger, model, parallelism, root }) {
+    constructor({ contextLength, engine, enginePort, logger, model, parallelism, root }) {
         super();
         // const models = getModels();
         // const targetModel = models.find(model => model.id === modelID);
@@ -108456,6 +108466,7 @@ class ModelManager extends EventEmitter {
         //     });
         // }
         this.engine = engine;
+        this.enginePort = enginePort;
         this.model = model;
         this.contextLength = typeof contextLength === "number" ? contextLength : null;
         this.parallelism = typeof parallelism === "number" ? parallelism : null;
@@ -108472,9 +108483,28 @@ class ModelManager extends EventEmitter {
     async fetchOpenAI(path, opts) {
         switch (this.engine) {
             case "llama.cpp":
-            case "vllm":
-                console.log("FETCH", path, opts);
-                return undiciExports.fetch(joinURL("http://localhost:8000", path), opts);
+            case "vllm": {
+                this.logger.debug(`Fetching from engine: ${path}`);
+                const callerSignal = opts?.signal;
+                const controller = new AbortController();
+                const timeout = setTimeout(() => {
+                    controller.abort(new Error("Inference request timeout"));
+                }, ENGINE_FETCH_TIMEOUT_MS);
+                const effectiveSignal = callerSignal ?? controller.signal;
+                try {
+                    return await undiciExports.fetch(joinURL(`http://localhost:${this.enginePort}`, path), {
+                        ...opts,
+                        headers: {
+                            ...opts?.headers,
+                            Connection: "keep-alive"
+                        },
+                        signal: effectiveSignal
+                    });
+                }
+                finally {
+                    clearTimeout(timeout);
+                }
+            }
             // case "ollama":
             //     console.log("FETCH", path, opts);
             //     return fetch(
@@ -108683,10 +108713,12 @@ class ModelManager extends EventEmitter {
         switch (this.engine) {
             case "llama.cpp":
                 return startLlamacpp.call(this, {
+                    enginePort: this.enginePort,
                     targetDirectory: join(this.modelsDirectory, this.uniqueName)
                 });
             case "vllm":
                 return startVLLM.call(this, {
+                    enginePort: this.enginePort,
                     targetDirectory: join(this.modelsDirectory, this.uniqueName)
                 });
             default: {
@@ -119255,6 +119287,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
     let modelManager = new ModelManager({
         contextLength: conduitConfiguration.contextLength ?? null,
         engine: configuration.agentEngineType,
+        enginePort: configuration.enginePort,
         logger,
         model: conduitConfiguration.targetModel,
         parallelism: conduitConfiguration.parallelism ?? null,
@@ -119400,6 +119433,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
         modelManager = new ModelManager({
             contextLength: conduitConfiguration.contextLength ?? null,
             engine: configuration.agentEngineType,
+            enginePort: configuration.enginePort,
             logger,
             model: conduitConfiguration.targetModel,
             parallelism: conduitConfiguration.parallelism ?? null,
@@ -119598,11 +119632,18 @@ function getConfiguration({ overrides } = {}) {
     const agentEngineType = LLMEngineSchema.parse(agentEngineTypeValue);
     const apiKey = overrides?.apiKey ?? readEnvString("API_KEY");
     const apiURL = overrides?.apiURL ?? readEnvStringOptional("API_URL", "https://api.infersec.ai");
+    const enginePort = overrides?.enginePort ??
+        readEnvIntegerOptional({ defaultValue: 8000, name: "ENGINE_PORT" });
+    if (enginePort < 1 || enginePort > 65535) {
+        throw new Error(`Invalid engine port: ${enginePort}`);
+    }
     const inferenceSourceID = overrides?.inferenceSourceID ?? readEnvString("SOURCE");
-    const portValue = overrides?.port ?? readEnvStringOptional("PORT", "9505");
-    const port = Number.parseInt(String(portValue), 10);
-    if (Number.isNaN(port)) {
-        throw new Error(`Invalid port: ${portValue}`);
+    const port = overrides?.port ?? readEnvIntegerOptional({ defaultValue: 9505, name: "PORT" });
+    if (port < 1 || port > 65535) {
+        throw new Error(`Invalid port: ${port}`);
+    }
+    if (port === enginePort) {
+        throw new Error("Port and engine port cannot be the same");
     }
     const defaultRootDirectory = join(process.env.HOME ?? "/tmp", ".cache", "infersec", "iagent");
     const rootDirectory = overrides?.rootDirectory ?? readEnvStringOptional("ROOT_DIRECTORY", defaultRootDirectory);
@@ -119612,6 +119653,7 @@ function getConfiguration({ overrides } = {}) {
         agentEngineType,
         apiKey,
         apiURL,
+        enginePort,
         inferenceSourceID,
         port,
         rootDirectory,
@@ -119648,6 +119690,7 @@ async function startInferenceAgent({ configurationOverrides }) {
             resolve();
         });
     });
+    logger.info("Engine port", { port: configuration.enginePort });
     process.on("SIGINT", createSignalShutdown({ logger, shutdown }));
     process.on("SIGTERM", createSignalShutdown({ logger, shutdown }));
     abortController.signal.addEventListener("abort", () => {

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@infersec/conduit",
   "description": "End user conduit agent for connecting local LLMs to the cloud.",
-  "version": "1.31.0",
+  "version": "1.32.0",
   "bin": {
     "infersec-conduit": "./dist/cli.js"
   },