npm - @infersec/conduit - Versions diffs - 1.22.8 → 1.24.0 - Mend

@infersec/conduit 1.22.8 → 1.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/cli.js +1 -1
package/dist/index.js +1 -1
package/dist/{start-Cqvc5hOj.js → start-CpPE5_K5.js} +54 -27
package/dist/utils/__tests__/engineMetrics.test.d.ts +1 -0
package/package.json +7 -4

package/dist/cli.js CHANGED Viewed

@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
 import { parseArgs } from 'node:util';
 import 'node:crypto';
-import { a as asError, s as startInferenceAgent } from './start-Cqvc5hOj.js';
+import { a as asError, s as startInferenceAgent } from './start-CpPE5_K5.js';
 import 'argon2';
 import 'node:child_process';
 import 'node:stream';

package/dist/index.js CHANGED Viewed

@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
 const __dirname = __pathDirname(__filename);
 import 'node:crypto';
-import { s as startInferenceAgent, a as asError } from './start-Cqvc5hOj.js';
+import { s as startInferenceAgent, a as asError } from './start-CpPE5_K5.js';
 import 'argon2';
 import 'node:child_process';
 import 'node:stream';

package/dist/{start-Cqvc5hOj.js → start-CpPE5_K5.js} RENAMED Viewed

@@ -199,6 +199,22 @@ function ulid$2(seedTime, prng) {
     return encodeTime(seed, TIME_LEN) + encodeRandom(RANDOM_LEN, currentPRNG);
 }
+/**
+ * Calculates the effective context length per slot, accounting for
+ * parallelism when using llama.cpp. For llama.cpp, the total context
+ * window is divided across parallel slots; for other engines, the
+ * full context length is used.
+ */
+function getEffectiveContextLength({ contextLength, engine, parallelism }) {
+    if (contextLength === null || contextLength <= 0) {
+        return null;
+    }
+    if (engine === "llama.cpp" && parallelism !== null && parallelism > 0) {
+        return contextLength / parallelism;
+    }
+    return contextLength;
+}
 function asError(error) {
     if (error instanceof Error) {
         return error;
@@ -14747,9 +14763,7 @@ const InferenceAgentMachineReportPayloadSchema = object({
     machine: InferenceAgentMachineMetadataSchema
 });
 const InferenceAgentResponseChunkPayloadSchema = object({
-    data: string$1()
-        .regex(/^data:text\/plain;base64,/)
-        .nullable(),
+    data: string$1().nullable(),
     headers: record(string$1(), string$1()).default({}).optional(),
     requestID: ULIDSchema,
     sequence: number$1().int().nonnegative(),
@@ -15105,7 +15119,11 @@ const ModelSchema = object({
     id: string$1(),
     object: literal("model"),
     created: number$1(),
-    owned_by: string$1()
+    owned_by: string$1(),
+    limit: object({
+        context: number$1().nullable()
+    })
+        .optional()
 });
 const ModelsPageSchema = object({
     object: literal("list"),
@@ -15258,9 +15276,7 @@ object({
     status: number$1().int().min(100).max(599)
 });
 const ClientToServerAPIResponseSchema = object({
-    data: string$1()
-        .regex(/^data:text\/plain;base64,/)
-        .nullable(),
+    data: string$1().nullable(),
     headers: record(string$1(), string$1()).default({}).optional(),
     requestID: ULIDSchema,
     status: number$1().int().min(100).max(599).default(200).optional()
@@ -108436,7 +108452,7 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
             apiURL,
             configuration,
             payload: {
-                data: encodeTextChunk(failureMessage),
+                data: encodeBinaryChunk(Buffer.from(failureMessage)),
                 sequence: 0,
                 status: 502
             },
@@ -108481,7 +108497,9 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
     let timeToFirstTokenMs = null;
     if (response.body instanceof Readable) {
         for await (const chunk of response.body) {
-            const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
+            const buffer = Buffer.isBuffer(chunk)
+                ? chunk
+                : Buffer.from(chunk);
             if (timeToFirstTokenMs === null) {
                 timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
             }
@@ -108490,7 +108508,7 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
                 apiURL,
                 configuration,
                 payload: {
-                    data: encodeTextChunk(buffer),
+                    data: encodeBinaryChunk(buffer),
                     sequence,
                     status: response.status
                 },
@@ -108527,7 +108545,7 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
         apiURL,
         configuration,
         payload: {
-            data: encodeTextChunk(responsePayload),
+            data: encodeBinaryChunk(Buffer.from(responsePayload)),
             headers: response.headers,
             sequence,
             status: response.status
@@ -108572,11 +108590,8 @@ async function postChunk({ apiURL, configuration, payload, requestID }) {
         method: "POST"
     });
 }
-function encodeTextChunk(chunk) {
-    if (Buffer.isBuffer(chunk)) {
-        return `data:text/plain;base64,${chunk.toString("base64")}`;
-    }
-    return `data:text/plain;base64,${Buffer.from(chunk, "utf-8").toString("base64")}`;
+function encodeBinaryChunk(chunk) {
+    return chunk.toString("base64");
 }
 function calculateRequestBytes(body) {
     if (body === null || body === undefined) {
@@ -117967,15 +117982,15 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
                 const parsed = JSON.parse(payload);
                 if (parsed.usage) {
                     const usageChunk = parsed.usage;
+                    const effectiveContext = getEffectiveContextLength({
+                        contextLength,
+                        engine,
+                        parallelism
+                    });
                     if (usageChunk.context_usage === undefined &&
                         usageChunk.prompt_tokens !== undefined &&
-                        contextLength !== null &&
-                        contextLength > 0) {
-                        let totalContextSize = contextLength;
-                        if (engine === "llama.cpp" && parallelism !== null && parallelism > 0) {
-                            totalContextSize = contextLength / parallelism;
-                        }
-                        usageChunk.context_usage = usageChunk.prompt_tokens / totalContextSize;
+                        effectiveContext !== null) {
+                        usageChunk.context_usage = usageChunk.prompt_tokens / effectiveContext;
                         modifiedLines.push("data: " + JSON.stringify(parsed));
                         continue;
                     }
@@ -118007,11 +118022,15 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
                     const promptTokens = parsed.usage?.prompt_tokens ?? null;
                     const totalTokens = parsed.usage?.total_tokens ?? null;
                     let contextUsage = parsed.usage?.context_usage ?? null;
+                    const effectiveContextForUsage = getEffectiveContextLength({
+                        contextLength,
+                        engine,
+                        parallelism
+                    });
                     if (contextUsage === null &&
                         promptTokens !== null &&
-                        contextLength !== null &&
-                        contextLength > 0) {
-                        contextUsage = promptTokens / contextLength;
+                        effectiveContextForUsage !== null) {
+                        contextUsage = promptTokens / effectiveContextForUsage;
                     }
                     usage = {
                         completionTokens,
@@ -118445,6 +118464,11 @@ async function createApplication({ abortController, apiClient, configuration, lo
             },
             "/v1/models": {
                 GET: async () => {
+                    const effectiveContextLength = getEffectiveContextLength({
+                        contextLength: modelManager.contextLength,
+                        engine: configuration.agentEngineType,
+                        parallelism: modelManager.parallelism
+                    });
                     return {
                         body: {
                             object: "list",
@@ -118453,7 +118477,10 @@ async function createApplication({ abortController, apiClient, configuration, lo
                                     id: conduitConfiguration.targetModel.id,
                                     object: "model",
                                     created: startup / 1000,
-                                    owned_by: "infersec"
+                                    owned_by: "infersec",
+                                    limit: {
+                                        context: effectiveContextLength
+                                    }
                                 }
                             ]
                         },

package/dist/utils/__tests__/engineMetrics.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@infersec/conduit",
   "description": "End user conduit agent for connecting local LLMs to the cloud.",
-  "version": "1.22.8",
+  "version": "1.24.0",
   "bin": {
     "infersec-conduit": "./dist/cli.js"
   },
@@ -23,10 +23,11 @@
     "format": "prettier --write .",
     "prepublishOnly": "npm run build",
     "start": "npm run build && node ./dist/index.js",
-    "test": "npm run test:types && npm run test:lint && npm run test:format",
+    "test": "npm run test:types && npm run test:lint && npm run test:format && npm run test:unit",
     "test:format": "prettier --check .",
     "test:lint": "eslint source/**/*.ts",
-    "test:types": "tsc -p tsconfig.json --noEmit"
+    "test:types": "tsc -p tsconfig.json --noEmit",
+    "test:unit": "vitest run"
   },
   "prettier": "@infersec/prettier",
   "publishConfig": {
@@ -46,8 +47,10 @@
     "@rollup/plugin-typescript": "^12.1.4",
     "@types/express": "^4.17.23",
     "@types/supertest": "^6.0.3",
+    "@vitest/coverage-v8": "^3.0.5",
     "rollup": "^4.46.2",
-    "tslib": "^2.8.1"
+    "tslib": "^2.8.1",
+    "vitest": "^3.0.5"
   },
   "dependencies": {
     "@huggingface/hub": "^2.5.2",