@infersec/conduit 1.22.8 → 1.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import { parseArgs } from 'node:util';
8
8
  import 'node:crypto';
9
- import { a as asError, s as startInferenceAgent } from './start-Cqvc5hOj.js';
9
+ import { a as asError, s as startInferenceAgent } from './start-CpPE5_K5.js';
10
10
  import 'argon2';
11
11
  import 'node:child_process';
12
12
  import 'node:stream';
package/dist/index.js CHANGED
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
5
5
  const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import 'node:crypto';
8
- import { s as startInferenceAgent, a as asError } from './start-Cqvc5hOj.js';
8
+ import { s as startInferenceAgent, a as asError } from './start-CpPE5_K5.js';
9
9
  import 'argon2';
10
10
  import 'node:child_process';
11
11
  import 'node:stream';
@@ -199,6 +199,22 @@ function ulid$2(seedTime, prng) {
199
199
  return encodeTime(seed, TIME_LEN) + encodeRandom(RANDOM_LEN, currentPRNG);
200
200
  }
201
201
 
202
+ /**
203
+ * Calculates the effective context length per slot, accounting for
204
+ * parallelism when using llama.cpp. For llama.cpp, the total context
205
+ * window is divided across parallel slots; for other engines, the
206
+ * full context length is used.
207
+ */
208
+ function getEffectiveContextLength({ contextLength, engine, parallelism }) {
209
+ if (contextLength === null || contextLength <= 0) {
210
+ return null;
211
+ }
212
+ if (engine === "llama.cpp" && parallelism !== null && parallelism > 0) {
213
+ return contextLength / parallelism;
214
+ }
215
+ return contextLength;
216
+ }
217
+
202
218
  function asError(error) {
203
219
  if (error instanceof Error) {
204
220
  return error;
@@ -14747,9 +14763,7 @@ const InferenceAgentMachineReportPayloadSchema = object({
14747
14763
  machine: InferenceAgentMachineMetadataSchema
14748
14764
  });
14749
14765
  const InferenceAgentResponseChunkPayloadSchema = object({
14750
- data: string$1()
14751
- .regex(/^data:text\/plain;base64,/)
14752
- .nullable(),
14766
+ data: string$1().nullable(),
14753
14767
  headers: record(string$1(), string$1()).default({}).optional(),
14754
14768
  requestID: ULIDSchema,
14755
14769
  sequence: number$1().int().nonnegative(),
@@ -15105,7 +15119,11 @@ const ModelSchema = object({
15105
15119
  id: string$1(),
15106
15120
  object: literal("model"),
15107
15121
  created: number$1(),
15108
- owned_by: string$1()
15122
+ owned_by: string$1(),
15123
+ limit: object({
15124
+ context: number$1().nullable()
15125
+ })
15126
+ .optional()
15109
15127
  });
15110
15128
  const ModelsPageSchema = object({
15111
15129
  object: literal("list"),
@@ -15258,9 +15276,7 @@ object({
15258
15276
  status: number$1().int().min(100).max(599)
15259
15277
  });
15260
15278
  const ClientToServerAPIResponseSchema = object({
15261
- data: string$1()
15262
- .regex(/^data:text\/plain;base64,/)
15263
- .nullable(),
15279
+ data: string$1().nullable(),
15264
15280
  headers: record(string$1(), string$1()).default({}).optional(),
15265
15281
  requestID: ULIDSchema,
15266
15282
  status: number$1().int().min(100).max(599).default(200).optional()
@@ -108436,7 +108452,7 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
108436
108452
  apiURL,
108437
108453
  configuration,
108438
108454
  payload: {
108439
- data: encodeTextChunk(failureMessage),
108455
+ data: encodeBinaryChunk(Buffer.from(failureMessage)),
108440
108456
  sequence: 0,
108441
108457
  status: 502
108442
108458
  },
@@ -108481,7 +108497,9 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
108481
108497
  let timeToFirstTokenMs = null;
108482
108498
  if (response.body instanceof Readable) {
108483
108499
  for await (const chunk of response.body) {
108484
- const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
108500
+ const buffer = Buffer.isBuffer(chunk)
108501
+ ? chunk
108502
+ : Buffer.from(chunk);
108485
108503
  if (timeToFirstTokenMs === null) {
108486
108504
  timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
108487
108505
  }
@@ -108490,7 +108508,7 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
108490
108508
  apiURL,
108491
108509
  configuration,
108492
108510
  payload: {
108493
- data: encodeTextChunk(buffer),
108511
+ data: encodeBinaryChunk(buffer),
108494
108512
  sequence,
108495
108513
  status: response.status
108496
108514
  },
@@ -108527,7 +108545,7 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
108527
108545
  apiURL,
108528
108546
  configuration,
108529
108547
  payload: {
108530
- data: encodeTextChunk(responsePayload),
108548
+ data: encodeBinaryChunk(Buffer.from(responsePayload)),
108531
108549
  headers: response.headers,
108532
108550
  sequence,
108533
108551
  status: response.status
@@ -108572,11 +108590,8 @@ async function postChunk({ apiURL, configuration, payload, requestID }) {
108572
108590
  method: "POST"
108573
108591
  });
108574
108592
  }
108575
- function encodeTextChunk(chunk) {
108576
- if (Buffer.isBuffer(chunk)) {
108577
- return `data:text/plain;base64,${chunk.toString("base64")}`;
108578
- }
108579
- return `data:text/plain;base64,${Buffer.from(chunk, "utf-8").toString("base64")}`;
108593
+ function encodeBinaryChunk(chunk) {
108594
+ return chunk.toString("base64");
108580
108595
  }
108581
108596
  function calculateRequestBytes(body) {
108582
108597
  if (body === null || body === undefined) {
@@ -117967,15 +117982,15 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
117967
117982
  const parsed = JSON.parse(payload);
117968
117983
  if (parsed.usage) {
117969
117984
  const usageChunk = parsed.usage;
117985
+ const effectiveContext = getEffectiveContextLength({
117986
+ contextLength,
117987
+ engine,
117988
+ parallelism
117989
+ });
117970
117990
  if (usageChunk.context_usage === undefined &&
117971
117991
  usageChunk.prompt_tokens !== undefined &&
117972
- contextLength !== null &&
117973
- contextLength > 0) {
117974
- let totalContextSize = contextLength;
117975
- if (engine === "llama.cpp" && parallelism !== null && parallelism > 0) {
117976
- totalContextSize = contextLength / parallelism;
117977
- }
117978
- usageChunk.context_usage = usageChunk.prompt_tokens / totalContextSize;
117992
+ effectiveContext !== null) {
117993
+ usageChunk.context_usage = usageChunk.prompt_tokens / effectiveContext;
117979
117994
  modifiedLines.push("data: " + JSON.stringify(parsed));
117980
117995
  continue;
117981
117996
  }
@@ -118007,11 +118022,15 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
118007
118022
  const promptTokens = parsed.usage?.prompt_tokens ?? null;
118008
118023
  const totalTokens = parsed.usage?.total_tokens ?? null;
118009
118024
  let contextUsage = parsed.usage?.context_usage ?? null;
118025
+ const effectiveContextForUsage = getEffectiveContextLength({
118026
+ contextLength,
118027
+ engine,
118028
+ parallelism
118029
+ });
118010
118030
  if (contextUsage === null &&
118011
118031
  promptTokens !== null &&
118012
- contextLength !== null &&
118013
- contextLength > 0) {
118014
- contextUsage = promptTokens / contextLength;
118032
+ effectiveContextForUsage !== null) {
118033
+ contextUsage = promptTokens / effectiveContextForUsage;
118015
118034
  }
118016
118035
  usage = {
118017
118036
  completionTokens,
@@ -118445,6 +118464,11 @@ async function createApplication({ abortController, apiClient, configuration, lo
118445
118464
  },
118446
118465
  "/v1/models": {
118447
118466
  GET: async () => {
118467
+ const effectiveContextLength = getEffectiveContextLength({
118468
+ contextLength: modelManager.contextLength,
118469
+ engine: configuration.agentEngineType,
118470
+ parallelism: modelManager.parallelism
118471
+ });
118448
118472
  return {
118449
118473
  body: {
118450
118474
  object: "list",
@@ -118453,7 +118477,10 @@ async function createApplication({ abortController, apiClient, configuration, lo
118453
118477
  id: conduitConfiguration.targetModel.id,
118454
118478
  object: "model",
118455
118479
  created: startup / 1000,
118456
- owned_by: "infersec"
118480
+ owned_by: "infersec",
118481
+ limit: {
118482
+ context: effectiveContextLength
118483
+ }
118457
118484
  }
118458
118485
  ]
118459
118486
  },
@@ -0,0 +1 @@
1
+ export {};
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@infersec/conduit",
3
3
  "description": "End user conduit agent for connecting local LLMs to the cloud.",
4
- "version": "1.22.8",
4
+ "version": "1.24.0",
5
5
  "bin": {
6
6
  "infersec-conduit": "./dist/cli.js"
7
7
  },
@@ -23,10 +23,11 @@
23
23
  "format": "prettier --write .",
24
24
  "prepublishOnly": "npm run build",
25
25
  "start": "npm run build && node ./dist/index.js",
26
- "test": "npm run test:types && npm run test:lint && npm run test:format",
26
+ "test": "npm run test:types && npm run test:lint && npm run test:format && npm run test:unit",
27
27
  "test:format": "prettier --check .",
28
28
  "test:lint": "eslint source/**/*.ts",
29
- "test:types": "tsc -p tsconfig.json --noEmit"
29
+ "test:types": "tsc -p tsconfig.json --noEmit",
30
+ "test:unit": "vitest run"
30
31
  },
31
32
  "prettier": "@infersec/prettier",
32
33
  "publishConfig": {
@@ -46,8 +47,10 @@
46
47
  "@rollup/plugin-typescript": "^12.1.4",
47
48
  "@types/express": "^4.17.23",
48
49
  "@types/supertest": "^6.0.3",
50
+ "@vitest/coverage-v8": "^3.0.5",
49
51
  "rollup": "^4.46.2",
50
- "tslib": "^2.8.1"
52
+ "tslib": "^2.8.1",
53
+ "vitest": "^3.0.5"
51
54
  },
52
55
  "dependencies": {
53
56
  "@huggingface/hub": "^2.5.2",