@infersec/conduit 1.23.0 → 1.24.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import { parseArgs } from 'node:util';
8
8
  import 'node:crypto';
9
- import { a as asError, s as startInferenceAgent } from './start-MoDI8K51.js';
9
+ import { a as asError, s as startInferenceAgent } from './start-CpPE5_K5.js';
10
10
  import 'argon2';
11
11
  import 'node:child_process';
12
12
  import 'node:stream';
package/dist/index.js CHANGED
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
5
5
  const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import 'node:crypto';
8
- import { s as startInferenceAgent, a as asError } from './start-MoDI8K51.js';
8
+ import { s as startInferenceAgent, a as asError } from './start-CpPE5_K5.js';
9
9
  import 'argon2';
10
10
  import 'node:child_process';
11
11
  import 'node:stream';
@@ -199,6 +199,22 @@ function ulid$2(seedTime, prng) {
199
199
  return encodeTime(seed, TIME_LEN) + encodeRandom(RANDOM_LEN, currentPRNG);
200
200
  }
201
201
 
202
+ /**
203
+ * Calculates the effective context length per slot, accounting for
204
+ * parallelism when using llama.cpp. For llama.cpp, the total context
205
+ * window is divided across parallel slots; for other engines, the
206
+ * full context length is used.
207
+ */
208
+ function getEffectiveContextLength({ contextLength, engine, parallelism }) {
209
+ if (contextLength === null || contextLength <= 0) {
210
+ return null;
211
+ }
212
+ if (engine === "llama.cpp" && parallelism !== null && parallelism > 0) {
213
+ return contextLength / parallelism;
214
+ }
215
+ return contextLength;
216
+ }
217
+
202
218
  function asError(error) {
203
219
  if (error instanceof Error) {
204
220
  return error;
@@ -15103,7 +15119,11 @@ const ModelSchema = object({
15103
15119
  id: string$1(),
15104
15120
  object: literal("model"),
15105
15121
  created: number$1(),
15106
- owned_by: string$1()
15122
+ owned_by: string$1(),
15123
+ limit: object({
15124
+ context: number$1().nullable()
15125
+ })
15126
+ .optional()
15107
15127
  });
15108
15128
  const ModelsPageSchema = object({
15109
15129
  object: literal("list"),
@@ -117962,15 +117982,15 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
117962
117982
  const parsed = JSON.parse(payload);
117963
117983
  if (parsed.usage) {
117964
117984
  const usageChunk = parsed.usage;
117985
+ const effectiveContext = getEffectiveContextLength({
117986
+ contextLength,
117987
+ engine,
117988
+ parallelism
117989
+ });
117965
117990
  if (usageChunk.context_usage === undefined &&
117966
117991
  usageChunk.prompt_tokens !== undefined &&
117967
- contextLength !== null &&
117968
- contextLength > 0) {
117969
- let totalContextSize = contextLength;
117970
- if (engine === "llama.cpp" && parallelism !== null && parallelism > 0) {
117971
- totalContextSize = contextLength / parallelism;
117972
- }
117973
- usageChunk.context_usage = usageChunk.prompt_tokens / totalContextSize;
117992
+ effectiveContext !== null) {
117993
+ usageChunk.context_usage = usageChunk.prompt_tokens / effectiveContext;
117974
117994
  modifiedLines.push("data: " + JSON.stringify(parsed));
117975
117995
  continue;
117976
117996
  }
@@ -118002,11 +118022,15 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
118002
118022
  const promptTokens = parsed.usage?.prompt_tokens ?? null;
118003
118023
  const totalTokens = parsed.usage?.total_tokens ?? null;
118004
118024
  let contextUsage = parsed.usage?.context_usage ?? null;
118025
+ const effectiveContextForUsage = getEffectiveContextLength({
118026
+ contextLength,
118027
+ engine,
118028
+ parallelism
118029
+ });
118005
118030
  if (contextUsage === null &&
118006
118031
  promptTokens !== null &&
118007
- contextLength !== null &&
118008
- contextLength > 0) {
118009
- contextUsage = promptTokens / contextLength;
118032
+ effectiveContextForUsage !== null) {
118033
+ contextUsage = promptTokens / effectiveContextForUsage;
118010
118034
  }
118011
118035
  usage = {
118012
118036
  completionTokens,
@@ -118440,6 +118464,11 @@ async function createApplication({ abortController, apiClient, configuration, lo
118440
118464
  },
118441
118465
  "/v1/models": {
118442
118466
  GET: async () => {
118467
+ const effectiveContextLength = getEffectiveContextLength({
118468
+ contextLength: modelManager.contextLength,
118469
+ engine: configuration.agentEngineType,
118470
+ parallelism: modelManager.parallelism
118471
+ });
118443
118472
  return {
118444
118473
  body: {
118445
118474
  object: "list",
@@ -118448,7 +118477,10 @@ async function createApplication({ abortController, apiClient, configuration, lo
118448
118477
  id: conduitConfiguration.targetModel.id,
118449
118478
  object: "model",
118450
118479
  created: startup / 1000,
118451
- owned_by: "infersec"
118480
+ owned_by: "infersec",
118481
+ limit: {
118482
+ context: effectiveContextLength
118483
+ }
118452
118484
  }
118453
118485
  ]
118454
118486
  },
@@ -0,0 +1 @@
1
+ export {};
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@infersec/conduit",
3
3
  "description": "End user conduit agent for connecting local LLMs to the cloud.",
4
- "version": "1.23.0",
4
+ "version": "1.24.1",
5
5
  "bin": {
6
6
  "infersec-conduit": "./dist/cli.js"
7
7
  },
@@ -23,10 +23,11 @@
23
23
  "format": "prettier --write .",
24
24
  "prepublishOnly": "npm run build",
25
25
  "start": "npm run build && node ./dist/index.js",
26
- "test": "npm run test:types && npm run test:lint && npm run test:format",
26
+ "test": "npm run test:types && npm run test:lint && npm run test:format && npm run test:unit",
27
27
  "test:format": "prettier --check .",
28
28
  "test:lint": "eslint source/**/*.ts",
29
- "test:types": "tsc -p tsconfig.json --noEmit"
29
+ "test:types": "tsc -p tsconfig.json --noEmit",
30
+ "test:unit": "vitest run"
30
31
  },
31
32
  "prettier": "@infersec/prettier",
32
33
  "publishConfig": {
@@ -46,8 +47,10 @@
46
47
  "@rollup/plugin-typescript": "^12.1.4",
47
48
  "@types/express": "^4.17.23",
48
49
  "@types/supertest": "^6.0.3",
50
+ "@vitest/coverage-v8": "^3.0.5",
49
51
  "rollup": "^4.46.2",
50
- "tslib": "^2.8.1"
52
+ "tslib": "^2.8.1",
53
+ "vitest": "^3.0.5"
51
54
  },
52
55
  "dependencies": {
53
56
  "@huggingface/hub": "^2.5.2",