@infersec/conduit 1.23.0 → 1.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js
CHANGED
|
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
|
|
|
6
6
|
|
|
7
7
|
import { parseArgs } from 'node:util';
|
|
8
8
|
import 'node:crypto';
|
|
9
|
-
import { a as asError, s as startInferenceAgent } from './start-
|
|
9
|
+
import { a as asError, s as startInferenceAgent } from './start-CpPE5_K5.js';
|
|
10
10
|
import 'argon2';
|
|
11
11
|
import 'node:child_process';
|
|
12
12
|
import 'node:stream';
|
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
|
|
|
5
5
|
const __dirname = __pathDirname(__filename);
|
|
6
6
|
|
|
7
7
|
import 'node:crypto';
|
|
8
|
-
import { s as startInferenceAgent, a as asError } from './start-
|
|
8
|
+
import { s as startInferenceAgent, a as asError } from './start-CpPE5_K5.js';
|
|
9
9
|
import 'argon2';
|
|
10
10
|
import 'node:child_process';
|
|
11
11
|
import 'node:stream';
|
|
@@ -199,6 +199,22 @@ function ulid$2(seedTime, prng) {
|
|
|
199
199
|
return encodeTime(seed, TIME_LEN) + encodeRandom(RANDOM_LEN, currentPRNG);
|
|
200
200
|
}
|
|
201
201
|
|
|
202
|
+
/**
|
|
203
|
+
* Calculates the effective context length per slot, accounting for
|
|
204
|
+
* parallelism when using llama.cpp. For llama.cpp, the total context
|
|
205
|
+
* window is divided across parallel slots; for other engines, the
|
|
206
|
+
* full context length is used.
|
|
207
|
+
*/
|
|
208
|
+
function getEffectiveContextLength({ contextLength, engine, parallelism }) {
|
|
209
|
+
if (contextLength === null || contextLength <= 0) {
|
|
210
|
+
return null;
|
|
211
|
+
}
|
|
212
|
+
if (engine === "llama.cpp" && parallelism !== null && parallelism > 0) {
|
|
213
|
+
return contextLength / parallelism;
|
|
214
|
+
}
|
|
215
|
+
return contextLength;
|
|
216
|
+
}
|
|
217
|
+
|
|
202
218
|
function asError(error) {
|
|
203
219
|
if (error instanceof Error) {
|
|
204
220
|
return error;
|
|
@@ -15103,7 +15119,11 @@ const ModelSchema = object({
|
|
|
15103
15119
|
id: string$1(),
|
|
15104
15120
|
object: literal("model"),
|
|
15105
15121
|
created: number$1(),
|
|
15106
|
-
owned_by: string$1()
|
|
15122
|
+
owned_by: string$1(),
|
|
15123
|
+
limit: object({
|
|
15124
|
+
context: number$1().nullable()
|
|
15125
|
+
})
|
|
15126
|
+
.optional()
|
|
15107
15127
|
});
|
|
15108
15128
|
const ModelsPageSchema = object({
|
|
15109
15129
|
object: literal("list"),
|
|
@@ -117962,15 +117982,15 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
|
|
|
117962
117982
|
const parsed = JSON.parse(payload);
|
|
117963
117983
|
if (parsed.usage) {
|
|
117964
117984
|
const usageChunk = parsed.usage;
|
|
117985
|
+
const effectiveContext = getEffectiveContextLength({
|
|
117986
|
+
contextLength,
|
|
117987
|
+
engine,
|
|
117988
|
+
parallelism
|
|
117989
|
+
});
|
|
117965
117990
|
if (usageChunk.context_usage === undefined &&
|
|
117966
117991
|
usageChunk.prompt_tokens !== undefined &&
|
|
117967
|
-
|
|
117968
|
-
|
|
117969
|
-
let totalContextSize = contextLength;
|
|
117970
|
-
if (engine === "llama.cpp" && parallelism !== null && parallelism > 0) {
|
|
117971
|
-
totalContextSize = contextLength / parallelism;
|
|
117972
|
-
}
|
|
117973
|
-
usageChunk.context_usage = usageChunk.prompt_tokens / totalContextSize;
|
|
117992
|
+
effectiveContext !== null) {
|
|
117993
|
+
usageChunk.context_usage = usageChunk.prompt_tokens / effectiveContext;
|
|
117974
117994
|
modifiedLines.push("data: " + JSON.stringify(parsed));
|
|
117975
117995
|
continue;
|
|
117976
117996
|
}
|
|
@@ -118002,11 +118022,15 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
|
|
|
118002
118022
|
const promptTokens = parsed.usage?.prompt_tokens ?? null;
|
|
118003
118023
|
const totalTokens = parsed.usage?.total_tokens ?? null;
|
|
118004
118024
|
let contextUsage = parsed.usage?.context_usage ?? null;
|
|
118025
|
+
const effectiveContextForUsage = getEffectiveContextLength({
|
|
118026
|
+
contextLength,
|
|
118027
|
+
engine,
|
|
118028
|
+
parallelism
|
|
118029
|
+
});
|
|
118005
118030
|
if (contextUsage === null &&
|
|
118006
118031
|
promptTokens !== null &&
|
|
118007
|
-
|
|
118008
|
-
|
|
118009
|
-
contextUsage = promptTokens / contextLength;
|
|
118032
|
+
effectiveContextForUsage !== null) {
|
|
118033
|
+
contextUsage = promptTokens / effectiveContextForUsage;
|
|
118010
118034
|
}
|
|
118011
118035
|
usage = {
|
|
118012
118036
|
completionTokens,
|
|
@@ -118440,6 +118464,11 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
118440
118464
|
},
|
|
118441
118465
|
"/v1/models": {
|
|
118442
118466
|
GET: async () => {
|
|
118467
|
+
const effectiveContextLength = getEffectiveContextLength({
|
|
118468
|
+
contextLength: modelManager.contextLength,
|
|
118469
|
+
engine: configuration.agentEngineType,
|
|
118470
|
+
parallelism: modelManager.parallelism
|
|
118471
|
+
});
|
|
118443
118472
|
return {
|
|
118444
118473
|
body: {
|
|
118445
118474
|
object: "list",
|
|
@@ -118448,7 +118477,10 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
118448
118477
|
id: conduitConfiguration.targetModel.id,
|
|
118449
118478
|
object: "model",
|
|
118450
118479
|
created: startup / 1000,
|
|
118451
|
-
owned_by: "infersec"
|
|
118480
|
+
owned_by: "infersec",
|
|
118481
|
+
limit: {
|
|
118482
|
+
context: effectiveContextLength
|
|
118483
|
+
}
|
|
118452
118484
|
}
|
|
118453
118485
|
]
|
|
118454
118486
|
},
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@infersec/conduit",
|
|
3
3
|
"description": "End user conduit agent for connecting local LLMs to the cloud.",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.24.0",
|
|
5
5
|
"bin": {
|
|
6
6
|
"infersec-conduit": "./dist/cli.js"
|
|
7
7
|
},
|
|
@@ -23,10 +23,11 @@
|
|
|
23
23
|
"format": "prettier --write .",
|
|
24
24
|
"prepublishOnly": "npm run build",
|
|
25
25
|
"start": "npm run build && node ./dist/index.js",
|
|
26
|
-
"test": "npm run test:types && npm run test:lint && npm run test:format",
|
|
26
|
+
"test": "npm run test:types && npm run test:lint && npm run test:format && npm run test:unit",
|
|
27
27
|
"test:format": "prettier --check .",
|
|
28
28
|
"test:lint": "eslint source/**/*.ts",
|
|
29
|
-
"test:types": "tsc -p tsconfig.json --noEmit"
|
|
29
|
+
"test:types": "tsc -p tsconfig.json --noEmit",
|
|
30
|
+
"test:unit": "vitest run"
|
|
30
31
|
},
|
|
31
32
|
"prettier": "@infersec/prettier",
|
|
32
33
|
"publishConfig": {
|
|
@@ -46,8 +47,10 @@
|
|
|
46
47
|
"@rollup/plugin-typescript": "^12.1.4",
|
|
47
48
|
"@types/express": "^4.17.23",
|
|
48
49
|
"@types/supertest": "^6.0.3",
|
|
50
|
+
"@vitest/coverage-v8": "^3.0.5",
|
|
49
51
|
"rollup": "^4.46.2",
|
|
50
|
-
"tslib": "^2.8.1"
|
|
52
|
+
"tslib": "^2.8.1",
|
|
53
|
+
"vitest": "^3.0.5"
|
|
51
54
|
},
|
|
52
55
|
"dependencies": {
|
|
53
56
|
"@huggingface/hub": "^2.5.2",
|