@infersec/conduit 1.22.8 → 1.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js
CHANGED
|
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
|
|
|
6
6
|
|
|
7
7
|
import { parseArgs } from 'node:util';
|
|
8
8
|
import 'node:crypto';
|
|
9
|
-
import { a as asError, s as startInferenceAgent } from './start-
|
|
9
|
+
import { a as asError, s as startInferenceAgent } from './start-CpPE5_K5.js';
|
|
10
10
|
import 'argon2';
|
|
11
11
|
import 'node:child_process';
|
|
12
12
|
import 'node:stream';
|
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
|
|
|
5
5
|
const __dirname = __pathDirname(__filename);
|
|
6
6
|
|
|
7
7
|
import 'node:crypto';
|
|
8
|
-
import { s as startInferenceAgent, a as asError } from './start-
|
|
8
|
+
import { s as startInferenceAgent, a as asError } from './start-CpPE5_K5.js';
|
|
9
9
|
import 'argon2';
|
|
10
10
|
import 'node:child_process';
|
|
11
11
|
import 'node:stream';
|
|
@@ -199,6 +199,22 @@ function ulid$2(seedTime, prng) {
|
|
|
199
199
|
return encodeTime(seed, TIME_LEN) + encodeRandom(RANDOM_LEN, currentPRNG);
|
|
200
200
|
}
|
|
201
201
|
|
|
202
|
+
/**
|
|
203
|
+
* Calculates the effective context length per slot, accounting for
|
|
204
|
+
* parallelism when using llama.cpp. For llama.cpp, the total context
|
|
205
|
+
* window is divided across parallel slots; for other engines, the
|
|
206
|
+
* full context length is used.
|
|
207
|
+
*/
|
|
208
|
+
function getEffectiveContextLength({ contextLength, engine, parallelism }) {
|
|
209
|
+
if (contextLength === null || contextLength <= 0) {
|
|
210
|
+
return null;
|
|
211
|
+
}
|
|
212
|
+
if (engine === "llama.cpp" && parallelism !== null && parallelism > 0) {
|
|
213
|
+
return contextLength / parallelism;
|
|
214
|
+
}
|
|
215
|
+
return contextLength;
|
|
216
|
+
}
|
|
217
|
+
|
|
202
218
|
function asError(error) {
|
|
203
219
|
if (error instanceof Error) {
|
|
204
220
|
return error;
|
|
@@ -14747,9 +14763,7 @@ const InferenceAgentMachineReportPayloadSchema = object({
|
|
|
14747
14763
|
machine: InferenceAgentMachineMetadataSchema
|
|
14748
14764
|
});
|
|
14749
14765
|
const InferenceAgentResponseChunkPayloadSchema = object({
|
|
14750
|
-
data: string$1()
|
|
14751
|
-
.regex(/^data:text\/plain;base64,/)
|
|
14752
|
-
.nullable(),
|
|
14766
|
+
data: string$1().nullable(),
|
|
14753
14767
|
headers: record(string$1(), string$1()).default({}).optional(),
|
|
14754
14768
|
requestID: ULIDSchema,
|
|
14755
14769
|
sequence: number$1().int().nonnegative(),
|
|
@@ -15105,7 +15119,11 @@ const ModelSchema = object({
|
|
|
15105
15119
|
id: string$1(),
|
|
15106
15120
|
object: literal("model"),
|
|
15107
15121
|
created: number$1(),
|
|
15108
|
-
owned_by: string$1()
|
|
15122
|
+
owned_by: string$1(),
|
|
15123
|
+
limit: object({
|
|
15124
|
+
context: number$1().nullable()
|
|
15125
|
+
})
|
|
15126
|
+
.optional()
|
|
15109
15127
|
});
|
|
15110
15128
|
const ModelsPageSchema = object({
|
|
15111
15129
|
object: literal("list"),
|
|
@@ -15258,9 +15276,7 @@ object({
|
|
|
15258
15276
|
status: number$1().int().min(100).max(599)
|
|
15259
15277
|
});
|
|
15260
15278
|
const ClientToServerAPIResponseSchema = object({
|
|
15261
|
-
data: string$1()
|
|
15262
|
-
.regex(/^data:text\/plain;base64,/)
|
|
15263
|
-
.nullable(),
|
|
15279
|
+
data: string$1().nullable(),
|
|
15264
15280
|
headers: record(string$1(), string$1()).default({}).optional(),
|
|
15265
15281
|
requestID: ULIDSchema,
|
|
15266
15282
|
status: number$1().int().min(100).max(599).default(200).optional()
|
|
@@ -108436,7 +108452,7 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
|
|
|
108436
108452
|
apiURL,
|
|
108437
108453
|
configuration,
|
|
108438
108454
|
payload: {
|
|
108439
|
-
data:
|
|
108455
|
+
data: encodeBinaryChunk(Buffer.from(failureMessage)),
|
|
108440
108456
|
sequence: 0,
|
|
108441
108457
|
status: 502
|
|
108442
108458
|
},
|
|
@@ -108481,7 +108497,9 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
|
|
|
108481
108497
|
let timeToFirstTokenMs = null;
|
|
108482
108498
|
if (response.body instanceof Readable) {
|
|
108483
108499
|
for await (const chunk of response.body) {
|
|
108484
|
-
const buffer = Buffer.isBuffer(chunk)
|
|
108500
|
+
const buffer = Buffer.isBuffer(chunk)
|
|
108501
|
+
? chunk
|
|
108502
|
+
: Buffer.from(chunk);
|
|
108485
108503
|
if (timeToFirstTokenMs === null) {
|
|
108486
108504
|
timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
|
|
108487
108505
|
}
|
|
@@ -108490,7 +108508,7 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
|
|
|
108490
108508
|
apiURL,
|
|
108491
108509
|
configuration,
|
|
108492
108510
|
payload: {
|
|
108493
|
-
data:
|
|
108511
|
+
data: encodeBinaryChunk(buffer),
|
|
108494
108512
|
sequence,
|
|
108495
108513
|
status: response.status
|
|
108496
108514
|
},
|
|
@@ -108527,7 +108545,7 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
|
|
|
108527
108545
|
apiURL,
|
|
108528
108546
|
configuration,
|
|
108529
108547
|
payload: {
|
|
108530
|
-
data:
|
|
108548
|
+
data: encodeBinaryChunk(Buffer.from(responsePayload)),
|
|
108531
108549
|
headers: response.headers,
|
|
108532
108550
|
sequence,
|
|
108533
108551
|
status: response.status
|
|
@@ -108572,11 +108590,8 @@ async function postChunk({ apiURL, configuration, payload, requestID }) {
|
|
|
108572
108590
|
method: "POST"
|
|
108573
108591
|
});
|
|
108574
108592
|
}
|
|
108575
|
-
function
|
|
108576
|
-
|
|
108577
|
-
return `data:text/plain;base64,${chunk.toString("base64")}`;
|
|
108578
|
-
}
|
|
108579
|
-
return `data:text/plain;base64,${Buffer.from(chunk, "utf-8").toString("base64")}`;
|
|
108593
|
+
function encodeBinaryChunk(chunk) {
|
|
108594
|
+
return chunk.toString("base64");
|
|
108580
108595
|
}
|
|
108581
108596
|
function calculateRequestBytes(body) {
|
|
108582
108597
|
if (body === null || body === undefined) {
|
|
@@ -117967,15 +117982,15 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
|
|
|
117967
117982
|
const parsed = JSON.parse(payload);
|
|
117968
117983
|
if (parsed.usage) {
|
|
117969
117984
|
const usageChunk = parsed.usage;
|
|
117985
|
+
const effectiveContext = getEffectiveContextLength({
|
|
117986
|
+
contextLength,
|
|
117987
|
+
engine,
|
|
117988
|
+
parallelism
|
|
117989
|
+
});
|
|
117970
117990
|
if (usageChunk.context_usage === undefined &&
|
|
117971
117991
|
usageChunk.prompt_tokens !== undefined &&
|
|
117972
|
-
|
|
117973
|
-
|
|
117974
|
-
let totalContextSize = contextLength;
|
|
117975
|
-
if (engine === "llama.cpp" && parallelism !== null && parallelism > 0) {
|
|
117976
|
-
totalContextSize = contextLength / parallelism;
|
|
117977
|
-
}
|
|
117978
|
-
usageChunk.context_usage = usageChunk.prompt_tokens / totalContextSize;
|
|
117992
|
+
effectiveContext !== null) {
|
|
117993
|
+
usageChunk.context_usage = usageChunk.prompt_tokens / effectiveContext;
|
|
117979
117994
|
modifiedLines.push("data: " + JSON.stringify(parsed));
|
|
117980
117995
|
continue;
|
|
117981
117996
|
}
|
|
@@ -118007,11 +118022,15 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
|
|
|
118007
118022
|
const promptTokens = parsed.usage?.prompt_tokens ?? null;
|
|
118008
118023
|
const totalTokens = parsed.usage?.total_tokens ?? null;
|
|
118009
118024
|
let contextUsage = parsed.usage?.context_usage ?? null;
|
|
118025
|
+
const effectiveContextForUsage = getEffectiveContextLength({
|
|
118026
|
+
contextLength,
|
|
118027
|
+
engine,
|
|
118028
|
+
parallelism
|
|
118029
|
+
});
|
|
118010
118030
|
if (contextUsage === null &&
|
|
118011
118031
|
promptTokens !== null &&
|
|
118012
|
-
|
|
118013
|
-
|
|
118014
|
-
contextUsage = promptTokens / contextLength;
|
|
118032
|
+
effectiveContextForUsage !== null) {
|
|
118033
|
+
contextUsage = promptTokens / effectiveContextForUsage;
|
|
118015
118034
|
}
|
|
118016
118035
|
usage = {
|
|
118017
118036
|
completionTokens,
|
|
@@ -118445,6 +118464,11 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
118445
118464
|
},
|
|
118446
118465
|
"/v1/models": {
|
|
118447
118466
|
GET: async () => {
|
|
118467
|
+
const effectiveContextLength = getEffectiveContextLength({
|
|
118468
|
+
contextLength: modelManager.contextLength,
|
|
118469
|
+
engine: configuration.agentEngineType,
|
|
118470
|
+
parallelism: modelManager.parallelism
|
|
118471
|
+
});
|
|
118448
118472
|
return {
|
|
118449
118473
|
body: {
|
|
118450
118474
|
object: "list",
|
|
@@ -118453,7 +118477,10 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
118453
118477
|
id: conduitConfiguration.targetModel.id,
|
|
118454
118478
|
object: "model",
|
|
118455
118479
|
created: startup / 1000,
|
|
118456
|
-
owned_by: "infersec"
|
|
118480
|
+
owned_by: "infersec",
|
|
118481
|
+
limit: {
|
|
118482
|
+
context: effectiveContextLength
|
|
118483
|
+
}
|
|
118457
118484
|
}
|
|
118458
118485
|
]
|
|
118459
118486
|
},
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@infersec/conduit",
|
|
3
3
|
"description": "End user conduit agent for connecting local LLMs to the cloud.",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.24.0",
|
|
5
5
|
"bin": {
|
|
6
6
|
"infersec-conduit": "./dist/cli.js"
|
|
7
7
|
},
|
|
@@ -23,10 +23,11 @@
|
|
|
23
23
|
"format": "prettier --write .",
|
|
24
24
|
"prepublishOnly": "npm run build",
|
|
25
25
|
"start": "npm run build && node ./dist/index.js",
|
|
26
|
-
"test": "npm run test:types && npm run test:lint && npm run test:format",
|
|
26
|
+
"test": "npm run test:types && npm run test:lint && npm run test:format && npm run test:unit",
|
|
27
27
|
"test:format": "prettier --check .",
|
|
28
28
|
"test:lint": "eslint source/**/*.ts",
|
|
29
|
-
"test:types": "tsc -p tsconfig.json --noEmit"
|
|
29
|
+
"test:types": "tsc -p tsconfig.json --noEmit",
|
|
30
|
+
"test:unit": "vitest run"
|
|
30
31
|
},
|
|
31
32
|
"prettier": "@infersec/prettier",
|
|
32
33
|
"publishConfig": {
|
|
@@ -46,8 +47,10 @@
|
|
|
46
47
|
"@rollup/plugin-typescript": "^12.1.4",
|
|
47
48
|
"@types/express": "^4.17.23",
|
|
48
49
|
"@types/supertest": "^6.0.3",
|
|
50
|
+
"@vitest/coverage-v8": "^3.0.5",
|
|
49
51
|
"rollup": "^4.46.2",
|
|
50
|
-
"tslib": "^2.8.1"
|
|
52
|
+
"tslib": "^2.8.1",
|
|
53
|
+
"vitest": "^3.0.5"
|
|
51
54
|
},
|
|
52
55
|
"dependencies": {
|
|
53
56
|
"@huggingface/hub": "^2.5.2",
|