@infersec/conduit 1.24.3 → 1.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js
CHANGED
|
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
|
|
|
6
6
|
|
|
7
7
|
import { parseArgs } from 'node:util';
|
|
8
8
|
import 'node:crypto';
|
|
9
|
-
import { a as asError, s as startInferenceAgent } from './start-
|
|
9
|
+
import { a as asError, s as startInferenceAgent } from './start-DBk2G4SP.js';
|
|
10
10
|
import 'argon2';
|
|
11
11
|
import 'node:child_process';
|
|
12
12
|
import 'node:stream';
|
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
|
|
|
5
5
|
const __dirname = __pathDirname(__filename);
|
|
6
6
|
|
|
7
7
|
import 'node:crypto';
|
|
8
|
-
import { s as startInferenceAgent, a as asError } from './start-
|
|
8
|
+
import { s as startInferenceAgent, a as asError } from './start-DBk2G4SP.js';
|
|
9
9
|
import 'argon2';
|
|
10
10
|
import 'node:child_process';
|
|
11
11
|
import 'node:stream';
|
|
@@ -14914,6 +14914,23 @@ const API_SERVICE_CONDUIT_API_REFERENCE = {
|
|
|
14914
14914
|
type: "rest"
|
|
14915
14915
|
}
|
|
14916
14916
|
}
|
|
14917
|
+
},
|
|
14918
|
+
"/conduit/api/v1/source/:sourceID/requests/:requestID/stream": {
|
|
14919
|
+
POST: {
|
|
14920
|
+
auth: {
|
|
14921
|
+
type: "api-key"
|
|
14922
|
+
},
|
|
14923
|
+
parameters: {
|
|
14924
|
+
requestID: ULIDSchema,
|
|
14925
|
+
sourceID: ULIDSchema
|
|
14926
|
+
},
|
|
14927
|
+
response: {
|
|
14928
|
+
schema: object({
|
|
14929
|
+
acknowledged: literal(true)
|
|
14930
|
+
}),
|
|
14931
|
+
type: "rest"
|
|
14932
|
+
}
|
|
14933
|
+
}
|
|
14917
14934
|
}
|
|
14918
14935
|
};
|
|
14919
14936
|
|
|
@@ -17601,6 +17618,9 @@ function createLogger({ attributes = {}, name }) {
|
|
|
17601
17618
|
child: (attributes) => {
|
|
17602
17619
|
return buildLogger(logger.child(processAttributes(attributes)));
|
|
17603
17620
|
},
|
|
17621
|
+
debug: (message, attributes) => {
|
|
17622
|
+
logger.debug(processAttributes(attributes ?? {}), message);
|
|
17623
|
+
},
|
|
17604
17624
|
error: (message, attributes) => {
|
|
17605
17625
|
logger.error(processAttributes(attributes ?? {}), message);
|
|
17606
17626
|
},
|
|
@@ -97840,7 +97860,6 @@ function implementSingleEndpoint({ endpoint, handler, method, mount, route }) {
|
|
|
97840
97860
|
: []), (async (req, res) => {
|
|
97841
97861
|
res.locals.requestID = ulid$2();
|
|
97842
97862
|
try {
|
|
97843
|
-
console.log("HANDLE REQ", method, route, req.params);
|
|
97844
97863
|
// Extract and validate parameters with proper type assertion
|
|
97845
97864
|
const parameters = endpoint.parameters
|
|
97846
97865
|
? validateAndExtract("params", req.params, endpoint.parameters)
|
|
@@ -97888,7 +97907,6 @@ function implementSingleEndpoint({ endpoint, handler, method, mount, route }) {
|
|
|
97888
97907
|
res.status(output.status).send(output.statusText);
|
|
97889
97908
|
return;
|
|
97890
97909
|
}
|
|
97891
|
-
console.log("GOT RESPONSE", method, route, output.status, typeof output.body);
|
|
97892
97910
|
res.status(output.status);
|
|
97893
97911
|
if (endpoint.response.type === "text-stream") {
|
|
97894
97912
|
if (!res.getHeader("content-type")) {
|
|
@@ -108360,7 +108378,8 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
|
|
|
108360
108378
|
onRequestEnd,
|
|
108361
108379
|
onRequestStart,
|
|
108362
108380
|
reportMetrics,
|
|
108363
|
-
request: payload
|
|
108381
|
+
request: payload,
|
|
108382
|
+
signal
|
|
108364
108383
|
}).catch(error => {
|
|
108365
108384
|
logger.error("SSE request handler failed", {
|
|
108366
108385
|
error: asError(error),
|
|
@@ -108394,7 +108413,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
|
|
|
108394
108413
|
}
|
|
108395
108414
|
}
|
|
108396
108415
|
}
|
|
108397
|
-
async function handleRequest({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request }) {
|
|
108416
|
+
async function handleRequest({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
|
|
108398
108417
|
function reportMetricsSafe(payload) {
|
|
108399
108418
|
reportMetrics(payload).catch(error => {
|
|
108400
108419
|
logger.warn("Failed to upload LLM prompt metrics", {
|
|
@@ -108414,7 +108433,8 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
|
|
|
108414
108433
|
logger,
|
|
108415
108434
|
requestID: request.requestID,
|
|
108416
108435
|
requestStartedAt,
|
|
108417
|
-
response
|
|
108436
|
+
response,
|
|
108437
|
+
signal
|
|
108418
108438
|
});
|
|
108419
108439
|
const latencyMs = Math.max(0, Date.now() - requestStartedAt);
|
|
108420
108440
|
const totalTokens = 0;
|
|
@@ -108454,26 +108474,23 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
|
|
|
108454
108474
|
durationMs: latencyMs,
|
|
108455
108475
|
totalTokens
|
|
108456
108476
|
});
|
|
108457
|
-
await
|
|
108477
|
+
const streamHandler = await sendChunkStream({
|
|
108458
108478
|
apiURL,
|
|
108459
108479
|
configuration,
|
|
108460
|
-
|
|
108461
|
-
|
|
108462
|
-
sequence: 0,
|
|
108463
|
-
status: 502
|
|
108464
|
-
},
|
|
108465
|
-
requestID: request.requestID
|
|
108480
|
+
requestID: request.requestID,
|
|
108481
|
+
logger
|
|
108466
108482
|
});
|
|
108467
|
-
await
|
|
108468
|
-
|
|
108469
|
-
|
|
108470
|
-
|
|
108471
|
-
|
|
108472
|
-
|
|
108473
|
-
|
|
108474
|
-
|
|
108475
|
-
|
|
108483
|
+
await streamHandler.sendChunk({
|
|
108484
|
+
data: encodeBinaryChunk(Buffer.from(failureMessage)),
|
|
108485
|
+
sequence: 0,
|
|
108486
|
+
status: 502
|
|
108487
|
+
});
|
|
108488
|
+
await streamHandler.sendChunk({
|
|
108489
|
+
data: null,
|
|
108490
|
+
sequence: 1,
|
|
108491
|
+
status: 502
|
|
108476
108492
|
});
|
|
108493
|
+
await streamHandler.end();
|
|
108477
108494
|
reportMetricsSafe({
|
|
108478
108495
|
bytes: requestBytes + failureBytes,
|
|
108479
108496
|
completionTokens: 0,
|
|
@@ -108497,12 +108514,22 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
|
|
|
108497
108514
|
await onRequestEnd?.(request);
|
|
108498
108515
|
}
|
|
108499
108516
|
}
|
|
108500
|
-
async function streamResponse({ apiURL, configuration, logger, requestID, requestStartedAt, response }) {
|
|
108517
|
+
async function streamResponse({ apiURL, configuration, logger, requestID, requestStartedAt, response, signal }) {
|
|
108501
108518
|
let sequence = 0;
|
|
108502
108519
|
let responseBytes = 0;
|
|
108503
108520
|
let timeToFirstTokenMs = null;
|
|
108521
|
+
const streamHandler = await sendChunkStream({
|
|
108522
|
+
apiURL,
|
|
108523
|
+
configuration,
|
|
108524
|
+
requestID,
|
|
108525
|
+
logger
|
|
108526
|
+
});
|
|
108504
108527
|
if (response.body instanceof Readable) {
|
|
108505
108528
|
for await (const chunk of response.body) {
|
|
108529
|
+
if (signal?.aborted) {
|
|
108530
|
+
streamHandler.abort();
|
|
108531
|
+
throw new Error("Request cancelled");
|
|
108532
|
+
}
|
|
108506
108533
|
const buffer = Buffer.isBuffer(chunk)
|
|
108507
108534
|
? chunk
|
|
108508
108535
|
: Buffer.from(chunk);
|
|
@@ -108510,28 +108537,19 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
|
|
|
108510
108537
|
timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
|
|
108511
108538
|
}
|
|
108512
108539
|
responseBytes += buffer.length;
|
|
108513
|
-
await
|
|
108514
|
-
|
|
108515
|
-
|
|
108516
|
-
|
|
108517
|
-
data: encodeBinaryChunk(buffer),
|
|
108518
|
-
sequence,
|
|
108519
|
-
status: response.status
|
|
108520
|
-
},
|
|
108521
|
-
requestID
|
|
108540
|
+
await streamHandler.sendChunk({
|
|
108541
|
+
data: encodeBinaryChunk(buffer),
|
|
108542
|
+
sequence,
|
|
108543
|
+
status: response.status
|
|
108522
108544
|
});
|
|
108523
108545
|
sequence += 1;
|
|
108524
108546
|
}
|
|
108525
|
-
await
|
|
108526
|
-
|
|
108527
|
-
|
|
108528
|
-
|
|
108529
|
-
data: null,
|
|
108530
|
-
sequence,
|
|
108531
|
-
status: response.status
|
|
108532
|
-
},
|
|
108533
|
-
requestID
|
|
108547
|
+
await streamHandler.sendChunk({
|
|
108548
|
+
data: null,
|
|
108549
|
+
sequence,
|
|
108550
|
+
status: response.status
|
|
108534
108551
|
});
|
|
108552
|
+
await streamHandler.end();
|
|
108535
108553
|
return {
|
|
108536
108554
|
responseBytes,
|
|
108537
108555
|
status: response.status,
|
|
@@ -108547,27 +108565,18 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
|
|
|
108547
108565
|
responseBytes = Buffer.byteLength(responsePayload, "utf8");
|
|
108548
108566
|
timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
|
|
108549
108567
|
}
|
|
108550
|
-
await
|
|
108551
|
-
|
|
108552
|
-
|
|
108553
|
-
|
|
108554
|
-
|
|
108555
|
-
headers: response.headers,
|
|
108556
|
-
sequence,
|
|
108557
|
-
status: response.status
|
|
108558
|
-
},
|
|
108559
|
-
requestID
|
|
108568
|
+
await streamHandler.sendChunk({
|
|
108569
|
+
data: encodeBinaryChunk(Buffer.from(responsePayload)),
|
|
108570
|
+
headers: response.headers,
|
|
108571
|
+
sequence,
|
|
108572
|
+
status: response.status
|
|
108560
108573
|
});
|
|
108561
|
-
await
|
|
108562
|
-
|
|
108563
|
-
|
|
108564
|
-
|
|
108565
|
-
data: null,
|
|
108566
|
-
sequence: sequence + 1,
|
|
108567
|
-
status: response.status
|
|
108568
|
-
},
|
|
108569
|
-
requestID
|
|
108574
|
+
await streamHandler.sendChunk({
|
|
108575
|
+
data: null,
|
|
108576
|
+
sequence: sequence + 1,
|
|
108577
|
+
status: response.status
|
|
108570
108578
|
});
|
|
108579
|
+
await streamHandler.end();
|
|
108571
108580
|
logger.info("SSE response queued", {
|
|
108572
108581
|
requestMethod: requestID
|
|
108573
108582
|
});
|
|
@@ -108577,28 +108586,101 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
|
|
|
108577
108586
|
timeToFirstTokenMs
|
|
108578
108587
|
};
|
|
108579
108588
|
}
|
|
108580
|
-
async function postChunk({ apiURL, configuration, payload, requestID }) {
|
|
108581
|
-
const response = ClientToServerAPIResponseSchema.parse({
|
|
108582
|
-
data: payload.data,
|
|
108583
|
-
headers: payload.headers,
|
|
108584
|
-
requestID,
|
|
108585
|
-
status: payload.status
|
|
108586
|
-
});
|
|
108587
|
-
await fetch(`${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/${requestID}/chunk`, {
|
|
108588
|
-
body: JSON.stringify({
|
|
108589
|
-
...response,
|
|
108590
|
-
sequence: payload.sequence
|
|
108591
|
-
}),
|
|
108592
|
-
headers: {
|
|
108593
|
-
"content-type": "application/json",
|
|
108594
|
-
"x-api-key": configuration.apiKey
|
|
108595
|
-
},
|
|
108596
|
-
method: "POST"
|
|
108597
|
-
});
|
|
108598
|
-
}
|
|
108599
108589
|
function encodeBinaryChunk(chunk) {
|
|
108600
108590
|
return chunk.toString("base64");
|
|
108601
108591
|
}
|
|
108592
|
+
async function sendChunkStream({ apiURL, configuration, requestID, logger }) {
|
|
108593
|
+
const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/${requestID}/stream`;
|
|
108594
|
+
const maxFlushAttempts = 3;
|
|
108595
|
+
let isAborted = false;
|
|
108596
|
+
let isClosed = false;
|
|
108597
|
+
let activeAbortController = null;
|
|
108598
|
+
const chunks = [];
|
|
108599
|
+
const sendChunk = async (payload) => {
|
|
108600
|
+
if (isAborted || isClosed) {
|
|
108601
|
+
return;
|
|
108602
|
+
}
|
|
108603
|
+
const response = ClientToServerAPIResponseSchema.parse({
|
|
108604
|
+
data: payload.data,
|
|
108605
|
+
headers: payload.headers,
|
|
108606
|
+
requestID,
|
|
108607
|
+
status: payload.status
|
|
108608
|
+
});
|
|
108609
|
+
const chunk = JSON.stringify({
|
|
108610
|
+
...response,
|
|
108611
|
+
sequence: payload.sequence
|
|
108612
|
+
});
|
|
108613
|
+
chunks.push(Buffer.from(chunk + "\n"));
|
|
108614
|
+
if (chunks.length >= 10) {
|
|
108615
|
+
await flushChunks();
|
|
108616
|
+
}
|
|
108617
|
+
};
|
|
108618
|
+
const flushChunks = async () => {
|
|
108619
|
+
if (chunks.length === 0 || isAborted) {
|
|
108620
|
+
return;
|
|
108621
|
+
}
|
|
108622
|
+
const batch = chunks.splice(0, chunks.length);
|
|
108623
|
+
const body = Buffer.concat(batch);
|
|
108624
|
+
for (let attempt = 1; attempt <= maxFlushAttempts; attempt += 1) {
|
|
108625
|
+
try {
|
|
108626
|
+
activeAbortController = new AbortController();
|
|
108627
|
+
const response = await fetch(streamURL, {
|
|
108628
|
+
body: body.toString(),
|
|
108629
|
+
headers: {
|
|
108630
|
+
"content-type": "application/json",
|
|
108631
|
+
"x-api-key": configuration.apiKey
|
|
108632
|
+
},
|
|
108633
|
+
method: "POST",
|
|
108634
|
+
signal: activeAbortController.signal
|
|
108635
|
+
});
|
|
108636
|
+
if (!response.ok) {
|
|
108637
|
+
throw new Error(`Chunk stream flush failed with status ${response.status}`);
|
|
108638
|
+
}
|
|
108639
|
+
return;
|
|
108640
|
+
}
|
|
108641
|
+
catch (error) {
|
|
108642
|
+
if (isAborted) {
|
|
108643
|
+
return;
|
|
108644
|
+
}
|
|
108645
|
+
if (attempt >= maxFlushAttempts) {
|
|
108646
|
+
chunks.unshift(...batch);
|
|
108647
|
+
throw asError(error);
|
|
108648
|
+
}
|
|
108649
|
+
logger.warn("Failed to send chunk batch", {
|
|
108650
|
+
error: asError(error)
|
|
108651
|
+
});
|
|
108652
|
+
await sleep(100 * attempt);
|
|
108653
|
+
}
|
|
108654
|
+
finally {
|
|
108655
|
+
activeAbortController = null;
|
|
108656
|
+
}
|
|
108657
|
+
}
|
|
108658
|
+
};
|
|
108659
|
+
const end = async () => {
|
|
108660
|
+
if (isClosed || isAborted) {
|
|
108661
|
+
return;
|
|
108662
|
+
}
|
|
108663
|
+
await flushChunks();
|
|
108664
|
+
isClosed = true;
|
|
108665
|
+
};
|
|
108666
|
+
const abort = (error) => {
|
|
108667
|
+
isAborted = true;
|
|
108668
|
+
if (activeAbortController) {
|
|
108669
|
+
activeAbortController.abort();
|
|
108670
|
+
}
|
|
108671
|
+
chunks.length = 0;
|
|
108672
|
+
if (error) {
|
|
108673
|
+
logger.error("Chunk stream aborted", {
|
|
108674
|
+
error: asError(error)
|
|
108675
|
+
});
|
|
108676
|
+
}
|
|
108677
|
+
};
|
|
108678
|
+
return {
|
|
108679
|
+
sendChunk,
|
|
108680
|
+
end,
|
|
108681
|
+
abort
|
|
108682
|
+
};
|
|
108683
|
+
}
|
|
108602
108684
|
function calculateRequestBytes(body) {
|
|
108603
108685
|
if (body === null || body === undefined) {
|
|
108604
108686
|
return 0;
|
|
@@ -118119,13 +118201,14 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
|
|
|
118119
118201
|
}
|
|
118120
118202
|
}
|
|
118121
118203
|
body.on("data", (chunk) => {
|
|
118204
|
+
const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
118122
118205
|
if (firstChunkAt === null) {
|
|
118123
118206
|
firstChunkAt = Date.now();
|
|
118124
118207
|
}
|
|
118125
|
-
responseBytes +=
|
|
118126
|
-
buffer +=
|
|
118208
|
+
responseBytes += chunkBuffer.length;
|
|
118209
|
+
buffer += chunkBuffer.toString("utf8");
|
|
118127
118210
|
parseUsageFromBuffer();
|
|
118128
|
-
passThrough.write(modifyChunkWithUsage(
|
|
118211
|
+
passThrough.write(modifyChunkWithUsage(chunkBuffer));
|
|
118129
118212
|
});
|
|
118130
118213
|
body.once("error", err => {
|
|
118131
118214
|
logEngineMetrics({
|
|
@@ -118182,6 +118265,148 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
|
|
|
118182
118265
|
stream: passThrough
|
|
118183
118266
|
};
|
|
118184
118267
|
}
|
|
118268
|
+
function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
|
|
118269
|
+
const maxUsageCaptureBytes = 1024 * 1024;
|
|
118270
|
+
const startedAt = requestStartedAt ?? Date.now();
|
|
118271
|
+
const passThrough = new PassThrough();
|
|
118272
|
+
let responseBytes = 0;
|
|
118273
|
+
let firstChunkAt = null;
|
|
118274
|
+
let usage = null;
|
|
118275
|
+
const usageChunks = [];
|
|
118276
|
+
let usageBytes = 0;
|
|
118277
|
+
let usageCaptureEnabled = true;
|
|
118278
|
+
let completed = false;
|
|
118279
|
+
function finalize(error) {
|
|
118280
|
+
if (completed) {
|
|
118281
|
+
return;
|
|
118282
|
+
}
|
|
118283
|
+
completed = true;
|
|
118284
|
+
if (onComplete) {
|
|
118285
|
+
const completion = onComplete({
|
|
118286
|
+
durationMs: Math.max(0, Date.now() - startedAt),
|
|
118287
|
+
error,
|
|
118288
|
+
requestBodyBytes,
|
|
118289
|
+
responseBytes,
|
|
118290
|
+
timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
|
|
118291
|
+
usage
|
|
118292
|
+
});
|
|
118293
|
+
if (completion && typeof completion.catch === "function") {
|
|
118294
|
+
completion.catch(error => {
|
|
118295
|
+
logger.error("Engine metrics completion failed", {
|
|
118296
|
+
error: asError(error),
|
|
118297
|
+
requestUrl: requestPath
|
|
118298
|
+
});
|
|
118299
|
+
});
|
|
118300
|
+
}
|
|
118301
|
+
}
|
|
118302
|
+
}
|
|
118303
|
+
body.on("data", (chunk) => {
|
|
118304
|
+
const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
118305
|
+
if (firstChunkAt === null) {
|
|
118306
|
+
firstChunkAt = Date.now();
|
|
118307
|
+
}
|
|
118308
|
+
responseBytes += chunkBuffer.length;
|
|
118309
|
+
if (usageCaptureEnabled) {
|
|
118310
|
+
const nextSize = usageBytes + chunkBuffer.length;
|
|
118311
|
+
if (nextSize <= maxUsageCaptureBytes) {
|
|
118312
|
+
usageChunks.push(chunkBuffer);
|
|
118313
|
+
usageBytes = nextSize;
|
|
118314
|
+
}
|
|
118315
|
+
else {
|
|
118316
|
+
usageCaptureEnabled = false;
|
|
118317
|
+
usageChunks.length = 0;
|
|
118318
|
+
}
|
|
118319
|
+
}
|
|
118320
|
+
passThrough.write(chunkBuffer);
|
|
118321
|
+
});
|
|
118322
|
+
body.once("error", err => {
|
|
118323
|
+
logEngineMetrics({
|
|
118324
|
+
agentEngineType,
|
|
118325
|
+
error: err,
|
|
118326
|
+
level: "error",
|
|
118327
|
+
logger,
|
|
118328
|
+
requestBodyBytes,
|
|
118329
|
+
requestPath,
|
|
118330
|
+
responseBytes,
|
|
118331
|
+
usage
|
|
118332
|
+
});
|
|
118333
|
+
finalize(err);
|
|
118334
|
+
passThrough.destroy(err);
|
|
118335
|
+
});
|
|
118336
|
+
body.once("end", () => {
|
|
118337
|
+
if (usageCaptureEnabled) {
|
|
118338
|
+
try {
|
|
118339
|
+
const parsed = JSON.parse(Buffer.concat(usageChunks).toString("utf8"));
|
|
118340
|
+
if (parsed.usage) {
|
|
118341
|
+
const usageChunk = parsed.usage;
|
|
118342
|
+
const completionTokens = usageChunk.completion_tokens ?? null;
|
|
118343
|
+
const promptTokens = usageChunk.prompt_tokens ?? null;
|
|
118344
|
+
const totalTokens = usageChunk.total_tokens ?? null;
|
|
118345
|
+
let contextUsage = usageChunk.context_usage ?? null;
|
|
118346
|
+
const effectiveContext = getEffectiveContextLength({
|
|
118347
|
+
contextLength,
|
|
118348
|
+
engine,
|
|
118349
|
+
parallelism
|
|
118350
|
+
});
|
|
118351
|
+
if (contextUsage === null &&
|
|
118352
|
+
promptTokens !== null &&
|
|
118353
|
+
effectiveContext !== null) {
|
|
118354
|
+
contextUsage = promptTokens / effectiveContext;
|
|
118355
|
+
}
|
|
118356
|
+
usage = {
|
|
118357
|
+
completionTokens,
|
|
118358
|
+
contextUsage,
|
|
118359
|
+
promptTokens,
|
|
118360
|
+
totalTokens
|
|
118361
|
+
};
|
|
118362
|
+
}
|
|
118363
|
+
}
|
|
118364
|
+
catch (error) {
|
|
118365
|
+
logger.error("Failed to parse engine response body", {
|
|
118366
|
+
error: asError(error),
|
|
118367
|
+
requestUrl: requestPath
|
|
118368
|
+
});
|
|
118369
|
+
}
|
|
118370
|
+
}
|
|
118371
|
+
logEngineMetrics({
|
|
118372
|
+
agentEngineType,
|
|
118373
|
+
level: "info",
|
|
118374
|
+
logger,
|
|
118375
|
+
requestBodyBytes,
|
|
118376
|
+
requestPath,
|
|
118377
|
+
responseBytes,
|
|
118378
|
+
usage
|
|
118379
|
+
});
|
|
118380
|
+
finalize(null);
|
|
118381
|
+
passThrough.end();
|
|
118382
|
+
});
|
|
118383
|
+
body.once("close", () => {
|
|
118384
|
+
if (completed) {
|
|
118385
|
+
if (!passThrough.writableEnded) {
|
|
118386
|
+
passThrough.end();
|
|
118387
|
+
}
|
|
118388
|
+
return;
|
|
118389
|
+
}
|
|
118390
|
+
const closeError = new Error("Engine response stream closed before completion");
|
|
118391
|
+
logEngineMetrics({
|
|
118392
|
+
agentEngineType,
|
|
118393
|
+
error: closeError,
|
|
118394
|
+
level: "error",
|
|
118395
|
+
logger,
|
|
118396
|
+
requestBodyBytes,
|
|
118397
|
+
requestPath,
|
|
118398
|
+
responseBytes,
|
|
118399
|
+
usage
|
|
118400
|
+
});
|
|
118401
|
+
finalize(closeError);
|
|
118402
|
+
if (!passThrough.writableEnded) {
|
|
118403
|
+
passThrough.end();
|
|
118404
|
+
}
|
|
118405
|
+
});
|
|
118406
|
+
return {
|
|
118407
|
+
stream: passThrough
|
|
118408
|
+
};
|
|
118409
|
+
}
|
|
118185
118410
|
function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }) {
|
|
118186
118411
|
const metricsMessage = [
|
|
118187
118412
|
"LLM engine stream metrics",
|
|
@@ -118254,6 +118479,35 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
118254
118479
|
}
|
|
118255
118480
|
const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody(body);
|
|
118256
118481
|
const requestStartedAt = Date.now();
|
|
118482
|
+
const requestBody = JSON.parse(serializedBody);
|
|
118483
|
+
const streamRequested = requestBody.stream === true;
|
|
118484
|
+
const onMonitoringComplete = ({ durationMs, error, responseBytes, timeToFirstTokenMs, usage }) => {
|
|
118485
|
+
const completionTokens = normalizeTokenCount(usage?.completionTokens);
|
|
118486
|
+
const promptTokens = normalizeTokenCount(usage?.promptTokens);
|
|
118487
|
+
const totalTokens = normalizeTokenCount(usage?.totalTokens ?? completionTokens + promptTokens);
|
|
118488
|
+
const latencyMs = Math.max(0, durationMs);
|
|
118489
|
+
reportMetricsSafe({
|
|
118490
|
+
bytes: requestBodyBytes + responseBytes,
|
|
118491
|
+
completionTokens,
|
|
118492
|
+
engine: configuration.agentEngineType,
|
|
118493
|
+
endpointId: null,
|
|
118494
|
+
latencyMs,
|
|
118495
|
+
modelId: modelID,
|
|
118496
|
+
promptTokens,
|
|
118497
|
+
requestBytes: requestBodyBytes,
|
|
118498
|
+
requestId: null,
|
|
118499
|
+
requestMethod: "POST",
|
|
118500
|
+
requestPath: path,
|
|
118501
|
+
responseBytes,
|
|
118502
|
+
successful: !error,
|
|
118503
|
+
timeToFirstTokenMs,
|
|
118504
|
+
tokensPerSecond: calculateTokensPerSecond({
|
|
118505
|
+
durationMs: latencyMs,
|
|
118506
|
+
totalTokens
|
|
118507
|
+
}),
|
|
118508
|
+
totalTokens
|
|
118509
|
+
});
|
|
118510
|
+
};
|
|
118257
118511
|
const response = await modelManager
|
|
118258
118512
|
.fetchOpenAI(path, {
|
|
118259
118513
|
body: serializedBody,
|
|
@@ -118348,44 +118602,31 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
118348
118602
|
statusText: responseStatusText
|
|
118349
118603
|
};
|
|
118350
118604
|
}
|
|
118351
|
-
const monitoredResponse =
|
|
118352
|
-
|
|
118353
|
-
|
|
118354
|
-
|
|
118355
|
-
|
|
118356
|
-
|
|
118357
|
-
|
|
118358
|
-
|
|
118359
|
-
|
|
118360
|
-
|
|
118361
|
-
|
|
118362
|
-
|
|
118363
|
-
|
|
118364
|
-
|
|
118365
|
-
|
|
118366
|
-
|
|
118367
|
-
|
|
118368
|
-
|
|
118369
|
-
|
|
118370
|
-
|
|
118371
|
-
|
|
118372
|
-
|
|
118373
|
-
|
|
118374
|
-
|
|
118375
|
-
|
|
118376
|
-
timeToFirstTokenMs,
|
|
118377
|
-
tokensPerSecond: calculateTokensPerSecond({
|
|
118378
|
-
durationMs: latencyMs,
|
|
118379
|
-
totalTokens
|
|
118380
|
-
}),
|
|
118381
|
-
totalTokens
|
|
118382
|
-
});
|
|
118383
|
-
},
|
|
118384
|
-
parallelism: modelManager.parallelism,
|
|
118385
|
-
requestBodyBytes,
|
|
118386
|
-
requestPath: path,
|
|
118387
|
-
requestStartedAt
|
|
118388
|
-
});
|
|
118605
|
+
const monitoredResponse = streamRequested
|
|
118606
|
+
? monitorEngineResponseStream({
|
|
118607
|
+
agentEngineType: configuration.agentEngineType,
|
|
118608
|
+
body: Readable.fromWeb(response.body),
|
|
118609
|
+
contextLength: modelManager.contextLength,
|
|
118610
|
+
engine: configuration.agentEngineType,
|
|
118611
|
+
logger,
|
|
118612
|
+
onComplete: onMonitoringComplete,
|
|
118613
|
+
parallelism: modelManager.parallelism,
|
|
118614
|
+
requestBodyBytes,
|
|
118615
|
+
requestPath: path,
|
|
118616
|
+
requestStartedAt
|
|
118617
|
+
})
|
|
118618
|
+
: monitorEngineResponseSingle({
|
|
118619
|
+
agentEngineType: configuration.agentEngineType,
|
|
118620
|
+
body: Readable.fromWeb(response.body),
|
|
118621
|
+
contextLength: modelManager.contextLength,
|
|
118622
|
+
engine: configuration.agentEngineType,
|
|
118623
|
+
logger,
|
|
118624
|
+
onComplete: onMonitoringComplete,
|
|
118625
|
+
parallelism: modelManager.parallelism,
|
|
118626
|
+
requestBodyBytes,
|
|
118627
|
+
requestPath: path,
|
|
118628
|
+
requestStartedAt
|
|
118629
|
+
});
|
|
118389
118630
|
return {
|
|
118390
118631
|
body: monitoredResponse.stream,
|
|
118391
118632
|
headers: Object.fromEntries(response.headers.entries()),
|
|
@@ -39,5 +39,6 @@ interface MonitorEngineResponseResult {
|
|
|
39
39
|
stream: Readable;
|
|
40
40
|
}
|
|
41
41
|
export declare function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
|
|
42
|
+
export declare function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
|
|
42
43
|
export declare function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }: EngineMetricsLogOptions): void;
|
|
43
44
|
export {};
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@infersec/conduit",
|
|
3
3
|
"description": "End user conduit agent for connecting local LLMs to the cloud.",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.25.0",
|
|
5
5
|
"bin": {
|
|
6
6
|
"infersec-conduit": "./dist/cli.js"
|
|
7
7
|
},
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
"test:format": "prettier --check .",
|
|
28
28
|
"test:lint": "eslint source/**/*.ts",
|
|
29
29
|
"test:types": "tsc -p tsconfig.json --noEmit",
|
|
30
|
-
"test:unit": "vitest run"
|
|
30
|
+
"test:unit": "vitest -c vitest.config.ts run"
|
|
31
31
|
},
|
|
32
32
|
"prettier": "@infersec/prettier",
|
|
33
33
|
"publishConfig": {
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|