@infersec/conduit 1.24.2 → 1.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js
CHANGED
|
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
|
|
|
6
6
|
|
|
7
7
|
import { parseArgs } from 'node:util';
|
|
8
8
|
import 'node:crypto';
|
|
9
|
-
import { a as asError, s as startInferenceAgent } from './start-
|
|
9
|
+
import { a as asError, s as startInferenceAgent } from './start-DBk2G4SP.js';
|
|
10
10
|
import 'argon2';
|
|
11
11
|
import 'node:child_process';
|
|
12
12
|
import 'node:stream';
|
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
|
|
|
5
5
|
const __dirname = __pathDirname(__filename);
|
|
6
6
|
|
|
7
7
|
import 'node:crypto';
|
|
8
|
-
import { s as startInferenceAgent, a as asError } from './start-
|
|
8
|
+
import { s as startInferenceAgent, a as asError } from './start-DBk2G4SP.js';
|
|
9
9
|
import 'argon2';
|
|
10
10
|
import 'node:child_process';
|
|
11
11
|
import 'node:stream';
|
|
@@ -14914,9 +14914,32 @@ const API_SERVICE_CONDUIT_API_REFERENCE = {
|
|
|
14914
14914
|
type: "rest"
|
|
14915
14915
|
}
|
|
14916
14916
|
}
|
|
14917
|
+
},
|
|
14918
|
+
"/conduit/api/v1/source/:sourceID/requests/:requestID/stream": {
|
|
14919
|
+
POST: {
|
|
14920
|
+
auth: {
|
|
14921
|
+
type: "api-key"
|
|
14922
|
+
},
|
|
14923
|
+
parameters: {
|
|
14924
|
+
requestID: ULIDSchema,
|
|
14925
|
+
sourceID: ULIDSchema
|
|
14926
|
+
},
|
|
14927
|
+
response: {
|
|
14928
|
+
schema: object({
|
|
14929
|
+
acknowledged: literal(true)
|
|
14930
|
+
}),
|
|
14931
|
+
type: "rest"
|
|
14932
|
+
}
|
|
14933
|
+
}
|
|
14917
14934
|
}
|
|
14918
14935
|
};
|
|
14919
14936
|
|
|
14937
|
+
/**
|
|
14938
|
+
* Coerce non-string values to JSON strings. Some LLM backends (e.g. llama.cpp)
|
|
14939
|
+
* return tool_calls arguments as parsed objects instead of JSON strings, which
|
|
14940
|
+
* violates the OpenAI spec. This schema field normalises them on parse.
|
|
14941
|
+
*/
|
|
14942
|
+
const jsonStringCoerced = preprocess(val => (typeof val === "string" ? val : JSON.stringify(val)), string$1());
|
|
14920
14943
|
// ==================== CHAT COMPLETION ROLES ====================
|
|
14921
14944
|
_enum([
|
|
14922
14945
|
"system",
|
|
@@ -14963,13 +14986,13 @@ const ChatCompletionAssistantMessageParamSchema = object({
|
|
|
14963
14986
|
type: literal("function"),
|
|
14964
14987
|
function: object({
|
|
14965
14988
|
name: string$1(),
|
|
14966
|
-
arguments:
|
|
14989
|
+
arguments: jsonStringCoerced
|
|
14967
14990
|
})
|
|
14968
14991
|
}))
|
|
14969
14992
|
.optional(),
|
|
14970
14993
|
function_call: object({
|
|
14971
14994
|
name: string$1(),
|
|
14972
|
-
arguments:
|
|
14995
|
+
arguments: jsonStringCoerced
|
|
14973
14996
|
})
|
|
14974
14997
|
.optional(),
|
|
14975
14998
|
refusal: string$1().nullable().optional()
|
|
@@ -15006,13 +15029,13 @@ const ChatCompletionMessageSchema = object({
|
|
|
15006
15029
|
type: literal("function"),
|
|
15007
15030
|
function: object({
|
|
15008
15031
|
name: string$1(),
|
|
15009
|
-
arguments:
|
|
15032
|
+
arguments: jsonStringCoerced
|
|
15010
15033
|
})
|
|
15011
15034
|
}))
|
|
15012
15035
|
.optional(),
|
|
15013
15036
|
function_call: object({
|
|
15014
15037
|
name: string$1(),
|
|
15015
|
-
arguments:
|
|
15038
|
+
arguments: jsonStringCoerced
|
|
15016
15039
|
})
|
|
15017
15040
|
.optional(),
|
|
15018
15041
|
refusal: string$1().nullable()
|
|
@@ -17595,6 +17618,9 @@ function createLogger({ attributes = {}, name }) {
|
|
|
17595
17618
|
child: (attributes) => {
|
|
17596
17619
|
return buildLogger(logger.child(processAttributes(attributes)));
|
|
17597
17620
|
},
|
|
17621
|
+
debug: (message, attributes) => {
|
|
17622
|
+
logger.debug(processAttributes(attributes ?? {}), message);
|
|
17623
|
+
},
|
|
17598
17624
|
error: (message, attributes) => {
|
|
17599
17625
|
logger.error(processAttributes(attributes ?? {}), message);
|
|
17600
17626
|
},
|
|
@@ -97834,7 +97860,6 @@ function implementSingleEndpoint({ endpoint, handler, method, mount, route }) {
|
|
|
97834
97860
|
: []), (async (req, res) => {
|
|
97835
97861
|
res.locals.requestID = ulid$2();
|
|
97836
97862
|
try {
|
|
97837
|
-
console.log("HANDLE REQ", method, route, req.params);
|
|
97838
97863
|
// Extract and validate parameters with proper type assertion
|
|
97839
97864
|
const parameters = endpoint.parameters
|
|
97840
97865
|
? validateAndExtract("params", req.params, endpoint.parameters)
|
|
@@ -97882,7 +97907,6 @@ function implementSingleEndpoint({ endpoint, handler, method, mount, route }) {
|
|
|
97882
97907
|
res.status(output.status).send(output.statusText);
|
|
97883
97908
|
return;
|
|
97884
97909
|
}
|
|
97885
|
-
console.log("GOT RESPONSE", method, route, output.status, typeof output.body);
|
|
97886
97910
|
res.status(output.status);
|
|
97887
97911
|
if (endpoint.response.type === "text-stream") {
|
|
97888
97912
|
if (!res.getHeader("content-type")) {
|
|
@@ -108354,7 +108378,8 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
|
|
|
108354
108378
|
onRequestEnd,
|
|
108355
108379
|
onRequestStart,
|
|
108356
108380
|
reportMetrics,
|
|
108357
|
-
request: payload
|
|
108381
|
+
request: payload,
|
|
108382
|
+
signal
|
|
108358
108383
|
}).catch(error => {
|
|
108359
108384
|
logger.error("SSE request handler failed", {
|
|
108360
108385
|
error: asError(error),
|
|
@@ -108388,7 +108413,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
|
|
|
108388
108413
|
}
|
|
108389
108414
|
}
|
|
108390
108415
|
}
|
|
108391
|
-
async function handleRequest({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request }) {
|
|
108416
|
+
async function handleRequest({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
|
|
108392
108417
|
function reportMetricsSafe(payload) {
|
|
108393
108418
|
reportMetrics(payload).catch(error => {
|
|
108394
108419
|
logger.warn("Failed to upload LLM prompt metrics", {
|
|
@@ -108408,7 +108433,8 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
|
|
|
108408
108433
|
logger,
|
|
108409
108434
|
requestID: request.requestID,
|
|
108410
108435
|
requestStartedAt,
|
|
108411
|
-
response
|
|
108436
|
+
response,
|
|
108437
|
+
signal
|
|
108412
108438
|
});
|
|
108413
108439
|
const latencyMs = Math.max(0, Date.now() - requestStartedAt);
|
|
108414
108440
|
const totalTokens = 0;
|
|
@@ -108448,26 +108474,23 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
|
|
|
108448
108474
|
durationMs: latencyMs,
|
|
108449
108475
|
totalTokens
|
|
108450
108476
|
});
|
|
108451
|
-
await
|
|
108477
|
+
const streamHandler = await sendChunkStream({
|
|
108452
108478
|
apiURL,
|
|
108453
108479
|
configuration,
|
|
108454
|
-
|
|
108455
|
-
|
|
108456
|
-
sequence: 0,
|
|
108457
|
-
status: 502
|
|
108458
|
-
},
|
|
108459
|
-
requestID: request.requestID
|
|
108480
|
+
requestID: request.requestID,
|
|
108481
|
+
logger
|
|
108460
108482
|
});
|
|
108461
|
-
await
|
|
108462
|
-
|
|
108463
|
-
|
|
108464
|
-
|
|
108465
|
-
|
|
108466
|
-
|
|
108467
|
-
|
|
108468
|
-
|
|
108469
|
-
|
|
108483
|
+
await streamHandler.sendChunk({
|
|
108484
|
+
data: encodeBinaryChunk(Buffer.from(failureMessage)),
|
|
108485
|
+
sequence: 0,
|
|
108486
|
+
status: 502
|
|
108487
|
+
});
|
|
108488
|
+
await streamHandler.sendChunk({
|
|
108489
|
+
data: null,
|
|
108490
|
+
sequence: 1,
|
|
108491
|
+
status: 502
|
|
108470
108492
|
});
|
|
108493
|
+
await streamHandler.end();
|
|
108471
108494
|
reportMetricsSafe({
|
|
108472
108495
|
bytes: requestBytes + failureBytes,
|
|
108473
108496
|
completionTokens: 0,
|
|
@@ -108491,12 +108514,22 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
|
|
|
108491
108514
|
await onRequestEnd?.(request);
|
|
108492
108515
|
}
|
|
108493
108516
|
}
|
|
108494
|
-
async function streamResponse({ apiURL, configuration, logger, requestID, requestStartedAt, response }) {
|
|
108517
|
+
async function streamResponse({ apiURL, configuration, logger, requestID, requestStartedAt, response, signal }) {
|
|
108495
108518
|
let sequence = 0;
|
|
108496
108519
|
let responseBytes = 0;
|
|
108497
108520
|
let timeToFirstTokenMs = null;
|
|
108521
|
+
const streamHandler = await sendChunkStream({
|
|
108522
|
+
apiURL,
|
|
108523
|
+
configuration,
|
|
108524
|
+
requestID,
|
|
108525
|
+
logger
|
|
108526
|
+
});
|
|
108498
108527
|
if (response.body instanceof Readable) {
|
|
108499
108528
|
for await (const chunk of response.body) {
|
|
108529
|
+
if (signal?.aborted) {
|
|
108530
|
+
streamHandler.abort();
|
|
108531
|
+
throw new Error("Request cancelled");
|
|
108532
|
+
}
|
|
108500
108533
|
const buffer = Buffer.isBuffer(chunk)
|
|
108501
108534
|
? chunk
|
|
108502
108535
|
: Buffer.from(chunk);
|
|
@@ -108504,28 +108537,19 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
|
|
|
108504
108537
|
timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
|
|
108505
108538
|
}
|
|
108506
108539
|
responseBytes += buffer.length;
|
|
108507
|
-
await
|
|
108508
|
-
|
|
108509
|
-
|
|
108510
|
-
|
|
108511
|
-
data: encodeBinaryChunk(buffer),
|
|
108512
|
-
sequence,
|
|
108513
|
-
status: response.status
|
|
108514
|
-
},
|
|
108515
|
-
requestID
|
|
108540
|
+
await streamHandler.sendChunk({
|
|
108541
|
+
data: encodeBinaryChunk(buffer),
|
|
108542
|
+
sequence,
|
|
108543
|
+
status: response.status
|
|
108516
108544
|
});
|
|
108517
108545
|
sequence += 1;
|
|
108518
108546
|
}
|
|
108519
|
-
await
|
|
108520
|
-
|
|
108521
|
-
|
|
108522
|
-
|
|
108523
|
-
data: null,
|
|
108524
|
-
sequence,
|
|
108525
|
-
status: response.status
|
|
108526
|
-
},
|
|
108527
|
-
requestID
|
|
108547
|
+
await streamHandler.sendChunk({
|
|
108548
|
+
data: null,
|
|
108549
|
+
sequence,
|
|
108550
|
+
status: response.status
|
|
108528
108551
|
});
|
|
108552
|
+
await streamHandler.end();
|
|
108529
108553
|
return {
|
|
108530
108554
|
responseBytes,
|
|
108531
108555
|
status: response.status,
|
|
@@ -108541,27 +108565,18 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
|
|
|
108541
108565
|
responseBytes = Buffer.byteLength(responsePayload, "utf8");
|
|
108542
108566
|
timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
|
|
108543
108567
|
}
|
|
108544
|
-
await
|
|
108545
|
-
|
|
108546
|
-
|
|
108547
|
-
|
|
108548
|
-
|
|
108549
|
-
headers: response.headers,
|
|
108550
|
-
sequence,
|
|
108551
|
-
status: response.status
|
|
108552
|
-
},
|
|
108553
|
-
requestID
|
|
108568
|
+
await streamHandler.sendChunk({
|
|
108569
|
+
data: encodeBinaryChunk(Buffer.from(responsePayload)),
|
|
108570
|
+
headers: response.headers,
|
|
108571
|
+
sequence,
|
|
108572
|
+
status: response.status
|
|
108554
108573
|
});
|
|
108555
|
-
await
|
|
108556
|
-
|
|
108557
|
-
|
|
108558
|
-
|
|
108559
|
-
data: null,
|
|
108560
|
-
sequence: sequence + 1,
|
|
108561
|
-
status: response.status
|
|
108562
|
-
},
|
|
108563
|
-
requestID
|
|
108574
|
+
await streamHandler.sendChunk({
|
|
108575
|
+
data: null,
|
|
108576
|
+
sequence: sequence + 1,
|
|
108577
|
+
status: response.status
|
|
108564
108578
|
});
|
|
108579
|
+
await streamHandler.end();
|
|
108565
108580
|
logger.info("SSE response queued", {
|
|
108566
108581
|
requestMethod: requestID
|
|
108567
108582
|
});
|
|
@@ -108571,28 +108586,101 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
|
|
|
108571
108586
|
timeToFirstTokenMs
|
|
108572
108587
|
};
|
|
108573
108588
|
}
|
|
108574
|
-
async function postChunk({ apiURL, configuration, payload, requestID }) {
|
|
108575
|
-
const response = ClientToServerAPIResponseSchema.parse({
|
|
108576
|
-
data: payload.data,
|
|
108577
|
-
headers: payload.headers,
|
|
108578
|
-
requestID,
|
|
108579
|
-
status: payload.status
|
|
108580
|
-
});
|
|
108581
|
-
await fetch(`${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/${requestID}/chunk`, {
|
|
108582
|
-
body: JSON.stringify({
|
|
108583
|
-
...response,
|
|
108584
|
-
sequence: payload.sequence
|
|
108585
|
-
}),
|
|
108586
|
-
headers: {
|
|
108587
|
-
"content-type": "application/json",
|
|
108588
|
-
"x-api-key": configuration.apiKey
|
|
108589
|
-
},
|
|
108590
|
-
method: "POST"
|
|
108591
|
-
});
|
|
108592
|
-
}
|
|
108593
108589
|
function encodeBinaryChunk(chunk) {
|
|
108594
108590
|
return chunk.toString("base64");
|
|
108595
108591
|
}
|
|
108592
|
+
async function sendChunkStream({ apiURL, configuration, requestID, logger }) {
|
|
108593
|
+
const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/${requestID}/stream`;
|
|
108594
|
+
const maxFlushAttempts = 3;
|
|
108595
|
+
let isAborted = false;
|
|
108596
|
+
let isClosed = false;
|
|
108597
|
+
let activeAbortController = null;
|
|
108598
|
+
const chunks = [];
|
|
108599
|
+
const sendChunk = async (payload) => {
|
|
108600
|
+
if (isAborted || isClosed) {
|
|
108601
|
+
return;
|
|
108602
|
+
}
|
|
108603
|
+
const response = ClientToServerAPIResponseSchema.parse({
|
|
108604
|
+
data: payload.data,
|
|
108605
|
+
headers: payload.headers,
|
|
108606
|
+
requestID,
|
|
108607
|
+
status: payload.status
|
|
108608
|
+
});
|
|
108609
|
+
const chunk = JSON.stringify({
|
|
108610
|
+
...response,
|
|
108611
|
+
sequence: payload.sequence
|
|
108612
|
+
});
|
|
108613
|
+
chunks.push(Buffer.from(chunk + "\n"));
|
|
108614
|
+
if (chunks.length >= 10) {
|
|
108615
|
+
await flushChunks();
|
|
108616
|
+
}
|
|
108617
|
+
};
|
|
108618
|
+
const flushChunks = async () => {
|
|
108619
|
+
if (chunks.length === 0 || isAborted) {
|
|
108620
|
+
return;
|
|
108621
|
+
}
|
|
108622
|
+
const batch = chunks.splice(0, chunks.length);
|
|
108623
|
+
const body = Buffer.concat(batch);
|
|
108624
|
+
for (let attempt = 1; attempt <= maxFlushAttempts; attempt += 1) {
|
|
108625
|
+
try {
|
|
108626
|
+
activeAbortController = new AbortController();
|
|
108627
|
+
const response = await fetch(streamURL, {
|
|
108628
|
+
body: body.toString(),
|
|
108629
|
+
headers: {
|
|
108630
|
+
"content-type": "application/json",
|
|
108631
|
+
"x-api-key": configuration.apiKey
|
|
108632
|
+
},
|
|
108633
|
+
method: "POST",
|
|
108634
|
+
signal: activeAbortController.signal
|
|
108635
|
+
});
|
|
108636
|
+
if (!response.ok) {
|
|
108637
|
+
throw new Error(`Chunk stream flush failed with status ${response.status}`);
|
|
108638
|
+
}
|
|
108639
|
+
return;
|
|
108640
|
+
}
|
|
108641
|
+
catch (error) {
|
|
108642
|
+
if (isAborted) {
|
|
108643
|
+
return;
|
|
108644
|
+
}
|
|
108645
|
+
if (attempt >= maxFlushAttempts) {
|
|
108646
|
+
chunks.unshift(...batch);
|
|
108647
|
+
throw asError(error);
|
|
108648
|
+
}
|
|
108649
|
+
logger.warn("Failed to send chunk batch", {
|
|
108650
|
+
error: asError(error)
|
|
108651
|
+
});
|
|
108652
|
+
await sleep(100 * attempt);
|
|
108653
|
+
}
|
|
108654
|
+
finally {
|
|
108655
|
+
activeAbortController = null;
|
|
108656
|
+
}
|
|
108657
|
+
}
|
|
108658
|
+
};
|
|
108659
|
+
const end = async () => {
|
|
108660
|
+
if (isClosed || isAborted) {
|
|
108661
|
+
return;
|
|
108662
|
+
}
|
|
108663
|
+
await flushChunks();
|
|
108664
|
+
isClosed = true;
|
|
108665
|
+
};
|
|
108666
|
+
const abort = (error) => {
|
|
108667
|
+
isAborted = true;
|
|
108668
|
+
if (activeAbortController) {
|
|
108669
|
+
activeAbortController.abort();
|
|
108670
|
+
}
|
|
108671
|
+
chunks.length = 0;
|
|
108672
|
+
if (error) {
|
|
108673
|
+
logger.error("Chunk stream aborted", {
|
|
108674
|
+
error: asError(error)
|
|
108675
|
+
});
|
|
108676
|
+
}
|
|
108677
|
+
};
|
|
108678
|
+
return {
|
|
108679
|
+
sendChunk,
|
|
108680
|
+
end,
|
|
108681
|
+
abort
|
|
108682
|
+
};
|
|
108683
|
+
}
|
|
108596
108684
|
function calculateRequestBytes(body) {
|
|
108597
108685
|
if (body === null || body === undefined) {
|
|
108598
108686
|
return 0;
|
|
@@ -117945,6 +118033,42 @@ async function collectMachineMetadata() {
|
|
|
117945
118033
|
return machineMetadata;
|
|
117946
118034
|
}
|
|
117947
118035
|
|
|
118036
|
+
/**
|
|
118037
|
+
* Coerce non-string tool_calls function.arguments to JSON strings.
|
|
118038
|
+
* Some LLM backends return arguments as parsed objects instead of
|
|
118039
|
+
* JSON strings, violating the OpenAI spec. This mutates in place
|
|
118040
|
+
* and returns true if any coercion was performed.
|
|
118041
|
+
*/
|
|
118042
|
+
function coerceToolCallArguments(parsed) {
|
|
118043
|
+
const choices = parsed.choices;
|
|
118044
|
+
if (!Array.isArray(choices))
|
|
118045
|
+
return false;
|
|
118046
|
+
let modified = false;
|
|
118047
|
+
for (const choice of choices) {
|
|
118048
|
+
if (!choice || typeof choice !== "object")
|
|
118049
|
+
continue;
|
|
118050
|
+
const choiceRecord = choice;
|
|
118051
|
+
const msg = choiceRecord.delta ?? choiceRecord.message;
|
|
118052
|
+
if (!msg || typeof msg !== "object")
|
|
118053
|
+
continue;
|
|
118054
|
+
const toolCalls = msg.tool_calls;
|
|
118055
|
+
if (!Array.isArray(toolCalls))
|
|
118056
|
+
continue;
|
|
118057
|
+
for (const tc of toolCalls) {
|
|
118058
|
+
if (!tc || typeof tc !== "object")
|
|
118059
|
+
continue;
|
|
118060
|
+
const fn = tc.function;
|
|
118061
|
+
if (!fn || typeof fn !== "object")
|
|
118062
|
+
continue;
|
|
118063
|
+
const fnRecord = fn;
|
|
118064
|
+
if (fnRecord.arguments !== undefined && typeof fnRecord.arguments !== "string") {
|
|
118065
|
+
fnRecord.arguments = JSON.stringify(fnRecord.arguments);
|
|
118066
|
+
modified = true;
|
|
118067
|
+
}
|
|
118068
|
+
}
|
|
118069
|
+
}
|
|
118070
|
+
return modified;
|
|
118071
|
+
}
|
|
117948
118072
|
function isEngineUsageChunk(value) {
|
|
117949
118073
|
if (!value || typeof value !== "object") {
|
|
117950
118074
|
return false;
|
|
@@ -117980,6 +118104,10 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
|
|
|
117980
118104
|
}
|
|
117981
118105
|
try {
|
|
117982
118106
|
const parsed = JSON.parse(payload);
|
|
118107
|
+
let modified = false;
|
|
118108
|
+
if (coerceToolCallArguments(parsed)) {
|
|
118109
|
+
modified = true;
|
|
118110
|
+
}
|
|
117983
118111
|
if (parsed.usage) {
|
|
117984
118112
|
const usageChunk = parsed.usage;
|
|
117985
118113
|
const effectiveContext = getEffectiveContextLength({
|
|
@@ -117991,10 +118119,13 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
|
|
|
117991
118119
|
usageChunk.prompt_tokens !== undefined &&
|
|
117992
118120
|
effectiveContext !== null) {
|
|
117993
118121
|
usageChunk.context_usage = usageChunk.prompt_tokens / effectiveContext;
|
|
117994
|
-
|
|
117995
|
-
continue;
|
|
118122
|
+
modified = true;
|
|
117996
118123
|
}
|
|
117997
118124
|
}
|
|
118125
|
+
if (modified) {
|
|
118126
|
+
modifiedLines.push("data: " + JSON.stringify(parsed));
|
|
118127
|
+
continue;
|
|
118128
|
+
}
|
|
117998
118129
|
}
|
|
117999
118130
|
catch (_error) {
|
|
118000
118131
|
// Ignore malformed chunks
|
|
@@ -118070,13 +118201,14 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
|
|
|
118070
118201
|
}
|
|
118071
118202
|
}
|
|
118072
118203
|
body.on("data", (chunk) => {
|
|
118204
|
+
const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
118073
118205
|
if (firstChunkAt === null) {
|
|
118074
118206
|
firstChunkAt = Date.now();
|
|
118075
118207
|
}
|
|
118076
|
-
responseBytes +=
|
|
118077
|
-
buffer +=
|
|
118208
|
+
responseBytes += chunkBuffer.length;
|
|
118209
|
+
buffer += chunkBuffer.toString("utf8");
|
|
118078
118210
|
parseUsageFromBuffer();
|
|
118079
|
-
passThrough.write(modifyChunkWithUsage(
|
|
118211
|
+
passThrough.write(modifyChunkWithUsage(chunkBuffer));
|
|
118080
118212
|
});
|
|
118081
118213
|
body.once("error", err => {
|
|
118082
118214
|
logEngineMetrics({
|
|
@@ -118133,6 +118265,148 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
|
|
|
118133
118265
|
stream: passThrough
|
|
118134
118266
|
};
|
|
118135
118267
|
}
|
|
118268
|
+
function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
|
|
118269
|
+
const maxUsageCaptureBytes = 1024 * 1024;
|
|
118270
|
+
const startedAt = requestStartedAt ?? Date.now();
|
|
118271
|
+
const passThrough = new PassThrough();
|
|
118272
|
+
let responseBytes = 0;
|
|
118273
|
+
let firstChunkAt = null;
|
|
118274
|
+
let usage = null;
|
|
118275
|
+
const usageChunks = [];
|
|
118276
|
+
let usageBytes = 0;
|
|
118277
|
+
let usageCaptureEnabled = true;
|
|
118278
|
+
let completed = false;
|
|
118279
|
+
function finalize(error) {
|
|
118280
|
+
if (completed) {
|
|
118281
|
+
return;
|
|
118282
|
+
}
|
|
118283
|
+
completed = true;
|
|
118284
|
+
if (onComplete) {
|
|
118285
|
+
const completion = onComplete({
|
|
118286
|
+
durationMs: Math.max(0, Date.now() - startedAt),
|
|
118287
|
+
error,
|
|
118288
|
+
requestBodyBytes,
|
|
118289
|
+
responseBytes,
|
|
118290
|
+
timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
|
|
118291
|
+
usage
|
|
118292
|
+
});
|
|
118293
|
+
if (completion && typeof completion.catch === "function") {
|
|
118294
|
+
completion.catch(error => {
|
|
118295
|
+
logger.error("Engine metrics completion failed", {
|
|
118296
|
+
error: asError(error),
|
|
118297
|
+
requestUrl: requestPath
|
|
118298
|
+
});
|
|
118299
|
+
});
|
|
118300
|
+
}
|
|
118301
|
+
}
|
|
118302
|
+
}
|
|
118303
|
+
body.on("data", (chunk) => {
|
|
118304
|
+
const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
118305
|
+
if (firstChunkAt === null) {
|
|
118306
|
+
firstChunkAt = Date.now();
|
|
118307
|
+
}
|
|
118308
|
+
responseBytes += chunkBuffer.length;
|
|
118309
|
+
if (usageCaptureEnabled) {
|
|
118310
|
+
const nextSize = usageBytes + chunkBuffer.length;
|
|
118311
|
+
if (nextSize <= maxUsageCaptureBytes) {
|
|
118312
|
+
usageChunks.push(chunkBuffer);
|
|
118313
|
+
usageBytes = nextSize;
|
|
118314
|
+
}
|
|
118315
|
+
else {
|
|
118316
|
+
usageCaptureEnabled = false;
|
|
118317
|
+
usageChunks.length = 0;
|
|
118318
|
+
}
|
|
118319
|
+
}
|
|
118320
|
+
passThrough.write(chunkBuffer);
|
|
118321
|
+
});
|
|
118322
|
+
body.once("error", err => {
|
|
118323
|
+
logEngineMetrics({
|
|
118324
|
+
agentEngineType,
|
|
118325
|
+
error: err,
|
|
118326
|
+
level: "error",
|
|
118327
|
+
logger,
|
|
118328
|
+
requestBodyBytes,
|
|
118329
|
+
requestPath,
|
|
118330
|
+
responseBytes,
|
|
118331
|
+
usage
|
|
118332
|
+
});
|
|
118333
|
+
finalize(err);
|
|
118334
|
+
passThrough.destroy(err);
|
|
118335
|
+
});
|
|
118336
|
+
body.once("end", () => {
|
|
118337
|
+
if (usageCaptureEnabled) {
|
|
118338
|
+
try {
|
|
118339
|
+
const parsed = JSON.parse(Buffer.concat(usageChunks).toString("utf8"));
|
|
118340
|
+
if (parsed.usage) {
|
|
118341
|
+
const usageChunk = parsed.usage;
|
|
118342
|
+
const completionTokens = usageChunk.completion_tokens ?? null;
|
|
118343
|
+
const promptTokens = usageChunk.prompt_tokens ?? null;
|
|
118344
|
+
const totalTokens = usageChunk.total_tokens ?? null;
|
|
118345
|
+
let contextUsage = usageChunk.context_usage ?? null;
|
|
118346
|
+
const effectiveContext = getEffectiveContextLength({
|
|
118347
|
+
contextLength,
|
|
118348
|
+
engine,
|
|
118349
|
+
parallelism
|
|
118350
|
+
});
|
|
118351
|
+
if (contextUsage === null &&
|
|
118352
|
+
promptTokens !== null &&
|
|
118353
|
+
effectiveContext !== null) {
|
|
118354
|
+
contextUsage = promptTokens / effectiveContext;
|
|
118355
|
+
}
|
|
118356
|
+
usage = {
|
|
118357
|
+
completionTokens,
|
|
118358
|
+
contextUsage,
|
|
118359
|
+
promptTokens,
|
|
118360
|
+
totalTokens
|
|
118361
|
+
};
|
|
118362
|
+
}
|
|
118363
|
+
}
|
|
118364
|
+
catch (error) {
|
|
118365
|
+
logger.error("Failed to parse engine response body", {
|
|
118366
|
+
error: asError(error),
|
|
118367
|
+
requestUrl: requestPath
|
|
118368
|
+
});
|
|
118369
|
+
}
|
|
118370
|
+
}
|
|
118371
|
+
logEngineMetrics({
|
|
118372
|
+
agentEngineType,
|
|
118373
|
+
level: "info",
|
|
118374
|
+
logger,
|
|
118375
|
+
requestBodyBytes,
|
|
118376
|
+
requestPath,
|
|
118377
|
+
responseBytes,
|
|
118378
|
+
usage
|
|
118379
|
+
});
|
|
118380
|
+
finalize(null);
|
|
118381
|
+
passThrough.end();
|
|
118382
|
+
});
|
|
118383
|
+
body.once("close", () => {
|
|
118384
|
+
if (completed) {
|
|
118385
|
+
if (!passThrough.writableEnded) {
|
|
118386
|
+
passThrough.end();
|
|
118387
|
+
}
|
|
118388
|
+
return;
|
|
118389
|
+
}
|
|
118390
|
+
const closeError = new Error("Engine response stream closed before completion");
|
|
118391
|
+
logEngineMetrics({
|
|
118392
|
+
agentEngineType,
|
|
118393
|
+
error: closeError,
|
|
118394
|
+
level: "error",
|
|
118395
|
+
logger,
|
|
118396
|
+
requestBodyBytes,
|
|
118397
|
+
requestPath,
|
|
118398
|
+
responseBytes,
|
|
118399
|
+
usage
|
|
118400
|
+
});
|
|
118401
|
+
finalize(closeError);
|
|
118402
|
+
if (!passThrough.writableEnded) {
|
|
118403
|
+
passThrough.end();
|
|
118404
|
+
}
|
|
118405
|
+
});
|
|
118406
|
+
return {
|
|
118407
|
+
stream: passThrough
|
|
118408
|
+
};
|
|
118409
|
+
}
|
|
118136
118410
|
function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }) {
|
|
118137
118411
|
const metricsMessage = [
|
|
118138
118412
|
"LLM engine stream metrics",
|
|
@@ -118205,6 +118479,35 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
118205
118479
|
}
|
|
118206
118480
|
const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody(body);
|
|
118207
118481
|
const requestStartedAt = Date.now();
|
|
118482
|
+
const requestBody = JSON.parse(serializedBody);
|
|
118483
|
+
const streamRequested = requestBody.stream === true;
|
|
118484
|
+
const onMonitoringComplete = ({ durationMs, error, responseBytes, timeToFirstTokenMs, usage }) => {
|
|
118485
|
+
const completionTokens = normalizeTokenCount(usage?.completionTokens);
|
|
118486
|
+
const promptTokens = normalizeTokenCount(usage?.promptTokens);
|
|
118487
|
+
const totalTokens = normalizeTokenCount(usage?.totalTokens ?? completionTokens + promptTokens);
|
|
118488
|
+
const latencyMs = Math.max(0, durationMs);
|
|
118489
|
+
reportMetricsSafe({
|
|
118490
|
+
bytes: requestBodyBytes + responseBytes,
|
|
118491
|
+
completionTokens,
|
|
118492
|
+
engine: configuration.agentEngineType,
|
|
118493
|
+
endpointId: null,
|
|
118494
|
+
latencyMs,
|
|
118495
|
+
modelId: modelID,
|
|
118496
|
+
promptTokens,
|
|
118497
|
+
requestBytes: requestBodyBytes,
|
|
118498
|
+
requestId: null,
|
|
118499
|
+
requestMethod: "POST",
|
|
118500
|
+
requestPath: path,
|
|
118501
|
+
responseBytes,
|
|
118502
|
+
successful: !error,
|
|
118503
|
+
timeToFirstTokenMs,
|
|
118504
|
+
tokensPerSecond: calculateTokensPerSecond({
|
|
118505
|
+
durationMs: latencyMs,
|
|
118506
|
+
totalTokens
|
|
118507
|
+
}),
|
|
118508
|
+
totalTokens
|
|
118509
|
+
});
|
|
118510
|
+
};
|
|
118208
118511
|
const response = await modelManager
|
|
118209
118512
|
.fetchOpenAI(path, {
|
|
118210
118513
|
body: serializedBody,
|
|
@@ -118299,44 +118602,31 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
118299
118602
|
statusText: responseStatusText
|
|
118300
118603
|
};
|
|
118301
118604
|
}
|
|
118302
|
-
const monitoredResponse =
|
|
118303
|
-
|
|
118304
|
-
|
|
118305
|
-
|
|
118306
|
-
|
|
118307
|
-
|
|
118308
|
-
|
|
118309
|
-
|
|
118310
|
-
|
|
118311
|
-
|
|
118312
|
-
|
|
118313
|
-
|
|
118314
|
-
|
|
118315
|
-
|
|
118316
|
-
|
|
118317
|
-
|
|
118318
|
-
|
|
118319
|
-
|
|
118320
|
-
|
|
118321
|
-
|
|
118322
|
-
|
|
118323
|
-
|
|
118324
|
-
|
|
118325
|
-
|
|
118326
|
-
|
|
118327
|
-
timeToFirstTokenMs,
|
|
118328
|
-
tokensPerSecond: calculateTokensPerSecond({
|
|
118329
|
-
durationMs: latencyMs,
|
|
118330
|
-
totalTokens
|
|
118331
|
-
}),
|
|
118332
|
-
totalTokens
|
|
118333
|
-
});
|
|
118334
|
-
},
|
|
118335
|
-
parallelism: modelManager.parallelism,
|
|
118336
|
-
requestBodyBytes,
|
|
118337
|
-
requestPath: path,
|
|
118338
|
-
requestStartedAt
|
|
118339
|
-
});
|
|
118605
|
+
const monitoredResponse = streamRequested
|
|
118606
|
+
? monitorEngineResponseStream({
|
|
118607
|
+
agentEngineType: configuration.agentEngineType,
|
|
118608
|
+
body: Readable.fromWeb(response.body),
|
|
118609
|
+
contextLength: modelManager.contextLength,
|
|
118610
|
+
engine: configuration.agentEngineType,
|
|
118611
|
+
logger,
|
|
118612
|
+
onComplete: onMonitoringComplete,
|
|
118613
|
+
parallelism: modelManager.parallelism,
|
|
118614
|
+
requestBodyBytes,
|
|
118615
|
+
requestPath: path,
|
|
118616
|
+
requestStartedAt
|
|
118617
|
+
})
|
|
118618
|
+
: monitorEngineResponseSingle({
|
|
118619
|
+
agentEngineType: configuration.agentEngineType,
|
|
118620
|
+
body: Readable.fromWeb(response.body),
|
|
118621
|
+
contextLength: modelManager.contextLength,
|
|
118622
|
+
engine: configuration.agentEngineType,
|
|
118623
|
+
logger,
|
|
118624
|
+
onComplete: onMonitoringComplete,
|
|
118625
|
+
parallelism: modelManager.parallelism,
|
|
118626
|
+
requestBodyBytes,
|
|
118627
|
+
requestPath: path,
|
|
118628
|
+
requestStartedAt
|
|
118629
|
+
});
|
|
118340
118630
|
return {
|
|
118341
118631
|
body: monitoredResponse.stream,
|
|
118342
118632
|
headers: Object.fromEntries(response.headers.entries()),
|
|
@@ -39,5 +39,6 @@ interface MonitorEngineResponseResult {
|
|
|
39
39
|
stream: Readable;
|
|
40
40
|
}
|
|
41
41
|
export declare function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
|
|
42
|
+
export declare function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
|
|
42
43
|
export declare function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }: EngineMetricsLogOptions): void;
|
|
43
44
|
export {};
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@infersec/conduit",
|
|
3
3
|
"description": "End user conduit agent for connecting local LLMs to the cloud.",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.25.0",
|
|
5
5
|
"bin": {
|
|
6
6
|
"infersec-conduit": "./dist/cli.js"
|
|
7
7
|
},
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
"test:format": "prettier --check .",
|
|
28
28
|
"test:lint": "eslint source/**/*.ts",
|
|
29
29
|
"test:types": "tsc -p tsconfig.json --noEmit",
|
|
30
|
-
"test:unit": "vitest run"
|
|
30
|
+
"test:unit": "vitest -c vitest.config.ts run"
|
|
31
31
|
},
|
|
32
32
|
"prettier": "@infersec/prettier",
|
|
33
33
|
"publishConfig": {
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|