@infersec/conduit 1.20.2 → 1.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js
CHANGED
|
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
|
|
|
6
6
|
|
|
7
7
|
import { parseArgs } from 'node:util';
|
|
8
8
|
import 'node:crypto';
|
|
9
|
-
import { a as asError, s as startInferenceAgent } from './start-
|
|
9
|
+
import { a as asError, s as startInferenceAgent } from './start-Cqvc5hOj.js';
|
|
10
10
|
import 'argon2';
|
|
11
11
|
import 'node:child_process';
|
|
12
12
|
import 'node:stream';
|
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
|
|
|
5
5
|
const __dirname = __pathDirname(__filename);
|
|
6
6
|
|
|
7
7
|
import 'node:crypto';
|
|
8
|
-
import { s as startInferenceAgent, a as asError } from './start-
|
|
8
|
+
import { s as startInferenceAgent, a as asError } from './start-Cqvc5hOj.js';
|
|
9
9
|
import 'argon2';
|
|
10
10
|
import 'node:child_process';
|
|
11
11
|
import 'node:stream';
|
|
@@ -15056,6 +15056,7 @@ const ChatCompletionCreateParamsSchema = object({
|
|
|
15056
15056
|
// ==================== USAGE AND CHOICE SCHEMAS ====================
|
|
15057
15057
|
const ChatCompletionUsageSchema = object({
|
|
15058
15058
|
completion_tokens: number$1(),
|
|
15059
|
+
context_usage: number$1().min(0).max(1).optional(),
|
|
15059
15060
|
prompt_tokens: number$1(),
|
|
15060
15061
|
total_tokens: number$1()
|
|
15061
15062
|
});
|
|
@@ -15775,6 +15776,9 @@ function joinURL(...parts) {
|
|
|
15775
15776
|
.replace("&", "?");
|
|
15776
15777
|
}
|
|
15777
15778
|
|
|
15779
|
+
function isTerminatedError(error) {
|
|
15780
|
+
return error instanceof Error && error.message === "terminated" && error.name === "TypeError";
|
|
15781
|
+
}
|
|
15778
15782
|
async function connectSSE(url, options) {
|
|
15779
15783
|
const response = await fetch(url, {
|
|
15780
15784
|
headers: {
|
|
@@ -108349,18 +108353,23 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
|
|
|
108349
108353
|
if (signal?.aborted) {
|
|
108350
108354
|
return;
|
|
108351
108355
|
}
|
|
108352
|
-
|
|
108353
|
-
|
|
108354
|
-
|
|
108355
|
-
|
|
108356
|
-
|
|
108357
|
-
|
|
108356
|
+
const isTerminated = isTerminatedError(error);
|
|
108357
|
+
if (!isTerminated) {
|
|
108358
|
+
logger.error("SSE connection failed", {
|
|
108359
|
+
error: asError(error)
|
|
108360
|
+
});
|
|
108361
|
+
}
|
|
108362
|
+
if (signal?.aborted) {
|
|
108363
|
+
return;
|
|
108364
|
+
}
|
|
108365
|
+
if (!isTerminated) {
|
|
108366
|
+
const connectionDurationMs = Date.now() - connectionStartedAt;
|
|
108367
|
+
reconnectAttempt = connectionDurationMs > 10000 ? 0 : reconnectAttempt + 1;
|
|
108368
|
+
const reconnectDelayMs = Math.min(maxReconnectDelayMs, Math.max(1000, 1000 * 2 ** Math.min(6, reconnectAttempt)));
|
|
108369
|
+
logger.warn("SSE disconnected, retrying");
|
|
108370
|
+
await sleep(reconnectDelayMs);
|
|
108371
|
+
}
|
|
108358
108372
|
}
|
|
108359
|
-
const connectionDurationMs = Date.now() - connectionStartedAt;
|
|
108360
|
-
reconnectAttempt = connectionDurationMs > 10000 ? 0 : reconnectAttempt + 1;
|
|
108361
|
-
const reconnectDelayMs = Math.min(maxReconnectDelayMs, Math.max(1000, 1000 * 2 ** Math.min(6, reconnectAttempt)));
|
|
108362
|
-
logger.warn("SSE disconnected, retrying");
|
|
108363
|
-
await sleep(reconnectDelayMs);
|
|
108364
108373
|
}
|
|
108365
108374
|
}
|
|
108366
108375
|
async function handleRequest({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request }) {
|
|
@@ -117931,26 +117940,7 @@ function isEngineUsageChunk(value) {
|
|
|
117931
117940
|
}
|
|
117932
117941
|
return true;
|
|
117933
117942
|
}
|
|
117934
|
-
function
|
|
117935
|
-
const metricsMessage = [
|
|
117936
|
-
"LLM engine stream metrics",
|
|
117937
|
-
`path=${requestPath}`,
|
|
117938
|
-
`bytesTo=${requestBodyBytes}`,
|
|
117939
|
-
`bytesFrom=${responseBytes}`,
|
|
117940
|
-
`promptTokens=${usage?.promptTokens ?? "n/a"}`,
|
|
117941
|
-
`completionTokens=${usage?.completionTokens ?? "n/a"}`,
|
|
117942
|
-
`totalTokens=${usage?.totalTokens ?? "n/a"}`
|
|
117943
|
-
].join(" ");
|
|
117944
|
-
const attributes = {
|
|
117945
|
-
agentEngineType,
|
|
117946
|
-
requestUrl: requestPath
|
|
117947
|
-
};
|
|
117948
|
-
if (error) {
|
|
117949
|
-
attributes.error = error;
|
|
117950
|
-
}
|
|
117951
|
-
logger[level](metricsMessage, attributes);
|
|
117952
|
-
}
|
|
117953
|
-
function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }) {
|
|
117943
|
+
function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
|
|
117954
117944
|
const startedAt = requestStartedAt ?? Date.now();
|
|
117955
117945
|
const passThrough = new PassThrough();
|
|
117956
117946
|
let responseBytes = 0;
|
|
@@ -117958,29 +117948,45 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
|
|
|
117958
117948
|
let usage = null;
|
|
117959
117949
|
let buffer = "";
|
|
117960
117950
|
let completed = false;
|
|
117961
|
-
function
|
|
117962
|
-
|
|
117963
|
-
|
|
117964
|
-
|
|
117965
|
-
|
|
117966
|
-
|
|
117967
|
-
|
|
117968
|
-
|
|
117969
|
-
|
|
117970
|
-
requestBodyBytes,
|
|
117971
|
-
responseBytes,
|
|
117972
|
-
timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
|
|
117973
|
-
usage
|
|
117974
|
-
});
|
|
117975
|
-
if (completion && typeof completion.catch === "function") {
|
|
117976
|
-
completion.catch(error => {
|
|
117977
|
-
logger.error("Engine metrics completion failed", {
|
|
117978
|
-
error: asError(error),
|
|
117979
|
-
requestUrl: requestPath
|
|
117980
|
-
});
|
|
117981
|
-
});
|
|
117951
|
+
function modifyChunkWithUsage(chunk) {
|
|
117952
|
+
const text = chunk.toString("utf8");
|
|
117953
|
+
const lines = text.split("\n");
|
|
117954
|
+
const modifiedLines = [];
|
|
117955
|
+
for (const rawLine of lines) {
|
|
117956
|
+
const line = rawLine.trim();
|
|
117957
|
+
if (!line.startsWith("data:")) {
|
|
117958
|
+
modifiedLines.push(rawLine);
|
|
117959
|
+
continue;
|
|
117982
117960
|
}
|
|
117961
|
+
const payload = line.slice(5).trim();
|
|
117962
|
+
if (!payload || payload === "[DONE]") {
|
|
117963
|
+
modifiedLines.push(rawLine);
|
|
117964
|
+
continue;
|
|
117965
|
+
}
|
|
117966
|
+
try {
|
|
117967
|
+
const parsed = JSON.parse(payload);
|
|
117968
|
+
if (parsed.usage) {
|
|
117969
|
+
const usageChunk = parsed.usage;
|
|
117970
|
+
if (usageChunk.context_usage === undefined &&
|
|
117971
|
+
usageChunk.prompt_tokens !== undefined &&
|
|
117972
|
+
contextLength !== null &&
|
|
117973
|
+
contextLength > 0) {
|
|
117974
|
+
let totalContextSize = contextLength;
|
|
117975
|
+
if (engine === "llama.cpp" && parallelism !== null && parallelism > 0) {
|
|
117976
|
+
totalContextSize = contextLength / parallelism;
|
|
117977
|
+
}
|
|
117978
|
+
usageChunk.context_usage = usageChunk.prompt_tokens / totalContextSize;
|
|
117979
|
+
modifiedLines.push("data: " + JSON.stringify(parsed));
|
|
117980
|
+
continue;
|
|
117981
|
+
}
|
|
117982
|
+
}
|
|
117983
|
+
}
|
|
117984
|
+
catch (_error) {
|
|
117985
|
+
// Ignore malformed chunks
|
|
117986
|
+
}
|
|
117987
|
+
modifiedLines.push(rawLine);
|
|
117983
117988
|
}
|
|
117989
|
+
return Buffer.from(modifiedLines.join("\n"), "utf8");
|
|
117984
117990
|
}
|
|
117985
117991
|
function parseUsageFromBuffer() {
|
|
117986
117992
|
const lines = buffer.split("\n");
|
|
@@ -117997,10 +118003,21 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
|
|
|
117997
118003
|
try {
|
|
117998
118004
|
const parsed = JSON.parse(payload);
|
|
117999
118005
|
if (isEngineUsageChunk(parsed)) {
|
|
118006
|
+
const completionTokens = parsed.usage?.completion_tokens ?? null;
|
|
118007
|
+
const promptTokens = parsed.usage?.prompt_tokens ?? null;
|
|
118008
|
+
const totalTokens = parsed.usage?.total_tokens ?? null;
|
|
118009
|
+
let contextUsage = parsed.usage?.context_usage ?? null;
|
|
118010
|
+
if (contextUsage === null &&
|
|
118011
|
+
promptTokens !== null &&
|
|
118012
|
+
contextLength !== null &&
|
|
118013
|
+
contextLength > 0) {
|
|
118014
|
+
contextUsage = promptTokens / contextLength;
|
|
118015
|
+
}
|
|
118000
118016
|
usage = {
|
|
118001
|
-
completionTokens
|
|
118002
|
-
|
|
118003
|
-
|
|
118017
|
+
completionTokens,
|
|
118018
|
+
contextUsage,
|
|
118019
|
+
promptTokens,
|
|
118020
|
+
totalTokens
|
|
118004
118021
|
};
|
|
118005
118022
|
}
|
|
118006
118023
|
}
|
|
@@ -118009,6 +118026,30 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
|
|
|
118009
118026
|
}
|
|
118010
118027
|
}
|
|
118011
118028
|
}
|
|
118029
|
+
function finalize(error) {
|
|
118030
|
+
if (completed) {
|
|
118031
|
+
return;
|
|
118032
|
+
}
|
|
118033
|
+
completed = true;
|
|
118034
|
+
if (onComplete) {
|
|
118035
|
+
const completion = onComplete({
|
|
118036
|
+
durationMs: Math.max(0, Date.now() - startedAt),
|
|
118037
|
+
error,
|
|
118038
|
+
requestBodyBytes,
|
|
118039
|
+
responseBytes,
|
|
118040
|
+
timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
|
|
118041
|
+
usage
|
|
118042
|
+
});
|
|
118043
|
+
if (completion && typeof completion.catch === "function") {
|
|
118044
|
+
completion.catch(error => {
|
|
118045
|
+
logger.error("Engine metrics completion failed", {
|
|
118046
|
+
error: asError(error),
|
|
118047
|
+
requestUrl: requestPath
|
|
118048
|
+
});
|
|
118049
|
+
});
|
|
118050
|
+
}
|
|
118051
|
+
}
|
|
118052
|
+
}
|
|
118012
118053
|
body.on("data", (chunk) => {
|
|
118013
118054
|
if (firstChunkAt === null) {
|
|
118014
118055
|
firstChunkAt = Date.now();
|
|
@@ -118016,7 +118057,7 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
|
|
|
118016
118057
|
responseBytes += chunk.length;
|
|
118017
118058
|
buffer += chunk.toString("utf8");
|
|
118018
118059
|
parseUsageFromBuffer();
|
|
118019
|
-
passThrough.write(chunk);
|
|
118060
|
+
passThrough.write(modifyChunkWithUsage(chunk));
|
|
118020
118061
|
});
|
|
118021
118062
|
body.once("error", err => {
|
|
118022
118063
|
logEngineMetrics({
|
|
@@ -118073,6 +118114,26 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
|
|
|
118073
118114
|
stream: passThrough
|
|
118074
118115
|
};
|
|
118075
118116
|
}
|
|
118117
|
+
function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }) {
|
|
118118
|
+
const metricsMessage = [
|
|
118119
|
+
"LLM engine stream metrics",
|
|
118120
|
+
`path=${requestPath}`,
|
|
118121
|
+
`bytesTo=${requestBodyBytes}`,
|
|
118122
|
+
`bytesFrom=${responseBytes}`,
|
|
118123
|
+
`promptTokens=${usage?.promptTokens ?? "n/a"}`,
|
|
118124
|
+
`completionTokens=${usage?.completionTokens ?? "n/a"}`,
|
|
118125
|
+
`totalTokens=${usage?.totalTokens ?? "n/a"}`,
|
|
118126
|
+
`contextUsage=${usage?.contextUsage ?? "n/a"}`
|
|
118127
|
+
].join(" ");
|
|
118128
|
+
const attributes = {
|
|
118129
|
+
agentEngineType,
|
|
118130
|
+
requestUrl: requestPath
|
|
118131
|
+
};
|
|
118132
|
+
if (error) {
|
|
118133
|
+
attributes.error = error;
|
|
118134
|
+
}
|
|
118135
|
+
logger[level](metricsMessage, attributes);
|
|
118136
|
+
}
|
|
118076
118137
|
|
|
118077
118138
|
function isPlainObject(value) {
|
|
118078
118139
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -118222,6 +118283,8 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
118222
118283
|
const monitoredResponse = monitorEngineResponseStream({
|
|
118223
118284
|
agentEngineType: configuration.agentEngineType,
|
|
118224
118285
|
body: Readable.fromWeb(response.body),
|
|
118286
|
+
contextLength: modelManager.contextLength,
|
|
118287
|
+
engine: configuration.agentEngineType,
|
|
118225
118288
|
logger,
|
|
118226
118289
|
onComplete: ({ durationMs, error, responseBytes, timeToFirstTokenMs, usage }) => {
|
|
118227
118290
|
const completionTokens = normalizeTokenCount(usage?.completionTokens);
|
|
@@ -118250,6 +118313,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
118250
118313
|
totalTokens
|
|
118251
118314
|
});
|
|
118252
118315
|
},
|
|
118316
|
+
parallelism: modelManager.parallelism,
|
|
118253
118317
|
requestBodyBytes,
|
|
118254
118318
|
requestPath: path,
|
|
118255
118319
|
requestStartedAt
|
|
@@ -3,6 +3,7 @@ import { LLMEngine } from "@infersec/definitions";
|
|
|
3
3
|
import { Logger } from "@infersec/logger";
|
|
4
4
|
export interface EngineUsageMetrics {
|
|
5
5
|
completionTokens: number | null;
|
|
6
|
+
contextUsage: number | null;
|
|
6
7
|
promptTokens: number | null;
|
|
7
8
|
totalTokens: number | null;
|
|
8
9
|
}
|
|
@@ -22,7 +23,10 @@ interface EngineMetricsCompletion {
|
|
|
22
23
|
}
|
|
23
24
|
interface MonitorEngineResponseOptions extends EngineMetricsLoggerOptions {
|
|
24
25
|
body: Readable;
|
|
26
|
+
contextLength: number | null;
|
|
27
|
+
engine: LLMEngine;
|
|
25
28
|
onComplete?: (result: EngineMetricsCompletion) => void | Promise<void>;
|
|
29
|
+
parallelism: number | null;
|
|
26
30
|
requestStartedAt?: number;
|
|
27
31
|
}
|
|
28
32
|
interface EngineMetricsLogOptions extends EngineMetricsLoggerOptions {
|
|
@@ -34,6 +38,6 @@ interface EngineMetricsLogOptions extends EngineMetricsLoggerOptions {
|
|
|
34
38
|
interface MonitorEngineResponseResult {
|
|
35
39
|
stream: Readable;
|
|
36
40
|
}
|
|
41
|
+
export declare function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
|
|
37
42
|
export declare function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }: EngineMetricsLogOptions): void;
|
|
38
|
-
export declare function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
|
|
39
43
|
export {};
|