@infersec/conduit 1.20.3 → 1.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js
CHANGED
|
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
|
|
|
6
6
|
|
|
7
7
|
import { parseArgs } from 'node:util';
|
|
8
8
|
import 'node:crypto';
|
|
9
|
-
import { a as asError, s as startInferenceAgent } from './start-
|
|
9
|
+
import { a as asError, s as startInferenceAgent } from './start-Cqvc5hOj.js';
|
|
10
10
|
import 'argon2';
|
|
11
11
|
import 'node:child_process';
|
|
12
12
|
import 'node:stream';
|
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
|
|
|
5
5
|
const __dirname = __pathDirname(__filename);
|
|
6
6
|
|
|
7
7
|
import 'node:crypto';
|
|
8
|
-
import { s as startInferenceAgent, a as asError } from './start-
|
|
8
|
+
import { s as startInferenceAgent, a as asError } from './start-Cqvc5hOj.js';
|
|
9
9
|
import 'argon2';
|
|
10
10
|
import 'node:child_process';
|
|
11
11
|
import 'node:stream';
|
|
@@ -15056,6 +15056,7 @@ const ChatCompletionCreateParamsSchema = object({
|
|
|
15056
15056
|
// ==================== USAGE AND CHOICE SCHEMAS ====================
|
|
15057
15057
|
const ChatCompletionUsageSchema = object({
|
|
15058
15058
|
completion_tokens: number$1(),
|
|
15059
|
+
context_usage: number$1().min(0).max(1).optional(),
|
|
15059
15060
|
prompt_tokens: number$1(),
|
|
15060
15061
|
total_tokens: number$1()
|
|
15061
15062
|
});
|
|
@@ -117939,26 +117940,7 @@ function isEngineUsageChunk(value) {
|
|
|
117939
117940
|
}
|
|
117940
117941
|
return true;
|
|
117941
117942
|
}
|
|
117942
|
-
function
|
|
117943
|
-
const metricsMessage = [
|
|
117944
|
-
"LLM engine stream metrics",
|
|
117945
|
-
`path=${requestPath}`,
|
|
117946
|
-
`bytesTo=${requestBodyBytes}`,
|
|
117947
|
-
`bytesFrom=${responseBytes}`,
|
|
117948
|
-
`promptTokens=${usage?.promptTokens ?? "n/a"}`,
|
|
117949
|
-
`completionTokens=${usage?.completionTokens ?? "n/a"}`,
|
|
117950
|
-
`totalTokens=${usage?.totalTokens ?? "n/a"}`
|
|
117951
|
-
].join(" ");
|
|
117952
|
-
const attributes = {
|
|
117953
|
-
agentEngineType,
|
|
117954
|
-
requestUrl: requestPath
|
|
117955
|
-
};
|
|
117956
|
-
if (error) {
|
|
117957
|
-
attributes.error = error;
|
|
117958
|
-
}
|
|
117959
|
-
logger[level](metricsMessage, attributes);
|
|
117960
|
-
}
|
|
117961
|
-
function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }) {
|
|
117943
|
+
function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
|
|
117962
117944
|
const startedAt = requestStartedAt ?? Date.now();
|
|
117963
117945
|
const passThrough = new PassThrough();
|
|
117964
117946
|
let responseBytes = 0;
|
|
@@ -117966,29 +117948,45 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
|
|
|
117966
117948
|
let usage = null;
|
|
117967
117949
|
let buffer = "";
|
|
117968
117950
|
let completed = false;
|
|
117969
|
-
function
|
|
117970
|
-
|
|
117971
|
-
|
|
117972
|
-
|
|
117973
|
-
|
|
117974
|
-
|
|
117975
|
-
|
|
117976
|
-
|
|
117977
|
-
|
|
117978
|
-
|
|
117979
|
-
|
|
117980
|
-
|
|
117981
|
-
|
|
117982
|
-
|
|
117983
|
-
if (completion && typeof completion.catch === "function") {
|
|
117984
|
-
completion.catch(error => {
|
|
117985
|
-
logger.error("Engine metrics completion failed", {
|
|
117986
|
-
error: asError(error),
|
|
117987
|
-
requestUrl: requestPath
|
|
117988
|
-
});
|
|
117989
|
-
});
|
|
117951
|
+
function modifyChunkWithUsage(chunk) {
|
|
117952
|
+
const text = chunk.toString("utf8");
|
|
117953
|
+
const lines = text.split("\n");
|
|
117954
|
+
const modifiedLines = [];
|
|
117955
|
+
for (const rawLine of lines) {
|
|
117956
|
+
const line = rawLine.trim();
|
|
117957
|
+
if (!line.startsWith("data:")) {
|
|
117958
|
+
modifiedLines.push(rawLine);
|
|
117959
|
+
continue;
|
|
117960
|
+
}
|
|
117961
|
+
const payload = line.slice(5).trim();
|
|
117962
|
+
if (!payload || payload === "[DONE]") {
|
|
117963
|
+
modifiedLines.push(rawLine);
|
|
117964
|
+
continue;
|
|
117990
117965
|
}
|
|
117966
|
+
try {
|
|
117967
|
+
const parsed = JSON.parse(payload);
|
|
117968
|
+
if (parsed.usage) {
|
|
117969
|
+
const usageChunk = parsed.usage;
|
|
117970
|
+
if (usageChunk.context_usage === undefined &&
|
|
117971
|
+
usageChunk.prompt_tokens !== undefined &&
|
|
117972
|
+
contextLength !== null &&
|
|
117973
|
+
contextLength > 0) {
|
|
117974
|
+
let totalContextSize = contextLength;
|
|
117975
|
+
if (engine === "llama.cpp" && parallelism !== null && parallelism > 0) {
|
|
117976
|
+
totalContextSize = contextLength / parallelism;
|
|
117977
|
+
}
|
|
117978
|
+
usageChunk.context_usage = usageChunk.prompt_tokens / totalContextSize;
|
|
117979
|
+
modifiedLines.push("data: " + JSON.stringify(parsed));
|
|
117980
|
+
continue;
|
|
117981
|
+
}
|
|
117982
|
+
}
|
|
117983
|
+
}
|
|
117984
|
+
catch (_error) {
|
|
117985
|
+
// Ignore malformed chunks
|
|
117986
|
+
}
|
|
117987
|
+
modifiedLines.push(rawLine);
|
|
117991
117988
|
}
|
|
117989
|
+
return Buffer.from(modifiedLines.join("\n"), "utf8");
|
|
117992
117990
|
}
|
|
117993
117991
|
function parseUsageFromBuffer() {
|
|
117994
117992
|
const lines = buffer.split("\n");
|
|
@@ -118005,10 +118003,21 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
|
|
|
118005
118003
|
try {
|
|
118006
118004
|
const parsed = JSON.parse(payload);
|
|
118007
118005
|
if (isEngineUsageChunk(parsed)) {
|
|
118006
|
+
const completionTokens = parsed.usage?.completion_tokens ?? null;
|
|
118007
|
+
const promptTokens = parsed.usage?.prompt_tokens ?? null;
|
|
118008
|
+
const totalTokens = parsed.usage?.total_tokens ?? null;
|
|
118009
|
+
let contextUsage = parsed.usage?.context_usage ?? null;
|
|
118010
|
+
if (contextUsage === null &&
|
|
118011
|
+
promptTokens !== null &&
|
|
118012
|
+
contextLength !== null &&
|
|
118013
|
+
contextLength > 0) {
|
|
118014
|
+
contextUsage = promptTokens / contextLength;
|
|
118015
|
+
}
|
|
118008
118016
|
usage = {
|
|
118009
|
-
completionTokens
|
|
118010
|
-
|
|
118011
|
-
|
|
118017
|
+
completionTokens,
|
|
118018
|
+
contextUsage,
|
|
118019
|
+
promptTokens,
|
|
118020
|
+
totalTokens
|
|
118012
118021
|
};
|
|
118013
118022
|
}
|
|
118014
118023
|
}
|
|
@@ -118017,6 +118026,30 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
|
|
|
118017
118026
|
}
|
|
118018
118027
|
}
|
|
118019
118028
|
}
|
|
118029
|
+
function finalize(error) {
|
|
118030
|
+
if (completed) {
|
|
118031
|
+
return;
|
|
118032
|
+
}
|
|
118033
|
+
completed = true;
|
|
118034
|
+
if (onComplete) {
|
|
118035
|
+
const completion = onComplete({
|
|
118036
|
+
durationMs: Math.max(0, Date.now() - startedAt),
|
|
118037
|
+
error,
|
|
118038
|
+
requestBodyBytes,
|
|
118039
|
+
responseBytes,
|
|
118040
|
+
timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
|
|
118041
|
+
usage
|
|
118042
|
+
});
|
|
118043
|
+
if (completion && typeof completion.catch === "function") {
|
|
118044
|
+
completion.catch(error => {
|
|
118045
|
+
logger.error("Engine metrics completion failed", {
|
|
118046
|
+
error: asError(error),
|
|
118047
|
+
requestUrl: requestPath
|
|
118048
|
+
});
|
|
118049
|
+
});
|
|
118050
|
+
}
|
|
118051
|
+
}
|
|
118052
|
+
}
|
|
118020
118053
|
body.on("data", (chunk) => {
|
|
118021
118054
|
if (firstChunkAt === null) {
|
|
118022
118055
|
firstChunkAt = Date.now();
|
|
@@ -118024,7 +118057,7 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
|
|
|
118024
118057
|
responseBytes += chunk.length;
|
|
118025
118058
|
buffer += chunk.toString("utf8");
|
|
118026
118059
|
parseUsageFromBuffer();
|
|
118027
|
-
passThrough.write(chunk);
|
|
118060
|
+
passThrough.write(modifyChunkWithUsage(chunk));
|
|
118028
118061
|
});
|
|
118029
118062
|
body.once("error", err => {
|
|
118030
118063
|
logEngineMetrics({
|
|
@@ -118081,6 +118114,26 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
|
|
|
118081
118114
|
stream: passThrough
|
|
118082
118115
|
};
|
|
118083
118116
|
}
|
|
118117
|
+
function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }) {
|
|
118118
|
+
const metricsMessage = [
|
|
118119
|
+
"LLM engine stream metrics",
|
|
118120
|
+
`path=${requestPath}`,
|
|
118121
|
+
`bytesTo=${requestBodyBytes}`,
|
|
118122
|
+
`bytesFrom=${responseBytes}`,
|
|
118123
|
+
`promptTokens=${usage?.promptTokens ?? "n/a"}`,
|
|
118124
|
+
`completionTokens=${usage?.completionTokens ?? "n/a"}`,
|
|
118125
|
+
`totalTokens=${usage?.totalTokens ?? "n/a"}`,
|
|
118126
|
+
`contextUsage=${usage?.contextUsage ?? "n/a"}`
|
|
118127
|
+
].join(" ");
|
|
118128
|
+
const attributes = {
|
|
118129
|
+
agentEngineType,
|
|
118130
|
+
requestUrl: requestPath
|
|
118131
|
+
};
|
|
118132
|
+
if (error) {
|
|
118133
|
+
attributes.error = error;
|
|
118134
|
+
}
|
|
118135
|
+
logger[level](metricsMessage, attributes);
|
|
118136
|
+
}
|
|
118084
118137
|
|
|
118085
118138
|
function isPlainObject(value) {
|
|
118086
118139
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -118230,6 +118283,8 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
118230
118283
|
const monitoredResponse = monitorEngineResponseStream({
|
|
118231
118284
|
agentEngineType: configuration.agentEngineType,
|
|
118232
118285
|
body: Readable.fromWeb(response.body),
|
|
118286
|
+
contextLength: modelManager.contextLength,
|
|
118287
|
+
engine: configuration.agentEngineType,
|
|
118233
118288
|
logger,
|
|
118234
118289
|
onComplete: ({ durationMs, error, responseBytes, timeToFirstTokenMs, usage }) => {
|
|
118235
118290
|
const completionTokens = normalizeTokenCount(usage?.completionTokens);
|
|
@@ -118258,6 +118313,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
118258
118313
|
totalTokens
|
|
118259
118314
|
});
|
|
118260
118315
|
},
|
|
118316
|
+
parallelism: modelManager.parallelism,
|
|
118261
118317
|
requestBodyBytes,
|
|
118262
118318
|
requestPath: path,
|
|
118263
118319
|
requestStartedAt
|
|
@@ -3,6 +3,7 @@ import { LLMEngine } from "@infersec/definitions";
|
|
|
3
3
|
import { Logger } from "@infersec/logger";
|
|
4
4
|
export interface EngineUsageMetrics {
|
|
5
5
|
completionTokens: number | null;
|
|
6
|
+
contextUsage: number | null;
|
|
6
7
|
promptTokens: number | null;
|
|
7
8
|
totalTokens: number | null;
|
|
8
9
|
}
|
|
@@ -22,7 +23,10 @@ interface EngineMetricsCompletion {
|
|
|
22
23
|
}
|
|
23
24
|
interface MonitorEngineResponseOptions extends EngineMetricsLoggerOptions {
|
|
24
25
|
body: Readable;
|
|
26
|
+
contextLength: number | null;
|
|
27
|
+
engine: LLMEngine;
|
|
25
28
|
onComplete?: (result: EngineMetricsCompletion) => void | Promise<void>;
|
|
29
|
+
parallelism: number | null;
|
|
26
30
|
requestStartedAt?: number;
|
|
27
31
|
}
|
|
28
32
|
interface EngineMetricsLogOptions extends EngineMetricsLoggerOptions {
|
|
@@ -34,6 +38,6 @@ interface EngineMetricsLogOptions extends EngineMetricsLoggerOptions {
|
|
|
34
38
|
interface MonitorEngineResponseResult {
|
|
35
39
|
stream: Readable;
|
|
36
40
|
}
|
|
41
|
+
export declare function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
|
|
37
42
|
export declare function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }: EngineMetricsLogOptions): void;
|
|
38
|
-
export declare function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
|
|
39
43
|
export {};
|