@infersec/conduit 1.20.3 → 1.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import { parseArgs } from 'node:util';
8
8
  import 'node:crypto';
9
- import { a as asError, s as startInferenceAgent } from './start-mkMX6VEU.js';
9
+ import { a as asError, s as startInferenceAgent } from './start-Cqvc5hOj.js';
10
10
  import 'argon2';
11
11
  import 'node:child_process';
12
12
  import 'node:stream';
package/dist/index.js CHANGED
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
5
5
  const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import 'node:crypto';
8
- import { s as startInferenceAgent, a as asError } from './start-mkMX6VEU.js';
8
+ import { s as startInferenceAgent, a as asError } from './start-Cqvc5hOj.js';
9
9
  import 'argon2';
10
10
  import 'node:child_process';
11
11
  import 'node:stream';
@@ -15056,6 +15056,7 @@ const ChatCompletionCreateParamsSchema = object({
15056
15056
  // ==================== USAGE AND CHOICE SCHEMAS ====================
15057
15057
  const ChatCompletionUsageSchema = object({
15058
15058
  completion_tokens: number$1(),
15059
+ context_usage: number$1().min(0).max(1).optional(),
15059
15060
  prompt_tokens: number$1(),
15060
15061
  total_tokens: number$1()
15061
15062
  });
@@ -117939,26 +117940,7 @@ function isEngineUsageChunk(value) {
117939
117940
  }
117940
117941
  return true;
117941
117942
  }
117942
- function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }) {
117943
- const metricsMessage = [
117944
- "LLM engine stream metrics",
117945
- `path=${requestPath}`,
117946
- `bytesTo=${requestBodyBytes}`,
117947
- `bytesFrom=${responseBytes}`,
117948
- `promptTokens=${usage?.promptTokens ?? "n/a"}`,
117949
- `completionTokens=${usage?.completionTokens ?? "n/a"}`,
117950
- `totalTokens=${usage?.totalTokens ?? "n/a"}`
117951
- ].join(" ");
117952
- const attributes = {
117953
- agentEngineType,
117954
- requestUrl: requestPath
117955
- };
117956
- if (error) {
117957
- attributes.error = error;
117958
- }
117959
- logger[level](metricsMessage, attributes);
117960
- }
117961
- function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }) {
117943
+ function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
117962
117944
  const startedAt = requestStartedAt ?? Date.now();
117963
117945
  const passThrough = new PassThrough();
117964
117946
  let responseBytes = 0;
@@ -117966,29 +117948,45 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
117966
117948
  let usage = null;
117967
117949
  let buffer = "";
117968
117950
  let completed = false;
117969
- function finalize(error) {
117970
- if (completed) {
117971
- return;
117972
- }
117973
- completed = true;
117974
- if (onComplete) {
117975
- const completion = onComplete({
117976
- durationMs: Math.max(0, Date.now() - startedAt),
117977
- error,
117978
- requestBodyBytes,
117979
- responseBytes,
117980
- timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
117981
- usage
117982
- });
117983
- if (completion && typeof completion.catch === "function") {
117984
- completion.catch(error => {
117985
- logger.error("Engine metrics completion failed", {
117986
- error: asError(error),
117987
- requestUrl: requestPath
117988
- });
117989
- });
117951
+ function modifyChunkWithUsage(chunk) {
117952
+ const text = chunk.toString("utf8");
117953
+ const lines = text.split("\n");
117954
+ const modifiedLines = [];
117955
+ for (const rawLine of lines) {
117956
+ const line = rawLine.trim();
117957
+ if (!line.startsWith("data:")) {
117958
+ modifiedLines.push(rawLine);
117959
+ continue;
117960
+ }
117961
+ const payload = line.slice(5).trim();
117962
+ if (!payload || payload === "[DONE]") {
117963
+ modifiedLines.push(rawLine);
117964
+ continue;
117990
117965
  }
117966
+ try {
117967
+ const parsed = JSON.parse(payload);
117968
+ if (parsed.usage) {
117969
+ const usageChunk = parsed.usage;
117970
+ if (usageChunk.context_usage === undefined &&
117971
+ usageChunk.prompt_tokens !== undefined &&
117972
+ contextLength !== null &&
117973
+ contextLength > 0) {
117974
+ let totalContextSize = contextLength;
117975
+ if (engine === "llama.cpp" && parallelism !== null && parallelism > 0) {
117976
+ totalContextSize = contextLength / parallelism;
117977
+ }
117978
+ usageChunk.context_usage = usageChunk.prompt_tokens / totalContextSize;
117979
+ modifiedLines.push("data: " + JSON.stringify(parsed));
117980
+ continue;
117981
+ }
117982
+ }
117983
+ }
117984
+ catch (_error) {
117985
+ // Ignore malformed chunks
117986
+ }
117987
+ modifiedLines.push(rawLine);
117991
117988
  }
117989
+ return Buffer.from(modifiedLines.join("\n"), "utf8");
117992
117990
  }
117993
117991
  function parseUsageFromBuffer() {
117994
117992
  const lines = buffer.split("\n");
@@ -118005,10 +118003,21 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
118005
118003
  try {
118006
118004
  const parsed = JSON.parse(payload);
118007
118005
  if (isEngineUsageChunk(parsed)) {
118006
+ const completionTokens = parsed.usage?.completion_tokens ?? null;
118007
+ const promptTokens = parsed.usage?.prompt_tokens ?? null;
118008
+ const totalTokens = parsed.usage?.total_tokens ?? null;
118009
+ let contextUsage = parsed.usage?.context_usage ?? null;
118010
+ if (contextUsage === null &&
118011
+ promptTokens !== null &&
118012
+ contextLength !== null &&
118013
+ contextLength > 0) {
118014
+ contextUsage = promptTokens / contextLength;
118015
+ }
118008
118016
  usage = {
118009
- completionTokens: parsed.usage?.completion_tokens ?? null,
118010
- promptTokens: parsed.usage?.prompt_tokens ?? null,
118011
- totalTokens: parsed.usage?.total_tokens ?? null
118017
+ completionTokens,
118018
+ contextUsage,
118019
+ promptTokens,
118020
+ totalTokens
118012
118021
  };
118013
118022
  }
118014
118023
  }
@@ -118017,6 +118026,30 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
118017
118026
  }
118018
118027
  }
118019
118028
  }
118029
+ function finalize(error) {
118030
+ if (completed) {
118031
+ return;
118032
+ }
118033
+ completed = true;
118034
+ if (onComplete) {
118035
+ const completion = onComplete({
118036
+ durationMs: Math.max(0, Date.now() - startedAt),
118037
+ error,
118038
+ requestBodyBytes,
118039
+ responseBytes,
118040
+ timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
118041
+ usage
118042
+ });
118043
+ if (completion && typeof completion.catch === "function") {
118044
+ completion.catch(error => {
118045
+ logger.error("Engine metrics completion failed", {
118046
+ error: asError(error),
118047
+ requestUrl: requestPath
118048
+ });
118049
+ });
118050
+ }
118051
+ }
118052
+ }
118020
118053
  body.on("data", (chunk) => {
118021
118054
  if (firstChunkAt === null) {
118022
118055
  firstChunkAt = Date.now();
@@ -118024,7 +118057,7 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
118024
118057
  responseBytes += chunk.length;
118025
118058
  buffer += chunk.toString("utf8");
118026
118059
  parseUsageFromBuffer();
118027
- passThrough.write(chunk);
118060
+ passThrough.write(modifyChunkWithUsage(chunk));
118028
118061
  });
118029
118062
  body.once("error", err => {
118030
118063
  logEngineMetrics({
@@ -118081,6 +118114,26 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
118081
118114
  stream: passThrough
118082
118115
  };
118083
118116
  }
118117
+ function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }) {
118118
+ const metricsMessage = [
118119
+ "LLM engine stream metrics",
118120
+ `path=${requestPath}`,
118121
+ `bytesTo=${requestBodyBytes}`,
118122
+ `bytesFrom=${responseBytes}`,
118123
+ `promptTokens=${usage?.promptTokens ?? "n/a"}`,
118124
+ `completionTokens=${usage?.completionTokens ?? "n/a"}`,
118125
+ `totalTokens=${usage?.totalTokens ?? "n/a"}`,
118126
+ `contextUsage=${usage?.contextUsage ?? "n/a"}`
118127
+ ].join(" ");
118128
+ const attributes = {
118129
+ agentEngineType,
118130
+ requestUrl: requestPath
118131
+ };
118132
+ if (error) {
118133
+ attributes.error = error;
118134
+ }
118135
+ logger[level](metricsMessage, attributes);
118136
+ }
118084
118137
 
118085
118138
  function isPlainObject(value) {
118086
118139
  return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -118230,6 +118283,8 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
118230
118283
  const monitoredResponse = monitorEngineResponseStream({
118231
118284
  agentEngineType: configuration.agentEngineType,
118232
118285
  body: Readable.fromWeb(response.body),
118286
+ contextLength: modelManager.contextLength,
118287
+ engine: configuration.agentEngineType,
118233
118288
  logger,
118234
118289
  onComplete: ({ durationMs, error, responseBytes, timeToFirstTokenMs, usage }) => {
118235
118290
  const completionTokens = normalizeTokenCount(usage?.completionTokens);
@@ -118258,6 +118313,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
118258
118313
  totalTokens
118259
118314
  });
118260
118315
  },
118316
+ parallelism: modelManager.parallelism,
118261
118317
  requestBodyBytes,
118262
118318
  requestPath: path,
118263
118319
  requestStartedAt
@@ -3,6 +3,7 @@ import { LLMEngine } from "@infersec/definitions";
3
3
  import { Logger } from "@infersec/logger";
4
4
  export interface EngineUsageMetrics {
5
5
  completionTokens: number | null;
6
+ contextUsage: number | null;
6
7
  promptTokens: number | null;
7
8
  totalTokens: number | null;
8
9
  }
@@ -22,7 +23,10 @@ interface EngineMetricsCompletion {
22
23
  }
23
24
  interface MonitorEngineResponseOptions extends EngineMetricsLoggerOptions {
24
25
  body: Readable;
26
+ contextLength: number | null;
27
+ engine: LLMEngine;
25
28
  onComplete?: (result: EngineMetricsCompletion) => void | Promise<void>;
29
+ parallelism: number | null;
26
30
  requestStartedAt?: number;
27
31
  }
28
32
  interface EngineMetricsLogOptions extends EngineMetricsLoggerOptions {
@@ -34,6 +38,6 @@ interface EngineMetricsLogOptions extends EngineMetricsLoggerOptions {
34
38
  interface MonitorEngineResponseResult {
35
39
  stream: Readable;
36
40
  }
41
+ export declare function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
37
42
  export declare function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }: EngineMetricsLogOptions): void;
38
- export declare function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
39
43
  export {};
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@infersec/conduit",
3
3
  "description": "End user conduit agent for connecting local LLMs to the cloud.",
4
- "version": "1.20.3",
4
+ "version": "1.21.1",
5
5
  "bin": {
6
6
  "infersec-conduit": "./dist/cli.js"
7
7
  },