@infersec/conduit 1.20.2 → 1.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import { parseArgs } from 'node:util';
8
8
  import 'node:crypto';
9
- import { a as asError, s as startInferenceAgent } from './start-CopKwPN6.js';
9
+ import { a as asError, s as startInferenceAgent } from './start-Cqvc5hOj.js';
10
10
  import 'argon2';
11
11
  import 'node:child_process';
12
12
  import 'node:stream';
package/dist/index.js CHANGED
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
5
5
  const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import 'node:crypto';
8
- import { s as startInferenceAgent, a as asError } from './start-CopKwPN6.js';
8
+ import { s as startInferenceAgent, a as asError } from './start-Cqvc5hOj.js';
9
9
  import 'argon2';
10
10
  import 'node:child_process';
11
11
  import 'node:stream';
@@ -15056,6 +15056,7 @@ const ChatCompletionCreateParamsSchema = object({
15056
15056
  // ==================== USAGE AND CHOICE SCHEMAS ====================
15057
15057
  const ChatCompletionUsageSchema = object({
15058
15058
  completion_tokens: number$1(),
15059
+ context_usage: number$1().min(0).max(1).optional(),
15059
15060
  prompt_tokens: number$1(),
15060
15061
  total_tokens: number$1()
15061
15062
  });
@@ -15775,6 +15776,9 @@ function joinURL(...parts) {
15775
15776
  .replace("&", "?");
15776
15777
  }
15777
15778
 
15779
+ function isTerminatedError(error) {
15780
+ return error instanceof Error && error.message === "terminated" && error.name === "TypeError";
15781
+ }
15778
15782
  async function connectSSE(url, options) {
15779
15783
  const response = await fetch(url, {
15780
15784
  headers: {
@@ -108349,18 +108353,23 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
108349
108353
  if (signal?.aborted) {
108350
108354
  return;
108351
108355
  }
108352
- logger.error("SSE connection failed", {
108353
- error: asError(error)
108354
- });
108355
- }
108356
- if (signal?.aborted) {
108357
- return;
108356
+ const isTerminated = isTerminatedError(error);
108357
+ if (!isTerminated) {
108358
+ logger.error("SSE connection failed", {
108359
+ error: asError(error)
108360
+ });
108361
+ }
108362
+ if (signal?.aborted) {
108363
+ return;
108364
+ }
108365
+ if (!isTerminated) {
108366
+ const connectionDurationMs = Date.now() - connectionStartedAt;
108367
+ reconnectAttempt = connectionDurationMs > 10000 ? 0 : reconnectAttempt + 1;
108368
+ const reconnectDelayMs = Math.min(maxReconnectDelayMs, Math.max(1000, 1000 * 2 ** Math.min(6, reconnectAttempt)));
108369
+ logger.warn("SSE disconnected, retrying");
108370
+ await sleep(reconnectDelayMs);
108371
+ }
108358
108372
  }
108359
- const connectionDurationMs = Date.now() - connectionStartedAt;
108360
- reconnectAttempt = connectionDurationMs > 10000 ? 0 : reconnectAttempt + 1;
108361
- const reconnectDelayMs = Math.min(maxReconnectDelayMs, Math.max(1000, 1000 * 2 ** Math.min(6, reconnectAttempt)));
108362
- logger.warn("SSE disconnected, retrying");
108363
- await sleep(reconnectDelayMs);
108364
108373
  }
108365
108374
  }
108366
108375
  async function handleRequest({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request }) {
@@ -117931,26 +117940,7 @@ function isEngineUsageChunk(value) {
117931
117940
  }
117932
117941
  return true;
117933
117942
  }
117934
- function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }) {
117935
- const metricsMessage = [
117936
- "LLM engine stream metrics",
117937
- `path=${requestPath}`,
117938
- `bytesTo=${requestBodyBytes}`,
117939
- `bytesFrom=${responseBytes}`,
117940
- `promptTokens=${usage?.promptTokens ?? "n/a"}`,
117941
- `completionTokens=${usage?.completionTokens ?? "n/a"}`,
117942
- `totalTokens=${usage?.totalTokens ?? "n/a"}`
117943
- ].join(" ");
117944
- const attributes = {
117945
- agentEngineType,
117946
- requestUrl: requestPath
117947
- };
117948
- if (error) {
117949
- attributes.error = error;
117950
- }
117951
- logger[level](metricsMessage, attributes);
117952
- }
117953
- function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }) {
117943
+ function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
117954
117944
  const startedAt = requestStartedAt ?? Date.now();
117955
117945
  const passThrough = new PassThrough();
117956
117946
  let responseBytes = 0;
@@ -117958,29 +117948,45 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
117958
117948
  let usage = null;
117959
117949
  let buffer = "";
117960
117950
  let completed = false;
117961
- function finalize(error) {
117962
- if (completed) {
117963
- return;
117964
- }
117965
- completed = true;
117966
- if (onComplete) {
117967
- const completion = onComplete({
117968
- durationMs: Math.max(0, Date.now() - startedAt),
117969
- error,
117970
- requestBodyBytes,
117971
- responseBytes,
117972
- timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
117973
- usage
117974
- });
117975
- if (completion && typeof completion.catch === "function") {
117976
- completion.catch(error => {
117977
- logger.error("Engine metrics completion failed", {
117978
- error: asError(error),
117979
- requestUrl: requestPath
117980
- });
117981
- });
117951
+ function modifyChunkWithUsage(chunk) {
117952
+ const text = chunk.toString("utf8");
117953
+ const lines = text.split("\n");
117954
+ const modifiedLines = [];
117955
+ for (const rawLine of lines) {
117956
+ const line = rawLine.trim();
117957
+ if (!line.startsWith("data:")) {
117958
+ modifiedLines.push(rawLine);
117959
+ continue;
117982
117960
  }
117961
+ const payload = line.slice(5).trim();
117962
+ if (!payload || payload === "[DONE]") {
117963
+ modifiedLines.push(rawLine);
117964
+ continue;
117965
+ }
117966
+ try {
117967
+ const parsed = JSON.parse(payload);
117968
+ if (parsed.usage) {
117969
+ const usageChunk = parsed.usage;
117970
+ if (usageChunk.context_usage === undefined &&
117971
+ usageChunk.prompt_tokens !== undefined &&
117972
+ contextLength !== null &&
117973
+ contextLength > 0) {
117974
+ let totalContextSize = contextLength;
117975
+ if (engine === "llama.cpp" && parallelism !== null && parallelism > 0) {
117976
+ totalContextSize = contextLength / parallelism;
117977
+ }
117978
+ usageChunk.context_usage = usageChunk.prompt_tokens / totalContextSize;
117979
+ modifiedLines.push("data: " + JSON.stringify(parsed));
117980
+ continue;
117981
+ }
117982
+ }
117983
+ }
117984
+ catch (_error) {
117985
+ // Ignore malformed chunks
117986
+ }
117987
+ modifiedLines.push(rawLine);
117983
117988
  }
117989
+ return Buffer.from(modifiedLines.join("\n"), "utf8");
117984
117990
  }
117985
117991
  function parseUsageFromBuffer() {
117986
117992
  const lines = buffer.split("\n");
@@ -117997,10 +118003,21 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
117997
118003
  try {
117998
118004
  const parsed = JSON.parse(payload);
117999
118005
  if (isEngineUsageChunk(parsed)) {
118006
+ const completionTokens = parsed.usage?.completion_tokens ?? null;
118007
+ const promptTokens = parsed.usage?.prompt_tokens ?? null;
118008
+ const totalTokens = parsed.usage?.total_tokens ?? null;
118009
+ let contextUsage = parsed.usage?.context_usage ?? null;
118010
+ if (contextUsage === null &&
118011
+ promptTokens !== null &&
118012
+ contextLength !== null &&
118013
+ contextLength > 0) {
118014
+ contextUsage = promptTokens / contextLength;
118015
+ }
118000
118016
  usage = {
118001
- completionTokens: parsed.usage?.completion_tokens ?? null,
118002
- promptTokens: parsed.usage?.prompt_tokens ?? null,
118003
- totalTokens: parsed.usage?.total_tokens ?? null
118017
+ completionTokens,
118018
+ contextUsage,
118019
+ promptTokens,
118020
+ totalTokens
118004
118021
  };
118005
118022
  }
118006
118023
  }
@@ -118009,6 +118026,30 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
118009
118026
  }
118010
118027
  }
118011
118028
  }
118029
+ function finalize(error) {
118030
+ if (completed) {
118031
+ return;
118032
+ }
118033
+ completed = true;
118034
+ if (onComplete) {
118035
+ const completion = onComplete({
118036
+ durationMs: Math.max(0, Date.now() - startedAt),
118037
+ error,
118038
+ requestBodyBytes,
118039
+ responseBytes,
118040
+ timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
118041
+ usage
118042
+ });
118043
+ if (completion && typeof completion.catch === "function") {
118044
+ completion.catch(error => {
118045
+ logger.error("Engine metrics completion failed", {
118046
+ error: asError(error),
118047
+ requestUrl: requestPath
118048
+ });
118049
+ });
118050
+ }
118051
+ }
118052
+ }
118012
118053
  body.on("data", (chunk) => {
118013
118054
  if (firstChunkAt === null) {
118014
118055
  firstChunkAt = Date.now();
@@ -118016,7 +118057,7 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
118016
118057
  responseBytes += chunk.length;
118017
118058
  buffer += chunk.toString("utf8");
118018
118059
  parseUsageFromBuffer();
118019
- passThrough.write(chunk);
118060
+ passThrough.write(modifyChunkWithUsage(chunk));
118020
118061
  });
118021
118062
  body.once("error", err => {
118022
118063
  logEngineMetrics({
@@ -118073,6 +118114,26 @@ function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete
118073
118114
  stream: passThrough
118074
118115
  };
118075
118116
  }
118117
+ function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }) {
118118
+ const metricsMessage = [
118119
+ "LLM engine stream metrics",
118120
+ `path=${requestPath}`,
118121
+ `bytesTo=${requestBodyBytes}`,
118122
+ `bytesFrom=${responseBytes}`,
118123
+ `promptTokens=${usage?.promptTokens ?? "n/a"}`,
118124
+ `completionTokens=${usage?.completionTokens ?? "n/a"}`,
118125
+ `totalTokens=${usage?.totalTokens ?? "n/a"}`,
118126
+ `contextUsage=${usage?.contextUsage ?? "n/a"}`
118127
+ ].join(" ");
118128
+ const attributes = {
118129
+ agentEngineType,
118130
+ requestUrl: requestPath
118131
+ };
118132
+ if (error) {
118133
+ attributes.error = error;
118134
+ }
118135
+ logger[level](metricsMessage, attributes);
118136
+ }
118076
118137
 
118077
118138
  function isPlainObject(value) {
118078
118139
  return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -118222,6 +118283,8 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
118222
118283
  const monitoredResponse = monitorEngineResponseStream({
118223
118284
  agentEngineType: configuration.agentEngineType,
118224
118285
  body: Readable.fromWeb(response.body),
118286
+ contextLength: modelManager.contextLength,
118287
+ engine: configuration.agentEngineType,
118225
118288
  logger,
118226
118289
  onComplete: ({ durationMs, error, responseBytes, timeToFirstTokenMs, usage }) => {
118227
118290
  const completionTokens = normalizeTokenCount(usage?.completionTokens);
@@ -118250,6 +118313,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
118250
118313
  totalTokens
118251
118314
  });
118252
118315
  },
118316
+ parallelism: modelManager.parallelism,
118253
118317
  requestBodyBytes,
118254
118318
  requestPath: path,
118255
118319
  requestStartedAt
@@ -3,6 +3,7 @@ import { LLMEngine } from "@infersec/definitions";
3
3
  import { Logger } from "@infersec/logger";
4
4
  export interface EngineUsageMetrics {
5
5
  completionTokens: number | null;
6
+ contextUsage: number | null;
6
7
  promptTokens: number | null;
7
8
  totalTokens: number | null;
8
9
  }
@@ -22,7 +23,10 @@ interface EngineMetricsCompletion {
22
23
  }
23
24
  interface MonitorEngineResponseOptions extends EngineMetricsLoggerOptions {
24
25
  body: Readable;
26
+ contextLength: number | null;
27
+ engine: LLMEngine;
25
28
  onComplete?: (result: EngineMetricsCompletion) => void | Promise<void>;
29
+ parallelism: number | null;
26
30
  requestStartedAt?: number;
27
31
  }
28
32
  interface EngineMetricsLogOptions extends EngineMetricsLoggerOptions {
@@ -34,6 +38,6 @@ interface EngineMetricsLogOptions extends EngineMetricsLoggerOptions {
34
38
  interface MonitorEngineResponseResult {
35
39
  stream: Readable;
36
40
  }
41
+ export declare function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
37
42
  export declare function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }: EngineMetricsLogOptions): void;
38
- export declare function monitorEngineResponseStream({ agentEngineType, body, logger, onComplete, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
39
43
  export {};
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@infersec/conduit",
3
3
  "description": "End user conduit agent for connecting local LLMs to the cloud.",
4
- "version": "1.20.2",
4
+ "version": "1.21.0",
5
5
  "bin": {
6
6
  "infersec-conduit": "./dist/cli.js"
7
7
  },