@infersec/conduit 1.40.0 → 1.41.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import { parseArgs } from 'node:util';
8
8
  import 'node:crypto';
9
- import { a as asError, s as startInferenceAgent } from './start-Dw1tdbuz.js';
9
+ import { a as asError, s as startInferenceAgent } from './start-CPrgh7rN.js';
10
10
  import 'argon2';
11
11
  import 'node:child_process';
12
12
  import 'node:stream';
package/dist/index.js CHANGED
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
5
5
  const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import 'node:crypto';
8
- import { s as startInferenceAgent, a as asError } from './start-Dw1tdbuz.js';
8
+ import { s as startInferenceAgent, a as asError } from './start-CPrgh7rN.js';
9
9
  import 'argon2';
10
10
  import 'node:child_process';
11
11
  import 'node:stream';
@@ -6,7 +6,10 @@ export declare function handleSSERequests({ apiURL, configuration, logger, model
6
6
  configuration: Configuration;
7
7
  logger: Logger;
8
8
  modelID: ULID;
9
- onRequest: (request: ServerToClientAPIRequest) => Promise<APIResponse>;
9
+ onRequest: ({ request, signal }: {
10
+ request: ServerToClientAPIRequest;
11
+ signal?: AbortSignal;
12
+ }) => Promise<APIResponse>;
10
13
  onRequestEnd?: (request: ServerToClientAPIRequest) => Promise<void> | void;
11
14
  onRequestStart?: (request: ServerToClientAPIRequest) => Promise<void> | void;
12
15
  reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
@@ -3,7 +3,8 @@ import type { Configuration } from "../configuration.js";
3
3
  /**
4
4
  * Proxy server requests to the local inference HTTP server.
5
5
  */
6
- export declare function proxyRequest({ configuration, request }: {
6
+ export declare function proxyRequest({ configuration, request, signal }: {
7
7
  configuration: Configuration;
8
8
  request: ServerToClientAPIRequest;
9
+ signal?: AbortSignal;
9
10
  }): Promise<APIResponse>;
@@ -108660,7 +108660,9 @@ class ModelManager extends EventEmitter {
108660
108660
  const timeout = setTimeout(() => {
108661
108661
  controller.abort(new Error("Inference request timeout"));
108662
108662
  }, ENGINE_FETCH_TIMEOUT_MS);
108663
- const effectiveSignal = callerSignal ?? controller.signal;
108663
+ const effectiveSignal = callerSignal
108664
+ ? AbortSignal.any([callerSignal, controller.signal])
108665
+ : controller.signal;
108664
108666
  try {
108665
108667
  return await undiciExports.fetch(joinURL(`http://localhost:${this.enginePort}`, path), {
108666
108668
  ...opts,
@@ -109240,11 +109242,14 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
109240
109242
  }
109241
109243
  return;
109242
109244
  }
109243
- const closeError = new Error("Engine response stream closed before completion");
109245
+ const aborted = body.destroyed && body.errored?.name === "AbortError";
109246
+ const closeError = aborted
109247
+ ? null
109248
+ : new Error("Engine response stream closed before completion");
109244
109249
  logEngineMetrics({
109245
109250
  agentEngineType,
109246
- error: closeError,
109247
- level: "error",
109251
+ error: closeError ?? undefined,
109252
+ level: aborted ? "info" : "error",
109248
109253
  logger,
109249
109254
  requestBodyBytes,
109250
109255
  requestPath,
@@ -109382,11 +109387,14 @@ function monitorEngineResponseSingle({ agentEngineType, body, contextLength, eng
109382
109387
  }
109383
109388
  return;
109384
109389
  }
109385
- const closeError = new Error("Engine response stream closed before completion");
109390
+ const aborted = body.destroyed && body.errored?.name === "AbortError";
109391
+ const closeError = aborted
109392
+ ? null
109393
+ : new Error("Engine response stream closed before completion");
109386
109394
  logEngineMetrics({
109387
109395
  agentEngineType,
109388
- error: closeError,
109389
- level: "error",
109396
+ error: closeError ?? undefined,
109397
+ level: aborted ? "info" : "error",
109390
109398
  logger,
109391
109399
  requestBodyBytes,
109392
109400
  requestPath,
@@ -109457,7 +109465,7 @@ function calculateTokensPerSecond$2({ durationMs, totalTokens }) {
109457
109465
  }
109458
109466
  return Math.round(tokensPerSecond);
109459
109467
  }
109460
- async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID, modelManager, path, reportMetrics }) {
109468
+ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID, modelManager, path, reportMetrics, signal }) {
109461
109469
  function normalizeTokenCount(value) {
109462
109470
  if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
109463
109471
  return value;
@@ -109509,7 +109517,8 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
109509
109517
  headers: {
109510
109518
  "Content-Type": "application/json"
109511
109519
  },
109512
- method: "POST"
109520
+ method: "POST",
109521
+ signal
109513
109522
  })
109514
109523
  .catch(error => {
109515
109524
  logEngineMetrics({
@@ -109631,9 +109640,13 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
109631
109640
  function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger, startup }) {
109632
109641
  return {
109633
109642
  "/v1/chat/completions": {
109634
- POST: async ({ body }) => {
109643
+ POST: async ({ body, res }) => {
109635
109644
  const modelID = getModelID();
109636
109645
  const modelManager = getModelManager();
109646
+ const abortController = new AbortController();
109647
+ res.on("close", () => {
109648
+ abortController.abort();
109649
+ });
109637
109650
  return proxyOpenAIStreamingRoute({
109638
109651
  body,
109639
109652
  configuration,
@@ -109641,14 +109654,19 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
109641
109654
  modelID,
109642
109655
  modelManager,
109643
109656
  path: "/v1/chat/completions",
109644
- reportMetrics: apiClient.reportPromptMetrics
109657
+ reportMetrics: apiClient.reportPromptMetrics,
109658
+ signal: abortController.signal
109645
109659
  });
109646
109660
  }
109647
109661
  },
109648
109662
  "/v1/completions": {
109649
- POST: async ({ body }) => {
109663
+ POST: async ({ body, res }) => {
109650
109664
  const modelID = getModelID();
109651
109665
  const modelManager = getModelManager();
109666
+ const abortController = new AbortController();
109667
+ res.on("close", () => {
109668
+ abortController.abort();
109669
+ });
109652
109670
  return proxyOpenAIStreamingRoute({
109653
109671
  body,
109654
109672
  configuration,
@@ -109656,7 +109674,8 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
109656
109674
  modelID,
109657
109675
  modelManager,
109658
109676
  path: "/v1/completions",
109659
- reportMetrics: apiClient.reportPromptMetrics
109677
+ reportMetrics: apiClient.reportPromptMetrics,
109678
+ signal: abortController.signal
109660
109679
  });
109661
109680
  }
109662
109681
  },
@@ -109770,7 +109789,7 @@ function extractAnthropicNonStreamUsage(body) {
109770
109789
  return null;
109771
109790
  }
109772
109791
  }
109773
- async function proxyAnthropicStreamingRoute({ body, configuration, logger, modelID, modelManager, reportMetrics }) {
109792
+ async function proxyAnthropicStreamingRoute({ body, configuration, logger, modelID, modelManager, reportMetrics, signal }) {
109774
109793
  function reportMetricsSafe(payload) {
109775
109794
  reportMetrics(payload).catch(error => {
109776
109795
  logger.warn("Failed to upload LLM prompt metrics", {
@@ -109816,7 +109835,8 @@ async function proxyAnthropicStreamingRoute({ body, configuration, logger, model
109816
109835
  headers: {
109817
109836
  "Content-Type": "application/json"
109818
109837
  },
109819
- method: "POST"
109838
+ method: "POST",
109839
+ signal
109820
109840
  })
109821
109841
  .catch(error => {
109822
109842
  logEngineMetrics({
@@ -110060,16 +110080,21 @@ async function proxyAnthropicStreamingRoute({ body, configuration, logger, model
110060
110080
  function createConduitAnthropicAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger }) {
110061
110081
  return {
110062
110082
  "/v1/messages": {
110063
- POST: async ({ body }) => {
110083
+ POST: async ({ body, res }) => {
110064
110084
  const modelID = getModelID();
110065
110085
  const modelManager = getModelManager();
110086
+ const abortController = new AbortController();
110087
+ res.on("close", () => {
110088
+ abortController.abort();
110089
+ });
110066
110090
  return proxyAnthropicStreamingRoute({
110067
110091
  body,
110068
110092
  configuration,
110069
110093
  logger,
110070
110094
  modelID,
110071
110095
  modelManager,
110072
- reportMetrics: apiClient.reportPromptMetrics
110096
+ reportMetrics: apiClient.reportPromptMetrics,
110097
+ signal: abortController.signal
110073
110098
  });
110074
110099
  }
110075
110100
  }
@@ -110089,6 +110114,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
110089
110114
  const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/stream`;
110090
110115
  const maxReconnectDelayMs = 30000;
110091
110116
  let reconnectAttempt = 0;
110117
+ const activeRequests = new Map();
110092
110118
  while (!signal?.aborted) {
110093
110119
  const connectionStartedAt = Date.now();
110094
110120
  try {
@@ -110102,11 +110128,26 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
110102
110128
  });
110103
110129
  },
110104
110130
  onMessage: (message) => {
110131
+ if (message.event === "cancel") {
110132
+ const { requestID } = JSON.parse(message.data);
110133
+ const controller = activeRequests.get(requestID);
110134
+ if (controller) {
110135
+ logger.info("Cancelling active request", {
110136
+ requestID
110137
+ });
110138
+ controller.abort();
110139
+ activeRequests.delete(requestID);
110140
+ }
110141
+ return;
110142
+ }
110105
110143
  if (message.event !== "request") {
110106
110144
  return;
110107
110145
  }
110108
110146
  const payload = ServerToClientAPIRequestSchema.parse(JSON.parse(message.data));
110147
+ const perRequestController = new AbortController();
110148
+ activeRequests.set(payload.requestID, perRequestController);
110109
110149
  handleRequest({
110150
+ activeRequests,
110110
110151
  apiURL,
110111
110152
  configuration,
110112
110153
  logger,
@@ -110116,7 +110157,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
110116
110157
  onRequestStart,
110117
110158
  reportMetrics,
110118
110159
  request: payload,
110119
- signal
110160
+ signal: perRequestController.signal
110120
110161
  }).catch(error => {
110121
110162
  logger.error("SSE request handler failed", {
110122
110163
  error: asError(error),
@@ -110151,7 +110192,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
110151
110192
  }
110152
110193
  }
110153
110194
  }
110154
- async function handleRequest({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
110195
+ async function handleRequest({ activeRequests, apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
110155
110196
  function reportMetricsSafe(payload) {
110156
110197
  reportMetrics(payload).catch(error => {
110157
110198
  logger.warn("Failed to upload LLM prompt metrics", {
@@ -110164,7 +110205,7 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
110164
110205
  const requestBytes = calculateRequestBytes(request.body ?? null);
110165
110206
  try {
110166
110207
  await onRequestStart?.(request);
110167
- const response = await onRequest(request);
110208
+ const response = await onRequest({ request, signal });
110168
110209
  const responseMetrics = await streamResponse({
110169
110210
  apiURL,
110170
110211
  configuration,
@@ -110200,37 +110241,42 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
110200
110241
  });
110201
110242
  }
110202
110243
  catch (error) {
110203
- logger.error("SSE request failed", {
110204
- error: asError(error),
110205
- requestMethod: request.requestID
110206
- });
110244
+ const isCancelled = signal?.aborted;
110245
+ if (isCancelled) {
110246
+ logger.info("SSE request cancelled", {
110247
+ requestID: request.requestID
110248
+ });
110249
+ }
110250
+ else {
110251
+ logger.error("SSE request failed", {
110252
+ error: asError(error),
110253
+ requestMethod: request.requestID
110254
+ });
110255
+ }
110207
110256
  const failureMessage = "Bad gateway\n\nProxying failed";
110208
110257
  const failureBytes = Buffer.byteLength(failureMessage, "utf8");
110209
110258
  const latencyMs = Math.max(0, Date.now() - requestStartedAt);
110210
- const totalTokens = 0;
110211
- const tokensPerSecond = calculateTokensPerSecond({
110212
- durationMs: latencyMs,
110213
- totalTokens
110214
- });
110215
- const streamHandler = await sendChunkStream({
110216
- apiURL,
110217
- configuration,
110218
- requestID: request.requestID,
110219
- logger
110220
- });
110221
- await streamHandler.sendChunk({
110222
- data: encodeBinaryChunk(Buffer.from(failureMessage)),
110223
- sequence: 0,
110224
- status: 502
110225
- });
110226
- await streamHandler.sendChunk({
110227
- data: null,
110228
- sequence: 1,
110229
- status: 502
110230
- });
110231
- await streamHandler.end();
110259
+ if (!isCancelled) {
110260
+ const streamHandler = await sendChunkStream({
110261
+ apiURL,
110262
+ configuration,
110263
+ requestID: request.requestID,
110264
+ logger
110265
+ });
110266
+ await streamHandler.sendChunk({
110267
+ data: encodeBinaryChunk(Buffer.from(failureMessage)),
110268
+ sequence: 0,
110269
+ status: 502
110270
+ });
110271
+ await streamHandler.sendChunk({
110272
+ data: null,
110273
+ sequence: 1,
110274
+ status: 502
110275
+ });
110276
+ await streamHandler.end();
110277
+ }
110232
110278
  reportMetricsSafe({
110233
- bytes: requestBytes + failureBytes,
110279
+ bytes: isCancelled ? requestBytes : requestBytes + failureBytes,
110234
110280
  completionTokens: 0,
110235
110281
  engine: configuration.agentEngineType,
110236
110282
  endpointId: null,
@@ -110241,14 +110287,15 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
110241
110287
  requestId: request.requestID,
110242
110288
  requestMethod: request.method,
110243
110289
  requestPath: request.path,
110244
- responseBytes: failureBytes,
110290
+ responseBytes: isCancelled ? 0 : failureBytes,
110245
110291
  successful: false,
110246
- timeToFirstTokenMs: latencyMs,
110247
- tokensPerSecond,
110248
- totalTokens
110292
+ timeToFirstTokenMs: isCancelled ? null : latencyMs,
110293
+ tokensPerSecond: 0,
110294
+ totalTokens: 0
110249
110295
  });
110250
110296
  }
110251
110297
  finally {
110298
+ activeRequests.delete(request.requestID);
110252
110299
  await onRequestEnd?.(request);
110253
110300
  }
110254
110301
  }
@@ -110446,7 +110493,7 @@ function calculateTokensPerSecond({ durationMs, totalTokens }) {
110446
110493
  /**
110447
110494
  * Proxy server requests to the local inference HTTP server.
110448
110495
  */
110449
- async function proxyRequest({ configuration, request }) {
110496
+ async function proxyRequest({ configuration, request, signal }) {
110450
110497
  let finalPath = request.path;
110451
110498
  if (request.parameters) {
110452
110499
  Object.entries(request.parameters).forEach(([key, value]) => {
@@ -110463,6 +110510,9 @@ async function proxyRequest({ configuration, request }) {
110463
110510
  method: request.method,
110464
110511
  headers: request.headers
110465
110512
  };
110513
+ if (signal) {
110514
+ fetchOptions.signal = signal;
110515
+ }
110466
110516
  if (request.body) {
110467
110517
  fetchOptions.body =
110468
110518
  typeof request.body === "object" ? JSON.stringify(request.body) : request.body;
@@ -120361,10 +120411,11 @@ async function createApplication({ abortController, apiClient, configuration, lo
120361
120411
  configuration,
120362
120412
  logger,
120363
120413
  modelID: conduitConfiguration.targetModel.id,
120364
- onRequest: async (request) => {
120414
+ onRequest: async ({ request, signal }) => {
120365
120415
  return proxyRequest({
120366
120416
  configuration,
120367
- request
120417
+ request,
120418
+ signal
120368
120419
  });
120369
120420
  },
120370
120421
  onRequestEnd: () => {
@@ -3,13 +3,14 @@ import { InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definition
3
3
  import { Logger } from "@infersec/logger";
4
4
  import { Configuration } from "../configuration.js";
5
5
  import { ModelManager } from "../modelManagement/ModelManager.js";
6
- export declare function proxyAnthropicStreamingRoute({ body, configuration, logger, modelID, modelManager, reportMetrics }: {
6
+ export declare function proxyAnthropicStreamingRoute({ body, configuration, logger, modelID, modelManager, reportMetrics, signal }: {
7
7
  body: unknown;
8
8
  configuration: Configuration;
9
9
  logger: Logger;
10
10
  modelID: ULID;
11
11
  modelManager: ModelManager;
12
12
  reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
13
+ signal?: AbortSignal;
13
14
  }): Promise<{
14
15
  body: Readable;
15
16
  headers: Record<string, string>;
@@ -3,7 +3,7 @@ import { InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definition
3
3
  import { Logger } from "@infersec/logger";
4
4
  import { Configuration } from "../configuration.js";
5
5
  import { ModelManager } from "../modelManagement/ModelManager.js";
6
- export declare function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID, modelManager, path, reportMetrics }: {
6
+ export declare function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID, modelManager, path, reportMetrics, signal }: {
7
7
  body: unknown;
8
8
  configuration: Configuration;
9
9
  logger: Logger;
@@ -11,6 +11,7 @@ export declare function proxyOpenAIStreamingRoute({ body, configuration, logger,
11
11
  modelManager: ModelManager;
12
12
  path: "/v1/chat/completions" | "/v1/completions";
13
13
  reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
14
+ signal?: AbortSignal;
14
15
  }): Promise<{
15
16
  body: Readable;
16
17
  headers: Record<string, string>;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@infersec/conduit",
3
3
  "description": "End user conduit agent for connecting local LLMs to the cloud.",
4
- "version": "1.40.0",
4
+ "version": "1.41.0",
5
5
  "bin": {
6
6
  "infersec-conduit": "./dist/cli.js"
7
7
  },