@infersec/conduit 1.40.0 → 1.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import { parseArgs } from 'node:util';
8
8
  import 'node:crypto';
9
- import { a as asError, s as startInferenceAgent } from './start-Dw1tdbuz.js';
9
+ import { a as asError, s as startInferenceAgent } from './start-DGdf6ycx.js';
10
10
  import 'argon2';
11
11
  import 'node:child_process';
12
12
  import 'node:stream';
package/dist/index.js CHANGED
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
5
5
  const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import 'node:crypto';
8
- import { s as startInferenceAgent, a as asError } from './start-Dw1tdbuz.js';
8
+ import { s as startInferenceAgent, a as asError } from './start-DGdf6ycx.js';
9
9
  import 'argon2';
10
10
  import 'node:child_process';
11
11
  import 'node:stream';
@@ -1,5 +1,5 @@
1
1
  import { API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE } from "@infersec/definitions";
2
- import { implementAPIReference } from "@infersec/fetch";
2
+ import { implementAPIReference, type APIRequest } from "@infersec/fetch";
3
3
  import { Logger } from "@infersec/logger";
4
4
  import { APIClient } from "../apiClient/index.js";
5
5
  import { Configuration } from "../configuration.js";
@@ -19,7 +19,7 @@ export declare function createPostMessagesHandler(options: {
19
19
  getModelManager: () => ModelManager;
20
20
  logger: Logger;
21
21
  }): (params: {
22
- req: import("@infersec/fetch").APIRequest;
22
+ req: APIRequest;
23
23
  res: import("@infersec/fetch").APIResponse;
24
24
  parameters: Record<string, never>;
25
25
  query: Record<string, never>;
@@ -1,5 +1,5 @@
1
1
  import { API_CLIENT_CONDUIT_OPENAI_REFERENCE } from "@infersec/definitions";
2
- import { implementAPIReference } from "@infersec/fetch";
2
+ import { implementAPIReference, type APIRequest } from "@infersec/fetch";
3
3
  import { Logger } from "@infersec/logger";
4
4
  import { APIClient } from "../apiClient/index.js";
5
5
  import { Configuration } from "../configuration.js";
@@ -21,7 +21,7 @@ export declare function createGetModelsHandler(options: {
21
21
  logger: Logger;
22
22
  startup: number;
23
23
  }): (params: {
24
- req: import("@infersec/fetch").APIRequest;
24
+ req: APIRequest;
25
25
  res: import("@infersec/fetch").APIResponse;
26
26
  parameters: Record<string, never>;
27
27
  query: Record<string, never>;
@@ -66,7 +66,7 @@ export declare function createPostChatCompletionsHandler(options: {
66
66
  logger: Logger;
67
67
  startup: number;
68
68
  }): (params: {
69
- req: import("@infersec/fetch").APIRequest;
69
+ req: APIRequest;
70
70
  res: import("@infersec/fetch").APIResponse;
71
71
  parameters: Record<string, never>;
72
72
  query: Record<string, never>;
@@ -172,7 +172,7 @@ export declare function createPostCompletionsHandler(options: {
172
172
  logger: Logger;
173
173
  startup: number;
174
174
  }): (params: {
175
- req: import("@infersec/fetch").APIRequest;
175
+ req: APIRequest;
176
176
  res: import("@infersec/fetch").APIResponse;
177
177
  parameters: Record<string, never>;
178
178
  query: Record<string, never>;
@@ -6,7 +6,10 @@ export declare function handleSSERequests({ apiURL, configuration, logger, model
6
6
  configuration: Configuration;
7
7
  logger: Logger;
8
8
  modelID: ULID;
9
- onRequest: (request: ServerToClientAPIRequest) => Promise<APIResponse>;
9
+ onRequest: ({ request, signal }: {
10
+ request: ServerToClientAPIRequest;
11
+ signal?: AbortSignal;
12
+ }) => Promise<APIResponse>;
10
13
  onRequestEnd?: (request: ServerToClientAPIRequest) => Promise<void> | void;
11
14
  onRequestStart?: (request: ServerToClientAPIRequest) => Promise<void> | void;
12
15
  reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
@@ -3,7 +3,8 @@ import type { Configuration } from "../configuration.js";
3
3
  /**
4
4
  * Proxy server requests to the local inference HTTP server.
5
5
  */
6
- export declare function proxyRequest({ configuration, request }: {
6
+ export declare function proxyRequest({ configuration, request, signal }: {
7
7
  configuration: Configuration;
8
8
  request: ServerToClientAPIRequest;
9
+ signal?: AbortSignal;
9
10
  }): Promise<APIResponse>;
@@ -15622,21 +15622,30 @@ const ServerToClientAPIRequestSchema = APIRequestSchema.extend({
15622
15622
 
15623
15623
  _enum(["inf-end", "inf-src", "storage"]);
15624
15624
 
15625
- const PlanSlugSchema = _enum(["basic", "business", "free-trial", "open", "team"]);
15626
- object({
15627
- href: string$1(),
15628
- limits: object({
15629
- endpoints: number$1().int().nonnegative(),
15630
- sources: number$1().int().nonnegative(),
15631
- users: number$1().int().nonnegative()
15632
- }),
15633
- monthlyCostEUR: number$1().int().nonnegative(),
15634
- name: string$1(),
15635
- slug: PlanSlugSchema,
15636
- trialDays: number$1().int().positive().nullable(),
15637
- websiteFeatures: array(string$1()),
15638
- websiteIconPath: string$1()
15625
+ const ConnectedSourceMetaSchema = object({
15626
+ chargeName: literal("connected_source"),
15627
+ unitCost: number$1(),
15628
+ units: number$1()
15629
+ });
15630
+ const TokenInputMetaSchema = object({
15631
+ chargeName: literal("tokens_input"),
15632
+ modelId: string$1().nullable(),
15633
+ requestId: string$1().nullable(),
15634
+ unitCost: number$1(),
15635
+ units: number$1()
15639
15636
  });
15637
+ const TokenOutputMetaSchema = object({
15638
+ chargeName: literal("tokens_output"),
15639
+ modelId: string$1().nullable(),
15640
+ requestId: string$1().nullable(),
15641
+ unitCost: number$1(),
15642
+ units: number$1()
15643
+ });
15644
+ discriminatedUnion("chargeName", [
15645
+ ConnectedSourceMetaSchema,
15646
+ TokenInputMetaSchema,
15647
+ TokenOutputMetaSchema
15648
+ ]);
15640
15649
 
15641
15650
  function readEnvIntegerOptional({ defaultValue, name }) {
15642
15651
  const str = readEnvStringOptional(name, `${defaultValue}`).trim();
@@ -108660,7 +108669,9 @@ class ModelManager extends EventEmitter {
108660
108669
  const timeout = setTimeout(() => {
108661
108670
  controller.abort(new Error("Inference request timeout"));
108662
108671
  }, ENGINE_FETCH_TIMEOUT_MS);
108663
- const effectiveSignal = callerSignal ?? controller.signal;
108672
+ const effectiveSignal = callerSignal
108673
+ ? AbortSignal.any([callerSignal, controller.signal])
108674
+ : controller.signal;
108664
108675
  try {
108665
108676
  return await undiciExports.fetch(joinURL(`http://localhost:${this.enginePort}`, path), {
108666
108677
  ...opts,
@@ -109240,11 +109251,14 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
109240
109251
  }
109241
109252
  return;
109242
109253
  }
109243
- const closeError = new Error("Engine response stream closed before completion");
109254
+ const aborted = body.destroyed && body.errored?.name === "AbortError";
109255
+ const closeError = aborted
109256
+ ? null
109257
+ : new Error("Engine response stream closed before completion");
109244
109258
  logEngineMetrics({
109245
109259
  agentEngineType,
109246
- error: closeError,
109247
- level: "error",
109260
+ error: closeError ?? undefined,
109261
+ level: aborted ? "info" : "error",
109248
109262
  logger,
109249
109263
  requestBodyBytes,
109250
109264
  requestPath,
@@ -109382,11 +109396,14 @@ function monitorEngineResponseSingle({ agentEngineType, body, contextLength, eng
109382
109396
  }
109383
109397
  return;
109384
109398
  }
109385
- const closeError = new Error("Engine response stream closed before completion");
109399
+ const aborted = body.destroyed && body.errored?.name === "AbortError";
109400
+ const closeError = aborted
109401
+ ? null
109402
+ : new Error("Engine response stream closed before completion");
109386
109403
  logEngineMetrics({
109387
109404
  agentEngineType,
109388
- error: closeError,
109389
- level: "error",
109405
+ error: closeError ?? undefined,
109406
+ level: aborted ? "info" : "error",
109390
109407
  logger,
109391
109408
  requestBodyBytes,
109392
109409
  requestPath,
@@ -109457,7 +109474,7 @@ function calculateTokensPerSecond$2({ durationMs, totalTokens }) {
109457
109474
  }
109458
109475
  return Math.round(tokensPerSecond);
109459
109476
  }
109460
- async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID, modelManager, path, reportMetrics }) {
109477
+ async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }) {
109461
109478
  function normalizeTokenCount(value) {
109462
109479
  if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
109463
109480
  return value;
@@ -109485,7 +109502,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
109485
109502
  bytes: requestBodyBytes + responseBytes,
109486
109503
  completionTokens,
109487
109504
  engine: configuration.agentEngineType,
109488
- endpointId: null,
109505
+ endpointId: endpointId ?? null,
109489
109506
  latencyMs,
109490
109507
  modelId: modelID,
109491
109508
  promptTokens,
@@ -109509,7 +109526,8 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
109509
109526
  headers: {
109510
109527
  "Content-Type": "application/json"
109511
109528
  },
109512
- method: "POST"
109529
+ method: "POST",
109530
+ signal
109513
109531
  })
109514
109532
  .catch(error => {
109515
109533
  logEngineMetrics({
@@ -109527,7 +109545,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
109527
109545
  bytes: requestBodyBytes,
109528
109546
  completionTokens: 0,
109529
109547
  engine: configuration.agentEngineType,
109530
- endpointId: null,
109548
+ endpointId: endpointId ?? null,
109531
109549
  latencyMs,
109532
109550
  modelId: modelID,
109533
109551
  promptTokens: 0,
@@ -109577,7 +109595,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
109577
109595
  bytes: requestBodyBytes,
109578
109596
  completionTokens: 0,
109579
109597
  engine: configuration.agentEngineType,
109580
- endpointId: null,
109598
+ endpointId: endpointId ?? null,
109581
109599
  latencyMs,
109582
109600
  modelId: modelID,
109583
109601
  promptTokens: 0,
@@ -109628,35 +109646,52 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
109628
109646
  };
109629
109647
  }
109630
109648
 
109649
+ function extractEndpointId$1(req) {
109650
+ const value = req.headers["x-endpoint-id"];
109651
+ const raw = typeof value === "string" ? value : Array.isArray(value) ? value[0] : null;
109652
+ return raw && isValid(raw) ? raw : null;
109653
+ }
109631
109654
  function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger, startup }) {
109632
109655
  return {
109633
109656
  "/v1/chat/completions": {
109634
- POST: async ({ body }) => {
109657
+ POST: async ({ body, req, res }) => {
109635
109658
  const modelID = getModelID();
109636
109659
  const modelManager = getModelManager();
109660
+ const abortController = new AbortController();
109661
+ res.on("close", () => {
109662
+ abortController.abort();
109663
+ });
109637
109664
  return proxyOpenAIStreamingRoute({
109638
109665
  body,
109639
109666
  configuration,
109667
+ endpointId: extractEndpointId$1(req),
109640
109668
  logger,
109641
109669
  modelID,
109642
109670
  modelManager,
109643
109671
  path: "/v1/chat/completions",
109644
- reportMetrics: apiClient.reportPromptMetrics
109672
+ reportMetrics: apiClient.reportPromptMetrics,
109673
+ signal: abortController.signal
109645
109674
  });
109646
109675
  }
109647
109676
  },
109648
109677
  "/v1/completions": {
109649
- POST: async ({ body }) => {
109678
+ POST: async ({ body, req, res }) => {
109650
109679
  const modelID = getModelID();
109651
109680
  const modelManager = getModelManager();
109681
+ const abortController = new AbortController();
109682
+ res.on("close", () => {
109683
+ abortController.abort();
109684
+ });
109652
109685
  return proxyOpenAIStreamingRoute({
109653
109686
  body,
109654
109687
  configuration,
109688
+ endpointId: extractEndpointId$1(req),
109655
109689
  logger,
109656
109690
  modelID,
109657
109691
  modelManager,
109658
109692
  path: "/v1/completions",
109659
- reportMetrics: apiClient.reportPromptMetrics
109693
+ reportMetrics: apiClient.reportPromptMetrics,
109694
+ signal: abortController.signal
109660
109695
  });
109661
109696
  }
109662
109697
  },
@@ -109770,7 +109805,7 @@ function extractAnthropicNonStreamUsage(body) {
109770
109805
  return null;
109771
109806
  }
109772
109807
  }
109773
- async function proxyAnthropicStreamingRoute({ body, configuration, logger, modelID, modelManager, reportMetrics }) {
109808
+ async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, logger, modelID, modelManager, reportMetrics, signal }) {
109774
109809
  function reportMetricsSafe(payload) {
109775
109810
  reportMetrics(payload).catch(error => {
109776
109811
  logger.warn("Failed to upload LLM prompt metrics", {
@@ -109792,7 +109827,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, logger, model
109792
109827
  bytes: requestBodyBytes + responseBytes,
109793
109828
  completionTokens,
109794
109829
  engine: configuration.agentEngineType,
109795
- endpointId: null,
109830
+ endpointId: endpointId ?? null,
109796
109831
  latencyMs,
109797
109832
  modelId: modelID,
109798
109833
  promptTokens,
@@ -109816,7 +109851,8 @@ async function proxyAnthropicStreamingRoute({ body, configuration, logger, model
109816
109851
  headers: {
109817
109852
  "Content-Type": "application/json"
109818
109853
  },
109819
- method: "POST"
109854
+ method: "POST",
109855
+ signal
109820
109856
  })
109821
109857
  .catch(error => {
109822
109858
  logEngineMetrics({
@@ -109834,7 +109870,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, logger, model
109834
109870
  bytes: requestBodyBytes,
109835
109871
  completionTokens: 0,
109836
109872
  engine: configuration.agentEngineType,
109837
- endpointId: null,
109873
+ endpointId: endpointId ?? null,
109838
109874
  latencyMs,
109839
109875
  modelId: modelID,
109840
109876
  promptTokens: 0,
@@ -109879,7 +109915,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, logger, model
109879
109915
  bytes: requestBodyBytes,
109880
109916
  completionTokens: 0,
109881
109917
  engine: configuration.agentEngineType,
109882
- endpointId: null,
109918
+ endpointId: endpointId ?? null,
109883
109919
  latencyMs,
109884
109920
  modelId: modelID,
109885
109921
  promptTokens: 0,
@@ -110057,19 +110093,30 @@ async function proxyAnthropicStreamingRoute({ body, configuration, logger, model
110057
110093
  };
110058
110094
  }
110059
110095
 
110096
+ function extractEndpointId(req) {
110097
+ const value = req.headers["x-endpoint-id"];
110098
+ const raw = typeof value === "string" ? value : Array.isArray(value) ? value[0] : null;
110099
+ return raw && isValid(raw) ? raw : null;
110100
+ }
110060
110101
  function createConduitAnthropicAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger }) {
110061
110102
  return {
110062
110103
  "/v1/messages": {
110063
- POST: async ({ body }) => {
110104
+ POST: async ({ body, req, res }) => {
110064
110105
  const modelID = getModelID();
110065
110106
  const modelManager = getModelManager();
110107
+ const abortController = new AbortController();
110108
+ res.on("close", () => {
110109
+ abortController.abort();
110110
+ });
110066
110111
  return proxyAnthropicStreamingRoute({
110067
110112
  body,
110068
110113
  configuration,
110114
+ endpointId: extractEndpointId(req),
110069
110115
  logger,
110070
110116
  modelID,
110071
110117
  modelManager,
110072
- reportMetrics: apiClient.reportPromptMetrics
110118
+ reportMetrics: apiClient.reportPromptMetrics,
110119
+ signal: abortController.signal
110073
110120
  });
110074
110121
  }
110075
110122
  }
@@ -110089,6 +110136,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
110089
110136
  const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/stream`;
110090
110137
  const maxReconnectDelayMs = 30000;
110091
110138
  let reconnectAttempt = 0;
110139
+ const activeRequests = new Map();
110092
110140
  while (!signal?.aborted) {
110093
110141
  const connectionStartedAt = Date.now();
110094
110142
  try {
@@ -110102,11 +110150,29 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
110102
110150
  });
110103
110151
  },
110104
110152
  onMessage: (message) => {
110153
+ if (message.event === "cancel") {
110154
+ const { requestID } = JSON.parse(message.data);
110155
+ const controller = activeRequests.get(requestID);
110156
+ if (controller) {
110157
+ logger.info("Cancelling active request", {
110158
+ requestID
110159
+ });
110160
+ controller.abort();
110161
+ activeRequests.delete(requestID);
110162
+ }
110163
+ return;
110164
+ }
110105
110165
  if (message.event !== "request") {
110106
110166
  return;
110107
110167
  }
110108
110168
  const payload = ServerToClientAPIRequestSchema.parse(JSON.parse(message.data));
110169
+ const perRequestController = new AbortController();
110170
+ activeRequests.set(payload.requestID, perRequestController);
110171
+ const effectiveSignal = signal
110172
+ ? AbortSignal.any([perRequestController.signal, signal])
110173
+ : perRequestController.signal;
110109
110174
  handleRequest({
110175
+ activeRequests,
110110
110176
  apiURL,
110111
110177
  configuration,
110112
110178
  logger,
@@ -110116,7 +110182,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
110116
110182
  onRequestStart,
110117
110183
  reportMetrics,
110118
110184
  request: payload,
110119
- signal
110185
+ signal: effectiveSignal
110120
110186
  }).catch(error => {
110121
110187
  logger.error("SSE request handler failed", {
110122
110188
  error: asError(error),
@@ -110151,7 +110217,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
110151
110217
  }
110152
110218
  }
110153
110219
  }
110154
- async function handleRequest({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
110220
+ async function handleRequest({ activeRequests, apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
110155
110221
  function reportMetricsSafe(payload) {
110156
110222
  reportMetrics(payload).catch(error => {
110157
110223
  logger.warn("Failed to upload LLM prompt metrics", {
@@ -110160,11 +110226,12 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
110160
110226
  });
110161
110227
  });
110162
110228
  }
110229
+ const endpointId = request.parameters?.endpointID ?? null;
110163
110230
  const requestStartedAt = Date.now();
110164
110231
  const requestBytes = calculateRequestBytes(request.body ?? null);
110165
110232
  try {
110166
110233
  await onRequestStart?.(request);
110167
- const response = await onRequest(request);
110234
+ const response = await onRequest({ request, signal });
110168
110235
  const responseMetrics = await streamResponse({
110169
110236
  apiURL,
110170
110237
  configuration,
@@ -110184,7 +110251,7 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
110184
110251
  bytes: requestBytes + responseMetrics.responseBytes,
110185
110252
  completionTokens: 0,
110186
110253
  engine: configuration.agentEngineType,
110187
- endpointId: null,
110254
+ endpointId,
110188
110255
  latencyMs,
110189
110256
  modelId: modelID,
110190
110257
  promptTokens: 0,
@@ -110200,40 +110267,45 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
110200
110267
  });
110201
110268
  }
110202
110269
  catch (error) {
110203
- logger.error("SSE request failed", {
110204
- error: asError(error),
110205
- requestMethod: request.requestID
110206
- });
110270
+ const isCancelled = signal?.aborted;
110271
+ if (isCancelled) {
110272
+ logger.info("SSE request cancelled", {
110273
+ requestID: request.requestID
110274
+ });
110275
+ }
110276
+ else {
110277
+ logger.error("SSE request failed", {
110278
+ error: asError(error),
110279
+ requestMethod: request.requestID
110280
+ });
110281
+ }
110207
110282
  const failureMessage = "Bad gateway\n\nProxying failed";
110208
110283
  const failureBytes = Buffer.byteLength(failureMessage, "utf8");
110209
110284
  const latencyMs = Math.max(0, Date.now() - requestStartedAt);
110210
- const totalTokens = 0;
110211
- const tokensPerSecond = calculateTokensPerSecond({
110212
- durationMs: latencyMs,
110213
- totalTokens
110214
- });
110215
- const streamHandler = await sendChunkStream({
110216
- apiURL,
110217
- configuration,
110218
- requestID: request.requestID,
110219
- logger
110220
- });
110221
- await streamHandler.sendChunk({
110222
- data: encodeBinaryChunk(Buffer.from(failureMessage)),
110223
- sequence: 0,
110224
- status: 502
110225
- });
110226
- await streamHandler.sendChunk({
110227
- data: null,
110228
- sequence: 1,
110229
- status: 502
110230
- });
110231
- await streamHandler.end();
110285
+ if (!isCancelled) {
110286
+ const streamHandler = await sendChunkStream({
110287
+ apiURL,
110288
+ configuration,
110289
+ requestID: request.requestID,
110290
+ logger
110291
+ });
110292
+ await streamHandler.sendChunk({
110293
+ data: encodeBinaryChunk(Buffer.from(failureMessage)),
110294
+ sequence: 0,
110295
+ status: 502
110296
+ });
110297
+ await streamHandler.sendChunk({
110298
+ data: null,
110299
+ sequence: 1,
110300
+ status: 502
110301
+ });
110302
+ await streamHandler.end();
110303
+ }
110232
110304
  reportMetricsSafe({
110233
- bytes: requestBytes + failureBytes,
110305
+ bytes: isCancelled ? requestBytes : requestBytes + failureBytes,
110234
110306
  completionTokens: 0,
110235
110307
  engine: configuration.agentEngineType,
110236
- endpointId: null,
110308
+ endpointId,
110237
110309
  latencyMs,
110238
110310
  modelId: modelID,
110239
110311
  promptTokens: 0,
@@ -110241,14 +110313,15 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
110241
110313
  requestId: request.requestID,
110242
110314
  requestMethod: request.method,
110243
110315
  requestPath: request.path,
110244
- responseBytes: failureBytes,
110316
+ responseBytes: isCancelled ? 0 : failureBytes,
110245
110317
  successful: false,
110246
- timeToFirstTokenMs: latencyMs,
110247
- tokensPerSecond,
110248
- totalTokens
110318
+ timeToFirstTokenMs: isCancelled ? null : latencyMs,
110319
+ tokensPerSecond: 0,
110320
+ totalTokens: 0
110249
110321
  });
110250
110322
  }
110251
110323
  finally {
110324
+ activeRequests.delete(request.requestID);
110252
110325
  await onRequestEnd?.(request);
110253
110326
  }
110254
110327
  }
@@ -110446,7 +110519,7 @@ function calculateTokensPerSecond({ durationMs, totalTokens }) {
110446
110519
  /**
110447
110520
  * Proxy server requests to the local inference HTTP server.
110448
110521
  */
110449
- async function proxyRequest({ configuration, request }) {
110522
+ async function proxyRequest({ configuration, request, signal }) {
110450
110523
  let finalPath = request.path;
110451
110524
  if (request.parameters) {
110452
110525
  Object.entries(request.parameters).forEach(([key, value]) => {
@@ -110461,8 +110534,16 @@ async function proxyRequest({ configuration, request }) {
110461
110534
  }
110462
110535
  const fetchOptions = {
110463
110536
  method: request.method,
110464
- headers: request.headers
110537
+ headers: {
110538
+ ...request.headers,
110539
+ ...(request.parameters?.endpointID
110540
+ ? { "x-endpoint-id": request.parameters.endpointID }
110541
+ : undefined)
110542
+ }
110465
110543
  };
110544
+ if (signal) {
110545
+ fetchOptions.signal = signal;
110546
+ }
110466
110547
  if (request.body) {
110467
110548
  fetchOptions.body =
110468
110549
  typeof request.body === "object" ? JSON.stringify(request.body) : request.body;
@@ -120361,10 +120442,11 @@ async function createApplication({ abortController, apiClient, configuration, lo
120361
120442
  configuration,
120362
120443
  logger,
120363
120444
  modelID: conduitConfiguration.targetModel.id,
120364
- onRequest: async (request) => {
120445
+ onRequest: async ({ request, signal }) => {
120365
120446
  return proxyRequest({
120366
120447
  configuration,
120367
- request
120448
+ request,
120449
+ signal
120368
120450
  });
120369
120451
  },
120370
120452
  onRequestEnd: () => {
@@ -3,13 +3,15 @@ import { InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definition
3
3
  import { Logger } from "@infersec/logger";
4
4
  import { Configuration } from "../configuration.js";
5
5
  import { ModelManager } from "../modelManagement/ModelManager.js";
6
- export declare function proxyAnthropicStreamingRoute({ body, configuration, logger, modelID, modelManager, reportMetrics }: {
6
+ export declare function proxyAnthropicStreamingRoute({ body, configuration, endpointId, logger, modelID, modelManager, reportMetrics, signal }: {
7
7
  body: unknown;
8
8
  configuration: Configuration;
9
+ endpointId?: ULID | null;
9
10
  logger: Logger;
10
11
  modelID: ULID;
11
12
  modelManager: ModelManager;
12
13
  reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
14
+ signal?: AbortSignal;
13
15
  }): Promise<{
14
16
  body: Readable;
15
17
  headers: Record<string, string>;
@@ -3,14 +3,16 @@ import { InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definition
3
3
  import { Logger } from "@infersec/logger";
4
4
  import { Configuration } from "../configuration.js";
5
5
  import { ModelManager } from "../modelManagement/ModelManager.js";
6
- export declare function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID, modelManager, path, reportMetrics }: {
6
+ export declare function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }: {
7
7
  body: unknown;
8
8
  configuration: Configuration;
9
+ endpointId?: ULID | null;
9
10
  logger: Logger;
10
11
  modelID: ULID;
11
12
  modelManager: ModelManager;
12
13
  path: "/v1/chat/completions" | "/v1/completions";
13
14
  reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
15
+ signal?: AbortSignal;
14
16
  }): Promise<{
15
17
  body: Readable;
16
18
  headers: Record<string, string>;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@infersec/conduit",
3
3
  "description": "End user conduit agent for connecting local LLMs to the cloud.",
4
- "version": "1.40.0",
4
+ "version": "1.42.0",
5
5
  "bin": {
6
6
  "infersec-conduit": "./dist/cli.js"
7
7
  },