@infersec/conduit 1.24.3 → 1.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import { parseArgs } from 'node:util';
8
8
  import 'node:crypto';
9
- import { a as asError, s as startInferenceAgent } from './start-DiikacLf.js';
9
+ import { a as asError, s as startInferenceAgent } from './start-DBk2G4SP.js';
10
10
  import 'argon2';
11
11
  import 'node:child_process';
12
12
  import 'node:stream';
package/dist/index.js CHANGED
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
5
5
  const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import 'node:crypto';
8
- import { s as startInferenceAgent, a as asError } from './start-DiikacLf.js';
8
+ import { s as startInferenceAgent, a as asError } from './start-DBk2G4SP.js';
9
9
  import 'argon2';
10
10
  import 'node:child_process';
11
11
  import 'node:stream';
@@ -14914,6 +14914,23 @@ const API_SERVICE_CONDUIT_API_REFERENCE = {
14914
14914
  type: "rest"
14915
14915
  }
14916
14916
  }
14917
+ },
14918
+ "/conduit/api/v1/source/:sourceID/requests/:requestID/stream": {
14919
+ POST: {
14920
+ auth: {
14921
+ type: "api-key"
14922
+ },
14923
+ parameters: {
14924
+ requestID: ULIDSchema,
14925
+ sourceID: ULIDSchema
14926
+ },
14927
+ response: {
14928
+ schema: object({
14929
+ acknowledged: literal(true)
14930
+ }),
14931
+ type: "rest"
14932
+ }
14933
+ }
14917
14934
  }
14918
14935
  };
14919
14936
 
@@ -17601,6 +17618,9 @@ function createLogger({ attributes = {}, name }) {
17601
17618
  child: (attributes) => {
17602
17619
  return buildLogger(logger.child(processAttributes(attributes)));
17603
17620
  },
17621
+ debug: (message, attributes) => {
17622
+ logger.debug(processAttributes(attributes ?? {}), message);
17623
+ },
17604
17624
  error: (message, attributes) => {
17605
17625
  logger.error(processAttributes(attributes ?? {}), message);
17606
17626
  },
@@ -97840,7 +97860,6 @@ function implementSingleEndpoint({ endpoint, handler, method, mount, route }) {
97840
97860
  : []), (async (req, res) => {
97841
97861
  res.locals.requestID = ulid$2();
97842
97862
  try {
97843
- console.log("HANDLE REQ", method, route, req.params);
97844
97863
  // Extract and validate parameters with proper type assertion
97845
97864
  const parameters = endpoint.parameters
97846
97865
  ? validateAndExtract("params", req.params, endpoint.parameters)
@@ -97888,7 +97907,6 @@ function implementSingleEndpoint({ endpoint, handler, method, mount, route }) {
97888
97907
  res.status(output.status).send(output.statusText);
97889
97908
  return;
97890
97909
  }
97891
- console.log("GOT RESPONSE", method, route, output.status, typeof output.body);
97892
97910
  res.status(output.status);
97893
97911
  if (endpoint.response.type === "text-stream") {
97894
97912
  if (!res.getHeader("content-type")) {
@@ -108360,7 +108378,8 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
108360
108378
  onRequestEnd,
108361
108379
  onRequestStart,
108362
108380
  reportMetrics,
108363
- request: payload
108381
+ request: payload,
108382
+ signal
108364
108383
  }).catch(error => {
108365
108384
  logger.error("SSE request handler failed", {
108366
108385
  error: asError(error),
@@ -108394,7 +108413,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
108394
108413
  }
108395
108414
  }
108396
108415
  }
108397
- async function handleRequest({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request }) {
108416
+ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
108398
108417
  function reportMetricsSafe(payload) {
108399
108418
  reportMetrics(payload).catch(error => {
108400
108419
  logger.warn("Failed to upload LLM prompt metrics", {
@@ -108414,7 +108433,8 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
108414
108433
  logger,
108415
108434
  requestID: request.requestID,
108416
108435
  requestStartedAt,
108417
- response
108436
+ response,
108437
+ signal
108418
108438
  });
108419
108439
  const latencyMs = Math.max(0, Date.now() - requestStartedAt);
108420
108440
  const totalTokens = 0;
@@ -108454,26 +108474,23 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
108454
108474
  durationMs: latencyMs,
108455
108475
  totalTokens
108456
108476
  });
108457
- await postChunk({
108477
+ const streamHandler = await sendChunkStream({
108458
108478
  apiURL,
108459
108479
  configuration,
108460
- payload: {
108461
- data: encodeBinaryChunk(Buffer.from(failureMessage)),
108462
- sequence: 0,
108463
- status: 502
108464
- },
108465
- requestID: request.requestID
108480
+ requestID: request.requestID,
108481
+ logger
108466
108482
  });
108467
- await postChunk({
108468
- apiURL,
108469
- configuration,
108470
- payload: {
108471
- data: null,
108472
- sequence: 1,
108473
- status: 502
108474
- },
108475
- requestID: request.requestID
108483
+ await streamHandler.sendChunk({
108484
+ data: encodeBinaryChunk(Buffer.from(failureMessage)),
108485
+ sequence: 0,
108486
+ status: 502
108487
+ });
108488
+ await streamHandler.sendChunk({
108489
+ data: null,
108490
+ sequence: 1,
108491
+ status: 502
108476
108492
  });
108493
+ await streamHandler.end();
108477
108494
  reportMetricsSafe({
108478
108495
  bytes: requestBytes + failureBytes,
108479
108496
  completionTokens: 0,
@@ -108497,12 +108514,22 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
108497
108514
  await onRequestEnd?.(request);
108498
108515
  }
108499
108516
  }
108500
- async function streamResponse({ apiURL, configuration, logger, requestID, requestStartedAt, response }) {
108517
+ async function streamResponse({ apiURL, configuration, logger, requestID, requestStartedAt, response, signal }) {
108501
108518
  let sequence = 0;
108502
108519
  let responseBytes = 0;
108503
108520
  let timeToFirstTokenMs = null;
108521
+ const streamHandler = await sendChunkStream({
108522
+ apiURL,
108523
+ configuration,
108524
+ requestID,
108525
+ logger
108526
+ });
108504
108527
  if (response.body instanceof Readable) {
108505
108528
  for await (const chunk of response.body) {
108529
+ if (signal?.aborted) {
108530
+ streamHandler.abort();
108531
+ throw new Error("Request cancelled");
108532
+ }
108506
108533
  const buffer = Buffer.isBuffer(chunk)
108507
108534
  ? chunk
108508
108535
  : Buffer.from(chunk);
@@ -108510,28 +108537,19 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
108510
108537
  timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
108511
108538
  }
108512
108539
  responseBytes += buffer.length;
108513
- await postChunk({
108514
- apiURL,
108515
- configuration,
108516
- payload: {
108517
- data: encodeBinaryChunk(buffer),
108518
- sequence,
108519
- status: response.status
108520
- },
108521
- requestID
108540
+ await streamHandler.sendChunk({
108541
+ data: encodeBinaryChunk(buffer),
108542
+ sequence,
108543
+ status: response.status
108522
108544
  });
108523
108545
  sequence += 1;
108524
108546
  }
108525
- await postChunk({
108526
- apiURL,
108527
- configuration,
108528
- payload: {
108529
- data: null,
108530
- sequence,
108531
- status: response.status
108532
- },
108533
- requestID
108547
+ await streamHandler.sendChunk({
108548
+ data: null,
108549
+ sequence,
108550
+ status: response.status
108534
108551
  });
108552
+ await streamHandler.end();
108535
108553
  return {
108536
108554
  responseBytes,
108537
108555
  status: response.status,
@@ -108547,27 +108565,18 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
108547
108565
  responseBytes = Buffer.byteLength(responsePayload, "utf8");
108548
108566
  timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
108549
108567
  }
108550
- await postChunk({
108551
- apiURL,
108552
- configuration,
108553
- payload: {
108554
- data: encodeBinaryChunk(Buffer.from(responsePayload)),
108555
- headers: response.headers,
108556
- sequence,
108557
- status: response.status
108558
- },
108559
- requestID
108568
+ await streamHandler.sendChunk({
108569
+ data: encodeBinaryChunk(Buffer.from(responsePayload)),
108570
+ headers: response.headers,
108571
+ sequence,
108572
+ status: response.status
108560
108573
  });
108561
- await postChunk({
108562
- apiURL,
108563
- configuration,
108564
- payload: {
108565
- data: null,
108566
- sequence: sequence + 1,
108567
- status: response.status
108568
- },
108569
- requestID
108574
+ await streamHandler.sendChunk({
108575
+ data: null,
108576
+ sequence: sequence + 1,
108577
+ status: response.status
108570
108578
  });
108579
+ await streamHandler.end();
108571
108580
  logger.info("SSE response queued", {
108572
108581
  requestMethod: requestID
108573
108582
  });
@@ -108577,28 +108586,101 @@ async function streamResponse({ apiURL, configuration, logger, requestID, reques
108577
108586
  timeToFirstTokenMs
108578
108587
  };
108579
108588
  }
108580
- async function postChunk({ apiURL, configuration, payload, requestID }) {
108581
- const response = ClientToServerAPIResponseSchema.parse({
108582
- data: payload.data,
108583
- headers: payload.headers,
108584
- requestID,
108585
- status: payload.status
108586
- });
108587
- await fetch(`${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/${requestID}/chunk`, {
108588
- body: JSON.stringify({
108589
- ...response,
108590
- sequence: payload.sequence
108591
- }),
108592
- headers: {
108593
- "content-type": "application/json",
108594
- "x-api-key": configuration.apiKey
108595
- },
108596
- method: "POST"
108597
- });
108598
- }
108599
108589
  function encodeBinaryChunk(chunk) {
108600
108590
  return chunk.toString("base64");
108601
108591
  }
108592
+ async function sendChunkStream({ apiURL, configuration, requestID, logger }) {
108593
+ const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/${requestID}/stream`;
108594
+ const maxFlushAttempts = 3;
108595
+ let isAborted = false;
108596
+ let isClosed = false;
108597
+ let activeAbortController = null;
108598
+ const chunks = [];
108599
+ const sendChunk = async (payload) => {
108600
+ if (isAborted || isClosed) {
108601
+ return;
108602
+ }
108603
+ const response = ClientToServerAPIResponseSchema.parse({
108604
+ data: payload.data,
108605
+ headers: payload.headers,
108606
+ requestID,
108607
+ status: payload.status
108608
+ });
108609
+ const chunk = JSON.stringify({
108610
+ ...response,
108611
+ sequence: payload.sequence
108612
+ });
108613
+ chunks.push(Buffer.from(chunk + "\n"));
108614
+ if (chunks.length >= 10) {
108615
+ await flushChunks();
108616
+ }
108617
+ };
108618
+ const flushChunks = async () => {
108619
+ if (chunks.length === 0 || isAborted) {
108620
+ return;
108621
+ }
108622
+ const batch = chunks.splice(0, chunks.length);
108623
+ const body = Buffer.concat(batch);
108624
+ for (let attempt = 1; attempt <= maxFlushAttempts; attempt += 1) {
108625
+ try {
108626
+ activeAbortController = new AbortController();
108627
+ const response = await fetch(streamURL, {
108628
+ body: body.toString(),
108629
+ headers: {
108630
+ "content-type": "application/json",
108631
+ "x-api-key": configuration.apiKey
108632
+ },
108633
+ method: "POST",
108634
+ signal: activeAbortController.signal
108635
+ });
108636
+ if (!response.ok) {
108637
+ throw new Error(`Chunk stream flush failed with status ${response.status}`);
108638
+ }
108639
+ return;
108640
+ }
108641
+ catch (error) {
108642
+ if (isAborted) {
108643
+ return;
108644
+ }
108645
+ if (attempt >= maxFlushAttempts) {
108646
+ chunks.unshift(...batch);
108647
+ throw asError(error);
108648
+ }
108649
+ logger.warn("Failed to send chunk batch", {
108650
+ error: asError(error)
108651
+ });
108652
+ await sleep(100 * attempt);
108653
+ }
108654
+ finally {
108655
+ activeAbortController = null;
108656
+ }
108657
+ }
108658
+ };
108659
+ const end = async () => {
108660
+ if (isClosed || isAborted) {
108661
+ return;
108662
+ }
108663
+ await flushChunks();
108664
+ isClosed = true;
108665
+ };
108666
+ const abort = (error) => {
108667
+ isAborted = true;
108668
+ if (activeAbortController) {
108669
+ activeAbortController.abort();
108670
+ }
108671
+ chunks.length = 0;
108672
+ if (error) {
108673
+ logger.error("Chunk stream aborted", {
108674
+ error: asError(error)
108675
+ });
108676
+ }
108677
+ };
108678
+ return {
108679
+ sendChunk,
108680
+ end,
108681
+ abort
108682
+ };
108683
+ }
108602
108684
  function calculateRequestBytes(body) {
108603
108685
  if (body === null || body === undefined) {
108604
108686
  return 0;
@@ -118119,13 +118201,14 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
118119
118201
  }
118120
118202
  }
118121
118203
  body.on("data", (chunk) => {
118204
+ const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
118122
118205
  if (firstChunkAt === null) {
118123
118206
  firstChunkAt = Date.now();
118124
118207
  }
118125
- responseBytes += chunk.length;
118126
- buffer += chunk.toString("utf8");
118208
+ responseBytes += chunkBuffer.length;
118209
+ buffer += chunkBuffer.toString("utf8");
118127
118210
  parseUsageFromBuffer();
118128
- passThrough.write(modifyChunkWithUsage(chunk));
118211
+ passThrough.write(modifyChunkWithUsage(chunkBuffer));
118129
118212
  });
118130
118213
  body.once("error", err => {
118131
118214
  logEngineMetrics({
@@ -118182,6 +118265,148 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
118182
118265
  stream: passThrough
118183
118266
  };
118184
118267
  }
118268
+ function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
118269
+ const maxUsageCaptureBytes = 1024 * 1024;
118270
+ const startedAt = requestStartedAt ?? Date.now();
118271
+ const passThrough = new PassThrough();
118272
+ let responseBytes = 0;
118273
+ let firstChunkAt = null;
118274
+ let usage = null;
118275
+ const usageChunks = [];
118276
+ let usageBytes = 0;
118277
+ let usageCaptureEnabled = true;
118278
+ let completed = false;
118279
+ function finalize(error) {
118280
+ if (completed) {
118281
+ return;
118282
+ }
118283
+ completed = true;
118284
+ if (onComplete) {
118285
+ const completion = onComplete({
118286
+ durationMs: Math.max(0, Date.now() - startedAt),
118287
+ error,
118288
+ requestBodyBytes,
118289
+ responseBytes,
118290
+ timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
118291
+ usage
118292
+ });
118293
+ if (completion && typeof completion.catch === "function") {
118294
+ completion.catch(error => {
118295
+ logger.error("Engine metrics completion failed", {
118296
+ error: asError(error),
118297
+ requestUrl: requestPath
118298
+ });
118299
+ });
118300
+ }
118301
+ }
118302
+ }
118303
+ body.on("data", (chunk) => {
118304
+ const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
118305
+ if (firstChunkAt === null) {
118306
+ firstChunkAt = Date.now();
118307
+ }
118308
+ responseBytes += chunkBuffer.length;
118309
+ if (usageCaptureEnabled) {
118310
+ const nextSize = usageBytes + chunkBuffer.length;
118311
+ if (nextSize <= maxUsageCaptureBytes) {
118312
+ usageChunks.push(chunkBuffer);
118313
+ usageBytes = nextSize;
118314
+ }
118315
+ else {
118316
+ usageCaptureEnabled = false;
118317
+ usageChunks.length = 0;
118318
+ }
118319
+ }
118320
+ passThrough.write(chunkBuffer);
118321
+ });
118322
+ body.once("error", err => {
118323
+ logEngineMetrics({
118324
+ agentEngineType,
118325
+ error: err,
118326
+ level: "error",
118327
+ logger,
118328
+ requestBodyBytes,
118329
+ requestPath,
118330
+ responseBytes,
118331
+ usage
118332
+ });
118333
+ finalize(err);
118334
+ passThrough.destroy(err);
118335
+ });
118336
+ body.once("end", () => {
118337
+ if (usageCaptureEnabled) {
118338
+ try {
118339
+ const parsed = JSON.parse(Buffer.concat(usageChunks).toString("utf8"));
118340
+ if (parsed.usage) {
118341
+ const usageChunk = parsed.usage;
118342
+ const completionTokens = usageChunk.completion_tokens ?? null;
118343
+ const promptTokens = usageChunk.prompt_tokens ?? null;
118344
+ const totalTokens = usageChunk.total_tokens ?? null;
118345
+ let contextUsage = usageChunk.context_usage ?? null;
118346
+ const effectiveContext = getEffectiveContextLength({
118347
+ contextLength,
118348
+ engine,
118349
+ parallelism
118350
+ });
118351
+ if (contextUsage === null &&
118352
+ promptTokens !== null &&
118353
+ effectiveContext !== null) {
118354
+ contextUsage = promptTokens / effectiveContext;
118355
+ }
118356
+ usage = {
118357
+ completionTokens,
118358
+ contextUsage,
118359
+ promptTokens,
118360
+ totalTokens
118361
+ };
118362
+ }
118363
+ }
118364
+ catch (error) {
118365
+ logger.error("Failed to parse engine response body", {
118366
+ error: asError(error),
118367
+ requestUrl: requestPath
118368
+ });
118369
+ }
118370
+ }
118371
+ logEngineMetrics({
118372
+ agentEngineType,
118373
+ level: "info",
118374
+ logger,
118375
+ requestBodyBytes,
118376
+ requestPath,
118377
+ responseBytes,
118378
+ usage
118379
+ });
118380
+ finalize(null);
118381
+ passThrough.end();
118382
+ });
118383
+ body.once("close", () => {
118384
+ if (completed) {
118385
+ if (!passThrough.writableEnded) {
118386
+ passThrough.end();
118387
+ }
118388
+ return;
118389
+ }
118390
+ const closeError = new Error("Engine response stream closed before completion");
118391
+ logEngineMetrics({
118392
+ agentEngineType,
118393
+ error: closeError,
118394
+ level: "error",
118395
+ logger,
118396
+ requestBodyBytes,
118397
+ requestPath,
118398
+ responseBytes,
118399
+ usage
118400
+ });
118401
+ finalize(closeError);
118402
+ if (!passThrough.writableEnded) {
118403
+ passThrough.end();
118404
+ }
118405
+ });
118406
+ return {
118407
+ stream: passThrough
118408
+ };
118409
+ }
118185
118410
  function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }) {
118186
118411
  const metricsMessage = [
118187
118412
  "LLM engine stream metrics",
@@ -118254,6 +118479,35 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
118254
118479
  }
118255
118480
  const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody(body);
118256
118481
  const requestStartedAt = Date.now();
118482
+ const requestBody = JSON.parse(serializedBody);
118483
+ const streamRequested = requestBody.stream === true;
118484
+ const onMonitoringComplete = ({ durationMs, error, responseBytes, timeToFirstTokenMs, usage }) => {
118485
+ const completionTokens = normalizeTokenCount(usage?.completionTokens);
118486
+ const promptTokens = normalizeTokenCount(usage?.promptTokens);
118487
+ const totalTokens = normalizeTokenCount(usage?.totalTokens ?? completionTokens + promptTokens);
118488
+ const latencyMs = Math.max(0, durationMs);
118489
+ reportMetricsSafe({
118490
+ bytes: requestBodyBytes + responseBytes,
118491
+ completionTokens,
118492
+ engine: configuration.agentEngineType,
118493
+ endpointId: null,
118494
+ latencyMs,
118495
+ modelId: modelID,
118496
+ promptTokens,
118497
+ requestBytes: requestBodyBytes,
118498
+ requestId: null,
118499
+ requestMethod: "POST",
118500
+ requestPath: path,
118501
+ responseBytes,
118502
+ successful: !error,
118503
+ timeToFirstTokenMs,
118504
+ tokensPerSecond: calculateTokensPerSecond({
118505
+ durationMs: latencyMs,
118506
+ totalTokens
118507
+ }),
118508
+ totalTokens
118509
+ });
118510
+ };
118257
118511
  const response = await modelManager
118258
118512
  .fetchOpenAI(path, {
118259
118513
  body: serializedBody,
@@ -118348,44 +118602,31 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
118348
118602
  statusText: responseStatusText
118349
118603
  };
118350
118604
  }
118351
- const monitoredResponse = monitorEngineResponseStream({
118352
- agentEngineType: configuration.agentEngineType,
118353
- body: Readable.fromWeb(response.body),
118354
- contextLength: modelManager.contextLength,
118355
- engine: configuration.agentEngineType,
118356
- logger,
118357
- onComplete: ({ durationMs, error, responseBytes, timeToFirstTokenMs, usage }) => {
118358
- const completionTokens = normalizeTokenCount(usage?.completionTokens);
118359
- const promptTokens = normalizeTokenCount(usage?.promptTokens);
118360
- const totalTokens = normalizeTokenCount(usage?.totalTokens ?? completionTokens + promptTokens);
118361
- const latencyMs = Math.max(0, durationMs);
118362
- reportMetricsSafe({
118363
- bytes: requestBodyBytes + responseBytes,
118364
- completionTokens,
118365
- engine: configuration.agentEngineType,
118366
- endpointId: null,
118367
- latencyMs,
118368
- modelId: modelID,
118369
- promptTokens,
118370
- requestBytes: requestBodyBytes,
118371
- requestId: null,
118372
- requestMethod: "POST",
118373
- requestPath: path,
118374
- responseBytes,
118375
- successful: !error,
118376
- timeToFirstTokenMs,
118377
- tokensPerSecond: calculateTokensPerSecond({
118378
- durationMs: latencyMs,
118379
- totalTokens
118380
- }),
118381
- totalTokens
118382
- });
118383
- },
118384
- parallelism: modelManager.parallelism,
118385
- requestBodyBytes,
118386
- requestPath: path,
118387
- requestStartedAt
118388
- });
118605
+ const monitoredResponse = streamRequested
118606
+ ? monitorEngineResponseStream({
118607
+ agentEngineType: configuration.agentEngineType,
118608
+ body: Readable.fromWeb(response.body),
118609
+ contextLength: modelManager.contextLength,
118610
+ engine: configuration.agentEngineType,
118611
+ logger,
118612
+ onComplete: onMonitoringComplete,
118613
+ parallelism: modelManager.parallelism,
118614
+ requestBodyBytes,
118615
+ requestPath: path,
118616
+ requestStartedAt
118617
+ })
118618
+ : monitorEngineResponseSingle({
118619
+ agentEngineType: configuration.agentEngineType,
118620
+ body: Readable.fromWeb(response.body),
118621
+ contextLength: modelManager.contextLength,
118622
+ engine: configuration.agentEngineType,
118623
+ logger,
118624
+ onComplete: onMonitoringComplete,
118625
+ parallelism: modelManager.parallelism,
118626
+ requestBodyBytes,
118627
+ requestPath: path,
118628
+ requestStartedAt
118629
+ });
118389
118630
  return {
118390
118631
  body: monitoredResponse.stream,
118391
118632
  headers: Object.fromEntries(response.headers.entries()),
@@ -39,5 +39,6 @@ interface MonitorEngineResponseResult {
39
39
  stream: Readable;
40
40
  }
41
41
  export declare function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
42
+ export declare function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }: MonitorEngineResponseOptions): MonitorEngineResponseResult;
42
43
  export declare function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }: EngineMetricsLogOptions): void;
43
44
  export {};
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@infersec/conduit",
3
3
  "description": "End user conduit agent for connecting local LLMs to the cloud.",
4
- "version": "1.24.3",
4
+ "version": "1.25.1",
5
5
  "bin": {
6
6
  "infersec-conduit": "./dist/cli.js"
7
7
  },
@@ -27,7 +27,7 @@
27
27
  "test:format": "prettier --check .",
28
28
  "test:lint": "eslint source/**/*.ts",
29
29
  "test:types": "tsc -p tsconfig.json --noEmit",
30
- "test:unit": "vitest run"
30
+ "test:unit": "vitest -c vitest.config.ts run"
31
31
  },
32
32
  "prettier": "@infersec/prettier",
33
33
  "publishConfig": {
@@ -1 +0,0 @@
1
- export {};