@infersec/conduit 1.40.0 → 1.41.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js
CHANGED
|
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
|
|
|
6
6
|
|
|
7
7
|
import { parseArgs } from 'node:util';
|
|
8
8
|
import 'node:crypto';
|
|
9
|
-
import { a as asError, s as startInferenceAgent } from './start-
|
|
9
|
+
import { a as asError, s as startInferenceAgent } from './start-CPrgh7rN.js';
|
|
10
10
|
import 'argon2';
|
|
11
11
|
import 'node:child_process';
|
|
12
12
|
import 'node:stream';
|
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
|
|
|
5
5
|
const __dirname = __pathDirname(__filename);
|
|
6
6
|
|
|
7
7
|
import 'node:crypto';
|
|
8
|
-
import { s as startInferenceAgent, a as asError } from './start-
|
|
8
|
+
import { s as startInferenceAgent, a as asError } from './start-CPrgh7rN.js';
|
|
9
9
|
import 'argon2';
|
|
10
10
|
import 'node:child_process';
|
|
11
11
|
import 'node:stream';
|
package/dist/sse/handler.d.ts
CHANGED
|
@@ -6,7 +6,10 @@ export declare function handleSSERequests({ apiURL, configuration, logger, model
|
|
|
6
6
|
configuration: Configuration;
|
|
7
7
|
logger: Logger;
|
|
8
8
|
modelID: ULID;
|
|
9
|
-
onRequest: (request
|
|
9
|
+
onRequest: ({ request, signal }: {
|
|
10
|
+
request: ServerToClientAPIRequest;
|
|
11
|
+
signal?: AbortSignal;
|
|
12
|
+
}) => Promise<APIResponse>;
|
|
10
13
|
onRequestEnd?: (request: ServerToClientAPIRequest) => Promise<void> | void;
|
|
11
14
|
onRequestStart?: (request: ServerToClientAPIRequest) => Promise<void> | void;
|
|
12
15
|
reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
|
|
@@ -3,7 +3,8 @@ import type { Configuration } from "../configuration.js";
|
|
|
3
3
|
/**
|
|
4
4
|
* Proxy server requests to the local inference HTTP server.
|
|
5
5
|
*/
|
|
6
|
-
export declare function proxyRequest({ configuration, request }: {
|
|
6
|
+
export declare function proxyRequest({ configuration, request, signal }: {
|
|
7
7
|
configuration: Configuration;
|
|
8
8
|
request: ServerToClientAPIRequest;
|
|
9
|
+
signal?: AbortSignal;
|
|
9
10
|
}): Promise<APIResponse>;
|
|
@@ -108660,7 +108660,9 @@ class ModelManager extends EventEmitter {
|
|
|
108660
108660
|
const timeout = setTimeout(() => {
|
|
108661
108661
|
controller.abort(new Error("Inference request timeout"));
|
|
108662
108662
|
}, ENGINE_FETCH_TIMEOUT_MS);
|
|
108663
|
-
const effectiveSignal = callerSignal
|
|
108663
|
+
const effectiveSignal = callerSignal
|
|
108664
|
+
? AbortSignal.any([callerSignal, controller.signal])
|
|
108665
|
+
: controller.signal;
|
|
108664
108666
|
try {
|
|
108665
108667
|
return await undiciExports.fetch(joinURL(`http://localhost:${this.enginePort}`, path), {
|
|
108666
108668
|
...opts,
|
|
@@ -109240,11 +109242,14 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
|
|
|
109240
109242
|
}
|
|
109241
109243
|
return;
|
|
109242
109244
|
}
|
|
109243
|
-
const
|
|
109245
|
+
const aborted = body.destroyed && body.errored?.name === "AbortError";
|
|
109246
|
+
const closeError = aborted
|
|
109247
|
+
? null
|
|
109248
|
+
: new Error("Engine response stream closed before completion");
|
|
109244
109249
|
logEngineMetrics({
|
|
109245
109250
|
agentEngineType,
|
|
109246
|
-
error: closeError,
|
|
109247
|
-
level: "error",
|
|
109251
|
+
error: closeError ?? undefined,
|
|
109252
|
+
level: aborted ? "info" : "error",
|
|
109248
109253
|
logger,
|
|
109249
109254
|
requestBodyBytes,
|
|
109250
109255
|
requestPath,
|
|
@@ -109382,11 +109387,14 @@ function monitorEngineResponseSingle({ agentEngineType, body, contextLength, eng
|
|
|
109382
109387
|
}
|
|
109383
109388
|
return;
|
|
109384
109389
|
}
|
|
109385
|
-
const
|
|
109390
|
+
const aborted = body.destroyed && body.errored?.name === "AbortError";
|
|
109391
|
+
const closeError = aborted
|
|
109392
|
+
? null
|
|
109393
|
+
: new Error("Engine response stream closed before completion");
|
|
109386
109394
|
logEngineMetrics({
|
|
109387
109395
|
agentEngineType,
|
|
109388
|
-
error: closeError,
|
|
109389
|
-
level: "error",
|
|
109396
|
+
error: closeError ?? undefined,
|
|
109397
|
+
level: aborted ? "info" : "error",
|
|
109390
109398
|
logger,
|
|
109391
109399
|
requestBodyBytes,
|
|
109392
109400
|
requestPath,
|
|
@@ -109457,7 +109465,7 @@ function calculateTokensPerSecond$2({ durationMs, totalTokens }) {
|
|
|
109457
109465
|
}
|
|
109458
109466
|
return Math.round(tokensPerSecond);
|
|
109459
109467
|
}
|
|
109460
|
-
async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID, modelManager, path, reportMetrics }) {
|
|
109468
|
+
async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID, modelManager, path, reportMetrics, signal }) {
|
|
109461
109469
|
function normalizeTokenCount(value) {
|
|
109462
109470
|
if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
|
|
109463
109471
|
return value;
|
|
@@ -109509,7 +109517,8 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
109509
109517
|
headers: {
|
|
109510
109518
|
"Content-Type": "application/json"
|
|
109511
109519
|
},
|
|
109512
|
-
method: "POST"
|
|
109520
|
+
method: "POST",
|
|
109521
|
+
signal
|
|
109513
109522
|
})
|
|
109514
109523
|
.catch(error => {
|
|
109515
109524
|
logEngineMetrics({
|
|
@@ -109631,9 +109640,13 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
109631
109640
|
function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger, startup }) {
|
|
109632
109641
|
return {
|
|
109633
109642
|
"/v1/chat/completions": {
|
|
109634
|
-
POST: async ({ body }) => {
|
|
109643
|
+
POST: async ({ body, res }) => {
|
|
109635
109644
|
const modelID = getModelID();
|
|
109636
109645
|
const modelManager = getModelManager();
|
|
109646
|
+
const abortController = new AbortController();
|
|
109647
|
+
res.on("close", () => {
|
|
109648
|
+
abortController.abort();
|
|
109649
|
+
});
|
|
109637
109650
|
return proxyOpenAIStreamingRoute({
|
|
109638
109651
|
body,
|
|
109639
109652
|
configuration,
|
|
@@ -109641,14 +109654,19 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
|
|
|
109641
109654
|
modelID,
|
|
109642
109655
|
modelManager,
|
|
109643
109656
|
path: "/v1/chat/completions",
|
|
109644
|
-
reportMetrics: apiClient.reportPromptMetrics
|
|
109657
|
+
reportMetrics: apiClient.reportPromptMetrics,
|
|
109658
|
+
signal: abortController.signal
|
|
109645
109659
|
});
|
|
109646
109660
|
}
|
|
109647
109661
|
},
|
|
109648
109662
|
"/v1/completions": {
|
|
109649
|
-
POST: async ({ body }) => {
|
|
109663
|
+
POST: async ({ body, res }) => {
|
|
109650
109664
|
const modelID = getModelID();
|
|
109651
109665
|
const modelManager = getModelManager();
|
|
109666
|
+
const abortController = new AbortController();
|
|
109667
|
+
res.on("close", () => {
|
|
109668
|
+
abortController.abort();
|
|
109669
|
+
});
|
|
109652
109670
|
return proxyOpenAIStreamingRoute({
|
|
109653
109671
|
body,
|
|
109654
109672
|
configuration,
|
|
@@ -109656,7 +109674,8 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, get
|
|
|
109656
109674
|
modelID,
|
|
109657
109675
|
modelManager,
|
|
109658
109676
|
path: "/v1/completions",
|
|
109659
|
-
reportMetrics: apiClient.reportPromptMetrics
|
|
109677
|
+
reportMetrics: apiClient.reportPromptMetrics,
|
|
109678
|
+
signal: abortController.signal
|
|
109660
109679
|
});
|
|
109661
109680
|
}
|
|
109662
109681
|
},
|
|
@@ -109770,7 +109789,7 @@ function extractAnthropicNonStreamUsage(body) {
|
|
|
109770
109789
|
return null;
|
|
109771
109790
|
}
|
|
109772
109791
|
}
|
|
109773
|
-
async function proxyAnthropicStreamingRoute({ body, configuration, logger, modelID, modelManager, reportMetrics }) {
|
|
109792
|
+
async function proxyAnthropicStreamingRoute({ body, configuration, logger, modelID, modelManager, reportMetrics, signal }) {
|
|
109774
109793
|
function reportMetricsSafe(payload) {
|
|
109775
109794
|
reportMetrics(payload).catch(error => {
|
|
109776
109795
|
logger.warn("Failed to upload LLM prompt metrics", {
|
|
@@ -109816,7 +109835,8 @@ async function proxyAnthropicStreamingRoute({ body, configuration, logger, model
|
|
|
109816
109835
|
headers: {
|
|
109817
109836
|
"Content-Type": "application/json"
|
|
109818
109837
|
},
|
|
109819
|
-
method: "POST"
|
|
109838
|
+
method: "POST",
|
|
109839
|
+
signal
|
|
109820
109840
|
})
|
|
109821
109841
|
.catch(error => {
|
|
109822
109842
|
logEngineMetrics({
|
|
@@ -110060,16 +110080,21 @@ async function proxyAnthropicStreamingRoute({ body, configuration, logger, model
|
|
|
110060
110080
|
function createConduitAnthropicAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger }) {
|
|
110061
110081
|
return {
|
|
110062
110082
|
"/v1/messages": {
|
|
110063
|
-
POST: async ({ body }) => {
|
|
110083
|
+
POST: async ({ body, res }) => {
|
|
110064
110084
|
const modelID = getModelID();
|
|
110065
110085
|
const modelManager = getModelManager();
|
|
110086
|
+
const abortController = new AbortController();
|
|
110087
|
+
res.on("close", () => {
|
|
110088
|
+
abortController.abort();
|
|
110089
|
+
});
|
|
110066
110090
|
return proxyAnthropicStreamingRoute({
|
|
110067
110091
|
body,
|
|
110068
110092
|
configuration,
|
|
110069
110093
|
logger,
|
|
110070
110094
|
modelID,
|
|
110071
110095
|
modelManager,
|
|
110072
|
-
reportMetrics: apiClient.reportPromptMetrics
|
|
110096
|
+
reportMetrics: apiClient.reportPromptMetrics,
|
|
110097
|
+
signal: abortController.signal
|
|
110073
110098
|
});
|
|
110074
110099
|
}
|
|
110075
110100
|
}
|
|
@@ -110089,6 +110114,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
|
|
|
110089
110114
|
const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/stream`;
|
|
110090
110115
|
const maxReconnectDelayMs = 30000;
|
|
110091
110116
|
let reconnectAttempt = 0;
|
|
110117
|
+
const activeRequests = new Map();
|
|
110092
110118
|
while (!signal?.aborted) {
|
|
110093
110119
|
const connectionStartedAt = Date.now();
|
|
110094
110120
|
try {
|
|
@@ -110102,11 +110128,26 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
|
|
|
110102
110128
|
});
|
|
110103
110129
|
},
|
|
110104
110130
|
onMessage: (message) => {
|
|
110131
|
+
if (message.event === "cancel") {
|
|
110132
|
+
const { requestID } = JSON.parse(message.data);
|
|
110133
|
+
const controller = activeRequests.get(requestID);
|
|
110134
|
+
if (controller) {
|
|
110135
|
+
logger.info("Cancelling active request", {
|
|
110136
|
+
requestID
|
|
110137
|
+
});
|
|
110138
|
+
controller.abort();
|
|
110139
|
+
activeRequests.delete(requestID);
|
|
110140
|
+
}
|
|
110141
|
+
return;
|
|
110142
|
+
}
|
|
110105
110143
|
if (message.event !== "request") {
|
|
110106
110144
|
return;
|
|
110107
110145
|
}
|
|
110108
110146
|
const payload = ServerToClientAPIRequestSchema.parse(JSON.parse(message.data));
|
|
110147
|
+
const perRequestController = new AbortController();
|
|
110148
|
+
activeRequests.set(payload.requestID, perRequestController);
|
|
110109
110149
|
handleRequest({
|
|
110150
|
+
activeRequests,
|
|
110110
110151
|
apiURL,
|
|
110111
110152
|
configuration,
|
|
110112
110153
|
logger,
|
|
@@ -110116,7 +110157,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
|
|
|
110116
110157
|
onRequestStart,
|
|
110117
110158
|
reportMetrics,
|
|
110118
110159
|
request: payload,
|
|
110119
|
-
signal
|
|
110160
|
+
signal: perRequestController.signal
|
|
110120
110161
|
}).catch(error => {
|
|
110121
110162
|
logger.error("SSE request handler failed", {
|
|
110122
110163
|
error: asError(error),
|
|
@@ -110151,7 +110192,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
|
|
|
110151
110192
|
}
|
|
110152
110193
|
}
|
|
110153
110194
|
}
|
|
110154
|
-
async function handleRequest({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
|
|
110195
|
+
async function handleRequest({ activeRequests, apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
|
|
110155
110196
|
function reportMetricsSafe(payload) {
|
|
110156
110197
|
reportMetrics(payload).catch(error => {
|
|
110157
110198
|
logger.warn("Failed to upload LLM prompt metrics", {
|
|
@@ -110164,7 +110205,7 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
|
|
|
110164
110205
|
const requestBytes = calculateRequestBytes(request.body ?? null);
|
|
110165
110206
|
try {
|
|
110166
110207
|
await onRequestStart?.(request);
|
|
110167
|
-
const response = await onRequest(request);
|
|
110208
|
+
const response = await onRequest({ request, signal });
|
|
110168
110209
|
const responseMetrics = await streamResponse({
|
|
110169
110210
|
apiURL,
|
|
110170
110211
|
configuration,
|
|
@@ -110200,37 +110241,42 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
|
|
|
110200
110241
|
});
|
|
110201
110242
|
}
|
|
110202
110243
|
catch (error) {
|
|
110203
|
-
|
|
110204
|
-
|
|
110205
|
-
|
|
110206
|
-
|
|
110244
|
+
const isCancelled = signal?.aborted;
|
|
110245
|
+
if (isCancelled) {
|
|
110246
|
+
logger.info("SSE request cancelled", {
|
|
110247
|
+
requestID: request.requestID
|
|
110248
|
+
});
|
|
110249
|
+
}
|
|
110250
|
+
else {
|
|
110251
|
+
logger.error("SSE request failed", {
|
|
110252
|
+
error: asError(error),
|
|
110253
|
+
requestMethod: request.requestID
|
|
110254
|
+
});
|
|
110255
|
+
}
|
|
110207
110256
|
const failureMessage = "Bad gateway\n\nProxying failed";
|
|
110208
110257
|
const failureBytes = Buffer.byteLength(failureMessage, "utf8");
|
|
110209
110258
|
const latencyMs = Math.max(0, Date.now() - requestStartedAt);
|
|
110210
|
-
|
|
110211
|
-
|
|
110212
|
-
|
|
110213
|
-
|
|
110214
|
-
|
|
110215
|
-
|
|
110216
|
-
|
|
110217
|
-
|
|
110218
|
-
|
|
110219
|
-
|
|
110220
|
-
|
|
110221
|
-
|
|
110222
|
-
|
|
110223
|
-
|
|
110224
|
-
|
|
110225
|
-
|
|
110226
|
-
|
|
110227
|
-
|
|
110228
|
-
|
|
110229
|
-
status: 502
|
|
110230
|
-
});
|
|
110231
|
-
await streamHandler.end();
|
|
110259
|
+
if (!isCancelled) {
|
|
110260
|
+
const streamHandler = await sendChunkStream({
|
|
110261
|
+
apiURL,
|
|
110262
|
+
configuration,
|
|
110263
|
+
requestID: request.requestID,
|
|
110264
|
+
logger
|
|
110265
|
+
});
|
|
110266
|
+
await streamHandler.sendChunk({
|
|
110267
|
+
data: encodeBinaryChunk(Buffer.from(failureMessage)),
|
|
110268
|
+
sequence: 0,
|
|
110269
|
+
status: 502
|
|
110270
|
+
});
|
|
110271
|
+
await streamHandler.sendChunk({
|
|
110272
|
+
data: null,
|
|
110273
|
+
sequence: 1,
|
|
110274
|
+
status: 502
|
|
110275
|
+
});
|
|
110276
|
+
await streamHandler.end();
|
|
110277
|
+
}
|
|
110232
110278
|
reportMetricsSafe({
|
|
110233
|
-
bytes: requestBytes + failureBytes,
|
|
110279
|
+
bytes: isCancelled ? requestBytes : requestBytes + failureBytes,
|
|
110234
110280
|
completionTokens: 0,
|
|
110235
110281
|
engine: configuration.agentEngineType,
|
|
110236
110282
|
endpointId: null,
|
|
@@ -110241,14 +110287,15 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
|
|
|
110241
110287
|
requestId: request.requestID,
|
|
110242
110288
|
requestMethod: request.method,
|
|
110243
110289
|
requestPath: request.path,
|
|
110244
|
-
responseBytes: failureBytes,
|
|
110290
|
+
responseBytes: isCancelled ? 0 : failureBytes,
|
|
110245
110291
|
successful: false,
|
|
110246
|
-
timeToFirstTokenMs: latencyMs,
|
|
110247
|
-
tokensPerSecond,
|
|
110248
|
-
totalTokens
|
|
110292
|
+
timeToFirstTokenMs: isCancelled ? null : latencyMs,
|
|
110293
|
+
tokensPerSecond: 0,
|
|
110294
|
+
totalTokens: 0
|
|
110249
110295
|
});
|
|
110250
110296
|
}
|
|
110251
110297
|
finally {
|
|
110298
|
+
activeRequests.delete(request.requestID);
|
|
110252
110299
|
await onRequestEnd?.(request);
|
|
110253
110300
|
}
|
|
110254
110301
|
}
|
|
@@ -110446,7 +110493,7 @@ function calculateTokensPerSecond({ durationMs, totalTokens }) {
|
|
|
110446
110493
|
/**
|
|
110447
110494
|
* Proxy server requests to the local inference HTTP server.
|
|
110448
110495
|
*/
|
|
110449
|
-
async function proxyRequest({ configuration, request }) {
|
|
110496
|
+
async function proxyRequest({ configuration, request, signal }) {
|
|
110450
110497
|
let finalPath = request.path;
|
|
110451
110498
|
if (request.parameters) {
|
|
110452
110499
|
Object.entries(request.parameters).forEach(([key, value]) => {
|
|
@@ -110463,6 +110510,9 @@ async function proxyRequest({ configuration, request }) {
|
|
|
110463
110510
|
method: request.method,
|
|
110464
110511
|
headers: request.headers
|
|
110465
110512
|
};
|
|
110513
|
+
if (signal) {
|
|
110514
|
+
fetchOptions.signal = signal;
|
|
110515
|
+
}
|
|
110466
110516
|
if (request.body) {
|
|
110467
110517
|
fetchOptions.body =
|
|
110468
110518
|
typeof request.body === "object" ? JSON.stringify(request.body) : request.body;
|
|
@@ -120361,10 +120411,11 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
120361
120411
|
configuration,
|
|
120362
120412
|
logger,
|
|
120363
120413
|
modelID: conduitConfiguration.targetModel.id,
|
|
120364
|
-
onRequest: async (request) => {
|
|
120414
|
+
onRequest: async ({ request, signal }) => {
|
|
120365
120415
|
return proxyRequest({
|
|
120366
120416
|
configuration,
|
|
120367
|
-
request
|
|
120417
|
+
request,
|
|
120418
|
+
signal
|
|
120368
120419
|
});
|
|
120369
120420
|
},
|
|
120370
120421
|
onRequestEnd: () => {
|
|
@@ -3,13 +3,14 @@ import { InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definition
|
|
|
3
3
|
import { Logger } from "@infersec/logger";
|
|
4
4
|
import { Configuration } from "../configuration.js";
|
|
5
5
|
import { ModelManager } from "../modelManagement/ModelManager.js";
|
|
6
|
-
export declare function proxyAnthropicStreamingRoute({ body, configuration, logger, modelID, modelManager, reportMetrics }: {
|
|
6
|
+
export declare function proxyAnthropicStreamingRoute({ body, configuration, logger, modelID, modelManager, reportMetrics, signal }: {
|
|
7
7
|
body: unknown;
|
|
8
8
|
configuration: Configuration;
|
|
9
9
|
logger: Logger;
|
|
10
10
|
modelID: ULID;
|
|
11
11
|
modelManager: ModelManager;
|
|
12
12
|
reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
|
|
13
|
+
signal?: AbortSignal;
|
|
13
14
|
}): Promise<{
|
|
14
15
|
body: Readable;
|
|
15
16
|
headers: Record<string, string>;
|
package/dist/utils/openai.d.ts
CHANGED
|
@@ -3,7 +3,7 @@ import { InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definition
|
|
|
3
3
|
import { Logger } from "@infersec/logger";
|
|
4
4
|
import { Configuration } from "../configuration.js";
|
|
5
5
|
import { ModelManager } from "../modelManagement/ModelManager.js";
|
|
6
|
-
export declare function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID, modelManager, path, reportMetrics }: {
|
|
6
|
+
export declare function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID, modelManager, path, reportMetrics, signal }: {
|
|
7
7
|
body: unknown;
|
|
8
8
|
configuration: Configuration;
|
|
9
9
|
logger: Logger;
|
|
@@ -11,6 +11,7 @@ export declare function proxyOpenAIStreamingRoute({ body, configuration, logger,
|
|
|
11
11
|
modelManager: ModelManager;
|
|
12
12
|
path: "/v1/chat/completions" | "/v1/completions";
|
|
13
13
|
reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
|
|
14
|
+
signal?: AbortSignal;
|
|
14
15
|
}): Promise<{
|
|
15
16
|
body: Readable;
|
|
16
17
|
headers: Record<string, string>;
|