@infersec/conduit 1.40.0 → 1.42.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +1 -1
- package/dist/index.js +1 -1
- package/dist/requestHandlers/createConduitAnthropicAPIReferenceHandlers.d.ts +2 -2
- package/dist/requestHandlers/createConduitOpenAIAPIReferenceHandlers.d.ts +4 -4
- package/dist/sse/handler.d.ts +4 -1
- package/dist/sse/requestProxy.d.ts +2 -1
- package/dist/{start-Dw1tdbuz.js → start-DGdf6ycx.js} +159 -77
- package/dist/utils/anthropic.d.ts +3 -1
- package/dist/utils/openai.d.ts +3 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
|
|
|
6
6
|
|
|
7
7
|
import { parseArgs } from 'node:util';
|
|
8
8
|
import 'node:crypto';
|
|
9
|
-
import { a as asError, s as startInferenceAgent } from './start-
|
|
9
|
+
import { a as asError, s as startInferenceAgent } from './start-DGdf6ycx.js';
|
|
10
10
|
import 'argon2';
|
|
11
11
|
import 'node:child_process';
|
|
12
12
|
import 'node:stream';
|
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
|
|
|
5
5
|
const __dirname = __pathDirname(__filename);
|
|
6
6
|
|
|
7
7
|
import 'node:crypto';
|
|
8
|
-
import { s as startInferenceAgent, a as asError } from './start-
|
|
8
|
+
import { s as startInferenceAgent, a as asError } from './start-DGdf6ycx.js';
|
|
9
9
|
import 'argon2';
|
|
10
10
|
import 'node:child_process';
|
|
11
11
|
import 'node:stream';
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE } from "@infersec/definitions";
|
|
2
|
-
import { implementAPIReference } from "@infersec/fetch";
|
|
2
|
+
import { implementAPIReference, type APIRequest } from "@infersec/fetch";
|
|
3
3
|
import { Logger } from "@infersec/logger";
|
|
4
4
|
import { APIClient } from "../apiClient/index.js";
|
|
5
5
|
import { Configuration } from "../configuration.js";
|
|
@@ -19,7 +19,7 @@ export declare function createPostMessagesHandler(options: {
|
|
|
19
19
|
getModelManager: () => ModelManager;
|
|
20
20
|
logger: Logger;
|
|
21
21
|
}): (params: {
|
|
22
|
-
req:
|
|
22
|
+
req: APIRequest;
|
|
23
23
|
res: import("@infersec/fetch").APIResponse;
|
|
24
24
|
parameters: Record<string, never>;
|
|
25
25
|
query: Record<string, never>;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { API_CLIENT_CONDUIT_OPENAI_REFERENCE } from "@infersec/definitions";
|
|
2
|
-
import { implementAPIReference } from "@infersec/fetch";
|
|
2
|
+
import { implementAPIReference, type APIRequest } from "@infersec/fetch";
|
|
3
3
|
import { Logger } from "@infersec/logger";
|
|
4
4
|
import { APIClient } from "../apiClient/index.js";
|
|
5
5
|
import { Configuration } from "../configuration.js";
|
|
@@ -21,7 +21,7 @@ export declare function createGetModelsHandler(options: {
|
|
|
21
21
|
logger: Logger;
|
|
22
22
|
startup: number;
|
|
23
23
|
}): (params: {
|
|
24
|
-
req:
|
|
24
|
+
req: APIRequest;
|
|
25
25
|
res: import("@infersec/fetch").APIResponse;
|
|
26
26
|
parameters: Record<string, never>;
|
|
27
27
|
query: Record<string, never>;
|
|
@@ -66,7 +66,7 @@ export declare function createPostChatCompletionsHandler(options: {
|
|
|
66
66
|
logger: Logger;
|
|
67
67
|
startup: number;
|
|
68
68
|
}): (params: {
|
|
69
|
-
req:
|
|
69
|
+
req: APIRequest;
|
|
70
70
|
res: import("@infersec/fetch").APIResponse;
|
|
71
71
|
parameters: Record<string, never>;
|
|
72
72
|
query: Record<string, never>;
|
|
@@ -172,7 +172,7 @@ export declare function createPostCompletionsHandler(options: {
|
|
|
172
172
|
logger: Logger;
|
|
173
173
|
startup: number;
|
|
174
174
|
}): (params: {
|
|
175
|
-
req:
|
|
175
|
+
req: APIRequest;
|
|
176
176
|
res: import("@infersec/fetch").APIResponse;
|
|
177
177
|
parameters: Record<string, never>;
|
|
178
178
|
query: Record<string, never>;
|
package/dist/sse/handler.d.ts
CHANGED
|
@@ -6,7 +6,10 @@ export declare function handleSSERequests({ apiURL, configuration, logger, model
|
|
|
6
6
|
configuration: Configuration;
|
|
7
7
|
logger: Logger;
|
|
8
8
|
modelID: ULID;
|
|
9
|
-
onRequest: (request
|
|
9
|
+
onRequest: ({ request, signal }: {
|
|
10
|
+
request: ServerToClientAPIRequest;
|
|
11
|
+
signal?: AbortSignal;
|
|
12
|
+
}) => Promise<APIResponse>;
|
|
10
13
|
onRequestEnd?: (request: ServerToClientAPIRequest) => Promise<void> | void;
|
|
11
14
|
onRequestStart?: (request: ServerToClientAPIRequest) => Promise<void> | void;
|
|
12
15
|
reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
|
|
@@ -3,7 +3,8 @@ import type { Configuration } from "../configuration.js";
|
|
|
3
3
|
/**
|
|
4
4
|
* Proxy server requests to the local inference HTTP server.
|
|
5
5
|
*/
|
|
6
|
-
export declare function proxyRequest({ configuration, request }: {
|
|
6
|
+
export declare function proxyRequest({ configuration, request, signal }: {
|
|
7
7
|
configuration: Configuration;
|
|
8
8
|
request: ServerToClientAPIRequest;
|
|
9
|
+
signal?: AbortSignal;
|
|
9
10
|
}): Promise<APIResponse>;
|
|
@@ -15622,21 +15622,30 @@ const ServerToClientAPIRequestSchema = APIRequestSchema.extend({
|
|
|
15622
15622
|
|
|
15623
15623
|
_enum(["inf-end", "inf-src", "storage"]);
|
|
15624
15624
|
|
|
15625
|
-
const
|
|
15626
|
-
|
|
15627
|
-
|
|
15628
|
-
|
|
15629
|
-
|
|
15630
|
-
|
|
15631
|
-
|
|
15632
|
-
|
|
15633
|
-
|
|
15634
|
-
|
|
15635
|
-
|
|
15636
|
-
trialDays: number$1().int().positive().nullable(),
|
|
15637
|
-
websiteFeatures: array(string$1()),
|
|
15638
|
-
websiteIconPath: string$1()
|
|
15625
|
+
const ConnectedSourceMetaSchema = object({
|
|
15626
|
+
chargeName: literal("connected_source"),
|
|
15627
|
+
unitCost: number$1(),
|
|
15628
|
+
units: number$1()
|
|
15629
|
+
});
|
|
15630
|
+
const TokenInputMetaSchema = object({
|
|
15631
|
+
chargeName: literal("tokens_input"),
|
|
15632
|
+
modelId: string$1().nullable(),
|
|
15633
|
+
requestId: string$1().nullable(),
|
|
15634
|
+
unitCost: number$1(),
|
|
15635
|
+
units: number$1()
|
|
15639
15636
|
});
|
|
15637
|
+
const TokenOutputMetaSchema = object({
|
|
15638
|
+
chargeName: literal("tokens_output"),
|
|
15639
|
+
modelId: string$1().nullable(),
|
|
15640
|
+
requestId: string$1().nullable(),
|
|
15641
|
+
unitCost: number$1(),
|
|
15642
|
+
units: number$1()
|
|
15643
|
+
});
|
|
15644
|
+
discriminatedUnion("chargeName", [
|
|
15645
|
+
ConnectedSourceMetaSchema,
|
|
15646
|
+
TokenInputMetaSchema,
|
|
15647
|
+
TokenOutputMetaSchema
|
|
15648
|
+
]);
|
|
15640
15649
|
|
|
15641
15650
|
function readEnvIntegerOptional({ defaultValue, name }) {
|
|
15642
15651
|
const str = readEnvStringOptional(name, `${defaultValue}`).trim();
|
|
@@ -108660,7 +108669,9 @@ class ModelManager extends EventEmitter {
|
|
|
108660
108669
|
const timeout = setTimeout(() => {
|
|
108661
108670
|
controller.abort(new Error("Inference request timeout"));
|
|
108662
108671
|
}, ENGINE_FETCH_TIMEOUT_MS);
|
|
108663
|
-
const effectiveSignal = callerSignal
|
|
108672
|
+
const effectiveSignal = callerSignal
|
|
108673
|
+
? AbortSignal.any([callerSignal, controller.signal])
|
|
108674
|
+
: controller.signal;
|
|
108664
108675
|
try {
|
|
108665
108676
|
return await undiciExports.fetch(joinURL(`http://localhost:${this.enginePort}`, path), {
|
|
108666
108677
|
...opts,
|
|
@@ -109240,11 +109251,14 @@ function monitorEngineResponseStream({ agentEngineType, body, contextLength, eng
|
|
|
109240
109251
|
}
|
|
109241
109252
|
return;
|
|
109242
109253
|
}
|
|
109243
|
-
const
|
|
109254
|
+
const aborted = body.destroyed && body.errored?.name === "AbortError";
|
|
109255
|
+
const closeError = aborted
|
|
109256
|
+
? null
|
|
109257
|
+
: new Error("Engine response stream closed before completion");
|
|
109244
109258
|
logEngineMetrics({
|
|
109245
109259
|
agentEngineType,
|
|
109246
|
-
error: closeError,
|
|
109247
|
-
level: "error",
|
|
109260
|
+
error: closeError ?? undefined,
|
|
109261
|
+
level: aborted ? "info" : "error",
|
|
109248
109262
|
logger,
|
|
109249
109263
|
requestBodyBytes,
|
|
109250
109264
|
requestPath,
|
|
@@ -109382,11 +109396,14 @@ function monitorEngineResponseSingle({ agentEngineType, body, contextLength, eng
|
|
|
109382
109396
|
}
|
|
109383
109397
|
return;
|
|
109384
109398
|
}
|
|
109385
|
-
const
|
|
109399
|
+
const aborted = body.destroyed && body.errored?.name === "AbortError";
|
|
109400
|
+
const closeError = aborted
|
|
109401
|
+
? null
|
|
109402
|
+
: new Error("Engine response stream closed before completion");
|
|
109386
109403
|
logEngineMetrics({
|
|
109387
109404
|
agentEngineType,
|
|
109388
|
-
error: closeError,
|
|
109389
|
-
level: "error",
|
|
109405
|
+
error: closeError ?? undefined,
|
|
109406
|
+
level: aborted ? "info" : "error",
|
|
109390
109407
|
logger,
|
|
109391
109408
|
requestBodyBytes,
|
|
109392
109409
|
requestPath,
|
|
@@ -109457,7 +109474,7 @@ function calculateTokensPerSecond$2({ durationMs, totalTokens }) {
|
|
|
109457
109474
|
}
|
|
109458
109475
|
return Math.round(tokensPerSecond);
|
|
109459
109476
|
}
|
|
109460
|
-
async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID, modelManager, path, reportMetrics }) {
|
|
109477
|
+
async function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }) {
|
|
109461
109478
|
function normalizeTokenCount(value) {
|
|
109462
109479
|
if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
|
|
109463
109480
|
return value;
|
|
@@ -109485,7 +109502,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
109485
109502
|
bytes: requestBodyBytes + responseBytes,
|
|
109486
109503
|
completionTokens,
|
|
109487
109504
|
engine: configuration.agentEngineType,
|
|
109488
|
-
endpointId: null,
|
|
109505
|
+
endpointId: endpointId ?? null,
|
|
109489
109506
|
latencyMs,
|
|
109490
109507
|
modelId: modelID,
|
|
109491
109508
|
promptTokens,
|
|
@@ -109509,7 +109526,8 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
109509
109526
|
headers: {
|
|
109510
109527
|
"Content-Type": "application/json"
|
|
109511
109528
|
},
|
|
109512
|
-
method: "POST"
|
|
109529
|
+
method: "POST",
|
|
109530
|
+
signal
|
|
109513
109531
|
})
|
|
109514
109532
|
.catch(error => {
|
|
109515
109533
|
logEngineMetrics({
|
|
@@ -109527,7 +109545,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
109527
109545
|
bytes: requestBodyBytes,
|
|
109528
109546
|
completionTokens: 0,
|
|
109529
109547
|
engine: configuration.agentEngineType,
|
|
109530
|
-
endpointId: null,
|
|
109548
|
+
endpointId: endpointId ?? null,
|
|
109531
109549
|
latencyMs,
|
|
109532
109550
|
modelId: modelID,
|
|
109533
109551
|
promptTokens: 0,
|
|
@@ -109577,7 +109595,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
109577
109595
|
bytes: requestBodyBytes,
|
|
109578
109596
|
completionTokens: 0,
|
|
109579
109597
|
engine: configuration.agentEngineType,
|
|
109580
|
-
endpointId: null,
|
|
109598
|
+
endpointId: endpointId ?? null,
|
|
109581
109599
|
latencyMs,
|
|
109582
109600
|
modelId: modelID,
|
|
109583
109601
|
promptTokens: 0,
|
|
@@ -109628,35 +109646,52 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
109628
109646
|
};
|
|
109629
109647
|
}
|
|
109630
109648
|
|
|
109649
|
+
function extractEndpointId$1(req) {
|
|
109650
|
+
const value = req.headers["x-endpoint-id"];
|
|
109651
|
+
const raw = typeof value === "string" ? value : Array.isArray(value) ? value[0] : null;
|
|
109652
|
+
return raw && isValid(raw) ? raw : null;
|
|
109653
|
+
}
|
|
109631
109654
|
function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger, startup }) {
|
|
109632
109655
|
return {
|
|
109633
109656
|
"/v1/chat/completions": {
|
|
109634
|
-
POST: async ({ body }) => {
|
|
109657
|
+
POST: async ({ body, req, res }) => {
|
|
109635
109658
|
const modelID = getModelID();
|
|
109636
109659
|
const modelManager = getModelManager();
|
|
109660
|
+
const abortController = new AbortController();
|
|
109661
|
+
res.on("close", () => {
|
|
109662
|
+
abortController.abort();
|
|
109663
|
+
});
|
|
109637
109664
|
return proxyOpenAIStreamingRoute({
|
|
109638
109665
|
body,
|
|
109639
109666
|
configuration,
|
|
109667
|
+
endpointId: extractEndpointId$1(req),
|
|
109640
109668
|
logger,
|
|
109641
109669
|
modelID,
|
|
109642
109670
|
modelManager,
|
|
109643
109671
|
path: "/v1/chat/completions",
|
|
109644
|
-
reportMetrics: apiClient.reportPromptMetrics
|
|
109672
|
+
reportMetrics: apiClient.reportPromptMetrics,
|
|
109673
|
+
signal: abortController.signal
|
|
109645
109674
|
});
|
|
109646
109675
|
}
|
|
109647
109676
|
},
|
|
109648
109677
|
"/v1/completions": {
|
|
109649
|
-
POST: async ({ body }) => {
|
|
109678
|
+
POST: async ({ body, req, res }) => {
|
|
109650
109679
|
const modelID = getModelID();
|
|
109651
109680
|
const modelManager = getModelManager();
|
|
109681
|
+
const abortController = new AbortController();
|
|
109682
|
+
res.on("close", () => {
|
|
109683
|
+
abortController.abort();
|
|
109684
|
+
});
|
|
109652
109685
|
return proxyOpenAIStreamingRoute({
|
|
109653
109686
|
body,
|
|
109654
109687
|
configuration,
|
|
109688
|
+
endpointId: extractEndpointId$1(req),
|
|
109655
109689
|
logger,
|
|
109656
109690
|
modelID,
|
|
109657
109691
|
modelManager,
|
|
109658
109692
|
path: "/v1/completions",
|
|
109659
|
-
reportMetrics: apiClient.reportPromptMetrics
|
|
109693
|
+
reportMetrics: apiClient.reportPromptMetrics,
|
|
109694
|
+
signal: abortController.signal
|
|
109660
109695
|
});
|
|
109661
109696
|
}
|
|
109662
109697
|
},
|
|
@@ -109770,7 +109805,7 @@ function extractAnthropicNonStreamUsage(body) {
|
|
|
109770
109805
|
return null;
|
|
109771
109806
|
}
|
|
109772
109807
|
}
|
|
109773
|
-
async function proxyAnthropicStreamingRoute({ body, configuration, logger, modelID, modelManager, reportMetrics }) {
|
|
109808
|
+
async function proxyAnthropicStreamingRoute({ body, configuration, endpointId, logger, modelID, modelManager, reportMetrics, signal }) {
|
|
109774
109809
|
function reportMetricsSafe(payload) {
|
|
109775
109810
|
reportMetrics(payload).catch(error => {
|
|
109776
109811
|
logger.warn("Failed to upload LLM prompt metrics", {
|
|
@@ -109792,7 +109827,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, logger, model
|
|
|
109792
109827
|
bytes: requestBodyBytes + responseBytes,
|
|
109793
109828
|
completionTokens,
|
|
109794
109829
|
engine: configuration.agentEngineType,
|
|
109795
|
-
endpointId: null,
|
|
109830
|
+
endpointId: endpointId ?? null,
|
|
109796
109831
|
latencyMs,
|
|
109797
109832
|
modelId: modelID,
|
|
109798
109833
|
promptTokens,
|
|
@@ -109816,7 +109851,8 @@ async function proxyAnthropicStreamingRoute({ body, configuration, logger, model
|
|
|
109816
109851
|
headers: {
|
|
109817
109852
|
"Content-Type": "application/json"
|
|
109818
109853
|
},
|
|
109819
|
-
method: "POST"
|
|
109854
|
+
method: "POST",
|
|
109855
|
+
signal
|
|
109820
109856
|
})
|
|
109821
109857
|
.catch(error => {
|
|
109822
109858
|
logEngineMetrics({
|
|
@@ -109834,7 +109870,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, logger, model
|
|
|
109834
109870
|
bytes: requestBodyBytes,
|
|
109835
109871
|
completionTokens: 0,
|
|
109836
109872
|
engine: configuration.agentEngineType,
|
|
109837
|
-
endpointId: null,
|
|
109873
|
+
endpointId: endpointId ?? null,
|
|
109838
109874
|
latencyMs,
|
|
109839
109875
|
modelId: modelID,
|
|
109840
109876
|
promptTokens: 0,
|
|
@@ -109879,7 +109915,7 @@ async function proxyAnthropicStreamingRoute({ body, configuration, logger, model
|
|
|
109879
109915
|
bytes: requestBodyBytes,
|
|
109880
109916
|
completionTokens: 0,
|
|
109881
109917
|
engine: configuration.agentEngineType,
|
|
109882
|
-
endpointId: null,
|
|
109918
|
+
endpointId: endpointId ?? null,
|
|
109883
109919
|
latencyMs,
|
|
109884
109920
|
modelId: modelID,
|
|
109885
109921
|
promptTokens: 0,
|
|
@@ -110057,19 +110093,30 @@ async function proxyAnthropicStreamingRoute({ body, configuration, logger, model
|
|
|
110057
110093
|
};
|
|
110058
110094
|
}
|
|
110059
110095
|
|
|
110096
|
+
function extractEndpointId(req) {
|
|
110097
|
+
const value = req.headers["x-endpoint-id"];
|
|
110098
|
+
const raw = typeof value === "string" ? value : Array.isArray(value) ? value[0] : null;
|
|
110099
|
+
return raw && isValid(raw) ? raw : null;
|
|
110100
|
+
}
|
|
110060
110101
|
function createConduitAnthropicAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger }) {
|
|
110061
110102
|
return {
|
|
110062
110103
|
"/v1/messages": {
|
|
110063
|
-
POST: async ({ body }) => {
|
|
110104
|
+
POST: async ({ body, req, res }) => {
|
|
110064
110105
|
const modelID = getModelID();
|
|
110065
110106
|
const modelManager = getModelManager();
|
|
110107
|
+
const abortController = new AbortController();
|
|
110108
|
+
res.on("close", () => {
|
|
110109
|
+
abortController.abort();
|
|
110110
|
+
});
|
|
110066
110111
|
return proxyAnthropicStreamingRoute({
|
|
110067
110112
|
body,
|
|
110068
110113
|
configuration,
|
|
110114
|
+
endpointId: extractEndpointId(req),
|
|
110069
110115
|
logger,
|
|
110070
110116
|
modelID,
|
|
110071
110117
|
modelManager,
|
|
110072
|
-
reportMetrics: apiClient.reportPromptMetrics
|
|
110118
|
+
reportMetrics: apiClient.reportPromptMetrics,
|
|
110119
|
+
signal: abortController.signal
|
|
110073
110120
|
});
|
|
110074
110121
|
}
|
|
110075
110122
|
}
|
|
@@ -110089,6 +110136,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
|
|
|
110089
110136
|
const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/stream`;
|
|
110090
110137
|
const maxReconnectDelayMs = 30000;
|
|
110091
110138
|
let reconnectAttempt = 0;
|
|
110139
|
+
const activeRequests = new Map();
|
|
110092
110140
|
while (!signal?.aborted) {
|
|
110093
110141
|
const connectionStartedAt = Date.now();
|
|
110094
110142
|
try {
|
|
@@ -110102,11 +110150,29 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
|
|
|
110102
110150
|
});
|
|
110103
110151
|
},
|
|
110104
110152
|
onMessage: (message) => {
|
|
110153
|
+
if (message.event === "cancel") {
|
|
110154
|
+
const { requestID } = JSON.parse(message.data);
|
|
110155
|
+
const controller = activeRequests.get(requestID);
|
|
110156
|
+
if (controller) {
|
|
110157
|
+
logger.info("Cancelling active request", {
|
|
110158
|
+
requestID
|
|
110159
|
+
});
|
|
110160
|
+
controller.abort();
|
|
110161
|
+
activeRequests.delete(requestID);
|
|
110162
|
+
}
|
|
110163
|
+
return;
|
|
110164
|
+
}
|
|
110105
110165
|
if (message.event !== "request") {
|
|
110106
110166
|
return;
|
|
110107
110167
|
}
|
|
110108
110168
|
const payload = ServerToClientAPIRequestSchema.parse(JSON.parse(message.data));
|
|
110169
|
+
const perRequestController = new AbortController();
|
|
110170
|
+
activeRequests.set(payload.requestID, perRequestController);
|
|
110171
|
+
const effectiveSignal = signal
|
|
110172
|
+
? AbortSignal.any([perRequestController.signal, signal])
|
|
110173
|
+
: perRequestController.signal;
|
|
110109
110174
|
handleRequest({
|
|
110175
|
+
activeRequests,
|
|
110110
110176
|
apiURL,
|
|
110111
110177
|
configuration,
|
|
110112
110178
|
logger,
|
|
@@ -110116,7 +110182,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
|
|
|
110116
110182
|
onRequestStart,
|
|
110117
110183
|
reportMetrics,
|
|
110118
110184
|
request: payload,
|
|
110119
|
-
signal
|
|
110185
|
+
signal: effectiveSignal
|
|
110120
110186
|
}).catch(error => {
|
|
110121
110187
|
logger.error("SSE request handler failed", {
|
|
110122
110188
|
error: asError(error),
|
|
@@ -110151,7 +110217,7 @@ async function handleSSERequests({ apiURL, configuration, logger, modelID, onReq
|
|
|
110151
110217
|
}
|
|
110152
110218
|
}
|
|
110153
110219
|
}
|
|
110154
|
-
async function handleRequest({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
|
|
110220
|
+
async function handleRequest({ activeRequests, apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
|
|
110155
110221
|
function reportMetricsSafe(payload) {
|
|
110156
110222
|
reportMetrics(payload).catch(error => {
|
|
110157
110223
|
logger.warn("Failed to upload LLM prompt metrics", {
|
|
@@ -110160,11 +110226,12 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
|
|
|
110160
110226
|
});
|
|
110161
110227
|
});
|
|
110162
110228
|
}
|
|
110229
|
+
const endpointId = request.parameters?.endpointID ?? null;
|
|
110163
110230
|
const requestStartedAt = Date.now();
|
|
110164
110231
|
const requestBytes = calculateRequestBytes(request.body ?? null);
|
|
110165
110232
|
try {
|
|
110166
110233
|
await onRequestStart?.(request);
|
|
110167
|
-
const response = await onRequest(request);
|
|
110234
|
+
const response = await onRequest({ request, signal });
|
|
110168
110235
|
const responseMetrics = await streamResponse({
|
|
110169
110236
|
apiURL,
|
|
110170
110237
|
configuration,
|
|
@@ -110184,7 +110251,7 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
|
|
|
110184
110251
|
bytes: requestBytes + responseMetrics.responseBytes,
|
|
110185
110252
|
completionTokens: 0,
|
|
110186
110253
|
engine: configuration.agentEngineType,
|
|
110187
|
-
endpointId
|
|
110254
|
+
endpointId,
|
|
110188
110255
|
latencyMs,
|
|
110189
110256
|
modelId: modelID,
|
|
110190
110257
|
promptTokens: 0,
|
|
@@ -110200,40 +110267,45 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
|
|
|
110200
110267
|
});
|
|
110201
110268
|
}
|
|
110202
110269
|
catch (error) {
|
|
110203
|
-
|
|
110204
|
-
|
|
110205
|
-
|
|
110206
|
-
|
|
110270
|
+
const isCancelled = signal?.aborted;
|
|
110271
|
+
if (isCancelled) {
|
|
110272
|
+
logger.info("SSE request cancelled", {
|
|
110273
|
+
requestID: request.requestID
|
|
110274
|
+
});
|
|
110275
|
+
}
|
|
110276
|
+
else {
|
|
110277
|
+
logger.error("SSE request failed", {
|
|
110278
|
+
error: asError(error),
|
|
110279
|
+
requestMethod: request.requestID
|
|
110280
|
+
});
|
|
110281
|
+
}
|
|
110207
110282
|
const failureMessage = "Bad gateway\n\nProxying failed";
|
|
110208
110283
|
const failureBytes = Buffer.byteLength(failureMessage, "utf8");
|
|
110209
110284
|
const latencyMs = Math.max(0, Date.now() - requestStartedAt);
|
|
110210
|
-
|
|
110211
|
-
|
|
110212
|
-
|
|
110213
|
-
|
|
110214
|
-
|
|
110215
|
-
|
|
110216
|
-
|
|
110217
|
-
|
|
110218
|
-
|
|
110219
|
-
|
|
110220
|
-
|
|
110221
|
-
|
|
110222
|
-
|
|
110223
|
-
|
|
110224
|
-
|
|
110225
|
-
|
|
110226
|
-
|
|
110227
|
-
|
|
110228
|
-
|
|
110229
|
-
status: 502
|
|
110230
|
-
});
|
|
110231
|
-
await streamHandler.end();
|
|
110285
|
+
if (!isCancelled) {
|
|
110286
|
+
const streamHandler = await sendChunkStream({
|
|
110287
|
+
apiURL,
|
|
110288
|
+
configuration,
|
|
110289
|
+
requestID: request.requestID,
|
|
110290
|
+
logger
|
|
110291
|
+
});
|
|
110292
|
+
await streamHandler.sendChunk({
|
|
110293
|
+
data: encodeBinaryChunk(Buffer.from(failureMessage)),
|
|
110294
|
+
sequence: 0,
|
|
110295
|
+
status: 502
|
|
110296
|
+
});
|
|
110297
|
+
await streamHandler.sendChunk({
|
|
110298
|
+
data: null,
|
|
110299
|
+
sequence: 1,
|
|
110300
|
+
status: 502
|
|
110301
|
+
});
|
|
110302
|
+
await streamHandler.end();
|
|
110303
|
+
}
|
|
110232
110304
|
reportMetricsSafe({
|
|
110233
|
-
bytes: requestBytes + failureBytes,
|
|
110305
|
+
bytes: isCancelled ? requestBytes : requestBytes + failureBytes,
|
|
110234
110306
|
completionTokens: 0,
|
|
110235
110307
|
engine: configuration.agentEngineType,
|
|
110236
|
-
endpointId
|
|
110308
|
+
endpointId,
|
|
110237
110309
|
latencyMs,
|
|
110238
110310
|
modelId: modelID,
|
|
110239
110311
|
promptTokens: 0,
|
|
@@ -110241,14 +110313,15 @@ async function handleRequest({ apiURL, configuration, logger, modelID, onRequest
|
|
|
110241
110313
|
requestId: request.requestID,
|
|
110242
110314
|
requestMethod: request.method,
|
|
110243
110315
|
requestPath: request.path,
|
|
110244
|
-
responseBytes: failureBytes,
|
|
110316
|
+
responseBytes: isCancelled ? 0 : failureBytes,
|
|
110245
110317
|
successful: false,
|
|
110246
|
-
timeToFirstTokenMs: latencyMs,
|
|
110247
|
-
tokensPerSecond,
|
|
110248
|
-
totalTokens
|
|
110318
|
+
timeToFirstTokenMs: isCancelled ? null : latencyMs,
|
|
110319
|
+
tokensPerSecond: 0,
|
|
110320
|
+
totalTokens: 0
|
|
110249
110321
|
});
|
|
110250
110322
|
}
|
|
110251
110323
|
finally {
|
|
110324
|
+
activeRequests.delete(request.requestID);
|
|
110252
110325
|
await onRequestEnd?.(request);
|
|
110253
110326
|
}
|
|
110254
110327
|
}
|
|
@@ -110446,7 +110519,7 @@ function calculateTokensPerSecond({ durationMs, totalTokens }) {
|
|
|
110446
110519
|
/**
|
|
110447
110520
|
* Proxy server requests to the local inference HTTP server.
|
|
110448
110521
|
*/
|
|
110449
|
-
async function proxyRequest({ configuration, request }) {
|
|
110522
|
+
async function proxyRequest({ configuration, request, signal }) {
|
|
110450
110523
|
let finalPath = request.path;
|
|
110451
110524
|
if (request.parameters) {
|
|
110452
110525
|
Object.entries(request.parameters).forEach(([key, value]) => {
|
|
@@ -110461,8 +110534,16 @@ async function proxyRequest({ configuration, request }) {
|
|
|
110461
110534
|
}
|
|
110462
110535
|
const fetchOptions = {
|
|
110463
110536
|
method: request.method,
|
|
110464
|
-
headers:
|
|
110537
|
+
headers: {
|
|
110538
|
+
...request.headers,
|
|
110539
|
+
...(request.parameters?.endpointID
|
|
110540
|
+
? { "x-endpoint-id": request.parameters.endpointID }
|
|
110541
|
+
: undefined)
|
|
110542
|
+
}
|
|
110465
110543
|
};
|
|
110544
|
+
if (signal) {
|
|
110545
|
+
fetchOptions.signal = signal;
|
|
110546
|
+
}
|
|
110466
110547
|
if (request.body) {
|
|
110467
110548
|
fetchOptions.body =
|
|
110468
110549
|
typeof request.body === "object" ? JSON.stringify(request.body) : request.body;
|
|
@@ -120361,10 +120442,11 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
120361
120442
|
configuration,
|
|
120362
120443
|
logger,
|
|
120363
120444
|
modelID: conduitConfiguration.targetModel.id,
|
|
120364
|
-
onRequest: async (request) => {
|
|
120445
|
+
onRequest: async ({ request, signal }) => {
|
|
120365
120446
|
return proxyRequest({
|
|
120366
120447
|
configuration,
|
|
120367
|
-
request
|
|
120448
|
+
request,
|
|
120449
|
+
signal
|
|
120368
120450
|
});
|
|
120369
120451
|
},
|
|
120370
120452
|
onRequestEnd: () => {
|
|
@@ -3,13 +3,15 @@ import { InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definition
|
|
|
3
3
|
import { Logger } from "@infersec/logger";
|
|
4
4
|
import { Configuration } from "../configuration.js";
|
|
5
5
|
import { ModelManager } from "../modelManagement/ModelManager.js";
|
|
6
|
-
export declare function proxyAnthropicStreamingRoute({ body, configuration, logger, modelID, modelManager, reportMetrics }: {
|
|
6
|
+
export declare function proxyAnthropicStreamingRoute({ body, configuration, endpointId, logger, modelID, modelManager, reportMetrics, signal }: {
|
|
7
7
|
body: unknown;
|
|
8
8
|
configuration: Configuration;
|
|
9
|
+
endpointId?: ULID | null;
|
|
9
10
|
logger: Logger;
|
|
10
11
|
modelID: ULID;
|
|
11
12
|
modelManager: ModelManager;
|
|
12
13
|
reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
|
|
14
|
+
signal?: AbortSignal;
|
|
13
15
|
}): Promise<{
|
|
14
16
|
body: Readable;
|
|
15
17
|
headers: Record<string, string>;
|
package/dist/utils/openai.d.ts
CHANGED
|
@@ -3,14 +3,16 @@ import { InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definition
|
|
|
3
3
|
import { Logger } from "@infersec/logger";
|
|
4
4
|
import { Configuration } from "../configuration.js";
|
|
5
5
|
import { ModelManager } from "../modelManagement/ModelManager.js";
|
|
6
|
-
export declare function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID, modelManager, path, reportMetrics }: {
|
|
6
|
+
export declare function proxyOpenAIStreamingRoute({ body, configuration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }: {
|
|
7
7
|
body: unknown;
|
|
8
8
|
configuration: Configuration;
|
|
9
|
+
endpointId?: ULID | null;
|
|
9
10
|
logger: Logger;
|
|
10
11
|
modelID: ULID;
|
|
11
12
|
modelManager: ModelManager;
|
|
12
13
|
path: "/v1/chat/completions" | "/v1/completions";
|
|
13
14
|
reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
|
|
15
|
+
signal?: AbortSignal;
|
|
14
16
|
}): Promise<{
|
|
15
17
|
body: Readable;
|
|
16
18
|
headers: Record<string, string>;
|