@infersec/conduit 1.34.0 → 1.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js
CHANGED
|
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
|
|
|
6
6
|
|
|
7
7
|
import { parseArgs } from 'node:util';
|
|
8
8
|
import 'node:crypto';
|
|
9
|
-
import { a as asError, s as startInferenceAgent } from './start-
|
|
9
|
+
import { a as asError, s as startInferenceAgent } from './start-CdILFvRO.js';
|
|
10
10
|
import 'argon2';
|
|
11
11
|
import 'node:child_process';
|
|
12
12
|
import 'node:stream';
|
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
|
|
|
5
5
|
const __dirname = __pathDirname(__filename);
|
|
6
6
|
|
|
7
7
|
import 'node:crypto';
|
|
8
|
-
import { s as startInferenceAgent, a as asError } from './start-
|
|
8
|
+
import { s as startInferenceAgent, a as asError } from './start-CdILFvRO.js';
|
|
9
9
|
import 'argon2';
|
|
10
10
|
import 'node:child_process';
|
|
11
11
|
import 'node:stream';
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import { API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE } from "@infersec/definitions";
|
|
2
|
+
import { implementAPIReference } from "@infersec/fetch";
|
|
3
|
+
import { Logger } from "@infersec/logger";
|
|
4
|
+
import { APIClient } from "../apiClient/index.js";
|
|
5
|
+
import { Configuration } from "../configuration.js";
|
|
6
|
+
import { ModelManager } from "../modelManagement/ModelManager.js";
|
|
7
|
+
type ConduitAnthropicAPIReferenceHandlers = Parameters<typeof implementAPIReference<typeof API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE>>[0]["api"];
|
|
8
|
+
export declare function createConduitAnthropicAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger }: {
|
|
9
|
+
apiClient: APIClient;
|
|
10
|
+
configuration: Configuration;
|
|
11
|
+
getModelID: () => string;
|
|
12
|
+
getModelManager: () => ModelManager;
|
|
13
|
+
logger: Logger;
|
|
14
|
+
}): ConduitAnthropicAPIReferenceHandlers;
|
|
15
|
+
export declare function createPostMessagesHandler(options: {
|
|
16
|
+
apiClient: APIClient;
|
|
17
|
+
configuration: Configuration;
|
|
18
|
+
getModelID: () => string;
|
|
19
|
+
getModelManager: () => ModelManager;
|
|
20
|
+
logger: Logger;
|
|
21
|
+
}): (params: {
|
|
22
|
+
req: import("@infersec/fetch").APIRequest;
|
|
23
|
+
res: import("@infersec/fetch").APIResponse;
|
|
24
|
+
parameters: Record<string, never>;
|
|
25
|
+
query: Record<string, never>;
|
|
26
|
+
body: {
|
|
27
|
+
max_tokens: number;
|
|
28
|
+
messages: ({
|
|
29
|
+
content: string | ({
|
|
30
|
+
text: string;
|
|
31
|
+
type: "text";
|
|
32
|
+
} | {
|
|
33
|
+
source: {
|
|
34
|
+
data: string;
|
|
35
|
+
media_type: "image/gif" | "image/jpeg" | "image/png" | "image/webp";
|
|
36
|
+
type: "base64";
|
|
37
|
+
} | {
|
|
38
|
+
type: "url";
|
|
39
|
+
url: string;
|
|
40
|
+
};
|
|
41
|
+
type: "image";
|
|
42
|
+
} | {
|
|
43
|
+
tool_use_id: string;
|
|
44
|
+
content: string | {
|
|
45
|
+
text: string;
|
|
46
|
+
type: "text";
|
|
47
|
+
}[];
|
|
48
|
+
type: "tool_result";
|
|
49
|
+
is_error?: boolean | undefined;
|
|
50
|
+
})[];
|
|
51
|
+
role: "user";
|
|
52
|
+
} | {
|
|
53
|
+
content: string | ({
|
|
54
|
+
text: string;
|
|
55
|
+
type: "text";
|
|
56
|
+
} | {
|
|
57
|
+
id: string;
|
|
58
|
+
input: Record<string, unknown>;
|
|
59
|
+
name: string;
|
|
60
|
+
type: "tool_use";
|
|
61
|
+
} | {
|
|
62
|
+
[x: string]: unknown;
|
|
63
|
+
thinking: string;
|
|
64
|
+
type: "thinking";
|
|
65
|
+
} | {
|
|
66
|
+
[x: string]: unknown;
|
|
67
|
+
data: string;
|
|
68
|
+
type: "redacted_thinking";
|
|
69
|
+
})[];
|
|
70
|
+
role: "assistant";
|
|
71
|
+
})[];
|
|
72
|
+
model: string;
|
|
73
|
+
metadata?: {
|
|
74
|
+
user_id?: string | undefined;
|
|
75
|
+
} | undefined;
|
|
76
|
+
stop_sequences?: string[] | undefined;
|
|
77
|
+
stream?: boolean | undefined;
|
|
78
|
+
system?: string | {
|
|
79
|
+
text: string;
|
|
80
|
+
type: "text";
|
|
81
|
+
}[] | undefined;
|
|
82
|
+
temperature?: number | undefined;
|
|
83
|
+
thinking?: {
|
|
84
|
+
budget_tokens: number;
|
|
85
|
+
type: "enabled";
|
|
86
|
+
} | undefined;
|
|
87
|
+
tool_choice?: "none" | "any" | "auto" | {
|
|
88
|
+
type: "auto";
|
|
89
|
+
disable_parallel_tool_use?: boolean | undefined;
|
|
90
|
+
} | {
|
|
91
|
+
type: "any";
|
|
92
|
+
disable_parallel_tool_use?: boolean | undefined;
|
|
93
|
+
} | {
|
|
94
|
+
type: "none";
|
|
95
|
+
disable_parallel_tool_use?: boolean | undefined;
|
|
96
|
+
} | {
|
|
97
|
+
name: string;
|
|
98
|
+
type: "tool";
|
|
99
|
+
disable_parallel_tool_use?: boolean | undefined;
|
|
100
|
+
} | undefined;
|
|
101
|
+
tools?: {
|
|
102
|
+
input_schema: Record<string, unknown>;
|
|
103
|
+
name: string;
|
|
104
|
+
description?: string | undefined;
|
|
105
|
+
}[] | undefined;
|
|
106
|
+
top_k?: number | undefined;
|
|
107
|
+
top_p?: number | undefined;
|
|
108
|
+
};
|
|
109
|
+
responseSchema: undefined;
|
|
110
|
+
}) => Promise<{
|
|
111
|
+
body: import("stream").Readable;
|
|
112
|
+
headers?: Record<string, string>;
|
|
113
|
+
status: number;
|
|
114
|
+
} | {
|
|
115
|
+
headers?: Record<string, string>;
|
|
116
|
+
status: number;
|
|
117
|
+
statusText: string;
|
|
118
|
+
}>;
|
|
119
|
+
export {};
|
|
@@ -460,7 +460,7 @@ const allowsEval = cached(() => {
|
|
|
460
460
|
return false;
|
|
461
461
|
}
|
|
462
462
|
});
|
|
463
|
-
function isPlainObject$
|
|
463
|
+
function isPlainObject$3(o) {
|
|
464
464
|
if (isObject$1(o) === false)
|
|
465
465
|
return false;
|
|
466
466
|
// modified constructor
|
|
@@ -480,7 +480,7 @@ function isPlainObject$2(o) {
|
|
|
480
480
|
return true;
|
|
481
481
|
}
|
|
482
482
|
function shallowClone(o) {
|
|
483
|
-
if (isPlainObject$
|
|
483
|
+
if (isPlainObject$3(o))
|
|
484
484
|
return { ...o };
|
|
485
485
|
if (Array.isArray(o))
|
|
486
486
|
return [...o];
|
|
@@ -665,7 +665,7 @@ function omit(schema, mask) {
|
|
|
665
665
|
return clone(schema, def);
|
|
666
666
|
}
|
|
667
667
|
function extend(schema, shape) {
|
|
668
|
-
if (!isPlainObject$
|
|
668
|
+
if (!isPlainObject$3(shape)) {
|
|
669
669
|
throw new Error("Invalid input to extend: expected a plain object");
|
|
670
670
|
}
|
|
671
671
|
const checks = schema._zod.def.checks;
|
|
@@ -684,7 +684,7 @@ function extend(schema, shape) {
|
|
|
684
684
|
return clone(schema, def);
|
|
685
685
|
}
|
|
686
686
|
function safeExtend(schema, shape) {
|
|
687
|
-
if (!isPlainObject$
|
|
687
|
+
if (!isPlainObject$3(shape)) {
|
|
688
688
|
throw new Error("Invalid input to safeExtend: expected a plain object");
|
|
689
689
|
}
|
|
690
690
|
const def = {
|
|
@@ -944,7 +944,7 @@ var util$6 = /*#__PURE__*/Object.freeze({
|
|
|
944
944
|
getSizableOrigin: getSizableOrigin,
|
|
945
945
|
hexToUint8Array: hexToUint8Array,
|
|
946
946
|
isObject: isObject$1,
|
|
947
|
-
isPlainObject: isPlainObject$
|
|
947
|
+
isPlainObject: isPlainObject$3,
|
|
948
948
|
issue: issue,
|
|
949
949
|
joinValues: joinValues,
|
|
950
950
|
jsonStringifyReplacer: jsonStringifyReplacer,
|
|
@@ -3154,7 +3154,7 @@ function mergeValues(a, b) {
|
|
|
3154
3154
|
if (a instanceof Date && b instanceof Date && +a === +b) {
|
|
3155
3155
|
return { valid: true, data: a };
|
|
3156
3156
|
}
|
|
3157
|
-
if (isPlainObject$
|
|
3157
|
+
if (isPlainObject$3(a) && isPlainObject$3(b)) {
|
|
3158
3158
|
const bKeys = Object.keys(b);
|
|
3159
3159
|
const sharedKeys = Object.keys(a).filter((key) => bKeys.indexOf(key) !== -1);
|
|
3160
3160
|
const newObj = { ...a, ...b };
|
|
@@ -3286,7 +3286,7 @@ const $ZodRecord = /*@__PURE__*/ $constructor("$ZodRecord", (inst, def) => {
|
|
|
3286
3286
|
$ZodType.init(inst, def);
|
|
3287
3287
|
inst._zod.parse = (payload, ctx) => {
|
|
3288
3288
|
const input = payload.value;
|
|
3289
|
-
if (!isPlainObject$
|
|
3289
|
+
if (!isPlainObject$3(input)) {
|
|
3290
3290
|
payload.issues.push({
|
|
3291
3291
|
expected: "record",
|
|
3292
3292
|
code: "invalid_type",
|
|
@@ -15004,6 +15004,167 @@ const API_SERVICE_CONDUIT_API_REFERENCE = {
|
|
|
15004
15004
|
}
|
|
15005
15005
|
};
|
|
15006
15006
|
|
|
15007
|
+
// ==================== CONTENT BLOCKS ====================
|
|
15008
|
+
const AnthropicTextContentBlockSchema = object({
|
|
15009
|
+
text: string$1(),
|
|
15010
|
+
type: literal("text")
|
|
15011
|
+
});
|
|
15012
|
+
const AnthropicToolUseContentBlockSchema = object({
|
|
15013
|
+
id: string$1(),
|
|
15014
|
+
input: record(string$1(), unknown()),
|
|
15015
|
+
name: string$1(),
|
|
15016
|
+
type: literal("tool_use")
|
|
15017
|
+
});
|
|
15018
|
+
const AnthropicToolResultContentBlockSchema = object({
|
|
15019
|
+
tool_use_id: string$1(),
|
|
15020
|
+
content: union([
|
|
15021
|
+
string$1(),
|
|
15022
|
+
array(object({ text: string$1(), type: literal("text") }))
|
|
15023
|
+
]),
|
|
15024
|
+
is_error: boolean$1().optional(),
|
|
15025
|
+
type: literal("tool_result")
|
|
15026
|
+
});
|
|
15027
|
+
const AnthropicImageContentBlockSchema = object({
|
|
15028
|
+
source: discriminatedUnion("type", [
|
|
15029
|
+
object({
|
|
15030
|
+
data: string$1(),
|
|
15031
|
+
media_type: _enum(["image/gif", "image/jpeg", "image/png", "image/webp"]),
|
|
15032
|
+
type: literal("base64")
|
|
15033
|
+
}),
|
|
15034
|
+
object({
|
|
15035
|
+
type: literal("url"),
|
|
15036
|
+
url: string$1()
|
|
15037
|
+
})
|
|
15038
|
+
]),
|
|
15039
|
+
type: literal("image")
|
|
15040
|
+
});
|
|
15041
|
+
const AnthropicThinkingContentBlockSchema = object({
|
|
15042
|
+
thinking: string$1(),
|
|
15043
|
+
type: literal("thinking")
|
|
15044
|
+
})
|
|
15045
|
+
.passthrough();
|
|
15046
|
+
const AnthropicRedactedThinkingContentBlockSchema = object({
|
|
15047
|
+
data: string$1(),
|
|
15048
|
+
type: literal("redacted_thinking")
|
|
15049
|
+
})
|
|
15050
|
+
.passthrough();
|
|
15051
|
+
// ==================== INPUT CONTENT (for messages) ====================
|
|
15052
|
+
const AnthropicInputContentSchema = union([
|
|
15053
|
+
AnthropicTextContentBlockSchema,
|
|
15054
|
+
AnthropicImageContentBlockSchema,
|
|
15055
|
+
AnthropicToolResultContentBlockSchema
|
|
15056
|
+
]);
|
|
15057
|
+
// ==================== MESSAGE PARAMS ====================
|
|
15058
|
+
const AnthropicUserMessageParamSchema = object({
|
|
15059
|
+
content: union([string$1(), array(AnthropicInputContentSchema)]),
|
|
15060
|
+
role: literal("user")
|
|
15061
|
+
});
|
|
15062
|
+
const AnthropicAssistantMessageParamSchema = object({
|
|
15063
|
+
content: union([
|
|
15064
|
+
string$1(),
|
|
15065
|
+
array(union([
|
|
15066
|
+
AnthropicTextContentBlockSchema,
|
|
15067
|
+
AnthropicToolUseContentBlockSchema,
|
|
15068
|
+
AnthropicThinkingContentBlockSchema,
|
|
15069
|
+
AnthropicRedactedThinkingContentBlockSchema
|
|
15070
|
+
]))
|
|
15071
|
+
]),
|
|
15072
|
+
role: literal("assistant")
|
|
15073
|
+
});
|
|
15074
|
+
const AnthropicMessageParamSchema = discriminatedUnion("role", [
|
|
15075
|
+
AnthropicUserMessageParamSchema,
|
|
15076
|
+
AnthropicAssistantMessageParamSchema
|
|
15077
|
+
]);
|
|
15078
|
+
// ==================== TOOL DEFINITIONS ====================
|
|
15079
|
+
const AnthropicToolSchema = object({
|
|
15080
|
+
description: string$1().optional(),
|
|
15081
|
+
input_schema: record(string$1(), unknown()),
|
|
15082
|
+
name: string$1()
|
|
15083
|
+
});
|
|
15084
|
+
// ==================== MESSAGES CREATE PARAMS ====================
|
|
15085
|
+
const AnthropicMessagesCreateParamsSchema = object({
|
|
15086
|
+
max_tokens: number$1().int().positive(),
|
|
15087
|
+
messages: array(AnthropicMessageParamSchema),
|
|
15088
|
+
model: string$1(),
|
|
15089
|
+
metadata: object({
|
|
15090
|
+
user_id: string$1().optional()
|
|
15091
|
+
})
|
|
15092
|
+
.optional(),
|
|
15093
|
+
stop_sequences: array(string$1()).optional(),
|
|
15094
|
+
stream: boolean$1().optional(),
|
|
15095
|
+
system: union([string$1(), array(object({ text: string$1(), type: literal("text") }))])
|
|
15096
|
+
.optional(),
|
|
15097
|
+
temperature: number$1().min(0).max(1).optional(),
|
|
15098
|
+
thinking: object({
|
|
15099
|
+
budget_tokens: number$1().int().min(1024),
|
|
15100
|
+
type: literal("enabled")
|
|
15101
|
+
})
|
|
15102
|
+
.optional(),
|
|
15103
|
+
tool_choice: union([
|
|
15104
|
+
literal("auto"),
|
|
15105
|
+
literal("any"),
|
|
15106
|
+
literal("none"),
|
|
15107
|
+
object({
|
|
15108
|
+
disable_parallel_tool_use: boolean$1().optional(),
|
|
15109
|
+
type: literal("auto")
|
|
15110
|
+
}),
|
|
15111
|
+
object({
|
|
15112
|
+
disable_parallel_tool_use: boolean$1().optional(),
|
|
15113
|
+
type: literal("any")
|
|
15114
|
+
}),
|
|
15115
|
+
object({
|
|
15116
|
+
disable_parallel_tool_use: boolean$1().optional(),
|
|
15117
|
+
type: literal("none")
|
|
15118
|
+
}),
|
|
15119
|
+
object({
|
|
15120
|
+
disable_parallel_tool_use: boolean$1().optional(),
|
|
15121
|
+
name: string$1(),
|
|
15122
|
+
type: literal("tool")
|
|
15123
|
+
})
|
|
15124
|
+
])
|
|
15125
|
+
.optional(),
|
|
15126
|
+
tools: array(AnthropicToolSchema).optional(),
|
|
15127
|
+
top_k: number$1().int().positive().optional(),
|
|
15128
|
+
top_p: number$1().min(0).max(1).optional()
|
|
15129
|
+
});
|
|
15130
|
+
// ==================== RESPONSE SCHEMAS ====================
|
|
15131
|
+
const AnthropicUsageSchema = object({
|
|
15132
|
+
cache_creation_input_tokens: number$1().optional(),
|
|
15133
|
+
cache_read_input_tokens: number$1().optional(),
|
|
15134
|
+
input_tokens: number$1(),
|
|
15135
|
+
output_tokens: number$1()
|
|
15136
|
+
});
|
|
15137
|
+
object({
|
|
15138
|
+
content: array(union([
|
|
15139
|
+
AnthropicTextContentBlockSchema,
|
|
15140
|
+
AnthropicToolUseContentBlockSchema,
|
|
15141
|
+
AnthropicThinkingContentBlockSchema,
|
|
15142
|
+
AnthropicRedactedThinkingContentBlockSchema
|
|
15143
|
+
])),
|
|
15144
|
+
id: string$1(),
|
|
15145
|
+
model: string$1(),
|
|
15146
|
+
role: literal("assistant"),
|
|
15147
|
+
stop_reason: _enum(["end_turn", "max_tokens", "pause_turn", "refusal", "stop_sequence", "tool_use"])
|
|
15148
|
+
.nullable(),
|
|
15149
|
+
stop_sequence: string$1().nullable(),
|
|
15150
|
+
type: literal("message"),
|
|
15151
|
+
usage: AnthropicUsageSchema
|
|
15152
|
+
});
|
|
15153
|
+
|
|
15154
|
+
const API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE = {
|
|
15155
|
+
"/v1/messages": {
|
|
15156
|
+
POST: {
|
|
15157
|
+
auth: {
|
|
15158
|
+
type: "shared-secret"
|
|
15159
|
+
},
|
|
15160
|
+
body: AnthropicMessagesCreateParamsSchema,
|
|
15161
|
+
response: {
|
|
15162
|
+
type: "text-stream"
|
|
15163
|
+
}
|
|
15164
|
+
}
|
|
15165
|
+
}
|
|
15166
|
+
};
|
|
15167
|
+
|
|
15007
15168
|
/**
|
|
15008
15169
|
* Coerce non-string values to JSON strings. Some LLM backends (e.g. llama.cpp)
|
|
15009
15170
|
* return tool_calls arguments as parsed objects instead of JSON strings, which
|
|
@@ -15323,6 +15484,15 @@ const API_CLIENT_CONDUIT_OPENAI_REFERENCE = {
|
|
|
15323
15484
|
}
|
|
15324
15485
|
};
|
|
15325
15486
|
|
|
15487
|
+
({
|
|
15488
|
+
"/api/inferencing/:endpointID/anthropic/v1/messages": {
|
|
15489
|
+
POST: {
|
|
15490
|
+
parameters: {
|
|
15491
|
+
endpointID: ULIDSchema.describe("Endpoint identifier")
|
|
15492
|
+
}}
|
|
15493
|
+
}
|
|
15494
|
+
});
|
|
15495
|
+
|
|
15326
15496
|
({
|
|
15327
15497
|
"/api/inferencing/:endpointID/oai/v1/chat/completions": {
|
|
15328
15498
|
POST: {
|
|
@@ -109253,11 +109423,11 @@ function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBy
|
|
|
109253
109423
|
logger[level](metricsMessage, attributes);
|
|
109254
109424
|
}
|
|
109255
109425
|
|
|
109256
|
-
function isPlainObject$
|
|
109426
|
+
function isPlainObject$2(value) {
|
|
109257
109427
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
109258
109428
|
}
|
|
109259
|
-
function serializeRequestBody(body) {
|
|
109260
|
-
if (!isPlainObject$
|
|
109429
|
+
function serializeRequestBody$1(body) {
|
|
109430
|
+
if (!isPlainObject$2(body)) {
|
|
109261
109431
|
const payload = typeof body === "string" ? body : JSON.stringify(body);
|
|
109262
109432
|
return {
|
|
109263
109433
|
bytes: Buffer.byteLength(payload, "utf8"),
|
|
@@ -109266,7 +109436,7 @@ function serializeRequestBody(body) {
|
|
|
109266
109436
|
}
|
|
109267
109437
|
const requestPayload = { ...body };
|
|
109268
109438
|
const streamOptions = requestPayload.stream_options;
|
|
109269
|
-
const normalizedStreamOptions = isPlainObject$
|
|
109439
|
+
const normalizedStreamOptions = isPlainObject$2(streamOptions)
|
|
109270
109440
|
? { ...streamOptions }
|
|
109271
109441
|
: {};
|
|
109272
109442
|
normalizedStreamOptions.include_usage = true;
|
|
@@ -109277,7 +109447,7 @@ function serializeRequestBody(body) {
|
|
|
109277
109447
|
payload
|
|
109278
109448
|
};
|
|
109279
109449
|
}
|
|
109280
|
-
function calculateTokensPerSecond$
|
|
109450
|
+
function calculateTokensPerSecond$2({ durationMs, totalTokens }) {
|
|
109281
109451
|
if (durationMs <= 0) {
|
|
109282
109452
|
return 0;
|
|
109283
109453
|
}
|
|
@@ -109302,7 +109472,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
109302
109472
|
});
|
|
109303
109473
|
});
|
|
109304
109474
|
}
|
|
109305
|
-
const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody(body);
|
|
109475
|
+
const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody$1(body);
|
|
109306
109476
|
const requestStartedAt = Date.now();
|
|
109307
109477
|
const requestBody = JSON.parse(serializedBody);
|
|
109308
109478
|
const streamRequested = requestBody.stream === true;
|
|
@@ -109326,7 +109496,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
109326
109496
|
responseBytes,
|
|
109327
109497
|
successful: !error,
|
|
109328
109498
|
timeToFirstTokenMs,
|
|
109329
|
-
tokensPerSecond: calculateTokensPerSecond$
|
|
109499
|
+
tokensPerSecond: calculateTokensPerSecond$2({
|
|
109330
109500
|
durationMs: latencyMs,
|
|
109331
109501
|
totalTokens
|
|
109332
109502
|
}),
|
|
@@ -109383,8 +109553,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
|
|
|
109383
109553
|
error: responseError,
|
|
109384
109554
|
requestUrl: path,
|
|
109385
109555
|
statusCode: response.status,
|
|
109386
|
-
statusText: responseStatusText
|
|
109387
|
-
responseBody: responseBody ?? undefined
|
|
109556
|
+
statusText: responseStatusText
|
|
109388
109557
|
});
|
|
109389
109558
|
if (!response.body) {
|
|
109390
109559
|
return {
|
|
@@ -109531,6 +109700,385 @@ function createPostCompletionsHandler(options) {
|
|
|
109531
109700
|
return createConduitOpenAIAPIReferenceHandlers(options)["/v1/completions"].POST;
|
|
109532
109701
|
}
|
|
109533
109702
|
|
|
109703
|
+
function isPlainObject$1(value) {
|
|
109704
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
109705
|
+
}
|
|
109706
|
+
function serializeRequestBody(body) {
|
|
109707
|
+
const payload = typeof body === "string" ? body : JSON.stringify(body);
|
|
109708
|
+
return {
|
|
109709
|
+
bytes: Buffer.byteLength(payload, "utf8"),
|
|
109710
|
+
payload
|
|
109711
|
+
};
|
|
109712
|
+
}
|
|
109713
|
+
function calculateTokensPerSecond$1({ durationMs, totalTokens }) {
|
|
109714
|
+
if (durationMs <= 0)
|
|
109715
|
+
return 0;
|
|
109716
|
+
const tokensPerSecond = totalTokens / (durationMs / 1000);
|
|
109717
|
+
if (!Number.isFinite(tokensPerSecond) || tokensPerSecond <= 0)
|
|
109718
|
+
return 0;
|
|
109719
|
+
return Math.round(tokensPerSecond);
|
|
109720
|
+
}
|
|
109721
|
+
function normalizeTokenCount(value) {
|
|
109722
|
+
if (typeof value === "number" && Number.isFinite(value) && value >= 0)
|
|
109723
|
+
return value;
|
|
109724
|
+
return 0;
|
|
109725
|
+
}
|
|
109726
|
+
function extractAnthropicStreamUsage(line) {
|
|
109727
|
+
if (!line.startsWith("data:"))
|
|
109728
|
+
return null;
|
|
109729
|
+
const payload = line.slice(5).trim();
|
|
109730
|
+
if (!payload)
|
|
109731
|
+
return null;
|
|
109732
|
+
try {
|
|
109733
|
+
const parsed = JSON.parse(payload);
|
|
109734
|
+
if (!isPlainObject$1(parsed))
|
|
109735
|
+
return null;
|
|
109736
|
+
if (parsed.type === "message_start" && isPlainObject$1(parsed.message)) {
|
|
109737
|
+
const msgObj = parsed.message;
|
|
109738
|
+
const usage = msgObj.usage;
|
|
109739
|
+
if (isPlainObject$1(usage)) {
|
|
109740
|
+
return {
|
|
109741
|
+
inputTokens: typeof usage.input_tokens === "number" ? usage.input_tokens : null
|
|
109742
|
+
};
|
|
109743
|
+
}
|
|
109744
|
+
}
|
|
109745
|
+
if (parsed.type === "message_delta" && isPlainObject$1(parsed.usage)) {
|
|
109746
|
+
return {
|
|
109747
|
+
outputTokens: typeof parsed.usage.output_tokens === "number"
|
|
109748
|
+
? parsed.usage.output_tokens
|
|
109749
|
+
: null
|
|
109750
|
+
};
|
|
109751
|
+
}
|
|
109752
|
+
}
|
|
109753
|
+
catch {
|
|
109754
|
+
// ignore
|
|
109755
|
+
}
|
|
109756
|
+
return null;
|
|
109757
|
+
}
|
|
109758
|
+
function extractAnthropicNonStreamUsage(body) {
|
|
109759
|
+
try {
|
|
109760
|
+
const parsed = JSON.parse(body);
|
|
109761
|
+
if (!isPlainObject$1(parsed) || !isPlainObject$1(parsed.usage))
|
|
109762
|
+
return null;
|
|
109763
|
+
const usage = parsed.usage;
|
|
109764
|
+
return {
|
|
109765
|
+
inputTokens: typeof usage.input_tokens === "number" ? usage.input_tokens : null,
|
|
109766
|
+
outputTokens: typeof usage.output_tokens === "number" ? usage.output_tokens : null
|
|
109767
|
+
};
|
|
109768
|
+
}
|
|
109769
|
+
catch {
|
|
109770
|
+
return null;
|
|
109771
|
+
}
|
|
109772
|
+
}
|
|
109773
|
+
async function proxyAnthropicStreamingRoute({ body, configuration, logger, modelID, modelManager, reportMetrics }) {
|
|
109774
|
+
function reportMetricsSafe(payload) {
|
|
109775
|
+
reportMetrics(payload).catch(error => {
|
|
109776
|
+
logger.warn("Failed to upload LLM prompt metrics", {
|
|
109777
|
+
error: asError(error),
|
|
109778
|
+
requestUrl: "/v1/messages"
|
|
109779
|
+
});
|
|
109780
|
+
});
|
|
109781
|
+
}
|
|
109782
|
+
const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody(body);
|
|
109783
|
+
const requestStartedAt = Date.now();
|
|
109784
|
+
const requestBody = JSON.parse(serializedBody);
|
|
109785
|
+
const streamRequested = requestBody.stream === true;
|
|
109786
|
+
const onMonitoringComplete = ({ durationMs, error, responseBytes, usage }) => {
|
|
109787
|
+
const promptTokens = normalizeTokenCount(usage?.inputTokens);
|
|
109788
|
+
const completionTokens = normalizeTokenCount(usage?.outputTokens);
|
|
109789
|
+
const totalTokens = promptTokens + completionTokens;
|
|
109790
|
+
const latencyMs = Math.max(0, durationMs);
|
|
109791
|
+
reportMetricsSafe({
|
|
109792
|
+
bytes: requestBodyBytes + responseBytes,
|
|
109793
|
+
completionTokens,
|
|
109794
|
+
engine: configuration.agentEngineType,
|
|
109795
|
+
endpointId: null,
|
|
109796
|
+
latencyMs,
|
|
109797
|
+
modelId: modelID,
|
|
109798
|
+
promptTokens,
|
|
109799
|
+
requestBytes: requestBodyBytes,
|
|
109800
|
+
requestId: null,
|
|
109801
|
+
requestMethod: "POST",
|
|
109802
|
+
requestPath: "/v1/messages",
|
|
109803
|
+
responseBytes,
|
|
109804
|
+
successful: !error,
|
|
109805
|
+
timeToFirstTokenMs: null,
|
|
109806
|
+
tokensPerSecond: calculateTokensPerSecond$1({
|
|
109807
|
+
durationMs: latencyMs,
|
|
109808
|
+
totalTokens
|
|
109809
|
+
}),
|
|
109810
|
+
totalTokens
|
|
109811
|
+
});
|
|
109812
|
+
};
|
|
109813
|
+
const response = await modelManager
|
|
109814
|
+
.fetchOpenAI("/v1/messages", {
|
|
109815
|
+
body: serializedBody,
|
|
109816
|
+
headers: {
|
|
109817
|
+
"Content-Type": "application/json"
|
|
109818
|
+
},
|
|
109819
|
+
method: "POST"
|
|
109820
|
+
})
|
|
109821
|
+
.catch(error => {
|
|
109822
|
+
logEngineMetrics({
|
|
109823
|
+
agentEngineType: configuration.agentEngineType,
|
|
109824
|
+
error: asError(error),
|
|
109825
|
+
level: "error",
|
|
109826
|
+
logger,
|
|
109827
|
+
requestBodyBytes,
|
|
109828
|
+
requestPath: "/v1/messages",
|
|
109829
|
+
responseBytes: 0,
|
|
109830
|
+
usage: null
|
|
109831
|
+
});
|
|
109832
|
+
const latencyMs = Math.max(0, Date.now() - requestStartedAt);
|
|
109833
|
+
reportMetricsSafe({
|
|
109834
|
+
bytes: requestBodyBytes,
|
|
109835
|
+
completionTokens: 0,
|
|
109836
|
+
engine: configuration.agentEngineType,
|
|
109837
|
+
endpointId: null,
|
|
109838
|
+
latencyMs,
|
|
109839
|
+
modelId: modelID,
|
|
109840
|
+
promptTokens: 0,
|
|
109841
|
+
requestBytes: requestBodyBytes,
|
|
109842
|
+
requestId: null,
|
|
109843
|
+
requestMethod: "POST",
|
|
109844
|
+
requestPath: "/v1/messages",
|
|
109845
|
+
responseBytes: 0,
|
|
109846
|
+
successful: false,
|
|
109847
|
+
timeToFirstTokenMs: null,
|
|
109848
|
+
tokensPerSecond: 0,
|
|
109849
|
+
totalTokens: 0
|
|
109850
|
+
});
|
|
109851
|
+
throw error;
|
|
109852
|
+
});
|
|
109853
|
+
const responseStatusText = response.statusText ?? "Upstream request failed";
|
|
109854
|
+
if (!response.ok) {
|
|
109855
|
+
const responseClone = response.clone();
|
|
109856
|
+
const responseBody = await responseClone.text().catch(() => null);
|
|
109857
|
+
const responseError = new Error(responseBody
|
|
109858
|
+
? `Upstream error response: ${responseBody}`
|
|
109859
|
+
: "Upstream error response: empty body");
|
|
109860
|
+
logger.error("LLM engine request failed", {
|
|
109861
|
+
error: responseError,
|
|
109862
|
+
requestUrl: "/v1/messages",
|
|
109863
|
+
statusCode: response.status,
|
|
109864
|
+
statusText: responseStatusText
|
|
109865
|
+
});
|
|
109866
|
+
}
|
|
109867
|
+
if (!response.body) {
|
|
109868
|
+
logEngineMetrics({
|
|
109869
|
+
agentEngineType: configuration.agentEngineType,
|
|
109870
|
+
level: response.ok ? "info" : "error",
|
|
109871
|
+
logger,
|
|
109872
|
+
requestBodyBytes,
|
|
109873
|
+
requestPath: "/v1/messages",
|
|
109874
|
+
responseBytes: 0,
|
|
109875
|
+
usage: null
|
|
109876
|
+
});
|
|
109877
|
+
const latencyMs = Math.max(0, Date.now() - requestStartedAt);
|
|
109878
|
+
reportMetricsSafe({
|
|
109879
|
+
bytes: requestBodyBytes,
|
|
109880
|
+
completionTokens: 0,
|
|
109881
|
+
engine: configuration.agentEngineType,
|
|
109882
|
+
endpointId: null,
|
|
109883
|
+
latencyMs,
|
|
109884
|
+
modelId: modelID,
|
|
109885
|
+
promptTokens: 0,
|
|
109886
|
+
requestBytes: requestBodyBytes,
|
|
109887
|
+
requestId: null,
|
|
109888
|
+
requestMethod: "POST",
|
|
109889
|
+
requestPath: "/v1/messages",
|
|
109890
|
+
responseBytes: 0,
|
|
109891
|
+
successful: false,
|
|
109892
|
+
timeToFirstTokenMs: null,
|
|
109893
|
+
tokensPerSecond: 0,
|
|
109894
|
+
totalTokens: 0
|
|
109895
|
+
});
|
|
109896
|
+
return {
|
|
109897
|
+
status: response.status,
|
|
109898
|
+
statusText: responseStatusText
|
|
109899
|
+
};
|
|
109900
|
+
}
|
|
109901
|
+
const passThrough = new PassThrough();
|
|
109902
|
+
let responseBytes = 0;
|
|
109903
|
+
let completed = false;
|
|
109904
|
+
const usage = { inputTokens: null, outputTokens: null };
|
|
109905
|
+
const upstreamError = response.ok
|
|
109906
|
+
? null
|
|
109907
|
+
: new Error(`Upstream error: ${response.status} ${responseStatusText}`);
|
|
109908
|
+
function finalize(error) {
|
|
109909
|
+
if (completed)
|
|
109910
|
+
return;
|
|
109911
|
+
completed = true;
|
|
109912
|
+
onMonitoringComplete({
|
|
109913
|
+
durationMs: Math.max(0, Date.now() - requestStartedAt),
|
|
109914
|
+
error,
|
|
109915
|
+
responseBytes,
|
|
109916
|
+
usage
|
|
109917
|
+
});
|
|
109918
|
+
}
|
|
109919
|
+
const rawBody = Readable.fromWeb(response.body);
|
|
109920
|
+
if (streamRequested) {
|
|
109921
|
+
let buffer = "";
|
|
109922
|
+
rawBody.on("data", (chunk) => {
|
|
109923
|
+
const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
109924
|
+
responseBytes += chunkBuffer.length;
|
|
109925
|
+
buffer += chunkBuffer.toString("utf8");
|
|
109926
|
+
const lines = buffer.split("\n");
|
|
109927
|
+
buffer = lines.pop() ?? "";
|
|
109928
|
+
for (const line of lines) {
|
|
109929
|
+
const extracted = extractAnthropicStreamUsage(line.trim());
|
|
109930
|
+
if (extracted?.inputTokens !== undefined && extracted.inputTokens !== null) {
|
|
109931
|
+
usage.inputTokens = extracted.inputTokens;
|
|
109932
|
+
}
|
|
109933
|
+
if (extracted?.outputTokens !== undefined && extracted.outputTokens !== null) {
|
|
109934
|
+
usage.outputTokens = extracted.outputTokens;
|
|
109935
|
+
}
|
|
109936
|
+
}
|
|
109937
|
+
passThrough.write(chunkBuffer);
|
|
109938
|
+
});
|
|
109939
|
+
rawBody.once("error", err => {
|
|
109940
|
+
const normalizedError = asError(err);
|
|
109941
|
+
logEngineMetrics({
|
|
109942
|
+
agentEngineType: configuration.agentEngineType,
|
|
109943
|
+
error: normalizedError,
|
|
109944
|
+
level: "error",
|
|
109945
|
+
logger,
|
|
109946
|
+
requestBodyBytes,
|
|
109947
|
+
requestPath: "/v1/messages",
|
|
109948
|
+
responseBytes,
|
|
109949
|
+
usage: null
|
|
109950
|
+
});
|
|
109951
|
+
finalize(normalizedError);
|
|
109952
|
+
passThrough.destroy(normalizedError);
|
|
109953
|
+
});
|
|
109954
|
+
rawBody.once("end", () => {
|
|
109955
|
+
logEngineMetrics({
|
|
109956
|
+
agentEngineType: configuration.agentEngineType,
|
|
109957
|
+
level: upstreamError ? "error" : "info",
|
|
109958
|
+
logger,
|
|
109959
|
+
requestBodyBytes,
|
|
109960
|
+
requestPath: "/v1/messages",
|
|
109961
|
+
responseBytes,
|
|
109962
|
+
usage: null
|
|
109963
|
+
});
|
|
109964
|
+
finalize(upstreamError);
|
|
109965
|
+
passThrough.end();
|
|
109966
|
+
});
|
|
109967
|
+
rawBody.once("close", () => {
|
|
109968
|
+
if (completed) {
|
|
109969
|
+
if (!passThrough.writableEnded)
|
|
109970
|
+
passThrough.end();
|
|
109971
|
+
return;
|
|
109972
|
+
}
|
|
109973
|
+
const closeError = new Error("Engine response stream closed before completion");
|
|
109974
|
+
logEngineMetrics({
|
|
109975
|
+
agentEngineType: configuration.agentEngineType,
|
|
109976
|
+
error: closeError,
|
|
109977
|
+
level: "error",
|
|
109978
|
+
logger,
|
|
109979
|
+
requestBodyBytes,
|
|
109980
|
+
requestPath: "/v1/messages",
|
|
109981
|
+
responseBytes,
|
|
109982
|
+
usage: null
|
|
109983
|
+
});
|
|
109984
|
+
finalize(closeError);
|
|
109985
|
+
if (!passThrough.writableEnded)
|
|
109986
|
+
passThrough.end();
|
|
109987
|
+
});
|
|
109988
|
+
}
|
|
109989
|
+
else {
|
|
109990
|
+
const chunks = [];
|
|
109991
|
+
rawBody.on("data", (chunk) => {
|
|
109992
|
+
const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
109993
|
+
responseBytes += chunkBuffer.length;
|
|
109994
|
+
chunks.push(chunkBuffer);
|
|
109995
|
+
passThrough.write(chunkBuffer);
|
|
109996
|
+
});
|
|
109997
|
+
rawBody.once("error", err => {
|
|
109998
|
+
const normalizedError = asError(err);
|
|
109999
|
+
logEngineMetrics({
|
|
110000
|
+
agentEngineType: configuration.agentEngineType,
|
|
110001
|
+
error: normalizedError,
|
|
110002
|
+
level: "error",
|
|
110003
|
+
logger,
|
|
110004
|
+
requestBodyBytes,
|
|
110005
|
+
requestPath: "/v1/messages",
|
|
110006
|
+
responseBytes,
|
|
110007
|
+
usage: null
|
|
110008
|
+
});
|
|
110009
|
+
finalize(normalizedError);
|
|
110010
|
+
passThrough.destroy(normalizedError);
|
|
110011
|
+
});
|
|
110012
|
+
rawBody.once("end", () => {
|
|
110013
|
+
const fullBody = Buffer.concat(chunks).toString("utf8");
|
|
110014
|
+
const extractedUsage = extractAnthropicNonStreamUsage(fullBody);
|
|
110015
|
+
if (extractedUsage) {
|
|
110016
|
+
usage.inputTokens = extractedUsage.inputTokens;
|
|
110017
|
+
usage.outputTokens = extractedUsage.outputTokens;
|
|
110018
|
+
}
|
|
110019
|
+
logEngineMetrics({
|
|
110020
|
+
agentEngineType: configuration.agentEngineType,
|
|
110021
|
+
level: upstreamError ? "error" : "info",
|
|
110022
|
+
logger,
|
|
110023
|
+
requestBodyBytes,
|
|
110024
|
+
requestPath: "/v1/messages",
|
|
110025
|
+
responseBytes,
|
|
110026
|
+
usage: null
|
|
110027
|
+
});
|
|
110028
|
+
finalize(upstreamError);
|
|
110029
|
+
passThrough.end();
|
|
110030
|
+
});
|
|
110031
|
+
rawBody.once("close", () => {
|
|
110032
|
+
if (completed) {
|
|
110033
|
+
if (!passThrough.writableEnded)
|
|
110034
|
+
passThrough.end();
|
|
110035
|
+
return;
|
|
110036
|
+
}
|
|
110037
|
+
const closeError = new Error("Engine response stream closed before completion");
|
|
110038
|
+
logEngineMetrics({
|
|
110039
|
+
agentEngineType: configuration.agentEngineType,
|
|
110040
|
+
error: closeError,
|
|
110041
|
+
level: "error",
|
|
110042
|
+
logger,
|
|
110043
|
+
requestBodyBytes,
|
|
110044
|
+
requestPath: "/v1/messages",
|
|
110045
|
+
responseBytes,
|
|
110046
|
+
usage: null
|
|
110047
|
+
});
|
|
110048
|
+
finalize(closeError);
|
|
110049
|
+
if (!passThrough.writableEnded)
|
|
110050
|
+
passThrough.end();
|
|
110051
|
+
});
|
|
110052
|
+
}
|
|
110053
|
+
return {
|
|
110054
|
+
body: passThrough,
|
|
110055
|
+
headers: Object.fromEntries(response.headers.entries()),
|
|
110056
|
+
status: response.status
|
|
110057
|
+
};
|
|
110058
|
+
}
|
|
110059
|
+
|
|
110060
|
+
function createConduitAnthropicAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger }) {
|
|
110061
|
+
return {
|
|
110062
|
+
"/v1/messages": {
|
|
110063
|
+
POST: async ({ body }) => {
|
|
110064
|
+
const modelID = getModelID();
|
|
110065
|
+
const modelManager = getModelManager();
|
|
110066
|
+
return proxyAnthropicStreamingRoute({
|
|
110067
|
+
body,
|
|
110068
|
+
configuration,
|
|
110069
|
+
logger,
|
|
110070
|
+
modelID,
|
|
110071
|
+
modelManager,
|
|
110072
|
+
reportMetrics: apiClient.reportPromptMetrics
|
|
110073
|
+
});
|
|
110074
|
+
}
|
|
110075
|
+
}
|
|
110076
|
+
};
|
|
110077
|
+
}
|
|
110078
|
+
function createPostMessagesHandler(options) {
|
|
110079
|
+
return createConduitAnthropicAPIReferenceHandlers(options)["/v1/messages"].POST;
|
|
110080
|
+
}
|
|
110081
|
+
|
|
109534
110082
|
function createHealthHandler() {
|
|
109535
110083
|
return (_req, res) => {
|
|
109536
110084
|
res.status(200).send("OK");
|
|
@@ -119544,6 +120092,22 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
119544
120092
|
mount: publicRouter,
|
|
119545
120093
|
reference: API_CLIENT_CONDUIT_OPENAI_REFERENCE
|
|
119546
120094
|
});
|
|
120095
|
+
implementAPIReference({
|
|
120096
|
+
api: {
|
|
120097
|
+
"/v1/messages": {
|
|
120098
|
+
POST: createPostMessagesHandler({
|
|
120099
|
+
apiClient,
|
|
120100
|
+
configuration,
|
|
120101
|
+
getModelID: () => conduitConfiguration.targetModel.id,
|
|
120102
|
+
getModelManager: () => modelManager,
|
|
120103
|
+
logger
|
|
120104
|
+
})
|
|
120105
|
+
}
|
|
120106
|
+
},
|
|
120107
|
+
logger,
|
|
120108
|
+
mount: publicRouter,
|
|
120109
|
+
reference: API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE
|
|
120110
|
+
});
|
|
119547
120111
|
handleSSERequests({
|
|
119548
120112
|
apiURL: configuration.apiURL,
|
|
119549
120113
|
configuration,
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { Readable } from "node:stream";
|
|
2
|
+
import { InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definitions";
|
|
3
|
+
import { Logger } from "@infersec/logger";
|
|
4
|
+
import { Configuration } from "../configuration.js";
|
|
5
|
+
import { ModelManager } from "../modelManagement/ModelManager.js";
|
|
6
|
+
export declare function proxyAnthropicStreamingRoute({ body, configuration, logger, modelID, modelManager, reportMetrics }: {
|
|
7
|
+
body: unknown;
|
|
8
|
+
configuration: Configuration;
|
|
9
|
+
logger: Logger;
|
|
10
|
+
modelID: ULID;
|
|
11
|
+
modelManager: ModelManager;
|
|
12
|
+
reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
|
|
13
|
+
}): Promise<{
|
|
14
|
+
body: Readable;
|
|
15
|
+
headers: Record<string, string>;
|
|
16
|
+
status: number;
|
|
17
|
+
} | {
|
|
18
|
+
status: number;
|
|
19
|
+
statusText: string;
|
|
20
|
+
}>;
|