@hebo-ai/gateway 0.10.4 → 0.10.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -2
- package/dist/endpoints/chat-completions/handler.js +2 -0
- package/dist/endpoints/chat-completions/schema.d.ts +9 -5
- package/dist/endpoints/embeddings/handler.js +2 -0
- package/dist/endpoints/messages/converters.js +13 -12
- package/dist/endpoints/messages/handler.js +2 -0
- package/dist/endpoints/responses/handler.js +2 -0
- package/dist/endpoints/responses/schema.d.ts +4 -0
- package/dist/endpoints/shared/schema.d.ts +2 -0
- package/dist/endpoints/shared/schema.js +9 -1
- package/dist/errors/ai-sdk.js +15 -7
- package/dist/errors/anthropic.d.ts +3 -2
- package/dist/errors/anthropic.js +10 -11
- package/dist/errors/gateway.d.ts +3 -2
- package/dist/errors/gateway.js +10 -4
- package/dist/errors/openai.d.ts +3 -2
- package/dist/errors/openai.js +8 -9
- package/dist/errors/utils.d.ts +4 -4
- package/dist/errors/utils.js +12 -12
- package/dist/lifecycle.js +9 -9
- package/dist/middleware/utils.js +3 -0
- package/dist/models/amazon/middleware.js +1 -0
- package/dist/models/anthropic/middleware.d.ts +1 -1
- package/dist/models/anthropic/middleware.js +26 -1
- package/dist/models/anthropic/presets.d.ts +71 -3
- package/dist/models/anthropic/presets.js +13 -2
- package/dist/models/google/middleware.js +2 -0
- package/dist/models/openai/middleware.js +2 -1
- package/dist/models/types.d.ts +1 -1
- package/dist/models/types.js +1 -0
- package/dist/providers/bedrock/canonical.js +1 -0
- package/dist/providers/bedrock/middleware.js +3 -2
- package/dist/telemetry/gen-ai.js +2 -2
- package/dist/types.d.ts +3 -1
- package/dist/utils/headers.d.ts +5 -0
- package/dist/utils/headers.js +54 -7
- package/dist/utils/response.d.ts +3 -3
- package/dist/utils/response.js +13 -9
- package/dist/utils/stream.d.ts +1 -1
- package/dist/utils/stream.js +3 -11
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -218,7 +218,7 @@ Out-of-the-box model presets:
|
|
|
218
218
|
Nova: `nova` (`v1`, `v2`, `v1.x`, `v2.x`, `latest`, `embeddings`, `all`)
|
|
219
219
|
|
|
220
220
|
- **Anthropic** — `@hebo-ai/gateway/models/anthropic`
|
|
221
|
-
Claude: `claude` (`v4.6`, `v4.5`, `v4.1`, `v4`, `v3.7`, `v3.5`, `v3`, `v4.x`, `v3.x`, `haiku`, `sonnet`, `opus`, `latest`, `all`)
|
|
221
|
+
Claude: `claude` (`v4.7`, `v4.6`, `v4.5`, `v4.1`, `v4`, `v3.7`, `v3.5`, `v3`, `v4.x`, `v3.x`, `haiku`, `sonnet`, `opus`, `latest`, `all`)
|
|
222
222
|
|
|
223
223
|
- **Cohere** — `@hebo-ai/gateway/models/cohere`
|
|
224
224
|
Command: `command` (`A`, `R`, `latest`, `all`)
|
|
@@ -730,7 +730,7 @@ Normalization rules:
|
|
|
730
730
|
|
|
731
731
|
- `enabled` -> fall-back to model default if none provided
|
|
732
732
|
- `max_tokens`: fall-back to model default if model supports
|
|
733
|
-
- `effort` supports: `none`, `minimal`, `low`, `medium`, `high`, `xhigh`
|
|
733
|
+
- `effort` supports: `none`, `minimal`, `low`, `medium`, `high`, `xhigh`, `max`
|
|
734
734
|
- Generic `effort` -> budget = percentage of `max_tokens`
|
|
735
735
|
- `none`: 0%
|
|
736
736
|
- `minimal`: 10%
|
|
@@ -738,6 +738,7 @@ Normalization rules:
|
|
|
738
738
|
- `medium`: 50% (default)
|
|
739
739
|
- `high`: 80%
|
|
740
740
|
- `xhigh`: 95%
|
|
741
|
+
- `max`: 100%
|
|
741
742
|
|
|
742
743
|
Reasoning output is surfaced as extension to the `completion` object.
|
|
743
744
|
|
|
@@ -133,6 +133,8 @@ export const chatCompletions = (config) => {
|
|
|
133
133
|
});
|
|
134
134
|
logger.trace({ requestId: ctx.requestId, result }, "[chat] AI SDK result");
|
|
135
135
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
136
|
+
if (result.response.headers)
|
|
137
|
+
ctx.response = { headers: result.response.headers };
|
|
136
138
|
recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs, ctx.trace);
|
|
137
139
|
// Transform result.
|
|
138
140
|
ctx.result = toChatCompletions(result, ctx.resolvedModelId);
|
|
@@ -737,6 +737,7 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
|
|
|
737
737
|
medium: "medium";
|
|
738
738
|
high: "high";
|
|
739
739
|
xhigh: "xhigh";
|
|
740
|
+
max: "max";
|
|
740
741
|
}>>;
|
|
741
742
|
service_tier: z.ZodOptional<z.ZodEnum<{
|
|
742
743
|
default: "default";
|
|
@@ -768,6 +769,7 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
|
|
|
768
769
|
medium: "medium";
|
|
769
770
|
high: "high";
|
|
770
771
|
xhigh: "xhigh";
|
|
772
|
+
max: "max";
|
|
771
773
|
}>>;
|
|
772
774
|
max_tokens: z.ZodOptional<z.ZodNumber>;
|
|
773
775
|
exclude: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -997,6 +999,7 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
|
|
|
997
999
|
medium: "medium";
|
|
998
1000
|
high: "high";
|
|
999
1001
|
xhigh: "xhigh";
|
|
1002
|
+
max: "max";
|
|
1000
1003
|
}>>;
|
|
1001
1004
|
service_tier: z.ZodOptional<z.ZodEnum<{
|
|
1002
1005
|
default: "default";
|
|
@@ -1028,6 +1031,7 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
|
|
|
1028
1031
|
medium: "medium";
|
|
1029
1032
|
high: "high";
|
|
1030
1033
|
xhigh: "xhigh";
|
|
1034
|
+
max: "max";
|
|
1031
1035
|
}>>;
|
|
1032
1036
|
max_tokens: z.ZodOptional<z.ZodNumber>;
|
|
1033
1037
|
exclude: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -1051,8 +1055,8 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
|
|
|
1051
1055
|
export type ChatCompletionsBody = z.infer<typeof ChatCompletionsBodySchema>;
|
|
1052
1056
|
export declare const ChatCompletionsFinishReasonSchema: z.ZodEnum<{
|
|
1053
1057
|
length: "length";
|
|
1054
|
-
stop: "stop";
|
|
1055
1058
|
tool_calls: "tool_calls";
|
|
1059
|
+
stop: "stop";
|
|
1056
1060
|
content_filter: "content_filter";
|
|
1057
1061
|
}>;
|
|
1058
1062
|
export type ChatCompletionsFinishReason = z.infer<typeof ChatCompletionsFinishReasonSchema>;
|
|
@@ -1105,8 +1109,8 @@ export declare const ChatCompletionsChoiceSchema: z.ZodObject<{
|
|
|
1105
1109
|
}, z.core.$strip>;
|
|
1106
1110
|
finish_reason: z.ZodEnum<{
|
|
1107
1111
|
length: "length";
|
|
1108
|
-
stop: "stop";
|
|
1109
1112
|
tool_calls: "tool_calls";
|
|
1113
|
+
stop: "stop";
|
|
1110
1114
|
content_filter: "content_filter";
|
|
1111
1115
|
}>;
|
|
1112
1116
|
logprobs: z.ZodOptional<z.ZodUnknown>;
|
|
@@ -1179,8 +1183,8 @@ export declare const ChatCompletionsSchema: z.ZodObject<{
|
|
|
1179
1183
|
}, z.core.$strip>;
|
|
1180
1184
|
finish_reason: z.ZodEnum<{
|
|
1181
1185
|
length: "length";
|
|
1182
|
-
stop: "stop";
|
|
1183
1186
|
tool_calls: "tool_calls";
|
|
1187
|
+
stop: "stop";
|
|
1184
1188
|
content_filter: "content_filter";
|
|
1185
1189
|
}>;
|
|
1186
1190
|
logprobs: z.ZodOptional<z.ZodUnknown>;
|
|
@@ -1315,8 +1319,8 @@ export declare const ChatCompletionsChoiceDeltaSchema: z.ZodObject<{
|
|
|
1315
1319
|
}, z.core.$strip>;
|
|
1316
1320
|
finish_reason: z.ZodNullable<z.ZodEnum<{
|
|
1317
1321
|
length: "length";
|
|
1318
|
-
stop: "stop";
|
|
1319
1322
|
tool_calls: "tool_calls";
|
|
1323
|
+
stop: "stop";
|
|
1320
1324
|
content_filter: "content_filter";
|
|
1321
1325
|
}>>;
|
|
1322
1326
|
logprobs: z.ZodOptional<z.ZodUnknown>;
|
|
@@ -1377,8 +1381,8 @@ export declare const ChatCompletionsChunkSchema: z.ZodObject<{
|
|
|
1377
1381
|
}, z.core.$strip>;
|
|
1378
1382
|
finish_reason: z.ZodNullable<z.ZodEnum<{
|
|
1379
1383
|
length: "length";
|
|
1380
|
-
stop: "stop";
|
|
1381
1384
|
tool_calls: "tool_calls";
|
|
1385
|
+
stop: "stop";
|
|
1382
1386
|
content_filter: "content_filter";
|
|
1383
1387
|
}>>;
|
|
1384
1388
|
logprobs: z.ZodOptional<z.ZodUnknown>;
|
|
@@ -81,6 +81,8 @@ export const embeddings = (config) => {
|
|
|
81
81
|
});
|
|
82
82
|
logger.trace({ requestId: ctx.requestId, result }, "[embeddings] AI SDK result");
|
|
83
83
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
84
|
+
if (result.responses?.[0]?.headers)
|
|
85
|
+
ctx.response = { headers: result.responses[0].headers };
|
|
84
86
|
// Transform result.
|
|
85
87
|
ctx.result = toEmbeddings(result, ctx.modelId);
|
|
86
88
|
logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[chat] Embeddings");
|
|
@@ -482,18 +482,6 @@ export class MessagesTransformStream extends TransformStream {
|
|
|
482
482
|
break;
|
|
483
483
|
}
|
|
484
484
|
case "reasoning-delta": {
|
|
485
|
-
controller.enqueue({
|
|
486
|
-
event: "content_block_delta",
|
|
487
|
-
data: {
|
|
488
|
-
type: "content_block_delta",
|
|
489
|
-
index: blockIndex,
|
|
490
|
-
delta: { type: "thinking_delta", thinking: part.text },
|
|
491
|
-
},
|
|
492
|
-
});
|
|
493
|
-
break;
|
|
494
|
-
}
|
|
495
|
-
case "reasoning-end": {
|
|
496
|
-
// Emit signature delta if available from provider metadata
|
|
497
485
|
const { signature } = extractReasoningMetadata(part.providerMetadata);
|
|
498
486
|
if (signature) {
|
|
499
487
|
controller.enqueue({
|
|
@@ -505,6 +493,19 @@ export class MessagesTransformStream extends TransformStream {
|
|
|
505
493
|
},
|
|
506
494
|
});
|
|
507
495
|
}
|
|
496
|
+
else {
|
|
497
|
+
controller.enqueue({
|
|
498
|
+
event: "content_block_delta",
|
|
499
|
+
data: {
|
|
500
|
+
type: "content_block_delta",
|
|
501
|
+
index: blockIndex,
|
|
502
|
+
delta: { type: "thinking_delta", thinking: part.text },
|
|
503
|
+
},
|
|
504
|
+
});
|
|
505
|
+
}
|
|
506
|
+
break;
|
|
507
|
+
}
|
|
508
|
+
case "reasoning-end": {
|
|
508
509
|
controller.enqueue({
|
|
509
510
|
event: "content_block_stop",
|
|
510
511
|
data: { type: "content_block_stop", index: blockIndex },
|
|
@@ -124,6 +124,8 @@ export const messages = (config) => {
|
|
|
124
124
|
});
|
|
125
125
|
logger.trace({ requestId: ctx.requestId, result }, "[messages] AI SDK result");
|
|
126
126
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
127
|
+
if (result.response.headers)
|
|
128
|
+
ctx.response = { headers: result.response.headers };
|
|
127
129
|
recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs, ctx.trace);
|
|
128
130
|
ctx.result = toMessages(result, ctx.resolvedModelId);
|
|
129
131
|
logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[messages] Messages");
|
|
@@ -123,6 +123,8 @@ export const responses = (config) => {
|
|
|
123
123
|
});
|
|
124
124
|
logger.trace({ requestId: ctx.requestId, result }, "[responses] AI SDK result");
|
|
125
125
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
126
|
+
if (result.response.headers)
|
|
127
|
+
ctx.response = { headers: result.response.headers };
|
|
126
128
|
recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs, ctx.trace);
|
|
127
129
|
ctx.result = toResponses(result, ctx.resolvedModelId, ctx.body.metadata);
|
|
128
130
|
logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[responses] Responses");
|
|
@@ -1489,6 +1489,7 @@ declare const ResponsesInputsSchema: z.ZodObject<{
|
|
|
1489
1489
|
medium: "medium";
|
|
1490
1490
|
high: "high";
|
|
1491
1491
|
xhigh: "xhigh";
|
|
1492
|
+
max: "max";
|
|
1492
1493
|
}>>;
|
|
1493
1494
|
max_tokens: z.ZodOptional<z.ZodNumber>;
|
|
1494
1495
|
exclude: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -1524,6 +1525,7 @@ declare const ResponsesInputsSchema: z.ZodObject<{
|
|
|
1524
1525
|
medium: "medium";
|
|
1525
1526
|
high: "high";
|
|
1526
1527
|
xhigh: "xhigh";
|
|
1528
|
+
max: "max";
|
|
1527
1529
|
}>>;
|
|
1528
1530
|
extra_body: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
|
|
1529
1531
|
}, z.core.$strip>;
|
|
@@ -1984,6 +1986,7 @@ export declare const ResponsesBodySchema: z.ZodObject<{
|
|
|
1984
1986
|
medium: "medium";
|
|
1985
1987
|
high: "high";
|
|
1986
1988
|
xhigh: "xhigh";
|
|
1989
|
+
max: "max";
|
|
1987
1990
|
}>>;
|
|
1988
1991
|
max_tokens: z.ZodOptional<z.ZodNumber>;
|
|
1989
1992
|
exclude: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -2019,6 +2022,7 @@ export declare const ResponsesBodySchema: z.ZodObject<{
|
|
|
2019
2022
|
medium: "medium";
|
|
2020
2023
|
high: "high";
|
|
2021
2024
|
xhigh: "xhigh";
|
|
2025
|
+
max: "max";
|
|
2022
2026
|
}>>;
|
|
2023
2027
|
extra_body: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
|
|
2024
2028
|
model: z.ZodString;
|
|
@@ -24,6 +24,7 @@ export declare const ReasoningEffortSchema: z.ZodEnum<{
|
|
|
24
24
|
medium: "medium";
|
|
25
25
|
high: "high";
|
|
26
26
|
xhigh: "xhigh";
|
|
27
|
+
max: "max";
|
|
27
28
|
}>;
|
|
28
29
|
export type ReasoningEffort = z.infer<typeof ReasoningEffortSchema>;
|
|
29
30
|
export declare const ReasoningSummarySchema: z.ZodEnum<{
|
|
@@ -42,6 +43,7 @@ export declare const ReasoningConfigSchema: z.ZodObject<{
|
|
|
42
43
|
medium: "medium";
|
|
43
44
|
high: "high";
|
|
44
45
|
xhigh: "xhigh";
|
|
46
|
+
max: "max";
|
|
45
47
|
}>>;
|
|
46
48
|
max_tokens: z.ZodOptional<z.ZodNumber>;
|
|
47
49
|
exclude: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -11,7 +11,15 @@ export const CacheControlSchema = z.object({
|
|
|
11
11
|
ttl: z.enum(["5m", "1h", "24h"]).optional(),
|
|
12
12
|
});
|
|
13
13
|
export const ProviderMetadataSchema = z.record(z.string(), z.record(z.string(), z.any()));
|
|
14
|
-
export const ReasoningEffortSchema = z.enum([
|
|
14
|
+
export const ReasoningEffortSchema = z.enum([
|
|
15
|
+
"none",
|
|
16
|
+
"minimal",
|
|
17
|
+
"low",
|
|
18
|
+
"medium",
|
|
19
|
+
"high",
|
|
20
|
+
"xhigh",
|
|
21
|
+
"max",
|
|
22
|
+
]);
|
|
15
23
|
export const ReasoningSummarySchema = z.enum(["auto", "concise", "detailed", "none"]);
|
|
16
24
|
export const ReasoningConfigSchema = z.object({
|
|
17
25
|
enabled: z.optional(z.boolean()),
|
package/dist/errors/ai-sdk.js
CHANGED
|
@@ -1,11 +1,20 @@
|
|
|
1
1
|
import { AISDKError, APICallError, DownloadError, EmptyResponseBodyError, InvalidArgumentError, InvalidDataContentError, InvalidMessageRoleError, InvalidPromptError, InvalidResponseDataError, InvalidStreamPartError, InvalidToolApprovalError, InvalidToolInputError, JSONParseError, LoadAPIKeyError, LoadSettingError, MessageConversionError, MissingToolResultsError, NoContentGeneratedError, NoImageGeneratedError, NoObjectGeneratedError, NoOutputGeneratedError, NoSpeechGeneratedError, NoSuchModelError, NoSuchProviderError, NoSuchToolError, NoTranscriptGeneratedError, NoVideoGeneratedError, RetryError, ToolCallNotFoundForApprovalError, ToolCallRepairError, TooManyEmbeddingValuesForCallError, TypeValidationError, UIMessageStreamError, UnsupportedModelVersionError, UnsupportedFunctionalityError, } from "ai";
|
|
2
2
|
import { GatewayError } from "./gateway";
|
|
3
|
-
import {
|
|
3
|
+
import { STATUS_TEXT } from "./utils";
|
|
4
|
+
const normalizeApiCallError = (error) => {
|
|
5
|
+
const status = error.statusCode ?? (error.isRetryable ? 502 : 422);
|
|
6
|
+
const statusText = `UPSTREAM_${STATUS_TEXT(status)}`;
|
|
7
|
+
return new GatewayError(error, status, statusText, undefined, error.responseHeaders ?? undefined);
|
|
8
|
+
};
|
|
4
9
|
export const normalizeAiSdkError = (error) => {
|
|
5
10
|
if (APICallError.isInstance(error)) {
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
11
|
+
return normalizeApiCallError(error);
|
|
12
|
+
}
|
|
13
|
+
if (RetryError.isInstance(error)) {
|
|
14
|
+
if (APICallError.isInstance(error.lastError)) {
|
|
15
|
+
return normalizeApiCallError(error.lastError);
|
|
16
|
+
}
|
|
17
|
+
return new GatewayError(error, 502, `UPSTREAM_${STATUS_TEXT(502)}`);
|
|
9
18
|
}
|
|
10
19
|
if (JSONParseError.isInstance(error) ||
|
|
11
20
|
InvalidResponseDataError.isInstance(error) ||
|
|
@@ -15,7 +24,6 @@ export const normalizeAiSdkError = (error) => {
|
|
|
15
24
|
NoOutputGeneratedError.isInstance(error) ||
|
|
16
25
|
InvalidStreamPartError.isInstance(error) ||
|
|
17
26
|
UIMessageStreamError.isInstance(error) ||
|
|
18
|
-
RetryError.isInstance(error) ||
|
|
19
27
|
DownloadError.isInstance(error) ||
|
|
20
28
|
ToolCallRepairError.isInstance(error) ||
|
|
21
29
|
NoImageGeneratedError.isInstance(error) ||
|
|
@@ -23,7 +31,7 @@ export const normalizeAiSdkError = (error) => {
|
|
|
23
31
|
NoSpeechGeneratedError.isInstance(error) ||
|
|
24
32
|
NoTranscriptGeneratedError.isInstance(error) ||
|
|
25
33
|
NoVideoGeneratedError.isInstance(error)) {
|
|
26
|
-
return new GatewayError(error, 502, `UPSTREAM_${
|
|
34
|
+
return new GatewayError(error, 502, `UPSTREAM_${STATUS_TEXT(502)}`);
|
|
27
35
|
}
|
|
28
36
|
if (InvalidArgumentError.isInstance(error) ||
|
|
29
37
|
InvalidPromptError.isInstance(error) ||
|
|
@@ -40,7 +48,7 @@ export const normalizeAiSdkError = (error) => {
|
|
|
40
48
|
TooManyEmbeddingValuesForCallError.isInstance(error) ||
|
|
41
49
|
NoSuchModelError.isInstance(error) ||
|
|
42
50
|
NoSuchProviderError.isInstance(error)) {
|
|
43
|
-
return new GatewayError(error, 422, `UPSTREAM_${
|
|
51
|
+
return new GatewayError(error, 422, `UPSTREAM_${STATUS_TEXT(422)}`);
|
|
44
52
|
}
|
|
45
53
|
if (LoadSettingError.isInstance(error) || LoadAPIKeyError.isInstance(error)) {
|
|
46
54
|
return new GatewayError(error, 500);
|
|
@@ -9,7 +9,8 @@ export declare const AnthropicErrorSchema: z.ZodObject<{
|
|
|
9
9
|
export declare class AnthropicError {
|
|
10
10
|
readonly type: "error";
|
|
11
11
|
readonly error: z.infer<typeof AnthropicErrorSchema>["error"];
|
|
12
|
+
status: number;
|
|
12
13
|
constructor(message: string, type?: string);
|
|
13
14
|
}
|
|
14
|
-
export declare function toAnthropicError(error: unknown): AnthropicError;
|
|
15
|
-
export declare function toAnthropicErrorResponse(error: unknown,
|
|
15
|
+
export declare function toAnthropicError(error: unknown, requestId?: string): AnthropicError;
|
|
16
|
+
export declare function toAnthropicErrorResponse(error: unknown, init: ResponseInit): Response;
|
package/dist/errors/anthropic.js
CHANGED
|
@@ -14,6 +14,8 @@ export class AnthropicError {
|
|
|
14
14
|
error;
|
|
15
15
|
constructor(message, type = "api_error") {
|
|
16
16
|
this.error = { type, message };
|
|
17
|
+
// internal property to derive status from error handlers without breaking official format
|
|
18
|
+
Object.defineProperty(this, "status", { value: 500, writable: true });
|
|
17
19
|
}
|
|
18
20
|
}
|
|
19
21
|
const mapType = (status) => {
|
|
@@ -22,12 +24,12 @@ const mapType = (status) => {
|
|
|
22
24
|
return "invalid_request_error";
|
|
23
25
|
case 401:
|
|
24
26
|
return "authentication_error";
|
|
27
|
+
case 402:
|
|
28
|
+
return "billing_error";
|
|
25
29
|
case 403:
|
|
26
30
|
return "permission_error";
|
|
27
31
|
case 404:
|
|
28
32
|
return "not_found_error";
|
|
29
|
-
case 402:
|
|
30
|
-
return "billing_error";
|
|
31
33
|
case 413:
|
|
32
34
|
return "request_too_large";
|
|
33
35
|
case 429:
|
|
@@ -40,15 +42,12 @@ const mapType = (status) => {
|
|
|
40
42
|
return status >= 500 ? "api_error" : "invalid_request_error";
|
|
41
43
|
}
|
|
42
44
|
};
|
|
43
|
-
export function toAnthropicError(error) {
|
|
45
|
+
export function toAnthropicError(error, requestId) {
|
|
44
46
|
const meta = getErrorMeta(error);
|
|
45
|
-
|
|
47
|
+
const anthropicError = new AnthropicError(maybeMaskMessage(error instanceof Error ? error.message : String(error), meta.status, requestId), mapType(meta.status));
|
|
48
|
+
anthropicError.status = meta.status;
|
|
49
|
+
return anthropicError;
|
|
46
50
|
}
|
|
47
|
-
export function toAnthropicErrorResponse(error,
|
|
48
|
-
|
|
49
|
-
return toResponse(new AnthropicError(maybeMaskMessage(meta, resolveRequestId(responseInit)), mapType(meta.status)), {
|
|
50
|
-
status: meta.status,
|
|
51
|
-
statusText: meta.code,
|
|
52
|
-
headers: responseInit?.headers,
|
|
53
|
-
});
|
|
51
|
+
export function toAnthropicErrorResponse(error, init) {
|
|
52
|
+
return toResponse(new AnthropicError(maybeMaskMessage(error instanceof Error ? error.message : String(error), init.status ?? 500, resolveRequestId(init)), mapType(init.status ?? 500)), init);
|
|
54
53
|
}
|
package/dist/errors/gateway.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
export declare class GatewayError extends Error {
|
|
2
2
|
readonly status: number;
|
|
3
|
-
readonly
|
|
4
|
-
|
|
3
|
+
readonly statusText: string;
|
|
4
|
+
readonly headers: Record<string, string> | undefined;
|
|
5
|
+
constructor(error: unknown, status: number, statusText?: string, cause?: unknown, headers?: Record<string, string>);
|
|
5
6
|
}
|
package/dist/errors/gateway.js
CHANGED
|
@@ -1,13 +1,19 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { X_SHOULD_RETRY_HEADER } from "../utils/headers";
|
|
2
|
+
import { STATUS_TEXT } from "./utils";
|
|
2
3
|
export class GatewayError extends Error {
|
|
3
4
|
status;
|
|
4
|
-
|
|
5
|
-
|
|
5
|
+
statusText;
|
|
6
|
+
headers;
|
|
7
|
+
constructor(error, status, statusText, cause, headers) {
|
|
6
8
|
const isError = error instanceof Error;
|
|
7
9
|
super(isError ? error.message : String(error));
|
|
8
10
|
this.name = "GatewayError";
|
|
9
11
|
this.cause = cause ?? (isError ? error : undefined);
|
|
10
12
|
this.status = status;
|
|
11
|
-
this.
|
|
13
|
+
this.statusText = statusText ?? STATUS_TEXT(status);
|
|
14
|
+
this.headers = headers;
|
|
15
|
+
if (!this.statusText.startsWith("UPSTREAM_")) {
|
|
16
|
+
(this.headers ??= {})[X_SHOULD_RETRY_HEADER] = "false";
|
|
17
|
+
}
|
|
12
18
|
}
|
|
13
19
|
}
|
package/dist/errors/openai.d.ts
CHANGED
|
@@ -9,7 +9,8 @@ export declare const OpenAIErrorSchema: z.ZodObject<{
|
|
|
9
9
|
}, z.core.$strip>;
|
|
10
10
|
export declare class OpenAIError {
|
|
11
11
|
readonly error: z.infer<typeof OpenAIErrorSchema>["error"];
|
|
12
|
+
status: number;
|
|
12
13
|
constructor(message: string, type?: string, code?: string, param?: string);
|
|
13
14
|
}
|
|
14
|
-
export declare function toOpenAIError(error: unknown): OpenAIError;
|
|
15
|
-
export declare function toOpenAIErrorResponse(error: unknown,
|
|
15
|
+
export declare function toOpenAIError(error: unknown, requestId?: string): OpenAIError;
|
|
16
|
+
export declare function toOpenAIErrorResponse(error: unknown, init: ResponseInit): Response;
|
package/dist/errors/openai.js
CHANGED
|
@@ -14,18 +14,17 @@ export class OpenAIError {
|
|
|
14
14
|
error;
|
|
15
15
|
constructor(message, type = "server_error", code, param = "") {
|
|
16
16
|
this.error = { message, type, code: code?.toLowerCase(), param };
|
|
17
|
+
// internal property to derive status from error handlers without breaking official format
|
|
18
|
+
Object.defineProperty(this, "status", { value: 500, writable: true });
|
|
17
19
|
}
|
|
18
20
|
}
|
|
19
21
|
const mapType = (status) => (status < 500 ? "invalid_request_error" : "server_error");
|
|
20
|
-
export function toOpenAIError(error) {
|
|
22
|
+
export function toOpenAIError(error, requestId) {
|
|
21
23
|
const meta = getErrorMeta(error);
|
|
22
|
-
|
|
24
|
+
const openAIError = new OpenAIError(maybeMaskMessage(error instanceof Error ? error.message : String(error), meta.status, requestId), mapType(meta.status), meta.statusText);
|
|
25
|
+
openAIError.status = meta.status;
|
|
26
|
+
return openAIError;
|
|
23
27
|
}
|
|
24
|
-
export function toOpenAIErrorResponse(error,
|
|
25
|
-
|
|
26
|
-
return toResponse(new OpenAIError(maybeMaskMessage(meta, resolveRequestId(responseInit)), mapType(meta.status), meta.code), {
|
|
27
|
-
...responseInit,
|
|
28
|
-
status: meta.status,
|
|
29
|
-
statusText: meta.code,
|
|
30
|
-
});
|
|
28
|
+
export function toOpenAIErrorResponse(error, init) {
|
|
29
|
+
return toResponse(new OpenAIError(maybeMaskMessage(error instanceof Error ? error.message : String(error), init.status ?? 500, resolveRequestId(init)), mapType(init.status ?? 500), init.statusText ?? "INTERNAL_SERVER_ERROR"), init);
|
|
31
30
|
}
|
package/dist/errors/utils.d.ts
CHANGED
|
@@ -16,11 +16,11 @@ export declare const STATUS_CODES: {
|
|
|
16
16
|
readonly 503: "SERVICE_UNAVAILABLE";
|
|
17
17
|
readonly 504: "GATEWAY_TIMEOUT";
|
|
18
18
|
};
|
|
19
|
-
export declare const
|
|
19
|
+
export declare const STATUS_TEXT: (status: number) => "BAD_REQUEST" | "UNAUTHORIZED" | "PAYMENT_REQUIRED" | "FORBIDDEN" | "NOT_FOUND" | "METHOD_NOT_ALLOWED" | "CONFLICT" | "PAYLOAD_TOO_LARGE" | "UNSUPPORTED_MEDIA_TYPE" | "UNPROCESSABLE_ENTITY" | "TOO_MANY_REQUESTS" | "CLIENT_CLOSED_REQUEST" | "INTERNAL_SERVER_ERROR" | "BAD_GATEWAY" | "SERVICE_UNAVAILABLE" | "GATEWAY_TIMEOUT";
|
|
20
20
|
export type ErrorMeta = {
|
|
21
21
|
status: number;
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
statusText: string;
|
|
23
|
+
headers: Record<string, string>;
|
|
24
24
|
};
|
|
25
25
|
export declare function getErrorMeta(error: unknown): ErrorMeta;
|
|
26
|
-
export declare function maybeMaskMessage(
|
|
26
|
+
export declare function maybeMaskMessage(message: string, status: number, requestId?: string): string;
|
package/dist/errors/utils.js
CHANGED
|
@@ -19,37 +19,37 @@ export const STATUS_CODES = {
|
|
|
19
19
|
503: "SERVICE_UNAVAILABLE",
|
|
20
20
|
504: "GATEWAY_TIMEOUT",
|
|
21
21
|
};
|
|
22
|
-
export const
|
|
22
|
+
export const STATUS_TEXT = (status) => {
|
|
23
23
|
const label = STATUS_CODES[status];
|
|
24
24
|
if (label)
|
|
25
25
|
return label;
|
|
26
26
|
return status >= 400 && status < 500 ? STATUS_CODES[400] : STATUS_CODES[500];
|
|
27
27
|
};
|
|
28
|
-
// FUTURE: always return a wrapped GatewayError?
|
|
29
28
|
export function getErrorMeta(error) {
|
|
30
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
31
29
|
let status;
|
|
32
|
-
let
|
|
30
|
+
let statusText;
|
|
31
|
+
let headers;
|
|
33
32
|
if (error instanceof GatewayError) {
|
|
34
|
-
({ status,
|
|
33
|
+
({ status, statusText, headers } = error);
|
|
35
34
|
}
|
|
36
35
|
else {
|
|
37
36
|
const normalized = normalizeAiSdkError(error);
|
|
38
37
|
if (normalized) {
|
|
39
|
-
({ status,
|
|
38
|
+
({ status, statusText, headers } = normalized);
|
|
40
39
|
}
|
|
41
40
|
else {
|
|
42
41
|
status = 500;
|
|
43
|
-
|
|
42
|
+
statusText = STATUS_TEXT(status);
|
|
43
|
+
headers = {};
|
|
44
44
|
}
|
|
45
45
|
}
|
|
46
|
-
return { status,
|
|
46
|
+
return { status, statusText, headers: headers ?? {} };
|
|
47
47
|
}
|
|
48
|
-
export function maybeMaskMessage(
|
|
48
|
+
export function maybeMaskMessage(message, status, requestId) {
|
|
49
49
|
// FUTURE: consider masking all upstream errors, also 4xx
|
|
50
|
-
if (!(isProduction() &&
|
|
51
|
-
return
|
|
50
|
+
if (!(isProduction() && status >= 500)) {
|
|
51
|
+
return message;
|
|
52
52
|
}
|
|
53
53
|
// FUTURE: always attach requestId to errors (masked and unmasked)
|
|
54
|
-
return `${
|
|
54
|
+
return `${STATUS_TEXT(status)} (${requestId ?? "see requestId in response headers"})`;
|
|
55
55
|
}
|
package/dist/lifecycle.js
CHANGED
|
@@ -2,6 +2,7 @@ import { parseConfig } from "./config";
|
|
|
2
2
|
import { toAnthropicError, toAnthropicErrorResponse } from "./errors/anthropic";
|
|
3
3
|
import { GatewayError } from "./errors/gateway";
|
|
4
4
|
import { toOpenAIError, toOpenAIErrorResponse } from "./errors/openai";
|
|
5
|
+
import { getErrorMeta } from "./errors/utils";
|
|
5
6
|
import { logger } from "./logger";
|
|
6
7
|
import { getBaggageAttributes } from "./telemetry/baggage";
|
|
7
8
|
import { instrumentFetch } from "./telemetry/fetch";
|
|
@@ -53,7 +54,7 @@ export const winterCgHandler = (run, config) => {
|
|
|
53
54
|
requestId: ctx.requestId,
|
|
54
55
|
err: reason ?? ctx.request.signal.reason,
|
|
55
56
|
});
|
|
56
|
-
const isUpstreamError = reason instanceof GatewayError && reason.
|
|
57
|
+
const isUpstreamError = reason instanceof GatewayError && reason.statusText.startsWith("UPSTREAM_");
|
|
57
58
|
span.recordError(reason, realStatus >= 500 || isUpstreamError);
|
|
58
59
|
}
|
|
59
60
|
span.setAttributes({ "http.response.status_code_effective": realStatus });
|
|
@@ -76,10 +77,10 @@ export const winterCgHandler = (run, config) => {
|
|
|
76
77
|
}
|
|
77
78
|
if (!ctx.response) {
|
|
78
79
|
ctx.result = (await run(ctx, parsedConfig));
|
|
79
|
-
const
|
|
80
|
-
ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.requestId), {
|
|
80
|
+
const toError = ctx.operation === "messages" ? toAnthropicError : toOpenAIError;
|
|
81
|
+
ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.requestId, ctx.response), {
|
|
81
82
|
onDone: finalize,
|
|
82
|
-
|
|
83
|
+
toError: (error) => toError(error, ctx.requestId),
|
|
83
84
|
});
|
|
84
85
|
}
|
|
85
86
|
if (parsedConfig.hooks?.onResponse) {
|
|
@@ -111,11 +112,10 @@ export const winterCgHandler = (run, config) => {
|
|
|
111
112
|
const errorPayload = ctx.request.signal.aborted
|
|
112
113
|
? new GatewayError(error ?? ctx.request.signal.reason, 499)
|
|
113
114
|
: error;
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
ctx.
|
|
117
|
-
|
|
118
|
-
: toOpenAIErrorResponse(errorPayload, errorResponseInit);
|
|
115
|
+
if (!(ctx.response instanceof Response)) {
|
|
116
|
+
const toErrorResponse = ctx.operation === "messages" ? toAnthropicErrorResponse : toOpenAIErrorResponse;
|
|
117
|
+
ctx.response = toErrorResponse(errorPayload, prepareResponseInit(ctx.requestId, getErrorMeta(errorPayload)));
|
|
118
|
+
}
|
|
119
119
|
finalize(ctx.response.status, error);
|
|
120
120
|
}
|
|
121
121
|
});
|
package/dist/middleware/utils.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { LanguageModelMiddleware } from "ai";
|
|
2
2
|
import type { ChatCompletionsReasoningEffort } from "../../endpoints/chat-completions/schema";
|
|
3
|
-
export declare function mapClaudeReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string): "low" | "medium" | "high" | "max" | undefined;
|
|
3
|
+
export declare function mapClaudeReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string): "low" | "medium" | "high" | "xhigh" | "max" | undefined;
|
|
4
4
|
export declare const claudeReasoningMiddleware: LanguageModelMiddleware;
|
|
5
5
|
export declare const claudePromptCachingMiddleware: LanguageModelMiddleware;
|
|
@@ -6,11 +6,28 @@ const isClaude = (family, version) => {
|
|
|
6
6
|
modelId.includes(`claude-${family}-${dashed}`);
|
|
7
7
|
};
|
|
8
8
|
const isClaude4 = (modelId) => modelId.includes("claude-") && modelId.includes("-4");
|
|
9
|
+
const isOpus47 = isClaude("opus", "4.7");
|
|
9
10
|
const isOpus46 = isClaude("opus", "4.6");
|
|
10
11
|
const isOpus45 = isClaude("opus", "4.5");
|
|
11
12
|
const isOpus4 = isClaude("opus", "4");
|
|
12
13
|
const isSonnet46 = isClaude("sonnet", "4.6");
|
|
13
14
|
export function mapClaudeReasoningEffort(effort, modelId) {
|
|
15
|
+
if (isOpus47(modelId)) {
|
|
16
|
+
switch (effort) {
|
|
17
|
+
case "none":
|
|
18
|
+
case "minimal":
|
|
19
|
+
case "low":
|
|
20
|
+
return "low";
|
|
21
|
+
case "medium":
|
|
22
|
+
return "medium";
|
|
23
|
+
case "high":
|
|
24
|
+
return "high";
|
|
25
|
+
case "xhigh":
|
|
26
|
+
return "xhigh";
|
|
27
|
+
case "max":
|
|
28
|
+
return "max";
|
|
29
|
+
}
|
|
30
|
+
}
|
|
14
31
|
if (isOpus46(modelId)) {
|
|
15
32
|
switch (effort) {
|
|
16
33
|
case "none":
|
|
@@ -22,6 +39,7 @@ export function mapClaudeReasoningEffort(effort, modelId) {
|
|
|
22
39
|
case "high":
|
|
23
40
|
return "high";
|
|
24
41
|
case "xhigh":
|
|
42
|
+
case "max":
|
|
25
43
|
return "max";
|
|
26
44
|
}
|
|
27
45
|
}
|
|
@@ -34,11 +52,14 @@ export function mapClaudeReasoningEffort(effort, modelId) {
|
|
|
34
52
|
return "medium";
|
|
35
53
|
case "high":
|
|
36
54
|
case "xhigh":
|
|
55
|
+
case "max":
|
|
37
56
|
return "high";
|
|
38
57
|
}
|
|
39
58
|
return undefined;
|
|
40
59
|
}
|
|
41
60
|
function getMaxOutputTokens(modelId) {
|
|
61
|
+
if (isOpus47(modelId))
|
|
62
|
+
return 128_000;
|
|
42
63
|
if (isOpus46(modelId))
|
|
43
64
|
return 128_000;
|
|
44
65
|
if (isOpus45(modelId))
|
|
@@ -69,9 +90,13 @@ export const claudeReasoningMiddleware = {
|
|
|
69
90
|
}
|
|
70
91
|
else if (reasoning.effort) {
|
|
71
92
|
if (isClaude4(modelId)) {
|
|
93
|
+
// @ts-expect-error AI SDK type missing "xhigh" effort level (native on Opus 4.7+)
|
|
72
94
|
target.effort = mapClaudeReasoningEffort(reasoning.effort, modelId);
|
|
73
95
|
}
|
|
74
|
-
if (
|
|
96
|
+
if (isOpus47(modelId)) {
|
|
97
|
+
target.thinking = { type: "adaptive" };
|
|
98
|
+
}
|
|
99
|
+
else if (isOpus46(modelId)) {
|
|
75
100
|
target.thinking = clampedMaxTokens
|
|
76
101
|
? // @ts-expect-error AI SDK type missing type:adaptive with budgetToken
|
|
77
102
|
{ type: "adaptive", budgetTokens: clampedMaxTokens }
|
|
@@ -107,6 +107,18 @@ export declare const claudeOpus45: import("../../utils/preset").Preset<"anthropi
|
|
|
107
107
|
context: number;
|
|
108
108
|
providers: readonly ["anthropic", "bedrock", "vertex", "azure"];
|
|
109
109
|
}>;
|
|
110
|
+
export declare const claudeOpus47: import("../../utils/preset").Preset<"anthropic/claude-opus-4.7", CatalogModel, {
|
|
111
|
+
name: string;
|
|
112
|
+
capabilities: ("reasoning" | "temperature" | "attachments" | "tool_call" | "structured_output")[];
|
|
113
|
+
context: number;
|
|
114
|
+
created: string;
|
|
115
|
+
knowledge: string;
|
|
116
|
+
modalities: {
|
|
117
|
+
input: readonly ["text", "image", "pdf", "file"];
|
|
118
|
+
output: readonly ["text"];
|
|
119
|
+
};
|
|
120
|
+
providers: readonly ["anthropic", "bedrock", "vertex", "azure"];
|
|
121
|
+
}>;
|
|
110
122
|
export declare const claudeOpus46: import("../../utils/preset").Preset<"anthropic/claude-opus-4.6", CatalogModel, {
|
|
111
123
|
name: string;
|
|
112
124
|
capabilities: ("reasoning" | "temperature" | "attachments" | "tool_call" | "structured_output")[];
|
|
@@ -144,7 +156,18 @@ export declare const claudeOpus4: import("../../utils/preset").Preset<"anthropic
|
|
|
144
156
|
providers: readonly ["anthropic", "bedrock", "vertex", "azure"];
|
|
145
157
|
}>;
|
|
146
158
|
export declare const claude: {
|
|
147
|
-
readonly latest: readonly [import("../../utils/preset").Preset<"anthropic/claude-
|
|
159
|
+
readonly latest: readonly [import("../../utils/preset").Preset<"anthropic/claude-opus-4.7", CatalogModel, {
|
|
160
|
+
name: string;
|
|
161
|
+
capabilities: ("reasoning" | "temperature" | "attachments" | "tool_call" | "structured_output")[];
|
|
162
|
+
context: number;
|
|
163
|
+
created: string;
|
|
164
|
+
knowledge: string;
|
|
165
|
+
modalities: {
|
|
166
|
+
input: readonly ["text", "image", "pdf", "file"];
|
|
167
|
+
output: readonly ["text"];
|
|
168
|
+
};
|
|
169
|
+
providers: readonly ["anthropic", "bedrock", "vertex", "azure"];
|
|
170
|
+
}>, import("../../utils/preset").Preset<"anthropic/claude-sonnet-4.6", CatalogModel, {
|
|
148
171
|
name: string;
|
|
149
172
|
capabilities: ("reasoning" | "temperature" | "attachments" | "tool_call" | "structured_output")[];
|
|
150
173
|
created: string;
|
|
@@ -266,6 +289,17 @@ export declare const claude: {
|
|
|
266
289
|
};
|
|
267
290
|
context: number;
|
|
268
291
|
providers: readonly ["anthropic", "bedrock", "vertex", "azure"];
|
|
292
|
+
}> | import("../../utils/preset").Preset<"anthropic/claude-opus-4.7", CatalogModel, {
|
|
293
|
+
name: string;
|
|
294
|
+
capabilities: ("reasoning" | "temperature" | "attachments" | "tool_call" | "structured_output")[];
|
|
295
|
+
context: number;
|
|
296
|
+
created: string;
|
|
297
|
+
knowledge: string;
|
|
298
|
+
modalities: {
|
|
299
|
+
input: readonly ["text", "image", "pdf", "file"];
|
|
300
|
+
output: readonly ["text"];
|
|
301
|
+
};
|
|
302
|
+
providers: readonly ["anthropic", "bedrock", "vertex", "azure"];
|
|
269
303
|
}> | import("../../utils/preset").Preset<"anthropic/claude-opus-4.6", CatalogModel, {
|
|
270
304
|
name: string;
|
|
271
305
|
capabilities: ("reasoning" | "temperature" | "attachments" | "tool_call" | "structured_output")[];
|
|
@@ -300,7 +334,18 @@ export declare const claude: {
|
|
|
300
334
|
context: number;
|
|
301
335
|
providers: readonly ["anthropic", "bedrock", "vertex", "azure"];
|
|
302
336
|
}>)[];
|
|
303
|
-
readonly "v4.x": readonly [import("../../utils/preset").Preset<"anthropic/claude-
|
|
337
|
+
readonly "v4.x": readonly [import("../../utils/preset").Preset<"anthropic/claude-opus-4.7", CatalogModel, {
|
|
338
|
+
name: string;
|
|
339
|
+
capabilities: ("reasoning" | "temperature" | "attachments" | "tool_call" | "structured_output")[];
|
|
340
|
+
context: number;
|
|
341
|
+
created: string;
|
|
342
|
+
knowledge: string;
|
|
343
|
+
modalities: {
|
|
344
|
+
input: readonly ["text", "image", "pdf", "file"];
|
|
345
|
+
output: readonly ["text"];
|
|
346
|
+
};
|
|
347
|
+
providers: readonly ["anthropic", "bedrock", "vertex", "azure"];
|
|
348
|
+
}>, import("../../utils/preset").Preset<"anthropic/claude-sonnet-4.6", CatalogModel, {
|
|
304
349
|
name: string;
|
|
305
350
|
capabilities: ("reasoning" | "temperature" | "attachments" | "tool_call" | "structured_output")[];
|
|
306
351
|
created: string;
|
|
@@ -434,6 +479,18 @@ export declare const claude: {
|
|
|
434
479
|
context: number;
|
|
435
480
|
providers: readonly ["anthropic", "bedrock", "vertex", "azure"];
|
|
436
481
|
}>];
|
|
482
|
+
readonly "v4.7": readonly [import("../../utils/preset").Preset<"anthropic/claude-opus-4.7", CatalogModel, {
|
|
483
|
+
name: string;
|
|
484
|
+
capabilities: ("reasoning" | "temperature" | "attachments" | "tool_call" | "structured_output")[];
|
|
485
|
+
context: number;
|
|
486
|
+
created: string;
|
|
487
|
+
knowledge: string;
|
|
488
|
+
modalities: {
|
|
489
|
+
input: readonly ["text", "image", "pdf", "file"];
|
|
490
|
+
output: readonly ["text"];
|
|
491
|
+
};
|
|
492
|
+
providers: readonly ["anthropic", "bedrock", "vertex", "azure"];
|
|
493
|
+
}>];
|
|
437
494
|
readonly "v4.6": readonly [import("../../utils/preset").Preset<"anthropic/claude-sonnet-4.6", CatalogModel, {
|
|
438
495
|
name: string;
|
|
439
496
|
capabilities: ("reasoning" | "temperature" | "attachments" | "tool_call" | "structured_output")[];
|
|
@@ -663,7 +720,18 @@ export declare const claude: {
|
|
|
663
720
|
context: number;
|
|
664
721
|
providers: readonly ["anthropic", "bedrock", "vertex", "azure"];
|
|
665
722
|
}>];
|
|
666
|
-
readonly opus: readonly [import("../../utils/preset").Preset<"anthropic/claude-opus-4.
|
|
723
|
+
readonly opus: readonly [import("../../utils/preset").Preset<"anthropic/claude-opus-4.7", CatalogModel, {
|
|
724
|
+
name: string;
|
|
725
|
+
capabilities: ("reasoning" | "temperature" | "attachments" | "tool_call" | "structured_output")[];
|
|
726
|
+
context: number;
|
|
727
|
+
created: string;
|
|
728
|
+
knowledge: string;
|
|
729
|
+
modalities: {
|
|
730
|
+
input: readonly ["text", "image", "pdf", "file"];
|
|
731
|
+
output: readonly ["text"];
|
|
732
|
+
};
|
|
733
|
+
providers: readonly ["anthropic", "bedrock", "vertex", "azure"];
|
|
734
|
+
}>, import("../../utils/preset").Preset<"anthropic/claude-opus-4.6", CatalogModel, {
|
|
667
735
|
name: string;
|
|
668
736
|
capabilities: ("reasoning" | "temperature" | "attachments" | "tool_call" | "structured_output")[];
|
|
669
737
|
created: string;
|
|
@@ -87,6 +87,15 @@ export const claudeOpus45 = presetFor()("anthropic/claude-opus-4.5", {
|
|
|
87
87
|
created: "2025-11-01",
|
|
88
88
|
knowledge: "2025-05",
|
|
89
89
|
});
|
|
90
|
+
export const claudeOpus47 = presetFor()("anthropic/claude-opus-4.7", {
|
|
91
|
+
...CLAUDE_BASE,
|
|
92
|
+
...CLAUDE_PDF_MODALITIES,
|
|
93
|
+
name: "Claude Opus 4.7",
|
|
94
|
+
capabilities: [...CLAUDE_BASE.capabilities, "reasoning"],
|
|
95
|
+
context: 1_000_000,
|
|
96
|
+
created: "2026-04-16",
|
|
97
|
+
knowledge: "2026-01",
|
|
98
|
+
});
|
|
90
99
|
export const claudeOpus46 = presetFor()("anthropic/claude-opus-4.6", {
|
|
91
100
|
...CLAUDE_BASE,
|
|
92
101
|
...CLAUDE_PDF_MODALITIES,
|
|
@@ -112,6 +121,7 @@ export const claudeOpus4 = presetFor()("anthropic/claude-opus-4", {
|
|
|
112
121
|
knowledge: "2025-03",
|
|
113
122
|
});
|
|
114
123
|
const claudeAtomic = {
|
|
124
|
+
"v4.7": [claudeOpus47],
|
|
115
125
|
"v4.6": [claudeSonnet46, claudeOpus46],
|
|
116
126
|
"v4.5": [claudeHaiku45, claudeSonnet45, claudeOpus45],
|
|
117
127
|
"v4.1": [claudeOpus41],
|
|
@@ -121,10 +131,11 @@ const claudeAtomic = {
|
|
|
121
131
|
v3: [claudeHaiku3],
|
|
122
132
|
haiku: [claudeHaiku45, claudeHaiku35, claudeHaiku3],
|
|
123
133
|
sonnet: [claudeSonnet46, claudeSonnet45, claudeSonnet4, claudeSonnet37, claudeSonnet35],
|
|
124
|
-
opus: [claudeOpus46, claudeOpus45, claudeOpus41, claudeOpus4],
|
|
134
|
+
opus: [claudeOpus47, claudeOpus46, claudeOpus45, claudeOpus41, claudeOpus4],
|
|
125
135
|
};
|
|
126
136
|
const claudeGroups = {
|
|
127
137
|
"v4.x": [
|
|
138
|
+
...claudeAtomic["v4.7"],
|
|
128
139
|
...claudeAtomic["v4.6"],
|
|
129
140
|
...claudeAtomic["v4.5"],
|
|
130
141
|
...claudeAtomic["v4.1"],
|
|
@@ -135,6 +146,6 @@ const claudeGroups = {
|
|
|
135
146
|
export const claude = {
|
|
136
147
|
...claudeAtomic,
|
|
137
148
|
...claudeGroups,
|
|
138
|
-
latest: [...claudeAtomic["v4.6"]],
|
|
149
|
+
latest: [...claudeAtomic["v4.7"], ...claudeAtomic["v4.6"]],
|
|
139
150
|
all: Object.values(claudeAtomic).flat(),
|
|
140
151
|
};
|
|
@@ -29,6 +29,7 @@ export function mapGeminiReasoningEffort(effort, modelId) {
|
|
|
29
29
|
return "medium";
|
|
30
30
|
case "high":
|
|
31
31
|
case "xhigh":
|
|
32
|
+
case "max":
|
|
32
33
|
return "high";
|
|
33
34
|
}
|
|
34
35
|
}
|
|
@@ -43,6 +44,7 @@ export function mapGeminiReasoningEffort(effort, modelId) {
|
|
|
43
44
|
return "medium";
|
|
44
45
|
case "high":
|
|
45
46
|
case "xhigh":
|
|
47
|
+
case "max":
|
|
46
48
|
return "high";
|
|
47
49
|
}
|
|
48
50
|
return undefined;
|
|
@@ -28,6 +28,7 @@ function mapGptOssReasoningEffort(effort) {
|
|
|
28
28
|
return "medium";
|
|
29
29
|
case "high":
|
|
30
30
|
case "xhigh":
|
|
31
|
+
case "max":
|
|
31
32
|
return "high";
|
|
32
33
|
}
|
|
33
34
|
return undefined;
|
|
@@ -52,7 +53,7 @@ export const openAIReasoningMiddleware = {
|
|
|
52
53
|
target.reasoningEffort = "none";
|
|
53
54
|
}
|
|
54
55
|
else if (reasoning.effort) {
|
|
55
|
-
target.reasoningEffort = reasoning.effort;
|
|
56
|
+
target.reasoningEffort = reasoning.effort === "max" ? "xhigh" : reasoning.effort;
|
|
56
57
|
}
|
|
57
58
|
// FUTURE: warn that reasoning.max_tokens (not supported) was ignored
|
|
58
59
|
delete unknown["reasoning"];
|
package/dist/models/types.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { ProviderId } from "../providers/types";
|
|
2
|
-
export declare const CANONICAL_MODEL_IDS: readonly ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "anthropic/claude-haiku-4.5", "anthropic/claude-sonnet-4.5", "anthropic/claude-opus-4.5", "anthropic/claude-opus-4.1", "anthropic/claude-opus-4", "anthropic/claude-sonnet-4", "anthropic/claude-sonnet-3.7", "anthropic/claude-sonnet-3.5", "anthropic/claude-haiku-3.5", "anthropic/claude-haiku-3", "openai/gpt-oss-20b", "openai/gpt-oss-120b", "openai/gpt-5", "openai/gpt-5-pro", "openai/gpt-5.2", "openai/gpt-5.2-chat", "openai/gpt-5.2-pro", "openai/gpt-5.2-codex", "openai/gpt-5.3-codex", "openai/gpt-5.3-codex-spark", "openai/gpt-5.3-chat", "openai/gpt-5.4", "openai/gpt-5.4-mini", "openai/gpt-5.4-nano", "openai/gpt-5.4-pro", "openai/gpt-5-mini", "openai/gpt-5-nano", "openai/gpt-5-codex", "openai/gpt-5.1-codex", "openai/gpt-5.1-codex-max", "openai/gpt-5.1-codex-mini", "openai/gpt-5.1-chat", "openai/gpt-5.1", "openai/text-embedding-3-small", "openai/text-embedding-3-large", "amazon/nova-micro", "amazon/nova-lite", "amazon/nova-pro", "amazon/nova-premier", "amazon/nova-2-lite", "amazon/nova-2-multimodal-embeddings", "google/gemini-2.5-flash-lite", "google/gemini-2.5-flash", "google/gemini-2.5-pro", "google/gemini-3-flash-preview", "google/gemini-3.1-flash-lite-preview", "google/gemini-3.1-pro-preview", "google/gemini-embedding-2-preview", "google/embedding-001", "google/gemma-3-1b", "google/gemma-3-4b", "google/gemma-3-12b", "google/gemma-3-27b", "google/gemma-4-e2b", "google/gemma-4-e4b", "google/gemma-4-26b-a4b", "google/gemma-4-31b", "meta/llama-3.1-8b", "meta/llama-3.1-70b", "meta/llama-3.1-405b", "meta/llama-3.2-1b", "meta/llama-3.2-3b", "meta/llama-3.2-11b", "meta/llama-3.2-90b", "meta/llama-3.3-70b", "meta/llama-4-scout", "meta/llama-4-maverick", "cohere/embed-v4.0", "cohere/embed-english-v3.0", "cohere/embed-english-light-v3.0", "cohere/embed-multilingual-v3.0", "cohere/embed-multilingual-light-v3.0", "cohere/command-a", "cohere/command-r7b", "cohere/command-a-translate", "cohere/command-a-reasoning", "cohere/command-a-vision", "cohere/command-r", "cohere/command-r-plus", "voyage/voyage-2-code", "voyage/voyage-2-law", "voyage/voyage-2-finance", "voyage/voyage-3-code", "voyage/voyage-3-large", "voyage/voyage-3.5-lite", "voyage/voyage-3.5", "voyage/voyage-4-lite", "voyage/voyage-4", "voyage/voyage-4-large"];
|
|
2
|
+
export declare const CANONICAL_MODEL_IDS: readonly ["anthropic/claude-opus-4.7", "anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "anthropic/claude-haiku-4.5", "anthropic/claude-sonnet-4.5", "anthropic/claude-opus-4.5", "anthropic/claude-opus-4.1", "anthropic/claude-opus-4", "anthropic/claude-sonnet-4", "anthropic/claude-sonnet-3.7", "anthropic/claude-sonnet-3.5", "anthropic/claude-haiku-3.5", "anthropic/claude-haiku-3", "openai/gpt-oss-20b", "openai/gpt-oss-120b", "openai/gpt-5", "openai/gpt-5-pro", "openai/gpt-5.2", "openai/gpt-5.2-chat", "openai/gpt-5.2-pro", "openai/gpt-5.2-codex", "openai/gpt-5.3-codex", "openai/gpt-5.3-codex-spark", "openai/gpt-5.3-chat", "openai/gpt-5.4", "openai/gpt-5.4-mini", "openai/gpt-5.4-nano", "openai/gpt-5.4-pro", "openai/gpt-5-mini", "openai/gpt-5-nano", "openai/gpt-5-codex", "openai/gpt-5.1-codex", "openai/gpt-5.1-codex-max", "openai/gpt-5.1-codex-mini", "openai/gpt-5.1-chat", "openai/gpt-5.1", "openai/text-embedding-3-small", "openai/text-embedding-3-large", "amazon/nova-micro", "amazon/nova-lite", "amazon/nova-pro", "amazon/nova-premier", "amazon/nova-2-lite", "amazon/nova-2-multimodal-embeddings", "google/gemini-2.5-flash-lite", "google/gemini-2.5-flash", "google/gemini-2.5-pro", "google/gemini-3-flash-preview", "google/gemini-3.1-flash-lite-preview", "google/gemini-3.1-pro-preview", "google/gemini-embedding-2-preview", "google/embedding-001", "google/gemma-3-1b", "google/gemma-3-4b", "google/gemma-3-12b", "google/gemma-3-27b", "google/gemma-4-e2b", "google/gemma-4-e4b", "google/gemma-4-26b-a4b", "google/gemma-4-31b", "meta/llama-3.1-8b", "meta/llama-3.1-70b", "meta/llama-3.1-405b", "meta/llama-3.2-1b", "meta/llama-3.2-3b", "meta/llama-3.2-11b", "meta/llama-3.2-90b", "meta/llama-3.3-70b", "meta/llama-4-scout", "meta/llama-4-maverick", "cohere/embed-v4.0", "cohere/embed-english-v3.0", "cohere/embed-english-light-v3.0", "cohere/embed-multilingual-v3.0", "cohere/embed-multilingual-light-v3.0", "cohere/command-a", "cohere/command-r7b", "cohere/command-a-translate", "cohere/command-a-reasoning", "cohere/command-a-vision", "cohere/command-r", "cohere/command-r-plus", "voyage/voyage-2-code", "voyage/voyage-2-law", "voyage/voyage-2-finance", "voyage/voyage-3-code", "voyage/voyage-3-large", "voyage/voyage-3.5-lite", "voyage/voyage-3.5", "voyage/voyage-4-lite", "voyage/voyage-4", "voyage/voyage-4-large"];
|
|
3
3
|
export type CanonicalModelId = (typeof CANONICAL_MODEL_IDS)[number];
|
|
4
4
|
export type ModelId = CanonicalModelId | (string & {});
|
|
5
5
|
export type CatalogModel = {
|
package/dist/models/types.js
CHANGED
|
@@ -9,6 +9,7 @@ import { withCanonicalIds } from "../registry";
|
|
|
9
9
|
const MAPPING = {
|
|
10
10
|
// Require Inference Profiles and can't be resolved from standard name mapping
|
|
11
11
|
"anthropic/claude-haiku-4.5": "{ip}anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
12
|
+
"anthropic/claude-opus-4.7": "{ip}anthropic.claude-opus-4-7",
|
|
12
13
|
"anthropic/claude-sonnet-4.6": "{ip}anthropic.claude-sonnet-4-6",
|
|
13
14
|
"anthropic/claude-sonnet-4.5": "{ip}anthropic.claude-sonnet-4-5-20250929-v1:0",
|
|
14
15
|
"anthropic/claude-opus-4.6": "{ip}anthropic.claude-opus-4-6-v1",
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
2
2
|
import { calculateReasoningBudgetFromEffort } from "../../middleware/utils";
|
|
3
|
-
const
|
|
3
|
+
const BEDROCK_EFFORT_CAPABLE = ["-4-6", "-4-7"];
|
|
4
|
+
const isBedrockEffortCapable = (modelId) => BEDROCK_EFFORT_CAPABLE.some((tag) => modelId.includes(tag));
|
|
4
5
|
// https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
|
|
5
6
|
export const bedrockServiceTierMiddleware = {
|
|
6
7
|
specificationVersion: "v3",
|
|
@@ -83,7 +84,7 @@ export const bedrockClaudeReasoningMiddleware = {
|
|
|
83
84
|
}
|
|
84
85
|
}
|
|
85
86
|
// FUTURE: bedrock currently does not support "effort" for other 4.x models
|
|
86
|
-
if (effort !== undefined &&
|
|
87
|
+
if (effort !== undefined && isBedrockEffortCapable(model.modelId)) {
|
|
87
88
|
target.maxReasoningEffort = effort;
|
|
88
89
|
}
|
|
89
90
|
delete bedrock.thinking;
|
package/dist/telemetry/gen-ai.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { metrics } from "@opentelemetry/api";
|
|
2
|
-
import {
|
|
2
|
+
import { STATUS_TEXT } from "../errors/utils";
|
|
3
3
|
const getMeter = () => metrics.getMeter("@hebo/gateway");
|
|
4
4
|
let requestDurationHistogram;
|
|
5
5
|
let timePerOutputTokenHistogram;
|
|
@@ -75,7 +75,7 @@ export const recordRequestDuration = (duration, status, ctx, signalLevel) => {
|
|
|
75
75
|
return;
|
|
76
76
|
const attrs = getGenAiGeneralAttributes(ctx, signalLevel);
|
|
77
77
|
if (status !== 200) {
|
|
78
|
-
attrs["error.type"] = `${status} ${
|
|
78
|
+
attrs["error.type"] = `${status} ${STATUS_TEXT(status).toLowerCase()}`;
|
|
79
79
|
}
|
|
80
80
|
getRequestDurationHistogram().record(duration / 1000, attrs);
|
|
81
81
|
};
|
package/dist/types.d.ts
CHANGED
|
@@ -69,8 +69,10 @@ export type GatewayContext = {
|
|
|
69
69
|
result?: ChatCompletions | ChatCompletionsStream | Embeddings | Messages | MessagesStream | Model | ModelList | Responses | ResponsesStream;
|
|
70
70
|
/**
|
|
71
71
|
* Response object returned by the handler.
|
|
72
|
+
* Handlers may set this to a `ResponseInit` containing upstream response
|
|
73
|
+
* headers; the lifecycle merges allowlisted headers into the final `Response`.
|
|
72
74
|
*/
|
|
73
|
-
response?: Response;
|
|
75
|
+
response?: Response | ResponseInit;
|
|
74
76
|
/**
|
|
75
77
|
* Per-request telemetry signal level override.
|
|
76
78
|
* When set (via body parameter or hook), overrides `cfg.telemetry.signals.gen_ai`
|
package/dist/utils/headers.d.ts
CHANGED
|
@@ -1,4 +1,9 @@
|
|
|
1
1
|
export declare const REQUEST_ID_HEADER = "x-request-id";
|
|
2
|
+
export declare const RETRY_AFTER_HEADER = "retry-after";
|
|
3
|
+
export declare const RETRY_AFTER_MS_HEADER = "retry-after-ms";
|
|
4
|
+
export declare const X_SHOULD_RETRY_HEADER = "x-should-retry";
|
|
2
5
|
type HeaderSource = Request | ResponseInit | undefined;
|
|
3
6
|
export declare const resolveRequestId: (source: HeaderSource) => string | undefined;
|
|
7
|
+
export declare const filterResponseHeaders: (upstream?: HeadersInit) => Record<string, string>;
|
|
8
|
+
export declare const buildRetryHeaders: (status: number, upstream?: Record<string, string>) => Record<string, string>;
|
|
4
9
|
export {};
|
package/dist/utils/headers.js
CHANGED
|
@@ -1,22 +1,69 @@
|
|
|
1
1
|
export const REQUEST_ID_HEADER = "x-request-id";
|
|
2
|
+
export const RETRY_AFTER_HEADER = "retry-after";
|
|
3
|
+
export const RETRY_AFTER_MS_HEADER = "retry-after-ms";
|
|
4
|
+
export const X_SHOULD_RETRY_HEADER = "x-should-retry";
|
|
5
|
+
const RESPONSE_HEADER_ALLOWLIST = [
|
|
6
|
+
RETRY_AFTER_HEADER,
|
|
7
|
+
RETRY_AFTER_MS_HEADER,
|
|
8
|
+
X_SHOULD_RETRY_HEADER,
|
|
9
|
+
];
|
|
10
|
+
const RETRYABLE_STATUS_CODES = new Set([408, 409, 429, 500, 502, 503, 504]);
|
|
11
|
+
const DEFAULT_RETRY_AFTER_MS = 1000;
|
|
2
12
|
export const resolveRequestId = (source) => {
|
|
3
13
|
if (!source)
|
|
4
14
|
return undefined;
|
|
5
15
|
if (source instanceof Request) {
|
|
6
16
|
return source.headers.get(REQUEST_ID_HEADER) ?? undefined;
|
|
7
17
|
}
|
|
8
|
-
|
|
9
|
-
if (!headers)
|
|
18
|
+
if (!source.headers)
|
|
10
19
|
return undefined;
|
|
20
|
+
return getHeader(source.headers, REQUEST_ID_HEADER);
|
|
21
|
+
};
|
|
22
|
+
function getHeader(headers, key) {
|
|
11
23
|
if (headers instanceof Headers) {
|
|
12
|
-
return headers.get(
|
|
24
|
+
return headers.get(key) ?? undefined;
|
|
13
25
|
}
|
|
14
26
|
if (Array.isArray(headers)) {
|
|
15
|
-
for (const [
|
|
16
|
-
if (
|
|
17
|
-
return
|
|
27
|
+
for (const [k, v] of headers) {
|
|
28
|
+
if (k.toLowerCase() === key.toLowerCase()) {
|
|
29
|
+
return v;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return undefined;
|
|
33
|
+
}
|
|
34
|
+
return headers[key] ?? headers[key.toLowerCase()];
|
|
35
|
+
}
|
|
36
|
+
export const filterResponseHeaders = (upstream) => {
|
|
37
|
+
if (!upstream)
|
|
38
|
+
return {};
|
|
39
|
+
const filtered = {};
|
|
40
|
+
for (const key of RESPONSE_HEADER_ALLOWLIST) {
|
|
41
|
+
const value = getHeader(upstream, key);
|
|
42
|
+
if (value !== undefined) {
|
|
43
|
+
filtered[key] = value;
|
|
18
44
|
}
|
|
45
|
+
}
|
|
46
|
+
return filtered;
|
|
47
|
+
};
|
|
48
|
+
function deriveRetryAfterMs(retryAfter) {
|
|
49
|
+
if (retryAfter === undefined)
|
|
50
|
+
return undefined;
|
|
51
|
+
const num = Number(retryAfter);
|
|
52
|
+
if (Number.isFinite(num) && num > 0)
|
|
53
|
+
return num * 1000;
|
|
54
|
+
const dateMs = Date.parse(retryAfter);
|
|
55
|
+
if (!Number.isFinite(dateMs))
|
|
19
56
|
return undefined;
|
|
57
|
+
const deltaMs = dateMs - Date.now();
|
|
58
|
+
return deltaMs > 0 ? deltaMs : undefined;
|
|
59
|
+
}
|
|
60
|
+
export const buildRetryHeaders = (status, upstream = {}) => {
|
|
61
|
+
if (!RETRYABLE_STATUS_CODES.has(status)) {
|
|
62
|
+
upstream[X_SHOULD_RETRY_HEADER] = "false";
|
|
63
|
+
return upstream;
|
|
20
64
|
}
|
|
21
|
-
|
|
65
|
+
upstream[RETRY_AFTER_MS_HEADER] ??= String(deriveRetryAfterMs(upstream[RETRY_AFTER_HEADER]) ?? DEFAULT_RETRY_AFTER_MS);
|
|
66
|
+
upstream[RETRY_AFTER_HEADER] = String(Math.ceil((Number(upstream[RETRY_AFTER_MS_HEADER]) || DEFAULT_RETRY_AFTER_MS) / 1000));
|
|
67
|
+
upstream[X_SHOULD_RETRY_HEADER] ??= "true";
|
|
68
|
+
return upstream;
|
|
22
69
|
};
|
package/dist/utils/response.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { SseFrame } from "./stream";
|
|
2
|
-
export declare const prepareResponseInit: (requestId: string) => ResponseInit;
|
|
3
|
-
export declare const mergeResponseInit: (
|
|
2
|
+
export declare const prepareResponseInit: (requestId: string, upstream?: ResponseInit) => ResponseInit;
|
|
3
|
+
export declare const mergeResponseInit: (headers: Record<string, string>, responseInit?: ResponseInit) => ResponseInit;
|
|
4
4
|
export declare const toResponse: (result: ReadableStream<SseFrame> | Uint8Array<ArrayBuffer> | object | string, responseInit?: ResponseInit, streamOptions?: {
|
|
5
5
|
onDone?: (status: number, reason?: unknown) => void;
|
|
6
|
-
|
|
6
|
+
toError?: (error: unknown) => unknown;
|
|
7
7
|
}) => Response;
|
package/dist/utils/response.js
CHANGED
|
@@ -1,19 +1,23 @@
|
|
|
1
|
-
import { REQUEST_ID_HEADER } from "./headers";
|
|
1
|
+
import { buildRetryHeaders, filterResponseHeaders, REQUEST_ID_HEADER } from "./headers";
|
|
2
2
|
import { toSseStream } from "./stream";
|
|
3
3
|
const TEXT_ENCODER = new TextEncoder();
|
|
4
|
-
export const prepareResponseInit = (requestId) =>
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
4
|
+
export const prepareResponseInit = (requestId, upstream) => {
|
|
5
|
+
const init = upstream ?? {};
|
|
6
|
+
init.headers = filterResponseHeaders(upstream?.headers);
|
|
7
|
+
if (init.status && init.status >= 400)
|
|
8
|
+
init.headers = buildRetryHeaders(init.status, init.headers);
|
|
9
|
+
init.headers[REQUEST_ID_HEADER] = requestId;
|
|
10
|
+
return init;
|
|
11
|
+
};
|
|
12
|
+
export const mergeResponseInit = (headers, responseInit) => {
|
|
13
|
+
if (!responseInit)
|
|
14
|
+
return { headers };
|
|
9
15
|
const override = responseInit?.headers;
|
|
10
16
|
if (override) {
|
|
11
17
|
new Headers(override).forEach((value, key) => {
|
|
12
|
-
headers
|
|
18
|
+
headers[key] = value;
|
|
13
19
|
});
|
|
14
20
|
}
|
|
15
|
-
if (!responseInit)
|
|
16
|
-
return { headers };
|
|
17
21
|
return {
|
|
18
22
|
status: responseInit.status,
|
|
19
23
|
statusText: responseInit.statusText,
|
package/dist/utils/stream.d.ts
CHANGED
|
@@ -5,6 +5,6 @@ export type SseFrame<T = unknown, E extends string | undefined = string | undefi
|
|
|
5
5
|
export type SseErrorFrame = SseFrame<Error, "error" | undefined>;
|
|
6
6
|
export declare function toSseStream(src: ReadableStream<SseFrame>, options?: {
|
|
7
7
|
onDone?: (status: number, reason?: unknown) => void;
|
|
8
|
+
toError?: (error: unknown) => unknown;
|
|
8
9
|
keepAliveMs?: number;
|
|
9
|
-
formatError?: (error: unknown) => unknown;
|
|
10
10
|
}): ReadableStream<Uint8Array>;
|
package/dist/utils/stream.js
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { toOpenAIError } from "../errors/openai";
|
|
2
1
|
const TEXT_ENCODER = new TextEncoder();
|
|
3
2
|
const SSE_DONE_CHUNK = TEXT_ENCODER.encode("data: [DONE]\n\n");
|
|
4
3
|
const SSE_KEEP_ALIVE_CHUNK = TEXT_ENCODER.encode(": keep-alive\n\n");
|
|
@@ -59,13 +58,9 @@ export function toSseStream(src, options = {}) {
|
|
|
59
58
|
}
|
|
60
59
|
const value = result.value;
|
|
61
60
|
if (value.event === "error" || value.data instanceof Error) {
|
|
62
|
-
const error = options.
|
|
63
|
-
? options.formatError(value.data)
|
|
64
|
-
: toOpenAIError(value.data);
|
|
61
|
+
const error = options.toError?.(value.data) ?? value.data;
|
|
65
62
|
controller.enqueue(TEXT_ENCODER.encode(serializeSseFrame({ event: value.event, data: error })));
|
|
66
|
-
|
|
67
|
-
const errorStatus = openAiError?.error.type === "invalid_request_error" ? 422 : 502;
|
|
68
|
-
done(controller, errorStatus, value.data);
|
|
63
|
+
done(controller, error["status"] ?? 502, value.data);
|
|
69
64
|
reader.cancel(value.data).catch(() => { });
|
|
70
65
|
return;
|
|
71
66
|
}
|
|
@@ -74,12 +69,9 @@ export function toSseStream(src, options = {}) {
|
|
|
74
69
|
}
|
|
75
70
|
catch (error) {
|
|
76
71
|
try {
|
|
77
|
-
const errorPayload = options.formatError
|
|
78
|
-
? options.formatError(error)
|
|
79
|
-
: toOpenAIError(error);
|
|
80
72
|
controller.enqueue(TEXT_ENCODER.encode(serializeSseFrame({
|
|
81
73
|
event: "error",
|
|
82
|
-
data:
|
|
74
|
+
data: options.toError?.(error) ?? error,
|
|
83
75
|
})));
|
|
84
76
|
}
|
|
85
77
|
catch { }
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hebo-ai/gateway",
|
|
3
|
-
"version": "0.10.
|
|
3
|
+
"version": "0.10.6",
|
|
4
4
|
"description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI /chat/completions, OpenResponses /responses & Anthropic /messages.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|