@hebo-ai/gateway 0.6.2-rc1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -8
- package/dist/config.js +28 -1
- package/dist/endpoints/chat-completions/converters.d.ts +5 -5
- package/dist/endpoints/chat-completions/converters.js +86 -49
- package/dist/endpoints/chat-completions/handler.js +4 -4
- package/dist/endpoints/chat-completions/otel.d.ts +1 -1
- package/dist/endpoints/chat-completions/otel.js +20 -18
- package/dist/endpoints/chat-completions/schema.d.ts +47 -23
- package/dist/endpoints/chat-completions/schema.js +24 -17
- package/dist/endpoints/embeddings/handler.js +2 -2
- package/dist/endpoints/embeddings/otel.d.ts +2 -2
- package/dist/endpoints/embeddings/otel.js +7 -2
- package/dist/endpoints/embeddings/schema.d.ts +6 -0
- package/dist/endpoints/embeddings/schema.js +4 -1
- package/dist/endpoints/models/handler.js +2 -2
- package/dist/errors/openai.d.ts +1 -6
- package/dist/lifecycle.d.ts +3 -2
- package/dist/lifecycle.js +4 -6
- package/dist/middleware/utils.js +0 -1
- package/dist/models/amazon/middleware.js +6 -5
- package/dist/models/anthropic/middleware.js +13 -13
- package/dist/models/cohere/middleware.js +7 -5
- package/dist/models/google/middleware.d.ts +1 -1
- package/dist/models/google/middleware.js +29 -25
- package/dist/models/google/presets.d.ts +28 -0
- package/dist/models/google/presets.js +7 -1
- package/dist/models/openai/middleware.js +7 -7
- package/dist/models/types.d.ts +1 -1
- package/dist/models/types.js +1 -0
- package/dist/models/voyage/middleware.js +2 -1
- package/dist/providers/bedrock/middleware.d.ts +1 -0
- package/dist/providers/bedrock/middleware.js +54 -23
- package/dist/providers/groq/index.d.ts +1 -0
- package/dist/providers/groq/index.js +1 -0
- package/dist/providers/groq/middleware.d.ts +2 -0
- package/dist/providers/groq/middleware.js +31 -0
- package/dist/providers/vertex/index.d.ts +1 -0
- package/dist/providers/vertex/index.js +1 -0
- package/dist/providers/vertex/middleware.d.ts +2 -0
- package/dist/providers/vertex/middleware.js +47 -0
- package/dist/types.d.ts +25 -4
- package/dist/types.js +1 -0
- package/dist/utils/response.d.ts +4 -1
- package/dist/utils/response.js +5 -20
- package/dist/utils/stream.d.ts +9 -0
- package/dist/utils/stream.js +100 -0
- package/package.json +5 -1
- package/dist/telemetry/stream.d.ts +0 -3
- package/dist/telemetry/stream.js +0 -58
|
@@ -11,14 +11,15 @@ export const geminiDimensionsMiddleware = {
|
|
|
11
11
|
const dimensions = unknown["dimensions"];
|
|
12
12
|
if (!dimensions)
|
|
13
13
|
return params;
|
|
14
|
-
(params.providerOptions["google"] ??= {})
|
|
14
|
+
const target = (params.providerOptions["google"] ??= {});
|
|
15
|
+
target.outputDimensionality = dimensions;
|
|
15
16
|
delete unknown["dimensions"];
|
|
16
17
|
return params;
|
|
17
18
|
},
|
|
18
19
|
};
|
|
19
20
|
// https://ai.google.dev/gemini-api/docs/thinking#thinking-levels
|
|
20
21
|
export function mapGeminiReasoningEffort(effort, modelId) {
|
|
21
|
-
if (modelId.includes("
|
|
22
|
+
if (modelId.includes("pro")) {
|
|
22
23
|
switch (effort) {
|
|
23
24
|
case "none":
|
|
24
25
|
case "minimal":
|
|
@@ -28,26 +29,22 @@ export function mapGeminiReasoningEffort(effort, modelId) {
|
|
|
28
29
|
return "medium";
|
|
29
30
|
case "high":
|
|
30
31
|
case "xhigh":
|
|
31
|
-
case "max":
|
|
32
32
|
return "high";
|
|
33
33
|
}
|
|
34
34
|
}
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
return "high";
|
|
48
|
-
}
|
|
35
|
+
// Flash
|
|
36
|
+
switch (effort) {
|
|
37
|
+
case "none":
|
|
38
|
+
case "minimal":
|
|
39
|
+
return "minimal";
|
|
40
|
+
case "low":
|
|
41
|
+
return "low";
|
|
42
|
+
case "medium":
|
|
43
|
+
return "medium";
|
|
44
|
+
case "high":
|
|
45
|
+
case "xhigh":
|
|
46
|
+
return "high";
|
|
49
47
|
}
|
|
50
|
-
return effort;
|
|
51
48
|
}
|
|
52
49
|
export const GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65536;
|
|
53
50
|
export const GEMINI_2_5_PRO_MIN_THINKING_BUDGET = 128;
|
|
@@ -58,6 +55,9 @@ export const geminiReasoningMiddleware = {
|
|
|
58
55
|
const unknown = params.providerOptions?.["unknown"];
|
|
59
56
|
if (!unknown)
|
|
60
57
|
return params;
|
|
58
|
+
// If thinking options exist, just pass through
|
|
59
|
+
if (unknown["thinking_config"])
|
|
60
|
+
return params;
|
|
61
61
|
const reasoning = unknown["reasoning"];
|
|
62
62
|
if (!reasoning)
|
|
63
63
|
return params;
|
|
@@ -65,19 +65,19 @@ export const geminiReasoningMiddleware = {
|
|
|
65
65
|
const modelId = model.modelId;
|
|
66
66
|
if (modelId.includes("gemini-2")) {
|
|
67
67
|
const is25Pro = modelId.includes("gemini-2.5-pro");
|
|
68
|
-
target
|
|
68
|
+
target.thinkingConfig = {
|
|
69
69
|
thinkingBudget: reasoning.max_tokens ??
|
|
70
70
|
calculateReasoningBudgetFromEffort(reasoning.effort ?? "none", params.maxOutputTokens ?? GEMINI_DEFAULT_MAX_OUTPUT_TOKENS, is25Pro ? GEMINI_2_5_PRO_MIN_THINKING_BUDGET : 0),
|
|
71
71
|
};
|
|
72
72
|
}
|
|
73
73
|
else if (modelId.includes("gemini-3") && reasoning.effort) {
|
|
74
|
-
target
|
|
74
|
+
target.thinkingConfig = {
|
|
75
75
|
thinkingLevel: mapGeminiReasoningEffort(reasoning.effort, modelId),
|
|
76
76
|
};
|
|
77
77
|
// FUTURE: warn if model is gemini-3 and max_tokens (unsupported) was ignored
|
|
78
78
|
}
|
|
79
|
-
(target
|
|
80
|
-
|
|
79
|
+
const thinkingConfig = (target.thinkingConfig ??= {});
|
|
80
|
+
thinkingConfig.includeThoughts = reasoning.enabled ? !reasoning.exclude : false;
|
|
81
81
|
delete unknown["reasoning"];
|
|
82
82
|
return params;
|
|
83
83
|
},
|
|
@@ -91,9 +91,13 @@ export const geminiPromptCachingMiddleware = {
|
|
|
91
91
|
const unknown = params.providerOptions?.["unknown"];
|
|
92
92
|
if (!unknown)
|
|
93
93
|
return params;
|
|
94
|
-
|
|
95
|
-
if (
|
|
96
|
-
|
|
94
|
+
// If cached_content options exist, just pass through
|
|
95
|
+
if (unknown["cached_content"])
|
|
96
|
+
return params;
|
|
97
|
+
const promptCacheKey = unknown["prompt_cache_key"];
|
|
98
|
+
if (promptCacheKey) {
|
|
99
|
+
(params.providerOptions["google"] ??= {}).cachedContent =
|
|
100
|
+
promptCacheKey;
|
|
97
101
|
}
|
|
98
102
|
delete unknown["cached_content"];
|
|
99
103
|
return params;
|
|
@@ -9,6 +9,16 @@ export declare const geminiEmbedding001: import("../../utils/preset").Preset<"go
|
|
|
9
9
|
};
|
|
10
10
|
providers: readonly ["vertex"];
|
|
11
11
|
}>;
|
|
12
|
+
export declare const geminiEmbedding2Preview: import("../../utils/preset").Preset<"google/gemini-embedding-2-preview", CatalogModel, {
|
|
13
|
+
name: string;
|
|
14
|
+
created: string;
|
|
15
|
+
context: number;
|
|
16
|
+
modalities: {
|
|
17
|
+
input: readonly ["text"];
|
|
18
|
+
output: readonly ["embedding"];
|
|
19
|
+
};
|
|
20
|
+
providers: readonly ["vertex"];
|
|
21
|
+
}>;
|
|
12
22
|
export declare const gemini3FlashPreview: import("../../utils/preset").Preset<"google/gemini-3-flash-preview", CatalogModel, {
|
|
13
23
|
name: string;
|
|
14
24
|
created: string;
|
|
@@ -159,6 +169,15 @@ export declare const gemini: {
|
|
|
159
169
|
output: readonly ["embedding"];
|
|
160
170
|
};
|
|
161
171
|
providers: readonly ["vertex"];
|
|
172
|
+
}>, import("../../utils/preset").Preset<"google/gemini-embedding-2-preview", CatalogModel, {
|
|
173
|
+
name: string;
|
|
174
|
+
created: string;
|
|
175
|
+
context: number;
|
|
176
|
+
modalities: {
|
|
177
|
+
input: readonly ["text"];
|
|
178
|
+
output: readonly ["embedding"];
|
|
179
|
+
};
|
|
180
|
+
providers: readonly ["vertex"];
|
|
162
181
|
}>];
|
|
163
182
|
readonly all: (import("../../utils/preset").Preset<"google/embedding-001", CatalogModel, {
|
|
164
183
|
name: string;
|
|
@@ -169,6 +188,15 @@ export declare const gemini: {
|
|
|
169
188
|
output: readonly ["embedding"];
|
|
170
189
|
};
|
|
171
190
|
providers: readonly ["vertex"];
|
|
191
|
+
}> | import("../../utils/preset").Preset<"google/gemini-embedding-2-preview", CatalogModel, {
|
|
192
|
+
name: string;
|
|
193
|
+
created: string;
|
|
194
|
+
context: number;
|
|
195
|
+
modalities: {
|
|
196
|
+
input: readonly ["text"];
|
|
197
|
+
output: readonly ["embedding"];
|
|
198
|
+
};
|
|
199
|
+
providers: readonly ["vertex"];
|
|
172
200
|
}> | import("../../utils/preset").Preset<"google/gemini-3-flash-preview", CatalogModel, {
|
|
173
201
|
name: string;
|
|
174
202
|
created: string;
|
|
@@ -27,6 +27,12 @@ export const geminiEmbedding001 = presetFor()("google/embedding-001", {
|
|
|
27
27
|
created: "2025-05-20",
|
|
28
28
|
context: 8192,
|
|
29
29
|
});
|
|
30
|
+
export const geminiEmbedding2Preview = presetFor()("google/gemini-embedding-2-preview", {
|
|
31
|
+
...GEMINI_EMBEDDINGS_BASE,
|
|
32
|
+
name: "Gemini Embedding 2 (Preview)",
|
|
33
|
+
created: "2026-03-10",
|
|
34
|
+
context: 8192,
|
|
35
|
+
});
|
|
30
36
|
export const gemini3FlashPreview = presetFor()("google/gemini-3-flash-preview", {
|
|
31
37
|
...GEMINI_BASE,
|
|
32
38
|
name: "Gemini 3 Flash (Preview)",
|
|
@@ -66,7 +72,7 @@ export const gemini25Pro = presetFor()("google/gemini-2.5-pro", {
|
|
|
66
72
|
const geminiAtomic = {
|
|
67
73
|
"v2.5": [gemini25FlashLite, gemini25Flash, gemini25Pro],
|
|
68
74
|
"v3-preview": [gemini3FlashPreview, gemini31FlashLitePreview, gemini31ProPreview],
|
|
69
|
-
embeddings: [geminiEmbedding001],
|
|
75
|
+
embeddings: [geminiEmbedding001, geminiEmbedding2Preview],
|
|
70
76
|
};
|
|
71
77
|
const geminiGroups = {
|
|
72
78
|
"v2.x": [...geminiAtomic["v2.5"]],
|
|
@@ -10,7 +10,8 @@ export const openAIDimensionsMiddleware = {
|
|
|
10
10
|
const dimensions = unknown["dimensions"];
|
|
11
11
|
if (!dimensions)
|
|
12
12
|
return params;
|
|
13
|
-
(params.providerOptions["openai"] ??= {})
|
|
13
|
+
const target = (params.providerOptions["openai"] ??= {});
|
|
14
|
+
target.dimensions = dimensions;
|
|
14
15
|
delete unknown["dimensions"];
|
|
15
16
|
return params;
|
|
16
17
|
},
|
|
@@ -27,7 +28,6 @@ function mapGptOssReasoningEffort(effort) {
|
|
|
27
28
|
return "medium";
|
|
28
29
|
case "high":
|
|
29
30
|
case "xhigh":
|
|
30
|
-
case "max":
|
|
31
31
|
return "high";
|
|
32
32
|
}
|
|
33
33
|
}
|
|
@@ -45,13 +45,13 @@ export const openAIReasoningMiddleware = {
|
|
|
45
45
|
const isGptOss = model.modelId.includes("gpt-oss");
|
|
46
46
|
if (isGptOss) {
|
|
47
47
|
// FUTURE: warn that unable to disable reasoning for gpt-oss models
|
|
48
|
-
target
|
|
48
|
+
target.reasoningEffort = mapGptOssReasoningEffort(reasoning.effort);
|
|
49
49
|
}
|
|
50
50
|
else if (reasoning.enabled === false) {
|
|
51
|
-
target
|
|
51
|
+
target.reasoningEffort = "none";
|
|
52
52
|
}
|
|
53
53
|
else if (reasoning.effort) {
|
|
54
|
-
target
|
|
54
|
+
target.reasoningEffort = reasoning.effort;
|
|
55
55
|
}
|
|
56
56
|
// FUTURE: warn that reasoning.max_tokens (not supported) was ignored
|
|
57
57
|
delete unknown["reasoning"];
|
|
@@ -71,9 +71,9 @@ export const openAIPromptCachingMiddleware = {
|
|
|
71
71
|
if (key || retention) {
|
|
72
72
|
const target = (params.providerOptions["openai"] ??= {});
|
|
73
73
|
if (key)
|
|
74
|
-
target
|
|
74
|
+
target.promptCacheKey = key;
|
|
75
75
|
if (retention)
|
|
76
|
-
target
|
|
76
|
+
target.promptCacheRetention = retention;
|
|
77
77
|
}
|
|
78
78
|
delete unknown["prompt_cache_key"];
|
|
79
79
|
delete unknown["prompt_cache_retention"];
|
package/dist/models/types.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { ProviderId } from "../providers/types";
|
|
2
|
-
export declare const CANONICAL_MODEL_IDS: readonly ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "anthropic/claude-haiku-4.5", "anthropic/claude-sonnet-4.5", "anthropic/claude-opus-4.5", "anthropic/claude-opus-4.1", "anthropic/claude-opus-4", "anthropic/claude-sonnet-4", "anthropic/claude-sonnet-3.7", "anthropic/claude-sonnet-3.5", "anthropic/claude-haiku-3.5", "anthropic/claude-haiku-3", "openai/gpt-oss-20b", "openai/gpt-oss-120b", "openai/gpt-5", "openai/gpt-5-pro", "openai/gpt-5.2", "openai/gpt-5.2-chat", "openai/gpt-5.2-pro", "openai/gpt-5.2-codex", "openai/gpt-5.3-codex", "openai/gpt-5-mini", "openai/gpt-5-nano", "openai/gpt-5-codex", "openai/gpt-5.1-codex", "openai/gpt-5.1-codex-max", "openai/gpt-5.1-chat", "openai/gpt-5.1", "openai/text-embedding-3-small", "openai/text-embedding-3-large", "amazon/nova-micro", "amazon/nova-lite", "amazon/nova-pro", "amazon/nova-premier", "amazon/nova-2-lite", "amazon/nova-2-multimodal-embeddings", "google/gemini-2.5-flash-lite", "google/gemini-2.5-flash", "google/gemini-2.5-pro", "google/gemini-3-flash-preview", "google/gemini-3.1-flash-lite-preview", "google/gemini-3.1-pro-preview", "google/embedding-001", "meta/llama-3.1-8b", "meta/llama-3.1-70b", "meta/llama-3.1-405b", "meta/llama-3.2-1b", "meta/llama-3.2-3b", "meta/llama-3.2-11b", "meta/llama-3.2-90b", "meta/llama-3.3-70b", "meta/llama-4-scout", "meta/llama-4-maverick", "cohere/embed-v4.0", "cohere/embed-english-v3.0", "cohere/embed-english-light-v3.0", "cohere/embed-multilingual-v3.0", "cohere/embed-multilingual-light-v3.0", "cohere/command-a", "cohere/command-r7b", "cohere/command-a-translate", "cohere/command-a-reasoning", "cohere/command-a-vision", "cohere/command-r", "cohere/command-r-plus", "voyage/voyage-2-code", "voyage/voyage-2-law", "voyage/voyage-2-finance", "voyage/voyage-3-code", "voyage/voyage-3-large", "voyage/voyage-3.5-lite", "voyage/voyage-3.5", "voyage/voyage-4-lite", "voyage/voyage-4", "voyage/voyage-4-large"];
|
|
2
|
+
export declare const CANONICAL_MODEL_IDS: readonly ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "anthropic/claude-haiku-4.5", "anthropic/claude-sonnet-4.5", "anthropic/claude-opus-4.5", "anthropic/claude-opus-4.1", "anthropic/claude-opus-4", "anthropic/claude-sonnet-4", "anthropic/claude-sonnet-3.7", "anthropic/claude-sonnet-3.5", "anthropic/claude-haiku-3.5", "anthropic/claude-haiku-3", "openai/gpt-oss-20b", "openai/gpt-oss-120b", "openai/gpt-5", "openai/gpt-5-pro", "openai/gpt-5.2", "openai/gpt-5.2-chat", "openai/gpt-5.2-pro", "openai/gpt-5.2-codex", "openai/gpt-5.3-codex", "openai/gpt-5-mini", "openai/gpt-5-nano", "openai/gpt-5-codex", "openai/gpt-5.1-codex", "openai/gpt-5.1-codex-max", "openai/gpt-5.1-chat", "openai/gpt-5.1", "openai/text-embedding-3-small", "openai/text-embedding-3-large", "amazon/nova-micro", "amazon/nova-lite", "amazon/nova-pro", "amazon/nova-premier", "amazon/nova-2-lite", "amazon/nova-2-multimodal-embeddings", "google/gemini-2.5-flash-lite", "google/gemini-2.5-flash", "google/gemini-2.5-pro", "google/gemini-3-flash-preview", "google/gemini-3.1-flash-lite-preview", "google/gemini-3.1-pro-preview", "google/gemini-embedding-2-preview", "google/embedding-001", "meta/llama-3.1-8b", "meta/llama-3.1-70b", "meta/llama-3.1-405b", "meta/llama-3.2-1b", "meta/llama-3.2-3b", "meta/llama-3.2-11b", "meta/llama-3.2-90b", "meta/llama-3.3-70b", "meta/llama-4-scout", "meta/llama-4-maverick", "cohere/embed-v4.0", "cohere/embed-english-v3.0", "cohere/embed-english-light-v3.0", "cohere/embed-multilingual-v3.0", "cohere/embed-multilingual-light-v3.0", "cohere/command-a", "cohere/command-r7b", "cohere/command-a-translate", "cohere/command-a-reasoning", "cohere/command-a-vision", "cohere/command-r", "cohere/command-r-plus", "voyage/voyage-2-code", "voyage/voyage-2-law", "voyage/voyage-2-finance", "voyage/voyage-3-code", "voyage/voyage-3-large", "voyage/voyage-3.5-lite", "voyage/voyage-3.5", "voyage/voyage-4-lite", "voyage/voyage-4", "voyage/voyage-4-large"];
|
|
3
3
|
export type CanonicalModelId = (typeof CANONICAL_MODEL_IDS)[number];
|
|
4
4
|
export type ModelId = CanonicalModelId | (string & {});
|
|
5
5
|
export type CatalogModel = {
|
package/dist/models/types.js
CHANGED
|
@@ -10,7 +10,8 @@ export const voyageDimensionsMiddleware = {
|
|
|
10
10
|
const dimensions = unknown["dimensions"];
|
|
11
11
|
if (!dimensions)
|
|
12
12
|
return params;
|
|
13
|
-
(params.providerOptions["voyage"] ??= {})
|
|
13
|
+
const target = (params.providerOptions["voyage"] ??= {});
|
|
14
|
+
target.outputDimension = dimensions;
|
|
14
15
|
delete unknown["dimensions"];
|
|
15
16
|
return params;
|
|
16
17
|
},
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { LanguageModelMiddleware } from "ai";
|
|
2
|
+
export declare const bedrockServiceTierMiddleware: LanguageModelMiddleware;
|
|
2
3
|
export declare const bedrockGptReasoningMiddleware: LanguageModelMiddleware;
|
|
3
4
|
export declare const bedrockClaudeReasoningMiddleware: LanguageModelMiddleware;
|
|
4
5
|
export declare const bedrockPromptCachingMiddleware: LanguageModelMiddleware;
|
|
@@ -1,5 +1,37 @@
|
|
|
1
1
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
2
2
|
const isClaude46 = (modelId) => modelId.includes("-4-6");
|
|
3
|
+
// https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
|
|
4
|
+
export const bedrockServiceTierMiddleware = {
|
|
5
|
+
specificationVersion: "v3",
|
|
6
|
+
// eslint-disable-next-line require-await
|
|
7
|
+
transformParams: async ({ params }) => {
|
|
8
|
+
const bedrock = params.providerOptions?.["bedrock"];
|
|
9
|
+
if (!bedrock || typeof bedrock !== "object")
|
|
10
|
+
return params;
|
|
11
|
+
// UPSTREAM: https://github.com/vercel/ai/issues/13241
|
|
12
|
+
// @ts-expect-error AI SDK missing serviceTier, need to open PR
|
|
13
|
+
const tier = bedrock["serviceTier"];
|
|
14
|
+
switch (tier) {
|
|
15
|
+
case undefined:
|
|
16
|
+
return params;
|
|
17
|
+
case "auto":
|
|
18
|
+
// Bedrock uses its default tier when omitted.
|
|
19
|
+
// @ts-expect-error AI SDK missing serviceTier, need to open PR
|
|
20
|
+
delete bedrock.serviceTier;
|
|
21
|
+
return params;
|
|
22
|
+
case "scale":
|
|
23
|
+
// @ts-expect-error AI SDK missing serviceTier, need to open PR
|
|
24
|
+
bedrock.serviceTier = { type: "reserved" };
|
|
25
|
+
return params;
|
|
26
|
+
case "default":
|
|
27
|
+
case "flex":
|
|
28
|
+
case "priority":
|
|
29
|
+
// @ts-expect-error AI SDK missing serviceTier, need to open PR
|
|
30
|
+
bedrock.serviceTier = { type: tier };
|
|
31
|
+
return params;
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
};
|
|
3
35
|
export const bedrockGptReasoningMiddleware = {
|
|
4
36
|
specificationVersion: "v3",
|
|
5
37
|
// oxlint-disable-next-line require-await
|
|
@@ -7,14 +39,15 @@ export const bedrockGptReasoningMiddleware = {
|
|
|
7
39
|
if (!model.modelId.includes("gpt"))
|
|
8
40
|
return params;
|
|
9
41
|
const bedrock = params.providerOptions?.["bedrock"];
|
|
10
|
-
if (!bedrock
|
|
42
|
+
if (!bedrock)
|
|
11
43
|
return params;
|
|
12
|
-
const effort = bedrock
|
|
44
|
+
const effort = bedrock.reasoningEffort;
|
|
13
45
|
if (effort === undefined)
|
|
14
46
|
return params;
|
|
15
|
-
const target = (bedrock
|
|
16
|
-
|
|
17
|
-
|
|
47
|
+
const target = (bedrock.reasoningConfig ??= {});
|
|
48
|
+
// @ts-expect-error AI SDK does accept this
|
|
49
|
+
target.maxReasoningEffort = effort;
|
|
50
|
+
delete bedrock.reasoningEffort;
|
|
18
51
|
return params;
|
|
19
52
|
},
|
|
20
53
|
};
|
|
@@ -25,28 +58,25 @@ export const bedrockClaudeReasoningMiddleware = {
|
|
|
25
58
|
if (!model.modelId.includes("claude"))
|
|
26
59
|
return params;
|
|
27
60
|
const bedrock = params.providerOptions?.["bedrock"];
|
|
28
|
-
if (!bedrock
|
|
61
|
+
if (!bedrock)
|
|
29
62
|
return params;
|
|
30
|
-
const thinking = bedrock
|
|
31
|
-
const effort = bedrock
|
|
63
|
+
const thinking = bedrock.thinking;
|
|
64
|
+
const effort = bedrock.effort;
|
|
32
65
|
if (!thinking && effort === undefined)
|
|
33
66
|
return params;
|
|
34
|
-
const target = (bedrock
|
|
67
|
+
const target = (bedrock.reasoningConfig ??= {});
|
|
35
68
|
if (thinking && typeof thinking === "object") {
|
|
36
|
-
|
|
37
|
-
if (
|
|
38
|
-
target
|
|
39
|
-
}
|
|
40
|
-
if (thinkingOptions["budgetTokens"] !== undefined) {
|
|
41
|
-
target["budgetTokens"] = thinkingOptions["budgetTokens"];
|
|
69
|
+
target.type = thinking.type;
|
|
70
|
+
if ("budgetTokens" in thinking && thinking.budgetTokens !== undefined) {
|
|
71
|
+
target.budgetTokens = thinking.budgetTokens;
|
|
42
72
|
}
|
|
43
73
|
}
|
|
44
74
|
// FUTURE: bedrock currently does not support "effort" for other 4.x models
|
|
45
75
|
if (effort !== undefined && isClaude46(model.modelId)) {
|
|
46
|
-
target
|
|
76
|
+
target.maxReasoningEffort = effort;
|
|
47
77
|
}
|
|
48
|
-
delete bedrock
|
|
49
|
-
delete bedrock
|
|
78
|
+
delete bedrock.thinking;
|
|
79
|
+
delete bedrock.effort;
|
|
50
80
|
return params;
|
|
51
81
|
},
|
|
52
82
|
};
|
|
@@ -79,18 +109,18 @@ export const bedrockPromptCachingMiddleware = {
|
|
|
79
109
|
delete entryBedrock["cacheControl"];
|
|
80
110
|
};
|
|
81
111
|
for (const message of params.prompt) {
|
|
82
|
-
processCacheControl(message
|
|
83
|
-
if (!Array.isArray(message
|
|
112
|
+
processCacheControl(message.providerOptions);
|
|
113
|
+
if (!Array.isArray(message.content))
|
|
84
114
|
continue;
|
|
85
|
-
for (const part of message
|
|
86
|
-
processCacheControl(part
|
|
115
|
+
for (const part of message.content) {
|
|
116
|
+
processCacheControl(part.providerOptions);
|
|
87
117
|
}
|
|
88
118
|
lastCacheableBlock = message;
|
|
89
119
|
}
|
|
90
120
|
const bedrock = params.providerOptions?.["bedrock"];
|
|
91
121
|
const cacheControl = bedrock?.["cacheControl"];
|
|
92
122
|
if (cacheControl && !hasExplicitCacheControl && lastCacheableBlock) {
|
|
93
|
-
((lastCacheableBlock
|
|
123
|
+
((lastCacheableBlock.providerOptions ??= {})["bedrock"] ??= {})["cachePoint"] =
|
|
94
124
|
toBedrockCachePoint(model.modelId, cacheControl);
|
|
95
125
|
}
|
|
96
126
|
delete bedrock?.["cacheControl"];
|
|
@@ -99,6 +129,7 @@ export const bedrockPromptCachingMiddleware = {
|
|
|
99
129
|
};
|
|
100
130
|
modelMiddlewareMatcher.useForProvider("amazon-bedrock", {
|
|
101
131
|
language: [
|
|
132
|
+
bedrockServiceTierMiddleware,
|
|
102
133
|
bedrockGptReasoningMiddleware,
|
|
103
134
|
bedrockClaudeReasoningMiddleware,
|
|
104
135
|
bedrockPromptCachingMiddleware,
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
2
|
+
// https://console.groq.com/docs/service-tiers
|
|
3
|
+
export const groqServiceTierMiddleware = {
|
|
4
|
+
specificationVersion: "v3",
|
|
5
|
+
// eslint-disable-next-line require-await
|
|
6
|
+
transformParams: async ({ params }) => {
|
|
7
|
+
const groq = params.providerOptions?.["groq"];
|
|
8
|
+
if (!groq || typeof groq !== "object")
|
|
9
|
+
return params;
|
|
10
|
+
const tier = groq.serviceTier;
|
|
11
|
+
switch (tier) {
|
|
12
|
+
case undefined:
|
|
13
|
+
return params;
|
|
14
|
+
case "auto":
|
|
15
|
+
case "flex":
|
|
16
|
+
return params;
|
|
17
|
+
case "default":
|
|
18
|
+
groq.serviceTier = "on_demand";
|
|
19
|
+
return params;
|
|
20
|
+
case "scale":
|
|
21
|
+
case "priority":
|
|
22
|
+
// UPSTREAM: https://github.com/vercel/ai/issues/13235
|
|
23
|
+
// @ts-expect-error AI SDK missing "performance", need to open PR
|
|
24
|
+
groq.serviceTier = "performance";
|
|
25
|
+
return params;
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
};
|
|
29
|
+
modelMiddlewareMatcher.useForProvider("groq.*", {
|
|
30
|
+
language: [groqServiceTierMiddleware],
|
|
31
|
+
});
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
2
|
+
const VERTEX_REQUEST_TYPE_HEADER = "x-vertex-ai-llm-request-type";
|
|
3
|
+
const VERTEX_SHARED_REQUEST_TYPE_HEADER = "x-vertex-ai-llm-shared-request-type";
|
|
4
|
+
function setHeaderIfMissing(headers, key, value) {
|
|
5
|
+
headers[key] ??= value;
|
|
6
|
+
}
|
|
7
|
+
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/standard-paygo
|
|
8
|
+
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/priority-paygo
|
|
9
|
+
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/flex-paygo
|
|
10
|
+
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/provisioned-throughput/use-provisioned-throughput
|
|
11
|
+
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/reference/rest/v1/GenerateContentResponse#TrafficType
|
|
12
|
+
export const vertexServiceTierMiddleware = {
|
|
13
|
+
specificationVersion: "v3",
|
|
14
|
+
// eslint-disable-next-line require-await
|
|
15
|
+
transformParams: async ({ params }) => {
|
|
16
|
+
const vertex = params.providerOptions?.["vertex"];
|
|
17
|
+
if (!vertex || typeof vertex !== "object")
|
|
18
|
+
return params;
|
|
19
|
+
const tier = vertex["serviceTier"];
|
|
20
|
+
const headers = (params.headers ??= {});
|
|
21
|
+
switch (tier) {
|
|
22
|
+
case undefined:
|
|
23
|
+
return params;
|
|
24
|
+
case "flex":
|
|
25
|
+
setHeaderIfMissing(headers, VERTEX_REQUEST_TYPE_HEADER, "shared");
|
|
26
|
+
setHeaderIfMissing(headers, VERTEX_SHARED_REQUEST_TYPE_HEADER, "flex");
|
|
27
|
+
break;
|
|
28
|
+
case "priority":
|
|
29
|
+
setHeaderIfMissing(headers, VERTEX_REQUEST_TYPE_HEADER, "shared");
|
|
30
|
+
setHeaderIfMissing(headers, VERTEX_SHARED_REQUEST_TYPE_HEADER, "priority");
|
|
31
|
+
break;
|
|
32
|
+
case "scale":
|
|
33
|
+
setHeaderIfMissing(headers, VERTEX_REQUEST_TYPE_HEADER, "dedicated");
|
|
34
|
+
break;
|
|
35
|
+
case "default":
|
|
36
|
+
setHeaderIfMissing(headers, VERTEX_REQUEST_TYPE_HEADER, "shared");
|
|
37
|
+
break;
|
|
38
|
+
case "auto":
|
|
39
|
+
break;
|
|
40
|
+
}
|
|
41
|
+
delete vertex["serviceTier"];
|
|
42
|
+
return params;
|
|
43
|
+
},
|
|
44
|
+
};
|
|
45
|
+
modelMiddlewareMatcher.useForProvider(["google.vertex.*"], {
|
|
46
|
+
language: [vertexServiceTierMiddleware],
|
|
47
|
+
});
|
package/dist/types.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { ProviderV3 } from "@ai-sdk/provider";
|
|
2
2
|
import type { Tracer } from "@opentelemetry/api";
|
|
3
|
-
import type { ChatCompletions, ChatCompletionsBody,
|
|
3
|
+
import type { ChatCompletions, ChatCompletionsBody, ChatCompletionsStream } from "./endpoints/chat-completions/schema";
|
|
4
4
|
import type { Embeddings, EmbeddingsBody } from "./endpoints/embeddings/schema";
|
|
5
5
|
import type { Model, ModelList } from "./endpoints/models";
|
|
6
6
|
import type { Logger, LoggerConfig } from "./logger";
|
|
@@ -57,7 +57,7 @@ export type GatewayContext = {
|
|
|
57
57
|
/**
|
|
58
58
|
* Result returned by the handler (pre-response).
|
|
59
59
|
*/
|
|
60
|
-
result?: ChatCompletions |
|
|
60
|
+
result?: ChatCompletions | ChatCompletionsStream | Embeddings | Model | ModelList;
|
|
61
61
|
/**
|
|
62
62
|
* Response object returned by the handler.
|
|
63
63
|
*/
|
|
@@ -104,7 +104,7 @@ export type GatewayHooks = {
|
|
|
104
104
|
* Runs after the endpoint handler.
|
|
105
105
|
* @returns Result to replace, or undefined to keep original.
|
|
106
106
|
*/
|
|
107
|
-
after?: (ctx: AfterHookContext) => void | ChatCompletions |
|
|
107
|
+
after?: (ctx: AfterHookContext) => void | ChatCompletions | ChatCompletionsStream | Embeddings | Promise<void | ChatCompletions | ChatCompletionsStream | Embeddings>;
|
|
108
108
|
/**
|
|
109
109
|
* Runs after the lifecycle has produced the final Response.
|
|
110
110
|
* @returns Replacement Response, or undefined to keep original.
|
|
@@ -112,6 +112,18 @@ export type GatewayHooks = {
|
|
|
112
112
|
onResponse?: (ctx: OnResponseHookContext) => void | Response | Promise<void | Response>;
|
|
113
113
|
};
|
|
114
114
|
export type TelemetrySignalLevel = "off" | "required" | "recommended" | "full";
|
|
115
|
+
export declare const DEFAULT_CHAT_TIMEOUT_MS: number;
|
|
116
|
+
export type GatewayTimeout = number | null | {
|
|
117
|
+
/**
|
|
118
|
+
* Default timeout used.
|
|
119
|
+
*/
|
|
120
|
+
normal?: number | null;
|
|
121
|
+
/**
|
|
122
|
+
* Timeout used when `service_tier=flex`.
|
|
123
|
+
* Defaults to 3x `normal` when omitted.
|
|
124
|
+
*/
|
|
125
|
+
flex?: number | null;
|
|
126
|
+
};
|
|
115
127
|
/**
|
|
116
128
|
* Main configuration object for the gateway.
|
|
117
129
|
*/
|
|
@@ -162,9 +174,18 @@ export type GatewayConfig = {
|
|
|
162
174
|
hebo?: TelemetrySignalLevel;
|
|
163
175
|
};
|
|
164
176
|
};
|
|
177
|
+
/**
|
|
178
|
+
* Optional timeout for server responses.
|
|
179
|
+
* Supports a number in milliseconds, or tiered config.
|
|
180
|
+
*/
|
|
181
|
+
timeouts?: GatewayTimeout;
|
|
165
182
|
};
|
|
166
183
|
export declare const kParsed: unique symbol;
|
|
167
|
-
export type GatewayConfigParsed = GatewayConfig & {
|
|
184
|
+
export type GatewayConfigParsed = Omit<GatewayConfig, "timeouts"> & {
|
|
185
|
+
timeouts: {
|
|
186
|
+
normal?: number;
|
|
187
|
+
flex?: number;
|
|
188
|
+
};
|
|
168
189
|
[kParsed]: true;
|
|
169
190
|
};
|
|
170
191
|
export interface Endpoint {
|
package/dist/types.js
CHANGED
package/dist/utils/response.d.ts
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import type { SseFrame } from "./stream";
|
|
1
2
|
export declare const prepareResponseInit: (requestId: string) => ResponseInit;
|
|
2
3
|
export declare const mergeResponseInit: (defaultHeaders: HeadersInit, responseInit?: ResponseInit) => ResponseInit;
|
|
3
|
-
export declare const toResponse: (result: ReadableStream | Uint8Array<ArrayBuffer> | object | string, responseInit?: ResponseInit
|
|
4
|
+
export declare const toResponse: (result: ReadableStream<SseFrame> | Uint8Array<ArrayBuffer> | object | string, responseInit?: ResponseInit, streamOptions?: {
|
|
5
|
+
onDone?: (status: number, reason?: unknown) => void;
|
|
6
|
+
}) => Response;
|
package/dist/utils/response.js
CHANGED
|
@@ -1,17 +1,6 @@
|
|
|
1
1
|
import { REQUEST_ID_HEADER } from "./headers";
|
|
2
|
+
import { toSseStream } from "./stream";
|
|
2
3
|
const TEXT_ENCODER = new TextEncoder();
|
|
3
|
-
class JsonToSseTransformStream extends TransformStream {
|
|
4
|
-
constructor() {
|
|
5
|
-
super({
|
|
6
|
-
transform(part, controller) {
|
|
7
|
-
controller.enqueue(`data: ${JSON.stringify(part)}\n\n`);
|
|
8
|
-
},
|
|
9
|
-
flush(controller) {
|
|
10
|
-
controller.enqueue("data: [DONE]\n\n");
|
|
11
|
-
},
|
|
12
|
-
});
|
|
13
|
-
}
|
|
14
|
-
}
|
|
15
4
|
export const prepareResponseInit = (requestId) => ({
|
|
16
5
|
headers: { [REQUEST_ID_HEADER]: requestId },
|
|
17
6
|
});
|
|
@@ -31,11 +20,11 @@ export const mergeResponseInit = (defaultHeaders, responseInit) => {
|
|
|
31
20
|
headers,
|
|
32
21
|
};
|
|
33
22
|
};
|
|
34
|
-
export const toResponse = (result, responseInit) => {
|
|
23
|
+
export const toResponse = (result, responseInit, streamOptions) => {
|
|
35
24
|
let body;
|
|
36
25
|
const isStream = result instanceof ReadableStream;
|
|
37
26
|
if (isStream) {
|
|
38
|
-
body = result
|
|
27
|
+
body = toSseStream(result, streamOptions);
|
|
39
28
|
}
|
|
40
29
|
else if (result instanceof Uint8Array) {
|
|
41
30
|
body = result;
|
|
@@ -43,16 +32,12 @@ export const toResponse = (result, responseInit) => {
|
|
|
43
32
|
else if (typeof result === "string") {
|
|
44
33
|
body = TEXT_ENCODER.encode(result);
|
|
45
34
|
}
|
|
46
|
-
else if (result instanceof Error) {
|
|
47
|
-
body = TEXT_ENCODER.encode(JSON.stringify({ message: result.message }));
|
|
48
|
-
}
|
|
49
35
|
else {
|
|
50
36
|
body = TEXT_ENCODER.encode(JSON.stringify(result));
|
|
51
37
|
}
|
|
52
38
|
if (!responseInit?.statusText) {
|
|
53
|
-
const
|
|
54
|
-
const
|
|
55
|
-
const statusText = isError ? "REQUEST_FAILED" : "OK";
|
|
39
|
+
const status = responseInit?.status ?? 200;
|
|
40
|
+
const statusText = "OK";
|
|
56
41
|
const headers = responseInit?.headers;
|
|
57
42
|
responseInit = headers ? { status, statusText, headers } : { status, statusText };
|
|
58
43
|
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export type SseFrame<T = unknown, E extends string | undefined = string | undefined> = {
|
|
2
|
+
data: T;
|
|
3
|
+
event?: E;
|
|
4
|
+
};
|
|
5
|
+
export type SseErrorFrame = SseFrame<Error, "error" | undefined>;
|
|
6
|
+
export declare function toSseStream(src: ReadableStream<SseFrame>, options?: {
|
|
7
|
+
onDone?: (status: number, reason?: unknown) => void;
|
|
8
|
+
keepAliveMs?: number;
|
|
9
|
+
}): ReadableStream<Uint8Array>;
|