@hebo-ai/gateway 0.6.2-rc1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/endpoints/chat-completions/converters.js +21 -20
- package/dist/endpoints/chat-completions/schema.d.ts +4 -18
- package/dist/endpoints/chat-completions/schema.js +14 -17
- package/dist/endpoints/embeddings/otel.js +5 -0
- package/dist/endpoints/embeddings/schema.d.ts +6 -0
- package/dist/endpoints/embeddings/schema.js +4 -1
- package/dist/middleware/utils.js +0 -1
- package/dist/models/amazon/middleware.js +6 -5
- package/dist/models/anthropic/middleware.js +13 -13
- package/dist/models/cohere/middleware.js +7 -5
- package/dist/models/google/middleware.d.ts +1 -1
- package/dist/models/google/middleware.js +29 -25
- package/dist/models/openai/middleware.js +7 -7
- package/dist/models/voyage/middleware.js +2 -1
- package/dist/providers/bedrock/middleware.js +21 -23
- package/package.json +5 -1
package/README.md
CHANGED
|
@@ -536,14 +536,14 @@ Normalization rules:
|
|
|
536
536
|
|
|
537
537
|
- `enabled` -> fall-back to model default if none provided
|
|
538
538
|
- `max_tokens`: fall-back to model default if model supports
|
|
539
|
-
- `effort` supports: `none`, `minimal`, `low`, `medium`, `high`, `xhigh
|
|
539
|
+
- `effort` supports: `none`, `minimal`, `low`, `medium`, `high`, `xhigh`
|
|
540
540
|
- Generic `effort` -> budget = percentage of `max_tokens`
|
|
541
541
|
- `none`: 0%
|
|
542
542
|
- `minimal`: 10%
|
|
543
543
|
- `low`: 20%
|
|
544
544
|
- `medium`: 50% (default)
|
|
545
545
|
- `high`: 80%
|
|
546
|
-
- `xhigh
|
|
546
|
+
- `xhigh`: 95%
|
|
547
547
|
|
|
548
548
|
Reasoning output is surfaced as extension to the `completion` object.
|
|
549
549
|
|
|
@@ -665,7 +665,7 @@ https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/
|
|
|
665
665
|
|
|
666
666
|
> [!TIP]
|
|
667
667
|
> To populate custom span attributes, the inbound W3C `baggage` header is supported. Keys in the `hebo.` namespace are mapped to span attributes, with the namespace stripped. For example: `baggage: hebo.user_id=u-123` becomes span attribute `user_id=u-123`.
|
|
668
|
-
> For `/chat/completions`, request `metadata` (`Record<string, string>`, key 1-64 chars, value up to 512 chars) is also forwarded to spans as `gen_ai.request.metadata.<key>`.
|
|
668
|
+
> For `/chat/completions` and `/embeddings`, request `metadata` (`Record<string, string>`, key 1-64 chars, value up to 512 chars) is also forwarded to spans as `gen_ai.request.metadata.<key>`.
|
|
669
669
|
|
|
670
670
|
For observability integration that is not otel compliant, you can disable built-in telemetry and manually instrument requests during `before` / `after` hooks.
|
|
671
671
|
|
|
@@ -8,7 +8,12 @@ import { parseDataUrl } from "../../utils/url";
|
|
|
8
8
|
export function convertToTextCallOptions(params) {
|
|
9
9
|
const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, prompt_cache_key, prompt_cache_retention, extra_body, cache_control, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
|
|
10
10
|
Object.assign(rest, parseReasoningOptions(reasoning_effort, reasoning));
|
|
11
|
-
Object.assign(rest, parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention,
|
|
11
|
+
Object.assign(rest, parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cache_control));
|
|
12
|
+
if (extra_body) {
|
|
13
|
+
for (const v of Object.values(extra_body)) {
|
|
14
|
+
Object.assign(rest, v);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
12
17
|
const { toolChoice, activeTools } = convertToToolChoiceOptions(tool_choice);
|
|
13
18
|
return {
|
|
14
19
|
messages: convertToModelMessages(messages),
|
|
@@ -337,29 +342,25 @@ function parseReasoningOptions(reasoning_effort, reasoning) {
|
|
|
337
342
|
}
|
|
338
343
|
return out;
|
|
339
344
|
}
|
|
340
|
-
function parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention,
|
|
345
|
+
function parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cache_control) {
|
|
341
346
|
const out = {};
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
if (!syncedCacheControl && syncedCacheRetention) {
|
|
350
|
-
syncedCacheControl = {
|
|
347
|
+
let retention = prompt_cache_retention;
|
|
348
|
+
if (!retention && cache_control?.ttl) {
|
|
349
|
+
retention = cache_control.ttl === "24h" ? "24h" : "in_memory";
|
|
350
|
+
}
|
|
351
|
+
let control = cache_control;
|
|
352
|
+
if (!control && retention) {
|
|
353
|
+
control = {
|
|
351
354
|
type: "ephemeral",
|
|
352
|
-
ttl:
|
|
355
|
+
ttl: retention === "24h" ? "24h" : "5m",
|
|
353
356
|
};
|
|
354
357
|
}
|
|
355
|
-
if (
|
|
356
|
-
out["prompt_cache_key"] =
|
|
357
|
-
if (
|
|
358
|
-
out["prompt_cache_retention"] =
|
|
359
|
-
if (
|
|
360
|
-
out["
|
|
361
|
-
if (syncedCacheControl)
|
|
362
|
-
out["cache_control"] = syncedCacheControl;
|
|
358
|
+
if (prompt_cache_key)
|
|
359
|
+
out["prompt_cache_key"] = prompt_cache_key;
|
|
360
|
+
if (retention)
|
|
361
|
+
out["prompt_cache_retention"] = retention;
|
|
362
|
+
if (control)
|
|
363
|
+
out["cache_control"] = control;
|
|
363
364
|
return out;
|
|
364
365
|
}
|
|
365
366
|
// --- Response Flow ---
|
|
@@ -427,7 +427,6 @@ export declare const ChatCompletionsReasoningEffortSchema: z.ZodEnum<{
|
|
|
427
427
|
minimal: "minimal";
|
|
428
428
|
medium: "medium";
|
|
429
429
|
xhigh: "xhigh";
|
|
430
|
-
max: "max";
|
|
431
430
|
}>;
|
|
432
431
|
export type ChatCompletionsReasoningEffort = z.infer<typeof ChatCompletionsReasoningEffortSchema>;
|
|
433
432
|
export declare const ChatCompletionsReasoningConfigSchema: z.ZodObject<{
|
|
@@ -439,7 +438,6 @@ export declare const ChatCompletionsReasoningConfigSchema: z.ZodObject<{
|
|
|
439
438
|
minimal: "minimal";
|
|
440
439
|
medium: "medium";
|
|
441
440
|
xhigh: "xhigh";
|
|
442
|
-
max: "max";
|
|
443
441
|
}>>;
|
|
444
442
|
max_tokens: z.ZodOptional<z.ZodNumber>;
|
|
445
443
|
exclude: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -651,18 +649,12 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
|
|
|
651
649
|
minimal: "minimal";
|
|
652
650
|
medium: "medium";
|
|
653
651
|
xhigh: "xhigh";
|
|
654
|
-
max: "max";
|
|
655
652
|
}>>;
|
|
656
653
|
prompt_cache_key: z.ZodOptional<z.ZodString>;
|
|
657
654
|
prompt_cache_retention: z.ZodOptional<z.ZodEnum<{
|
|
658
655
|
in_memory: "in_memory";
|
|
659
656
|
"24h": "24h";
|
|
660
657
|
}>>;
|
|
661
|
-
extra_body: z.ZodOptional<z.ZodObject<{
|
|
662
|
-
google: z.ZodOptional<z.ZodObject<{
|
|
663
|
-
cached_content: z.ZodOptional<z.ZodString>;
|
|
664
|
-
}, z.core.$strip>>;
|
|
665
|
-
}, z.core.$strip>>;
|
|
666
658
|
cache_control: z.ZodOptional<z.ZodObject<{
|
|
667
659
|
type: z.ZodLiteral<"ephemeral">;
|
|
668
660
|
ttl: z.ZodOptional<z.ZodString>;
|
|
@@ -676,11 +668,11 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
|
|
|
676
668
|
minimal: "minimal";
|
|
677
669
|
medium: "medium";
|
|
678
670
|
xhigh: "xhigh";
|
|
679
|
-
max: "max";
|
|
680
671
|
}>>;
|
|
681
672
|
max_tokens: z.ZodOptional<z.ZodNumber>;
|
|
682
673
|
exclude: z.ZodOptional<z.ZodBoolean>;
|
|
683
674
|
}, z.core.$strip>>;
|
|
675
|
+
extra_body: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
684
676
|
}, z.core.$strip>;
|
|
685
677
|
export type ChatCompletionsInputs = z.infer<typeof ChatCompletionsInputsSchema>;
|
|
686
678
|
export declare const ChatCompletionsBodySchema: z.ZodObject<{
|
|
@@ -863,18 +855,12 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
|
|
|
863
855
|
minimal: "minimal";
|
|
864
856
|
medium: "medium";
|
|
865
857
|
xhigh: "xhigh";
|
|
866
|
-
max: "max";
|
|
867
858
|
}>>;
|
|
868
859
|
prompt_cache_key: z.ZodOptional<z.ZodString>;
|
|
869
860
|
prompt_cache_retention: z.ZodOptional<z.ZodEnum<{
|
|
870
861
|
in_memory: "in_memory";
|
|
871
862
|
"24h": "24h";
|
|
872
863
|
}>>;
|
|
873
|
-
extra_body: z.ZodOptional<z.ZodObject<{
|
|
874
|
-
google: z.ZodOptional<z.ZodObject<{
|
|
875
|
-
cached_content: z.ZodOptional<z.ZodString>;
|
|
876
|
-
}, z.core.$strip>>;
|
|
877
|
-
}, z.core.$strip>>;
|
|
878
864
|
cache_control: z.ZodOptional<z.ZodObject<{
|
|
879
865
|
type: z.ZodLiteral<"ephemeral">;
|
|
880
866
|
ttl: z.ZodOptional<z.ZodString>;
|
|
@@ -888,11 +874,11 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
|
|
|
888
874
|
minimal: "minimal";
|
|
889
875
|
medium: "medium";
|
|
890
876
|
xhigh: "xhigh";
|
|
891
|
-
max: "max";
|
|
892
877
|
}>>;
|
|
893
878
|
max_tokens: z.ZodOptional<z.ZodNumber>;
|
|
894
879
|
exclude: z.ZodOptional<z.ZodBoolean>;
|
|
895
880
|
}, z.core.$strip>>;
|
|
881
|
+
extra_body: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
896
882
|
model: z.ZodString;
|
|
897
883
|
stream: z.ZodOptional<z.ZodBoolean>;
|
|
898
884
|
}, z.core.$loose>;
|
|
@@ -1029,7 +1015,7 @@ export declare const ChatCompletionsSchema: z.ZodObject<{
|
|
|
1029
1015
|
cache_write_tokens: z.ZodOptional<z.ZodInt>;
|
|
1030
1016
|
}, z.core.$strip>>;
|
|
1031
1017
|
}, z.core.$strip>>;
|
|
1032
|
-
provider_metadata: z.ZodOptional<z.ZodUnknown
|
|
1018
|
+
provider_metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
1033
1019
|
}, z.core.$strip>;
|
|
1034
1020
|
export type ChatCompletions = z.infer<typeof ChatCompletionsSchema>;
|
|
1035
1021
|
export declare const ChatCompletionsToolCallDeltaSchema: z.ZodObject<{
|
|
@@ -1196,7 +1182,7 @@ export declare const ChatCompletionsChunkSchema: z.ZodObject<{
|
|
|
1196
1182
|
cache_write_tokens: z.ZodOptional<z.ZodInt>;
|
|
1197
1183
|
}, z.core.$strip>>;
|
|
1198
1184
|
}, z.core.$strip>>;
|
|
1199
|
-
provider_metadata: z.ZodOptional<z.ZodUnknown
|
|
1185
|
+
provider_metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
1200
1186
|
}, z.core.$strip>;
|
|
1201
1187
|
export type ChatCompletionsChunk = z.infer<typeof ChatCompletionsChunkSchema>;
|
|
1202
1188
|
export {};
|
|
@@ -161,14 +161,11 @@ export const ChatCompletionsToolChoiceSchema = z.union([
|
|
|
161
161
|
]);
|
|
162
162
|
export const ChatCompletionsReasoningEffortSchema = z.enum([
|
|
163
163
|
"none",
|
|
164
|
-
// Extension origin: Gemini
|
|
165
164
|
"minimal",
|
|
166
165
|
"low",
|
|
167
166
|
"medium",
|
|
168
167
|
"high",
|
|
169
168
|
"xhigh",
|
|
170
|
-
// Extension origin: Anthropic
|
|
171
|
-
"max",
|
|
172
169
|
]);
|
|
173
170
|
export const ChatCompletionsReasoningConfigSchema = z.object({
|
|
174
171
|
enabled: z.optional(z.boolean()),
|
|
@@ -212,22 +209,16 @@ const ChatCompletionsInputsSchema = z.object({
|
|
|
212
209
|
reasoning_effort: ChatCompletionsReasoningEffortSchema.optional(),
|
|
213
210
|
prompt_cache_key: z.string().optional(),
|
|
214
211
|
prompt_cache_retention: z.enum(["in_memory", "24h"]).optional(),
|
|
215
|
-
// Extension origin: Gemini explicit cache handle
|
|
216
|
-
// FUTURE: generalize extra_body handling
|
|
217
|
-
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
|
|
218
|
-
extra_body: z
|
|
219
|
-
.object({
|
|
220
|
-
google: z
|
|
221
|
-
.object({
|
|
222
|
-
cached_content: z.string().optional().meta({ extension: true }),
|
|
223
|
-
})
|
|
224
|
-
.optional(),
|
|
225
|
-
})
|
|
226
|
-
.optional(),
|
|
227
212
|
// Extension origin: OpenRouter/Vercel/Anthropic
|
|
228
213
|
cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
|
|
229
214
|
// Extension origin: OpenRouter
|
|
230
215
|
reasoning: ChatCompletionsReasoningConfigSchema.optional().meta({ extension: true }),
|
|
216
|
+
// Extension origin: Gemini extra_body
|
|
217
|
+
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview#extra_body
|
|
218
|
+
extra_body: z
|
|
219
|
+
.record(z.string(), z.record(z.string(), z.unknown()))
|
|
220
|
+
.optional()
|
|
221
|
+
.meta({ extension: true }),
|
|
231
222
|
});
|
|
232
223
|
export const ChatCompletionsBodySchema = z.looseObject({
|
|
233
224
|
model: z.string(),
|
|
@@ -274,7 +265,10 @@ export const ChatCompletionsSchema = z.object({
|
|
|
274
265
|
choices: z.array(ChatCompletionsChoiceSchema),
|
|
275
266
|
usage: ChatCompletionsUsageSchema.nullable(),
|
|
276
267
|
// Extension origin: Vercel AI Gateway
|
|
277
|
-
provider_metadata: z
|
|
268
|
+
provider_metadata: z
|
|
269
|
+
.record(z.string(), z.record(z.string(), z.unknown()))
|
|
270
|
+
.optional()
|
|
271
|
+
.meta({ extension: true }),
|
|
278
272
|
});
|
|
279
273
|
export const ChatCompletionsToolCallDeltaSchema = ChatCompletionsToolCallSchema.partial().extend({
|
|
280
274
|
index: z.int().nonnegative(),
|
|
@@ -297,5 +291,8 @@ export const ChatCompletionsChunkSchema = z.object({
|
|
|
297
291
|
choices: z.array(ChatCompletionsChoiceDeltaSchema),
|
|
298
292
|
usage: ChatCompletionsUsageSchema.nullable(),
|
|
299
293
|
// Extension origin: Vercel AI Gateway
|
|
300
|
-
provider_metadata: z
|
|
294
|
+
provider_metadata: z
|
|
295
|
+
.record(z.string(), z.record(z.string(), z.unknown()))
|
|
296
|
+
.optional()
|
|
297
|
+
.meta({ extension: true }),
|
|
301
298
|
});
|
|
@@ -7,6 +7,11 @@ export const getEmbeddingsRequestAttributes = (inputs, signalLevel) => {
|
|
|
7
7
|
Object.assign(attrs, {
|
|
8
8
|
"gen_ai.embeddings.dimension.count": inputs.dimensions,
|
|
9
9
|
});
|
|
10
|
+
if (inputs.metadata) {
|
|
11
|
+
for (const key in inputs.metadata) {
|
|
12
|
+
attrs[`gen_ai.request.metadata.${key}`] = inputs.metadata[key];
|
|
13
|
+
}
|
|
14
|
+
}
|
|
10
15
|
}
|
|
11
16
|
return attrs;
|
|
12
17
|
};
|
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
import * as z from "zod";
|
|
2
|
+
export declare const EmbeddingsDimensionsSchema: z.ZodInt;
|
|
3
|
+
export type EmbeddingsDimensions = z.infer<typeof EmbeddingsDimensionsSchema>;
|
|
4
|
+
export declare const EmbeddingsMetadataSchema: z.ZodRecord<z.ZodString, z.ZodString>;
|
|
5
|
+
export type EmbeddingsMetadata = z.infer<typeof EmbeddingsMetadataSchema>;
|
|
2
6
|
export declare const EmbeddingsInputsSchema: z.ZodObject<{
|
|
3
7
|
input: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
|
|
4
8
|
dimensions: z.ZodOptional<z.ZodInt>;
|
|
9
|
+
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
5
10
|
}, z.core.$strip>;
|
|
6
11
|
export type EmbeddingsInputs = z.infer<typeof EmbeddingsInputsSchema>;
|
|
7
12
|
export declare const EmbeddingsBodySchema: z.ZodObject<{
|
|
8
13
|
input: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
|
|
9
14
|
dimensions: z.ZodOptional<z.ZodInt>;
|
|
15
|
+
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
10
16
|
model: z.ZodString;
|
|
11
17
|
}, z.core.$loose>;
|
|
12
18
|
export type EmbeddingsBody = z.infer<typeof EmbeddingsBodySchema>;
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import * as z from "zod";
|
|
2
|
+
export const EmbeddingsDimensionsSchema = z.int().nonnegative().max(65536);
|
|
3
|
+
export const EmbeddingsMetadataSchema = z.record(z.string().min(1).max(64), z.string().max(512));
|
|
2
4
|
export const EmbeddingsInputsSchema = z.object({
|
|
3
5
|
input: z.union([z.string(), z.array(z.string())]),
|
|
4
|
-
dimensions:
|
|
6
|
+
dimensions: EmbeddingsDimensionsSchema.optional(),
|
|
7
|
+
metadata: EmbeddingsMetadataSchema.optional(),
|
|
5
8
|
});
|
|
6
9
|
export const EmbeddingsBodySchema = z.looseObject({
|
|
7
10
|
model: z.string(),
|
package/dist/middleware/utils.js
CHANGED
|
@@ -10,7 +10,9 @@ export const novaDimensionsMiddleware = {
|
|
|
10
10
|
const dimensions = unknown["dimensions"];
|
|
11
11
|
if (!dimensions)
|
|
12
12
|
return params;
|
|
13
|
-
(params.providerOptions["nova"] ??= {})
|
|
13
|
+
const target = (params.providerOptions["nova"] ??= {});
|
|
14
|
+
// @ts-expect-error AI SDK does the value checking for us
|
|
15
|
+
target.embeddingDimension = dimensions;
|
|
14
16
|
delete unknown["dimensions"];
|
|
15
17
|
return params;
|
|
16
18
|
},
|
|
@@ -26,7 +28,6 @@ function mapNovaEffort(effort) {
|
|
|
26
28
|
return "medium";
|
|
27
29
|
case "high":
|
|
28
30
|
case "xhigh":
|
|
29
|
-
case "max":
|
|
30
31
|
return "high";
|
|
31
32
|
}
|
|
32
33
|
}
|
|
@@ -42,18 +43,18 @@ export const novaReasoningMiddleware = {
|
|
|
42
43
|
return params;
|
|
43
44
|
const target = (params.providerOptions["amazon"] ??= {});
|
|
44
45
|
if (!reasoning.enabled) {
|
|
45
|
-
target
|
|
46
|
+
target.reasoningConfig = { type: "disabled" };
|
|
46
47
|
}
|
|
47
48
|
else if (reasoning.effort) {
|
|
48
49
|
// FUTURE: warn if mapNovaEffort modified the effort
|
|
49
|
-
target
|
|
50
|
+
target.reasoningConfig = {
|
|
50
51
|
type: "enabled",
|
|
51
52
|
maxReasoningEffort: mapNovaEffort(reasoning.effort),
|
|
52
53
|
};
|
|
53
54
|
}
|
|
54
55
|
else {
|
|
55
56
|
// FUTURE: warn if reasoning.max_tokens (unsupported) was ignored
|
|
56
|
-
target
|
|
57
|
+
target.reasoningConfig = { type: "enabled" };
|
|
57
58
|
}
|
|
58
59
|
delete unknown["reasoning"];
|
|
59
60
|
return params;
|
|
@@ -22,7 +22,6 @@ export function mapClaudeReasoningEffort(effort, modelId) {
|
|
|
22
22
|
case "high":
|
|
23
23
|
return "high";
|
|
24
24
|
case "xhigh":
|
|
25
|
-
case "max":
|
|
26
25
|
return "max";
|
|
27
26
|
}
|
|
28
27
|
}
|
|
@@ -35,7 +34,6 @@ export function mapClaudeReasoningEffort(effort, modelId) {
|
|
|
35
34
|
return "medium";
|
|
36
35
|
case "high":
|
|
37
36
|
case "xhigh":
|
|
38
|
-
case "max":
|
|
39
37
|
return "high";
|
|
40
38
|
}
|
|
41
39
|
}
|
|
@@ -66,41 +64,42 @@ export const claudeReasoningMiddleware = {
|
|
|
66
64
|
const modelId = model.modelId;
|
|
67
65
|
const clampedMaxTokens = reasoning.max_tokens && Math.min(reasoning.max_tokens, getMaxOutputTokens(modelId));
|
|
68
66
|
if (!reasoning.enabled) {
|
|
69
|
-
target
|
|
67
|
+
target.thinking = { type: "disabled" };
|
|
70
68
|
}
|
|
71
69
|
else if (reasoning.effort) {
|
|
72
70
|
if (isClaude4(modelId)) {
|
|
73
|
-
target
|
|
71
|
+
target.effort = mapClaudeReasoningEffort(reasoning.effort, modelId);
|
|
74
72
|
}
|
|
75
73
|
if (isOpus46(modelId)) {
|
|
76
|
-
target
|
|
77
|
-
?
|
|
74
|
+
target.thinking = clampedMaxTokens
|
|
75
|
+
? // @ts-expect-error AI SDK type missing type:adaptive with budgetToken
|
|
76
|
+
{ type: "adaptive", budgetTokens: clampedMaxTokens }
|
|
78
77
|
: { type: "adaptive" };
|
|
79
78
|
}
|
|
80
79
|
else if (isSonnet46(modelId)) {
|
|
81
|
-
target
|
|
80
|
+
target.thinking = clampedMaxTokens
|
|
82
81
|
? { type: "enabled", budgetTokens: clampedMaxTokens }
|
|
83
82
|
: { type: "adaptive" };
|
|
84
83
|
}
|
|
85
84
|
else {
|
|
86
|
-
target
|
|
85
|
+
target.thinking = { type: "enabled" };
|
|
87
86
|
if (clampedMaxTokens) {
|
|
88
|
-
target
|
|
87
|
+
target.thinking.budgetTokens = clampedMaxTokens;
|
|
89
88
|
}
|
|
90
89
|
else {
|
|
91
90
|
// FUTURE: warn that reasoning.max_tokens was computed
|
|
92
|
-
target
|
|
91
|
+
target.thinking.budgetTokens = calculateReasoningBudgetFromEffort(reasoning.effort, params.maxOutputTokens ?? getMaxOutputTokens(modelId), 1024);
|
|
93
92
|
}
|
|
94
93
|
}
|
|
95
94
|
}
|
|
96
95
|
else if (clampedMaxTokens) {
|
|
97
|
-
target
|
|
96
|
+
target.thinking = {
|
|
98
97
|
type: "enabled",
|
|
99
98
|
budgetTokens: clampedMaxTokens,
|
|
100
99
|
};
|
|
101
100
|
}
|
|
102
101
|
else {
|
|
103
|
-
target
|
|
102
|
+
target.thinking = { type: "enabled" };
|
|
104
103
|
}
|
|
105
104
|
delete unknown["reasoning"];
|
|
106
105
|
return params;
|
|
@@ -116,7 +115,8 @@ export const claudePromptCachingMiddleware = {
|
|
|
116
115
|
return params;
|
|
117
116
|
const cacheControl = unknown["cache_control"];
|
|
118
117
|
if (cacheControl) {
|
|
119
|
-
(params.providerOptions["anthropic"] ??= {})
|
|
118
|
+
(params.providerOptions["anthropic"] ??= {}).cacheControl =
|
|
119
|
+
cacheControl;
|
|
120
120
|
}
|
|
121
121
|
delete unknown["cache_control"];
|
|
122
122
|
return params;
|
|
@@ -17,7 +17,9 @@ export const cohereDimensionsMiddleware = {
|
|
|
17
17
|
const dimensions = unknown["dimensions"];
|
|
18
18
|
if (!dimensions)
|
|
19
19
|
return params;
|
|
20
|
-
(params.providerOptions["cohere"] ??= {})
|
|
20
|
+
const target = (params.providerOptions["cohere"] ??= {});
|
|
21
|
+
// @ts-expect-error AI SDK does the value checking for us
|
|
22
|
+
target.outputDimension = dimensions;
|
|
21
23
|
delete unknown["dimensions"];
|
|
22
24
|
return params;
|
|
23
25
|
},
|
|
@@ -35,20 +37,20 @@ export const cohereReasoningMiddleware = {
|
|
|
35
37
|
return params;
|
|
36
38
|
const target = (params.providerOptions["cohere"] ??= {});
|
|
37
39
|
if (!reasoning.enabled) {
|
|
38
|
-
target
|
|
40
|
+
target.thinking = { type: "disabled" };
|
|
39
41
|
}
|
|
40
42
|
else if (reasoning.max_tokens) {
|
|
41
|
-
target
|
|
43
|
+
target.thinking = { type: "enabled", tokenBudget: reasoning.max_tokens };
|
|
42
44
|
}
|
|
43
45
|
else if (reasoning.effort) {
|
|
44
46
|
// FUTURE: warn that reasoning.max_tokens was computed
|
|
45
|
-
target
|
|
47
|
+
target.thinking = {
|
|
46
48
|
type: "enabled",
|
|
47
49
|
tokenBudget: calculateReasoningBudgetFromEffort(reasoning.effort, params.maxOutputTokens ?? COHERE_MAX_OUTPUT_TOKENS, 1024),
|
|
48
50
|
};
|
|
49
51
|
}
|
|
50
52
|
else {
|
|
51
|
-
target
|
|
53
|
+
target.thinking = { type: "enabled" };
|
|
52
54
|
}
|
|
53
55
|
delete unknown["reasoning"];
|
|
54
56
|
return params;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { EmbeddingModelMiddleware, LanguageModelMiddleware } from "ai";
|
|
2
2
|
import type { ChatCompletionsReasoningEffort } from "../../endpoints/chat-completions/schema";
|
|
3
3
|
export declare const geminiDimensionsMiddleware: EmbeddingModelMiddleware;
|
|
4
|
-
export declare function mapGeminiReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string):
|
|
4
|
+
export declare function mapGeminiReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string): "low" | "high" | "minimal" | "medium";
|
|
5
5
|
export declare const GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65536;
|
|
6
6
|
export declare const GEMINI_2_5_PRO_MIN_THINKING_BUDGET = 128;
|
|
7
7
|
export declare const geminiReasoningMiddleware: LanguageModelMiddleware;
|
|
@@ -11,14 +11,15 @@ export const geminiDimensionsMiddleware = {
|
|
|
11
11
|
const dimensions = unknown["dimensions"];
|
|
12
12
|
if (!dimensions)
|
|
13
13
|
return params;
|
|
14
|
-
(params.providerOptions["google"] ??= {})
|
|
14
|
+
const target = (params.providerOptions["google"] ??= {});
|
|
15
|
+
target.outputDimensionality = dimensions;
|
|
15
16
|
delete unknown["dimensions"];
|
|
16
17
|
return params;
|
|
17
18
|
},
|
|
18
19
|
};
|
|
19
20
|
// https://ai.google.dev/gemini-api/docs/thinking#thinking-levels
|
|
20
21
|
export function mapGeminiReasoningEffort(effort, modelId) {
|
|
21
|
-
if (modelId.includes("
|
|
22
|
+
if (modelId.includes("pro")) {
|
|
22
23
|
switch (effort) {
|
|
23
24
|
case "none":
|
|
24
25
|
case "minimal":
|
|
@@ -28,26 +29,22 @@ export function mapGeminiReasoningEffort(effort, modelId) {
|
|
|
28
29
|
return "medium";
|
|
29
30
|
case "high":
|
|
30
31
|
case "xhigh":
|
|
31
|
-
case "max":
|
|
32
32
|
return "high";
|
|
33
33
|
}
|
|
34
34
|
}
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
return "high";
|
|
48
|
-
}
|
|
35
|
+
// Flash
|
|
36
|
+
switch (effort) {
|
|
37
|
+
case "none":
|
|
38
|
+
case "minimal":
|
|
39
|
+
return "minimal";
|
|
40
|
+
case "low":
|
|
41
|
+
return "low";
|
|
42
|
+
case "medium":
|
|
43
|
+
return "medium";
|
|
44
|
+
case "high":
|
|
45
|
+
case "xhigh":
|
|
46
|
+
return "high";
|
|
49
47
|
}
|
|
50
|
-
return effort;
|
|
51
48
|
}
|
|
52
49
|
export const GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65536;
|
|
53
50
|
export const GEMINI_2_5_PRO_MIN_THINKING_BUDGET = 128;
|
|
@@ -58,6 +55,9 @@ export const geminiReasoningMiddleware = {
|
|
|
58
55
|
const unknown = params.providerOptions?.["unknown"];
|
|
59
56
|
if (!unknown)
|
|
60
57
|
return params;
|
|
58
|
+
// If thinking options exist, just pass through
|
|
59
|
+
if (unknown["thinking_config"])
|
|
60
|
+
return params;
|
|
61
61
|
const reasoning = unknown["reasoning"];
|
|
62
62
|
if (!reasoning)
|
|
63
63
|
return params;
|
|
@@ -65,19 +65,19 @@ export const geminiReasoningMiddleware = {
|
|
|
65
65
|
const modelId = model.modelId;
|
|
66
66
|
if (modelId.includes("gemini-2")) {
|
|
67
67
|
const is25Pro = modelId.includes("gemini-2.5-pro");
|
|
68
|
-
target
|
|
68
|
+
target.thinkingConfig = {
|
|
69
69
|
thinkingBudget: reasoning.max_tokens ??
|
|
70
70
|
calculateReasoningBudgetFromEffort(reasoning.effort ?? "none", params.maxOutputTokens ?? GEMINI_DEFAULT_MAX_OUTPUT_TOKENS, is25Pro ? GEMINI_2_5_PRO_MIN_THINKING_BUDGET : 0),
|
|
71
71
|
};
|
|
72
72
|
}
|
|
73
73
|
else if (modelId.includes("gemini-3") && reasoning.effort) {
|
|
74
|
-
target
|
|
74
|
+
target.thinkingConfig = {
|
|
75
75
|
thinkingLevel: mapGeminiReasoningEffort(reasoning.effort, modelId),
|
|
76
76
|
};
|
|
77
77
|
// FUTURE: warn if model is gemini-3 and max_tokens (unsupported) was ignored
|
|
78
78
|
}
|
|
79
|
-
(target
|
|
80
|
-
|
|
79
|
+
const thinkingConfig = (target.thinkingConfig ??= {});
|
|
80
|
+
thinkingConfig.includeThoughts = reasoning.enabled ? !reasoning.exclude : false;
|
|
81
81
|
delete unknown["reasoning"];
|
|
82
82
|
return params;
|
|
83
83
|
},
|
|
@@ -91,9 +91,13 @@ export const geminiPromptCachingMiddleware = {
|
|
|
91
91
|
const unknown = params.providerOptions?.["unknown"];
|
|
92
92
|
if (!unknown)
|
|
93
93
|
return params;
|
|
94
|
-
|
|
95
|
-
if (
|
|
96
|
-
|
|
94
|
+
// If cached_content options exist, just pass through
|
|
95
|
+
if (unknown["cached_content"])
|
|
96
|
+
return params;
|
|
97
|
+
const promptCacheKey = unknown["prompt_cache_key"];
|
|
98
|
+
if (promptCacheKey) {
|
|
99
|
+
(params.providerOptions["google"] ??= {}).cachedContent =
|
|
100
|
+
promptCacheKey;
|
|
97
101
|
}
|
|
98
102
|
delete unknown["cached_content"];
|
|
99
103
|
return params;
|
|
@@ -10,7 +10,8 @@ export const openAIDimensionsMiddleware = {
|
|
|
10
10
|
const dimensions = unknown["dimensions"];
|
|
11
11
|
if (!dimensions)
|
|
12
12
|
return params;
|
|
13
|
-
(params.providerOptions["openai"] ??= {})
|
|
13
|
+
const target = (params.providerOptions["openai"] ??= {});
|
|
14
|
+
target.dimensions = dimensions;
|
|
14
15
|
delete unknown["dimensions"];
|
|
15
16
|
return params;
|
|
16
17
|
},
|
|
@@ -27,7 +28,6 @@ function mapGptOssReasoningEffort(effort) {
|
|
|
27
28
|
return "medium";
|
|
28
29
|
case "high":
|
|
29
30
|
case "xhigh":
|
|
30
|
-
case "max":
|
|
31
31
|
return "high";
|
|
32
32
|
}
|
|
33
33
|
}
|
|
@@ -45,13 +45,13 @@ export const openAIReasoningMiddleware = {
|
|
|
45
45
|
const isGptOss = model.modelId.includes("gpt-oss");
|
|
46
46
|
if (isGptOss) {
|
|
47
47
|
// FUTURE: warn that unable to disable reasoning for gpt-oss models
|
|
48
|
-
target
|
|
48
|
+
target.reasoningEffort = mapGptOssReasoningEffort(reasoning.effort);
|
|
49
49
|
}
|
|
50
50
|
else if (reasoning.enabled === false) {
|
|
51
|
-
target
|
|
51
|
+
target.reasoningEffort = "none";
|
|
52
52
|
}
|
|
53
53
|
else if (reasoning.effort) {
|
|
54
|
-
target
|
|
54
|
+
target.reasoningEffort = reasoning.effort;
|
|
55
55
|
}
|
|
56
56
|
// FUTURE: warn that reasoning.max_tokens (not supported) was ignored
|
|
57
57
|
delete unknown["reasoning"];
|
|
@@ -71,9 +71,9 @@ export const openAIPromptCachingMiddleware = {
|
|
|
71
71
|
if (key || retention) {
|
|
72
72
|
const target = (params.providerOptions["openai"] ??= {});
|
|
73
73
|
if (key)
|
|
74
|
-
target
|
|
74
|
+
target.promptCacheKey = key;
|
|
75
75
|
if (retention)
|
|
76
|
-
target
|
|
76
|
+
target.promptCacheRetention = retention;
|
|
77
77
|
}
|
|
78
78
|
delete unknown["prompt_cache_key"];
|
|
79
79
|
delete unknown["prompt_cache_retention"];
|
|
@@ -10,7 +10,8 @@ export const voyageDimensionsMiddleware = {
|
|
|
10
10
|
const dimensions = unknown["dimensions"];
|
|
11
11
|
if (!dimensions)
|
|
12
12
|
return params;
|
|
13
|
-
(params.providerOptions["voyage"] ??= {})
|
|
13
|
+
const target = (params.providerOptions["voyage"] ??= {});
|
|
14
|
+
target.outputDimension = dimensions;
|
|
14
15
|
delete unknown["dimensions"];
|
|
15
16
|
return params;
|
|
16
17
|
},
|
|
@@ -7,14 +7,15 @@ export const bedrockGptReasoningMiddleware = {
|
|
|
7
7
|
if (!model.modelId.includes("gpt"))
|
|
8
8
|
return params;
|
|
9
9
|
const bedrock = params.providerOptions?.["bedrock"];
|
|
10
|
-
if (!bedrock
|
|
10
|
+
if (!bedrock)
|
|
11
11
|
return params;
|
|
12
|
-
const effort = bedrock
|
|
12
|
+
const effort = bedrock.reasoningEffort;
|
|
13
13
|
if (effort === undefined)
|
|
14
14
|
return params;
|
|
15
|
-
const target = (bedrock
|
|
16
|
-
|
|
17
|
-
|
|
15
|
+
const target = (bedrock.reasoningConfig ??= {});
|
|
16
|
+
// @ts-expect-error AI SDK does accept this
|
|
17
|
+
target.maxReasoningEffort = effort;
|
|
18
|
+
delete bedrock.reasoningEffort;
|
|
18
19
|
return params;
|
|
19
20
|
},
|
|
20
21
|
};
|
|
@@ -25,28 +26,25 @@ export const bedrockClaudeReasoningMiddleware = {
|
|
|
25
26
|
if (!model.modelId.includes("claude"))
|
|
26
27
|
return params;
|
|
27
28
|
const bedrock = params.providerOptions?.["bedrock"];
|
|
28
|
-
if (!bedrock
|
|
29
|
+
if (!bedrock)
|
|
29
30
|
return params;
|
|
30
|
-
const thinking = bedrock
|
|
31
|
-
const effort = bedrock
|
|
31
|
+
const thinking = bedrock.thinking;
|
|
32
|
+
const effort = bedrock.effort;
|
|
32
33
|
if (!thinking && effort === undefined)
|
|
33
34
|
return params;
|
|
34
|
-
const target = (bedrock
|
|
35
|
+
const target = (bedrock.reasoningConfig ??= {});
|
|
35
36
|
if (thinking && typeof thinking === "object") {
|
|
36
|
-
|
|
37
|
-
if (
|
|
38
|
-
target
|
|
39
|
-
}
|
|
40
|
-
if (thinkingOptions["budgetTokens"] !== undefined) {
|
|
41
|
-
target["budgetTokens"] = thinkingOptions["budgetTokens"];
|
|
37
|
+
target.type = thinking.type;
|
|
38
|
+
if ("budgetTokens" in thinking && thinking.budgetTokens !== undefined) {
|
|
39
|
+
target.budgetTokens = thinking.budgetTokens;
|
|
42
40
|
}
|
|
43
41
|
}
|
|
44
42
|
// FUTURE: bedrock currently does not support "effort" for other 4.x models
|
|
45
43
|
if (effort !== undefined && isClaude46(model.modelId)) {
|
|
46
|
-
target
|
|
44
|
+
target.maxReasoningEffort = effort;
|
|
47
45
|
}
|
|
48
|
-
delete bedrock
|
|
49
|
-
delete bedrock
|
|
46
|
+
delete bedrock.thinking;
|
|
47
|
+
delete bedrock.effort;
|
|
50
48
|
return params;
|
|
51
49
|
},
|
|
52
50
|
};
|
|
@@ -79,18 +77,18 @@ export const bedrockPromptCachingMiddleware = {
|
|
|
79
77
|
delete entryBedrock["cacheControl"];
|
|
80
78
|
};
|
|
81
79
|
for (const message of params.prompt) {
|
|
82
|
-
processCacheControl(message
|
|
83
|
-
if (!Array.isArray(message
|
|
80
|
+
processCacheControl(message.providerOptions);
|
|
81
|
+
if (!Array.isArray(message.content))
|
|
84
82
|
continue;
|
|
85
|
-
for (const part of message
|
|
86
|
-
processCacheControl(part
|
|
83
|
+
for (const part of message.content) {
|
|
84
|
+
processCacheControl(part.providerOptions);
|
|
87
85
|
}
|
|
88
86
|
lastCacheableBlock = message;
|
|
89
87
|
}
|
|
90
88
|
const bedrock = params.providerOptions?.["bedrock"];
|
|
91
89
|
const cacheControl = bedrock?.["cacheControl"];
|
|
92
90
|
if (cacheControl && !hasExplicitCacheControl && lastCacheableBlock) {
|
|
93
|
-
((lastCacheableBlock
|
|
91
|
+
((lastCacheableBlock.providerOptions ??= {})["bedrock"] ??= {})["cachePoint"] =
|
|
94
92
|
toBedrockCachePoint(model.modelId, cacheControl);
|
|
95
93
|
}
|
|
96
94
|
delete bedrock?.["cacheControl"];
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hebo-ai/gateway",
|
|
3
|
-
"version": "0.6.2
|
|
3
|
+
"version": "0.6.2",
|
|
4
4
|
"description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI-compatible /chat/completions, /embeddings & /models.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|
|
@@ -179,6 +179,7 @@
|
|
|
179
179
|
"@ai-sdk/amazon-bedrock": "^4.0.77",
|
|
180
180
|
"@ai-sdk/anthropic": "^3.0.58",
|
|
181
181
|
"@ai-sdk/cohere": "^3.0.25",
|
|
182
|
+
"@ai-sdk/google": "^3.0.43",
|
|
182
183
|
"@ai-sdk/google-vertex": "^4.0.80",
|
|
183
184
|
"@ai-sdk/groq": "^3.0.29",
|
|
184
185
|
"@ai-sdk/openai": "^3.0.41",
|
|
@@ -196,6 +197,9 @@
|
|
|
196
197
|
"@ai-sdk/cohere": {
|
|
197
198
|
"optional": true
|
|
198
199
|
},
|
|
200
|
+
"@ai-sdk/google": {
|
|
201
|
+
"optional": true
|
|
202
|
+
},
|
|
199
203
|
"@ai-sdk/google-vertex": {
|
|
200
204
|
"optional": true
|
|
201
205
|
},
|