@hebo-ai/gateway 0.6.2-rc1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -8
- package/dist/config.js +28 -1
- package/dist/endpoints/chat-completions/converters.d.ts +5 -5
- package/dist/endpoints/chat-completions/converters.js +86 -49
- package/dist/endpoints/chat-completions/handler.js +4 -4
- package/dist/endpoints/chat-completions/otel.d.ts +1 -1
- package/dist/endpoints/chat-completions/otel.js +20 -18
- package/dist/endpoints/chat-completions/schema.d.ts +47 -23
- package/dist/endpoints/chat-completions/schema.js +24 -17
- package/dist/endpoints/embeddings/handler.js +2 -2
- package/dist/endpoints/embeddings/otel.d.ts +2 -2
- package/dist/endpoints/embeddings/otel.js +7 -2
- package/dist/endpoints/embeddings/schema.d.ts +6 -0
- package/dist/endpoints/embeddings/schema.js +4 -1
- package/dist/endpoints/models/handler.js +2 -2
- package/dist/errors/openai.d.ts +1 -6
- package/dist/lifecycle.d.ts +3 -2
- package/dist/lifecycle.js +4 -6
- package/dist/middleware/utils.js +0 -1
- package/dist/models/amazon/middleware.js +6 -5
- package/dist/models/anthropic/middleware.js +13 -13
- package/dist/models/cohere/middleware.js +7 -5
- package/dist/models/google/middleware.d.ts +1 -1
- package/dist/models/google/middleware.js +29 -25
- package/dist/models/google/presets.d.ts +28 -0
- package/dist/models/google/presets.js +7 -1
- package/dist/models/openai/middleware.js +7 -7
- package/dist/models/types.d.ts +1 -1
- package/dist/models/types.js +1 -0
- package/dist/models/voyage/middleware.js +2 -1
- package/dist/providers/bedrock/middleware.d.ts +1 -0
- package/dist/providers/bedrock/middleware.js +54 -23
- package/dist/providers/groq/index.d.ts +1 -0
- package/dist/providers/groq/index.js +1 -0
- package/dist/providers/groq/middleware.d.ts +2 -0
- package/dist/providers/groq/middleware.js +31 -0
- package/dist/providers/vertex/index.d.ts +1 -0
- package/dist/providers/vertex/index.js +1 -0
- package/dist/providers/vertex/middleware.d.ts +2 -0
- package/dist/providers/vertex/middleware.js +47 -0
- package/dist/types.d.ts +25 -4
- package/dist/types.js +1 -0
- package/dist/utils/response.d.ts +4 -1
- package/dist/utils/response.js +5 -20
- package/dist/utils/stream.d.ts +9 -0
- package/dist/utils/stream.js +100 -0
- package/package.json +5 -1
- package/dist/telemetry/stream.d.ts +0 -3
- package/dist/telemetry/stream.js +0 -58
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import * as z from "zod";
|
|
2
|
+
import type { SseErrorFrame, SseFrame } from "../../utils/stream";
|
|
2
3
|
export declare const ChatCompletionsCacheControlSchema: z.ZodObject<{
|
|
3
4
|
type: z.ZodLiteral<"ephemeral">;
|
|
4
5
|
ttl: z.ZodOptional<z.ZodString>;
|
|
@@ -427,7 +428,6 @@ export declare const ChatCompletionsReasoningEffortSchema: z.ZodEnum<{
|
|
|
427
428
|
minimal: "minimal";
|
|
428
429
|
medium: "medium";
|
|
429
430
|
xhigh: "xhigh";
|
|
430
|
-
max: "max";
|
|
431
431
|
}>;
|
|
432
432
|
export type ChatCompletionsReasoningEffort = z.infer<typeof ChatCompletionsReasoningEffortSchema>;
|
|
433
433
|
export declare const ChatCompletionsReasoningConfigSchema: z.ZodObject<{
|
|
@@ -439,7 +439,6 @@ export declare const ChatCompletionsReasoningConfigSchema: z.ZodObject<{
|
|
|
439
439
|
minimal: "minimal";
|
|
440
440
|
medium: "medium";
|
|
441
441
|
xhigh: "xhigh";
|
|
442
|
-
max: "max";
|
|
443
442
|
}>>;
|
|
444
443
|
max_tokens: z.ZodOptional<z.ZodNumber>;
|
|
445
444
|
exclude: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -471,6 +470,14 @@ export declare const ChatCompletionsResponseFormatSchema: z.ZodDiscriminatedUnio
|
|
|
471
470
|
export type ChatCompletionsResponseFormat = z.infer<typeof ChatCompletionsResponseFormatSchema>;
|
|
472
471
|
export declare const ChatCompletionsMetadataSchema: z.ZodRecord<z.ZodString, z.ZodString>;
|
|
473
472
|
export type ChatCompletionsMetadata = z.infer<typeof ChatCompletionsMetadataSchema>;
|
|
473
|
+
export declare const ChatCompletionsServiceTierSchema: z.ZodEnum<{
|
|
474
|
+
auto: "auto";
|
|
475
|
+
default: "default";
|
|
476
|
+
flex: "flex";
|
|
477
|
+
scale: "scale";
|
|
478
|
+
priority: "priority";
|
|
479
|
+
}>;
|
|
480
|
+
export type ChatCompletionsServiceTier = z.infer<typeof ChatCompletionsServiceTierSchema>;
|
|
474
481
|
declare const ChatCompletionsInputsSchema: z.ZodObject<{
|
|
475
482
|
messages: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
476
483
|
role: z.ZodLiteral<"system">;
|
|
@@ -651,18 +658,19 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
|
|
|
651
658
|
minimal: "minimal";
|
|
652
659
|
medium: "medium";
|
|
653
660
|
xhigh: "xhigh";
|
|
654
|
-
|
|
661
|
+
}>>;
|
|
662
|
+
service_tier: z.ZodOptional<z.ZodEnum<{
|
|
663
|
+
auto: "auto";
|
|
664
|
+
default: "default";
|
|
665
|
+
flex: "flex";
|
|
666
|
+
scale: "scale";
|
|
667
|
+
priority: "priority";
|
|
655
668
|
}>>;
|
|
656
669
|
prompt_cache_key: z.ZodOptional<z.ZodString>;
|
|
657
670
|
prompt_cache_retention: z.ZodOptional<z.ZodEnum<{
|
|
658
671
|
in_memory: "in_memory";
|
|
659
672
|
"24h": "24h";
|
|
660
673
|
}>>;
|
|
661
|
-
extra_body: z.ZodOptional<z.ZodObject<{
|
|
662
|
-
google: z.ZodOptional<z.ZodObject<{
|
|
663
|
-
cached_content: z.ZodOptional<z.ZodString>;
|
|
664
|
-
}, z.core.$strip>>;
|
|
665
|
-
}, z.core.$strip>>;
|
|
666
674
|
cache_control: z.ZodOptional<z.ZodObject<{
|
|
667
675
|
type: z.ZodLiteral<"ephemeral">;
|
|
668
676
|
ttl: z.ZodOptional<z.ZodString>;
|
|
@@ -676,11 +684,11 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
|
|
|
676
684
|
minimal: "minimal";
|
|
677
685
|
medium: "medium";
|
|
678
686
|
xhigh: "xhigh";
|
|
679
|
-
max: "max";
|
|
680
687
|
}>>;
|
|
681
688
|
max_tokens: z.ZodOptional<z.ZodNumber>;
|
|
682
689
|
exclude: z.ZodOptional<z.ZodBoolean>;
|
|
683
690
|
}, z.core.$strip>>;
|
|
691
|
+
extra_body: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
684
692
|
}, z.core.$strip>;
|
|
685
693
|
export type ChatCompletionsInputs = z.infer<typeof ChatCompletionsInputsSchema>;
|
|
686
694
|
export declare const ChatCompletionsBodySchema: z.ZodObject<{
|
|
@@ -863,18 +871,19 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
|
|
|
863
871
|
minimal: "minimal";
|
|
864
872
|
medium: "medium";
|
|
865
873
|
xhigh: "xhigh";
|
|
866
|
-
|
|
874
|
+
}>>;
|
|
875
|
+
service_tier: z.ZodOptional<z.ZodEnum<{
|
|
876
|
+
auto: "auto";
|
|
877
|
+
default: "default";
|
|
878
|
+
flex: "flex";
|
|
879
|
+
scale: "scale";
|
|
880
|
+
priority: "priority";
|
|
867
881
|
}>>;
|
|
868
882
|
prompt_cache_key: z.ZodOptional<z.ZodString>;
|
|
869
883
|
prompt_cache_retention: z.ZodOptional<z.ZodEnum<{
|
|
870
884
|
in_memory: "in_memory";
|
|
871
885
|
"24h": "24h";
|
|
872
886
|
}>>;
|
|
873
|
-
extra_body: z.ZodOptional<z.ZodObject<{
|
|
874
|
-
google: z.ZodOptional<z.ZodObject<{
|
|
875
|
-
cached_content: z.ZodOptional<z.ZodString>;
|
|
876
|
-
}, z.core.$strip>>;
|
|
877
|
-
}, z.core.$strip>>;
|
|
878
887
|
cache_control: z.ZodOptional<z.ZodObject<{
|
|
879
888
|
type: z.ZodLiteral<"ephemeral">;
|
|
880
889
|
ttl: z.ZodOptional<z.ZodString>;
|
|
@@ -888,19 +897,19 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
|
|
|
888
897
|
minimal: "minimal";
|
|
889
898
|
medium: "medium";
|
|
890
899
|
xhigh: "xhigh";
|
|
891
|
-
max: "max";
|
|
892
900
|
}>>;
|
|
893
901
|
max_tokens: z.ZodOptional<z.ZodNumber>;
|
|
894
902
|
exclude: z.ZodOptional<z.ZodBoolean>;
|
|
895
903
|
}, z.core.$strip>>;
|
|
904
|
+
extra_body: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
896
905
|
model: z.ZodString;
|
|
897
906
|
stream: z.ZodOptional<z.ZodBoolean>;
|
|
898
907
|
}, z.core.$loose>;
|
|
899
908
|
export type ChatCompletionsBody = z.infer<typeof ChatCompletionsBodySchema>;
|
|
900
909
|
export declare const ChatCompletionsFinishReasonSchema: z.ZodEnum<{
|
|
901
|
-
tool_calls: "tool_calls";
|
|
902
910
|
stop: "stop";
|
|
903
911
|
length: "length";
|
|
912
|
+
tool_calls: "tool_calls";
|
|
904
913
|
content_filter: "content_filter";
|
|
905
914
|
}>;
|
|
906
915
|
export type ChatCompletionsFinishReason = z.infer<typeof ChatCompletionsFinishReasonSchema>;
|
|
@@ -944,9 +953,9 @@ export declare const ChatCompletionsChoiceSchema: z.ZodObject<{
|
|
|
944
953
|
}, z.core.$strip>>;
|
|
945
954
|
}, z.core.$strip>;
|
|
946
955
|
finish_reason: z.ZodEnum<{
|
|
947
|
-
tool_calls: "tool_calls";
|
|
948
956
|
stop: "stop";
|
|
949
957
|
length: "length";
|
|
958
|
+
tool_calls: "tool_calls";
|
|
950
959
|
content_filter: "content_filter";
|
|
951
960
|
}>;
|
|
952
961
|
logprobs: z.ZodOptional<z.ZodUnknown>;
|
|
@@ -1010,9 +1019,9 @@ export declare const ChatCompletionsSchema: z.ZodObject<{
|
|
|
1010
1019
|
}, z.core.$strip>>;
|
|
1011
1020
|
}, z.core.$strip>;
|
|
1012
1021
|
finish_reason: z.ZodEnum<{
|
|
1013
|
-
tool_calls: "tool_calls";
|
|
1014
1022
|
stop: "stop";
|
|
1015
1023
|
length: "length";
|
|
1024
|
+
tool_calls: "tool_calls";
|
|
1016
1025
|
content_filter: "content_filter";
|
|
1017
1026
|
}>;
|
|
1018
1027
|
logprobs: z.ZodOptional<z.ZodUnknown>;
|
|
@@ -1029,7 +1038,14 @@ export declare const ChatCompletionsSchema: z.ZodObject<{
|
|
|
1029
1038
|
cache_write_tokens: z.ZodOptional<z.ZodInt>;
|
|
1030
1039
|
}, z.core.$strip>>;
|
|
1031
1040
|
}, z.core.$strip>>;
|
|
1032
|
-
|
|
1041
|
+
service_tier: z.ZodOptional<z.ZodEnum<{
|
|
1042
|
+
auto: "auto";
|
|
1043
|
+
default: "default";
|
|
1044
|
+
flex: "flex";
|
|
1045
|
+
scale: "scale";
|
|
1046
|
+
priority: "priority";
|
|
1047
|
+
}>>;
|
|
1048
|
+
provider_metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
1033
1049
|
}, z.core.$strip>;
|
|
1034
1050
|
export type ChatCompletions = z.infer<typeof ChatCompletionsSchema>;
|
|
1035
1051
|
export declare const ChatCompletionsToolCallDeltaSchema: z.ZodObject<{
|
|
@@ -1123,9 +1139,9 @@ export declare const ChatCompletionsChoiceDeltaSchema: z.ZodObject<{
|
|
|
1123
1139
|
}, z.core.$strip>>>;
|
|
1124
1140
|
}, z.core.$strip>;
|
|
1125
1141
|
finish_reason: z.ZodNullable<z.ZodEnum<{
|
|
1126
|
-
tool_calls: "tool_calls";
|
|
1127
1142
|
stop: "stop";
|
|
1128
1143
|
length: "length";
|
|
1144
|
+
tool_calls: "tool_calls";
|
|
1129
1145
|
content_filter: "content_filter";
|
|
1130
1146
|
}>>;
|
|
1131
1147
|
logprobs: z.ZodOptional<z.ZodUnknown>;
|
|
@@ -1177,9 +1193,9 @@ export declare const ChatCompletionsChunkSchema: z.ZodObject<{
|
|
|
1177
1193
|
}, z.core.$strip>>>;
|
|
1178
1194
|
}, z.core.$strip>;
|
|
1179
1195
|
finish_reason: z.ZodNullable<z.ZodEnum<{
|
|
1180
|
-
tool_calls: "tool_calls";
|
|
1181
1196
|
stop: "stop";
|
|
1182
1197
|
length: "length";
|
|
1198
|
+
tool_calls: "tool_calls";
|
|
1183
1199
|
content_filter: "content_filter";
|
|
1184
1200
|
}>>;
|
|
1185
1201
|
logprobs: z.ZodOptional<z.ZodUnknown>;
|
|
@@ -1196,7 +1212,15 @@ export declare const ChatCompletionsChunkSchema: z.ZodObject<{
|
|
|
1196
1212
|
cache_write_tokens: z.ZodOptional<z.ZodInt>;
|
|
1197
1213
|
}, z.core.$strip>>;
|
|
1198
1214
|
}, z.core.$strip>>;
|
|
1199
|
-
|
|
1215
|
+
service_tier: z.ZodOptional<z.ZodEnum<{
|
|
1216
|
+
auto: "auto";
|
|
1217
|
+
default: "default";
|
|
1218
|
+
flex: "flex";
|
|
1219
|
+
scale: "scale";
|
|
1220
|
+
priority: "priority";
|
|
1221
|
+
}>>;
|
|
1222
|
+
provider_metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
1200
1223
|
}, z.core.$strip>;
|
|
1201
1224
|
export type ChatCompletionsChunk = z.infer<typeof ChatCompletionsChunkSchema>;
|
|
1225
|
+
export type ChatCompletionsStream = ReadableStream<SseFrame<ChatCompletionsChunk> | SseErrorFrame>;
|
|
1202
1226
|
export {};
|
|
@@ -161,14 +161,11 @@ export const ChatCompletionsToolChoiceSchema = z.union([
|
|
|
161
161
|
]);
|
|
162
162
|
export const ChatCompletionsReasoningEffortSchema = z.enum([
|
|
163
163
|
"none",
|
|
164
|
-
// Extension origin: Gemini
|
|
165
164
|
"minimal",
|
|
166
165
|
"low",
|
|
167
166
|
"medium",
|
|
168
167
|
"high",
|
|
169
168
|
"xhigh",
|
|
170
|
-
// Extension origin: Anthropic
|
|
171
|
-
"max",
|
|
172
169
|
]);
|
|
173
170
|
export const ChatCompletionsReasoningConfigSchema = z.object({
|
|
174
171
|
enabled: z.optional(z.boolean()),
|
|
@@ -195,6 +192,13 @@ export const ChatCompletionsResponseFormatSchema = z.discriminatedUnion("type",
|
|
|
195
192
|
ChatCompletionsResponseFormatTextSchema,
|
|
196
193
|
]);
|
|
197
194
|
export const ChatCompletionsMetadataSchema = z.record(z.string().min(1).max(64), z.string().max(512));
|
|
195
|
+
export const ChatCompletionsServiceTierSchema = z.enum([
|
|
196
|
+
"auto",
|
|
197
|
+
"default",
|
|
198
|
+
"flex",
|
|
199
|
+
"scale",
|
|
200
|
+
"priority",
|
|
201
|
+
]);
|
|
198
202
|
const ChatCompletionsInputsSchema = z.object({
|
|
199
203
|
messages: z.array(ChatCompletionsMessageSchema),
|
|
200
204
|
tools: z.array(ChatCompletionsToolSchema).optional(),
|
|
@@ -210,24 +214,19 @@ const ChatCompletionsInputsSchema = z.object({
|
|
|
210
214
|
metadata: ChatCompletionsMetadataSchema.optional(),
|
|
211
215
|
response_format: ChatCompletionsResponseFormatSchema.optional(),
|
|
212
216
|
reasoning_effort: ChatCompletionsReasoningEffortSchema.optional(),
|
|
217
|
+
service_tier: ChatCompletionsServiceTierSchema.optional(),
|
|
213
218
|
prompt_cache_key: z.string().optional(),
|
|
214
219
|
prompt_cache_retention: z.enum(["in_memory", "24h"]).optional(),
|
|
215
|
-
// Extension origin: Gemini explicit cache handle
|
|
216
|
-
// FUTURE: generalize extra_body handling
|
|
217
|
-
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
|
|
218
|
-
extra_body: z
|
|
219
|
-
.object({
|
|
220
|
-
google: z
|
|
221
|
-
.object({
|
|
222
|
-
cached_content: z.string().optional().meta({ extension: true }),
|
|
223
|
-
})
|
|
224
|
-
.optional(),
|
|
225
|
-
})
|
|
226
|
-
.optional(),
|
|
227
220
|
// Extension origin: OpenRouter/Vercel/Anthropic
|
|
228
221
|
cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
|
|
229
222
|
// Extension origin: OpenRouter
|
|
230
223
|
reasoning: ChatCompletionsReasoningConfigSchema.optional().meta({ extension: true }),
|
|
224
|
+
// Extension origin: Gemini extra_body
|
|
225
|
+
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview#extra_body
|
|
226
|
+
extra_body: z
|
|
227
|
+
.record(z.string(), z.record(z.string(), z.unknown()))
|
|
228
|
+
.optional()
|
|
229
|
+
.meta({ extension: true }),
|
|
231
230
|
});
|
|
232
231
|
export const ChatCompletionsBodySchema = z.looseObject({
|
|
233
232
|
model: z.string(),
|
|
@@ -273,8 +272,12 @@ export const ChatCompletionsSchema = z.object({
|
|
|
273
272
|
model: z.string(),
|
|
274
273
|
choices: z.array(ChatCompletionsChoiceSchema),
|
|
275
274
|
usage: ChatCompletionsUsageSchema.nullable(),
|
|
275
|
+
service_tier: ChatCompletionsServiceTierSchema.optional(),
|
|
276
276
|
// Extension origin: Vercel AI Gateway
|
|
277
|
-
provider_metadata: z
|
|
277
|
+
provider_metadata: z
|
|
278
|
+
.record(z.string(), z.record(z.string(), z.unknown()))
|
|
279
|
+
.optional()
|
|
280
|
+
.meta({ extension: true }),
|
|
278
281
|
});
|
|
279
282
|
export const ChatCompletionsToolCallDeltaSchema = ChatCompletionsToolCallSchema.partial().extend({
|
|
280
283
|
index: z.int().nonnegative(),
|
|
@@ -296,6 +299,10 @@ export const ChatCompletionsChunkSchema = z.object({
|
|
|
296
299
|
model: z.string(),
|
|
297
300
|
choices: z.array(ChatCompletionsChoiceDeltaSchema),
|
|
298
301
|
usage: ChatCompletionsUsageSchema.nullable(),
|
|
302
|
+
service_tier: ChatCompletionsServiceTierSchema.optional(),
|
|
299
303
|
// Extension origin: Vercel AI Gateway
|
|
300
|
-
provider_metadata: z
|
|
304
|
+
provider_metadata: z
|
|
305
|
+
.record(z.string(), z.record(z.string(), z.unknown()))
|
|
306
|
+
.optional()
|
|
307
|
+
.meta({ extension: true }),
|
|
301
308
|
});
|
|
@@ -13,7 +13,7 @@ import { getEmbeddingsRequestAttributes, getEmbeddingsResponseAttributes } from
|
|
|
13
13
|
import { EmbeddingsBodySchema } from "./schema";
|
|
14
14
|
export const embeddings = (config) => {
|
|
15
15
|
const hooks = config.hooks;
|
|
16
|
-
const handler = async (ctx) => {
|
|
16
|
+
const handler = async (ctx, cfg) => {
|
|
17
17
|
const start = performance.now();
|
|
18
18
|
ctx.operation = "embeddings";
|
|
19
19
|
addSpanEvent("hebo.handler.started");
|
|
@@ -62,7 +62,7 @@ export const embeddings = (config) => {
|
|
|
62
62
|
ctx.resolvedProviderId = embeddingModel.provider;
|
|
63
63
|
logger.debug(`[embeddings] using ${embeddingModel.provider} for ${ctx.resolvedModelId}`);
|
|
64
64
|
addSpanEvent("hebo.provider.resolved");
|
|
65
|
-
const genAiSignalLevel =
|
|
65
|
+
const genAiSignalLevel = cfg.telemetry?.signals?.gen_ai;
|
|
66
66
|
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
|
|
67
67
|
setSpanAttributes(genAiGeneralAttrs);
|
|
68
68
|
// Convert inputs to AI SDK call options.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { Attributes } from "@opentelemetry/api";
|
|
2
|
-
import type { Embeddings,
|
|
2
|
+
import type { Embeddings, EmbeddingsBody } from "./schema";
|
|
3
3
|
import { type TelemetrySignalLevel } from "../../types";
|
|
4
|
-
export declare const getEmbeddingsRequestAttributes: (
|
|
4
|
+
export declare const getEmbeddingsRequestAttributes: (body: EmbeddingsBody, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
5
5
|
export declare const getEmbeddingsResponseAttributes: (embeddings: Embeddings, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
@@ -1,12 +1,17 @@
|
|
|
1
1
|
import {} from "../../types";
|
|
2
|
-
export const getEmbeddingsRequestAttributes = (
|
|
2
|
+
export const getEmbeddingsRequestAttributes = (body, signalLevel) => {
|
|
3
3
|
if (!signalLevel || signalLevel === "off")
|
|
4
4
|
return {};
|
|
5
5
|
const attrs = {};
|
|
6
6
|
if (signalLevel !== "required") {
|
|
7
7
|
Object.assign(attrs, {
|
|
8
|
-
"gen_ai.embeddings.dimension.count":
|
|
8
|
+
"gen_ai.embeddings.dimension.count": body.dimensions,
|
|
9
9
|
});
|
|
10
|
+
if (body.metadata) {
|
|
11
|
+
for (const key in body.metadata) {
|
|
12
|
+
attrs[`gen_ai.request.metadata.${key}`] = body.metadata[key];
|
|
13
|
+
}
|
|
14
|
+
}
|
|
10
15
|
}
|
|
11
16
|
return attrs;
|
|
12
17
|
};
|
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
import * as z from "zod";
|
|
2
|
+
export declare const EmbeddingsDimensionsSchema: z.ZodInt;
|
|
3
|
+
export type EmbeddingsDimensions = z.infer<typeof EmbeddingsDimensionsSchema>;
|
|
4
|
+
export declare const EmbeddingsMetadataSchema: z.ZodRecord<z.ZodString, z.ZodString>;
|
|
5
|
+
export type EmbeddingsMetadata = z.infer<typeof EmbeddingsMetadataSchema>;
|
|
2
6
|
export declare const EmbeddingsInputsSchema: z.ZodObject<{
|
|
3
7
|
input: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
|
|
4
8
|
dimensions: z.ZodOptional<z.ZodInt>;
|
|
9
|
+
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
5
10
|
}, z.core.$strip>;
|
|
6
11
|
export type EmbeddingsInputs = z.infer<typeof EmbeddingsInputsSchema>;
|
|
7
12
|
export declare const EmbeddingsBodySchema: z.ZodObject<{
|
|
8
13
|
input: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
|
|
9
14
|
dimensions: z.ZodOptional<z.ZodInt>;
|
|
15
|
+
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
10
16
|
model: z.ZodString;
|
|
11
17
|
}, z.core.$loose>;
|
|
12
18
|
export type EmbeddingsBody = z.infer<typeof EmbeddingsBodySchema>;
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import * as z from "zod";
|
|
2
|
+
export const EmbeddingsDimensionsSchema = z.int().nonnegative().max(65536);
|
|
3
|
+
export const EmbeddingsMetadataSchema = z.record(z.string().min(1).max(64), z.string().max(512));
|
|
2
4
|
export const EmbeddingsInputsSchema = z.object({
|
|
3
5
|
input: z.union([z.string(), z.array(z.string())]),
|
|
4
|
-
dimensions:
|
|
6
|
+
dimensions: EmbeddingsDimensionsSchema.optional(),
|
|
7
|
+
metadata: EmbeddingsMetadataSchema.optional(),
|
|
5
8
|
});
|
|
6
9
|
export const EmbeddingsBodySchema = z.looseObject({
|
|
7
10
|
model: z.string(),
|
|
@@ -2,8 +2,8 @@ import { GatewayError } from "../../errors/gateway";
|
|
|
2
2
|
import { winterCgHandler } from "../../lifecycle";
|
|
3
3
|
import { toModels, toModel } from "./converters";
|
|
4
4
|
export const models = (config) => {
|
|
5
|
-
//
|
|
6
|
-
const handler = async (ctx) => {
|
|
5
|
+
// eslint-disable-next-line require-await
|
|
6
|
+
const handler = async (ctx, _cfg) => {
|
|
7
7
|
ctx.operation = "models";
|
|
8
8
|
if (!ctx.request || ctx.request.method !== "GET") {
|
|
9
9
|
throw new GatewayError("Method Not Allowed", 405);
|
package/dist/errors/openai.d.ts
CHANGED
|
@@ -8,12 +8,7 @@ export declare const OpenAIErrorSchema: z.ZodObject<{
|
|
|
8
8
|
}, z.core.$strip>;
|
|
9
9
|
}, z.core.$strip>;
|
|
10
10
|
export declare class OpenAIError {
|
|
11
|
-
readonly error:
|
|
12
|
-
message: string;
|
|
13
|
-
type: string;
|
|
14
|
-
code: string | undefined;
|
|
15
|
-
param: string;
|
|
16
|
-
};
|
|
11
|
+
readonly error: z.infer<typeof OpenAIErrorSchema>["error"];
|
|
17
12
|
constructor(message: string, type?: string, code?: string, param?: string);
|
|
18
13
|
}
|
|
19
14
|
export declare function toOpenAIError(error: unknown): OpenAIError;
|
package/dist/lifecycle.d.ts
CHANGED
|
@@ -1,2 +1,3 @@
|
|
|
1
|
-
import type { GatewayConfig, GatewayContext } from "./types";
|
|
2
|
-
|
|
1
|
+
import type { GatewayConfig, GatewayConfigParsed, GatewayContext } from "./types";
|
|
2
|
+
import type { SseFrame } from "./utils/stream";
|
|
3
|
+
export declare const winterCgHandler: (run: (ctx: GatewayContext, cfg: GatewayConfigParsed) => Promise<object | ReadableStream<SseFrame>>, config: GatewayConfig) => (request: Request, state?: Record<string, unknown>) => Promise<Response>;
|
package/dist/lifecycle.js
CHANGED
|
@@ -8,7 +8,6 @@ import { recordRequestDuration } from "./telemetry/gen-ai";
|
|
|
8
8
|
import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
|
|
9
9
|
import { observeV8jsMemoryMetrics } from "./telemetry/memory";
|
|
10
10
|
import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
|
|
11
|
-
import { wrapStream } from "./telemetry/stream";
|
|
12
11
|
import { resolveOrCreateRequestId } from "./utils/request";
|
|
13
12
|
import { prepareResponseInit, toResponse } from "./utils/response";
|
|
14
13
|
export const winterCgHandler = (run, config) => {
|
|
@@ -69,11 +68,10 @@ export const winterCgHandler = (run, config) => {
|
|
|
69
68
|
}
|
|
70
69
|
}
|
|
71
70
|
if (!ctx.response) {
|
|
72
|
-
ctx.result = (await span.runWithContext(() => run(ctx)));
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
}
|
|
76
|
-
ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.requestId));
|
|
71
|
+
ctx.result = (await span.runWithContext(() => run(ctx, parsedConfig)));
|
|
72
|
+
ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.requestId), {
|
|
73
|
+
onDone: finalize,
|
|
74
|
+
});
|
|
77
75
|
}
|
|
78
76
|
if (parsedConfig.hooks?.onResponse) {
|
|
79
77
|
const onResponse = await parsedConfig.hooks.onResponse(ctx);
|
package/dist/middleware/utils.js
CHANGED
|
@@ -10,7 +10,9 @@ export const novaDimensionsMiddleware = {
|
|
|
10
10
|
const dimensions = unknown["dimensions"];
|
|
11
11
|
if (!dimensions)
|
|
12
12
|
return params;
|
|
13
|
-
(params.providerOptions["nova"] ??= {})
|
|
13
|
+
const target = (params.providerOptions["nova"] ??= {});
|
|
14
|
+
// @ts-expect-error AI SDK does the value checking for us
|
|
15
|
+
target.embeddingDimension = dimensions;
|
|
14
16
|
delete unknown["dimensions"];
|
|
15
17
|
return params;
|
|
16
18
|
},
|
|
@@ -26,7 +28,6 @@ function mapNovaEffort(effort) {
|
|
|
26
28
|
return "medium";
|
|
27
29
|
case "high":
|
|
28
30
|
case "xhigh":
|
|
29
|
-
case "max":
|
|
30
31
|
return "high";
|
|
31
32
|
}
|
|
32
33
|
}
|
|
@@ -42,18 +43,18 @@ export const novaReasoningMiddleware = {
|
|
|
42
43
|
return params;
|
|
43
44
|
const target = (params.providerOptions["amazon"] ??= {});
|
|
44
45
|
if (!reasoning.enabled) {
|
|
45
|
-
target
|
|
46
|
+
target.reasoningConfig = { type: "disabled" };
|
|
46
47
|
}
|
|
47
48
|
else if (reasoning.effort) {
|
|
48
49
|
// FUTURE: warn if mapNovaEffort modified the effort
|
|
49
|
-
target
|
|
50
|
+
target.reasoningConfig = {
|
|
50
51
|
type: "enabled",
|
|
51
52
|
maxReasoningEffort: mapNovaEffort(reasoning.effort),
|
|
52
53
|
};
|
|
53
54
|
}
|
|
54
55
|
else {
|
|
55
56
|
// FUTURE: warn if reasoning.max_tokens (unsupported) was ignored
|
|
56
|
-
target
|
|
57
|
+
target.reasoningConfig = { type: "enabled" };
|
|
57
58
|
}
|
|
58
59
|
delete unknown["reasoning"];
|
|
59
60
|
return params;
|
|
@@ -22,7 +22,6 @@ export function mapClaudeReasoningEffort(effort, modelId) {
|
|
|
22
22
|
case "high":
|
|
23
23
|
return "high";
|
|
24
24
|
case "xhigh":
|
|
25
|
-
case "max":
|
|
26
25
|
return "max";
|
|
27
26
|
}
|
|
28
27
|
}
|
|
@@ -35,7 +34,6 @@ export function mapClaudeReasoningEffort(effort, modelId) {
|
|
|
35
34
|
return "medium";
|
|
36
35
|
case "high":
|
|
37
36
|
case "xhigh":
|
|
38
|
-
case "max":
|
|
39
37
|
return "high";
|
|
40
38
|
}
|
|
41
39
|
}
|
|
@@ -66,41 +64,42 @@ export const claudeReasoningMiddleware = {
|
|
|
66
64
|
const modelId = model.modelId;
|
|
67
65
|
const clampedMaxTokens = reasoning.max_tokens && Math.min(reasoning.max_tokens, getMaxOutputTokens(modelId));
|
|
68
66
|
if (!reasoning.enabled) {
|
|
69
|
-
target
|
|
67
|
+
target.thinking = { type: "disabled" };
|
|
70
68
|
}
|
|
71
69
|
else if (reasoning.effort) {
|
|
72
70
|
if (isClaude4(modelId)) {
|
|
73
|
-
target
|
|
71
|
+
target.effort = mapClaudeReasoningEffort(reasoning.effort, modelId);
|
|
74
72
|
}
|
|
75
73
|
if (isOpus46(modelId)) {
|
|
76
|
-
target
|
|
77
|
-
?
|
|
74
|
+
target.thinking = clampedMaxTokens
|
|
75
|
+
? // @ts-expect-error AI SDK type missing type:adaptive with budgetToken
|
|
76
|
+
{ type: "adaptive", budgetTokens: clampedMaxTokens }
|
|
78
77
|
: { type: "adaptive" };
|
|
79
78
|
}
|
|
80
79
|
else if (isSonnet46(modelId)) {
|
|
81
|
-
target
|
|
80
|
+
target.thinking = clampedMaxTokens
|
|
82
81
|
? { type: "enabled", budgetTokens: clampedMaxTokens }
|
|
83
82
|
: { type: "adaptive" };
|
|
84
83
|
}
|
|
85
84
|
else {
|
|
86
|
-
target
|
|
85
|
+
target.thinking = { type: "enabled" };
|
|
87
86
|
if (clampedMaxTokens) {
|
|
88
|
-
target
|
|
87
|
+
target.thinking.budgetTokens = clampedMaxTokens;
|
|
89
88
|
}
|
|
90
89
|
else {
|
|
91
90
|
// FUTURE: warn that reasoning.max_tokens was computed
|
|
92
|
-
target
|
|
91
|
+
target.thinking.budgetTokens = calculateReasoningBudgetFromEffort(reasoning.effort, params.maxOutputTokens ?? getMaxOutputTokens(modelId), 1024);
|
|
93
92
|
}
|
|
94
93
|
}
|
|
95
94
|
}
|
|
96
95
|
else if (clampedMaxTokens) {
|
|
97
|
-
target
|
|
96
|
+
target.thinking = {
|
|
98
97
|
type: "enabled",
|
|
99
98
|
budgetTokens: clampedMaxTokens,
|
|
100
99
|
};
|
|
101
100
|
}
|
|
102
101
|
else {
|
|
103
|
-
target
|
|
102
|
+
target.thinking = { type: "enabled" };
|
|
104
103
|
}
|
|
105
104
|
delete unknown["reasoning"];
|
|
106
105
|
return params;
|
|
@@ -116,7 +115,8 @@ export const claudePromptCachingMiddleware = {
|
|
|
116
115
|
return params;
|
|
117
116
|
const cacheControl = unknown["cache_control"];
|
|
118
117
|
if (cacheControl) {
|
|
119
|
-
(params.providerOptions["anthropic"] ??= {})
|
|
118
|
+
(params.providerOptions["anthropic"] ??= {}).cacheControl =
|
|
119
|
+
cacheControl;
|
|
120
120
|
}
|
|
121
121
|
delete unknown["cache_control"];
|
|
122
122
|
return params;
|
|
@@ -17,7 +17,9 @@ export const cohereDimensionsMiddleware = {
|
|
|
17
17
|
const dimensions = unknown["dimensions"];
|
|
18
18
|
if (!dimensions)
|
|
19
19
|
return params;
|
|
20
|
-
(params.providerOptions["cohere"] ??= {})
|
|
20
|
+
const target = (params.providerOptions["cohere"] ??= {});
|
|
21
|
+
// @ts-expect-error AI SDK does the value checking for us
|
|
22
|
+
target.outputDimension = dimensions;
|
|
21
23
|
delete unknown["dimensions"];
|
|
22
24
|
return params;
|
|
23
25
|
},
|
|
@@ -35,20 +37,20 @@ export const cohereReasoningMiddleware = {
|
|
|
35
37
|
return params;
|
|
36
38
|
const target = (params.providerOptions["cohere"] ??= {});
|
|
37
39
|
if (!reasoning.enabled) {
|
|
38
|
-
target
|
|
40
|
+
target.thinking = { type: "disabled" };
|
|
39
41
|
}
|
|
40
42
|
else if (reasoning.max_tokens) {
|
|
41
|
-
target
|
|
43
|
+
target.thinking = { type: "enabled", tokenBudget: reasoning.max_tokens };
|
|
42
44
|
}
|
|
43
45
|
else if (reasoning.effort) {
|
|
44
46
|
// FUTURE: warn that reasoning.max_tokens was computed
|
|
45
|
-
target
|
|
47
|
+
target.thinking = {
|
|
46
48
|
type: "enabled",
|
|
47
49
|
tokenBudget: calculateReasoningBudgetFromEffort(reasoning.effort, params.maxOutputTokens ?? COHERE_MAX_OUTPUT_TOKENS, 1024),
|
|
48
50
|
};
|
|
49
51
|
}
|
|
50
52
|
else {
|
|
51
|
-
target
|
|
53
|
+
target.thinking = { type: "enabled" };
|
|
52
54
|
}
|
|
53
55
|
delete unknown["reasoning"];
|
|
54
56
|
return params;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { EmbeddingModelMiddleware, LanguageModelMiddleware } from "ai";
|
|
2
2
|
import type { ChatCompletionsReasoningEffort } from "../../endpoints/chat-completions/schema";
|
|
3
3
|
export declare const geminiDimensionsMiddleware: EmbeddingModelMiddleware;
|
|
4
|
-
export declare function mapGeminiReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string):
|
|
4
|
+
export declare function mapGeminiReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string): "low" | "high" | "minimal" | "medium";
|
|
5
5
|
export declare const GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65536;
|
|
6
6
|
export declare const GEMINI_2_5_PRO_MIN_THINKING_BUDGET = 128;
|
|
7
7
|
export declare const geminiReasoningMiddleware: LanguageModelMiddleware;
|