@hebo-ai/gateway 0.6.2-rc0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/endpoints/chat-completions/converters.js +26 -21
- package/dist/endpoints/chat-completions/handler.js +2 -0
- package/dist/endpoints/chat-completions/otel.js +1 -1
- package/dist/endpoints/chat-completions/schema.d.ts +4 -18
- package/dist/endpoints/chat-completions/schema.js +14 -17
- package/dist/endpoints/embeddings/handler.js +2 -0
- package/dist/endpoints/embeddings/otel.js +5 -0
- package/dist/endpoints/embeddings/schema.d.ts +6 -0
- package/dist/endpoints/embeddings/schema.js +4 -1
- package/dist/endpoints/models/converters.js +3 -3
- package/dist/lifecycle.js +2 -2
- package/dist/logger/default.js +3 -3
- package/dist/logger/index.d.ts +2 -5
- package/dist/middleware/common.js +1 -0
- package/dist/middleware/utils.js +0 -3
- package/dist/models/amazon/middleware.js +8 -5
- package/dist/models/anthropic/middleware.js +13 -13
- package/dist/models/catalog.js +5 -1
- package/dist/models/cohere/middleware.js +7 -5
- package/dist/models/google/middleware.d.ts +1 -1
- package/dist/models/google/middleware.js +29 -25
- package/dist/models/openai/middleware.js +13 -9
- package/dist/models/voyage/middleware.js +2 -1
- package/dist/providers/bedrock/middleware.js +21 -23
- package/dist/providers/registry.js +3 -0
- package/dist/telemetry/fetch.js +7 -2
- package/dist/telemetry/gen-ai.js +15 -12
- package/dist/telemetry/memory.d.ts +1 -1
- package/dist/telemetry/memory.js +30 -14
- package/dist/telemetry/span.js +1 -1
- package/dist/telemetry/stream.js +30 -23
- package/dist/utils/env.js +4 -2
- package/dist/utils/preset.js +1 -0
- package/dist/utils/response.js +3 -1
- package/package.json +36 -50
- package/src/config.ts +0 -98
- package/src/endpoints/chat-completions/converters.test.ts +0 -631
- package/src/endpoints/chat-completions/converters.ts +0 -899
- package/src/endpoints/chat-completions/handler.test.ts +0 -391
- package/src/endpoints/chat-completions/handler.ts +0 -201
- package/src/endpoints/chat-completions/index.ts +0 -4
- package/src/endpoints/chat-completions/otel.test.ts +0 -315
- package/src/endpoints/chat-completions/otel.ts +0 -214
- package/src/endpoints/chat-completions/schema.ts +0 -364
- package/src/endpoints/embeddings/converters.ts +0 -51
- package/src/endpoints/embeddings/handler.test.ts +0 -133
- package/src/endpoints/embeddings/handler.ts +0 -137
- package/src/endpoints/embeddings/index.ts +0 -4
- package/src/endpoints/embeddings/otel.ts +0 -40
- package/src/endpoints/embeddings/schema.ts +0 -36
- package/src/endpoints/models/converters.ts +0 -56
- package/src/endpoints/models/handler.test.ts +0 -122
- package/src/endpoints/models/handler.ts +0 -37
- package/src/endpoints/models/index.ts +0 -3
- package/src/endpoints/models/schema.ts +0 -37
- package/src/errors/ai-sdk.ts +0 -99
- package/src/errors/gateway.ts +0 -17
- package/src/errors/openai.ts +0 -57
- package/src/errors/utils.ts +0 -47
- package/src/gateway.ts +0 -50
- package/src/index.ts +0 -19
- package/src/lifecycle.ts +0 -135
- package/src/logger/default.ts +0 -105
- package/src/logger/index.ts +0 -42
- package/src/middleware/common.test.ts +0 -215
- package/src/middleware/common.ts +0 -163
- package/src/middleware/debug.ts +0 -37
- package/src/middleware/matcher.ts +0 -161
- package/src/middleware/utils.ts +0 -34
- package/src/models/amazon/index.ts +0 -2
- package/src/models/amazon/middleware.test.ts +0 -133
- package/src/models/amazon/middleware.ts +0 -79
- package/src/models/amazon/presets.ts +0 -104
- package/src/models/anthropic/index.ts +0 -2
- package/src/models/anthropic/middleware.test.ts +0 -643
- package/src/models/anthropic/middleware.ts +0 -148
- package/src/models/anthropic/presets.ts +0 -191
- package/src/models/catalog.ts +0 -13
- package/src/models/cohere/index.ts +0 -2
- package/src/models/cohere/middleware.test.ts +0 -138
- package/src/models/cohere/middleware.ts +0 -76
- package/src/models/cohere/presets.ts +0 -186
- package/src/models/google/index.ts +0 -2
- package/src/models/google/middleware.test.ts +0 -298
- package/src/models/google/middleware.ts +0 -137
- package/src/models/google/presets.ts +0 -118
- package/src/models/meta/index.ts +0 -1
- package/src/models/meta/presets.ts +0 -143
- package/src/models/openai/index.ts +0 -2
- package/src/models/openai/middleware.test.ts +0 -189
- package/src/models/openai/middleware.ts +0 -103
- package/src/models/openai/presets.ts +0 -280
- package/src/models/types.ts +0 -114
- package/src/models/voyage/index.ts +0 -2
- package/src/models/voyage/middleware.test.ts +0 -28
- package/src/models/voyage/middleware.ts +0 -23
- package/src/models/voyage/presets.ts +0 -126
- package/src/providers/anthropic/canonical.ts +0 -17
- package/src/providers/anthropic/index.ts +0 -1
- package/src/providers/bedrock/canonical.ts +0 -87
- package/src/providers/bedrock/index.ts +0 -2
- package/src/providers/bedrock/middleware.test.ts +0 -303
- package/src/providers/bedrock/middleware.ts +0 -128
- package/src/providers/cohere/canonical.ts +0 -26
- package/src/providers/cohere/index.ts +0 -1
- package/src/providers/groq/canonical.ts +0 -21
- package/src/providers/groq/index.ts +0 -1
- package/src/providers/openai/canonical.ts +0 -16
- package/src/providers/openai/index.ts +0 -1
- package/src/providers/registry.test.ts +0 -44
- package/src/providers/registry.ts +0 -165
- package/src/providers/types.ts +0 -20
- package/src/providers/vertex/canonical.ts +0 -17
- package/src/providers/vertex/index.ts +0 -1
- package/src/providers/voyage/canonical.ts +0 -16
- package/src/providers/voyage/index.ts +0 -1
- package/src/telemetry/ai-sdk.ts +0 -46
- package/src/telemetry/baggage.ts +0 -27
- package/src/telemetry/fetch.ts +0 -62
- package/src/telemetry/gen-ai.ts +0 -113
- package/src/telemetry/http.ts +0 -62
- package/src/telemetry/index.ts +0 -1
- package/src/telemetry/memory.ts +0 -36
- package/src/telemetry/span.ts +0 -85
- package/src/telemetry/stream.ts +0 -64
- package/src/types.ts +0 -223
- package/src/utils/env.ts +0 -7
- package/src/utils/headers.ts +0 -27
- package/src/utils/preset.ts +0 -65
- package/src/utils/request.test.ts +0 -75
- package/src/utils/request.ts +0 -52
- package/src/utils/response.ts +0 -84
- package/src/utils/url.ts +0 -26
|
@@ -1,364 +0,0 @@
|
|
|
1
|
-
import * as z from "zod";
|
|
2
|
-
|
|
3
|
-
export const ChatCompletionsCacheControlSchema = z.object({
|
|
4
|
-
type: z.literal("ephemeral"),
|
|
5
|
-
ttl: z.string().optional(),
|
|
6
|
-
});
|
|
7
|
-
export type ChatCompletionsCacheControl = z.infer<typeof ChatCompletionsCacheControlSchema>;
|
|
8
|
-
|
|
9
|
-
export const ChatCompletionsContentPartTextSchema = z.object({
|
|
10
|
-
type: z.literal("text"),
|
|
11
|
-
text: z.string(),
|
|
12
|
-
// Extension origin: Anthropic/OpenRouter/Vercel
|
|
13
|
-
cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
|
|
14
|
-
});
|
|
15
|
-
export type ChatCompletionsContentPartText = z.infer<typeof ChatCompletionsContentPartTextSchema>;
|
|
16
|
-
|
|
17
|
-
export const ChatCompletionsContentPartImageSchema = z.object({
|
|
18
|
-
type: z.literal("image_url"),
|
|
19
|
-
image_url: z.object({
|
|
20
|
-
url: z.string(),
|
|
21
|
-
detail: z.enum(["low", "high", "auto"]).optional(),
|
|
22
|
-
}),
|
|
23
|
-
// Extension origin: OpenRouter/Vercel/Anthropic
|
|
24
|
-
cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
|
|
25
|
-
});
|
|
26
|
-
|
|
27
|
-
export const ChatCompletionsContentPartFileSchema = z.object({
|
|
28
|
-
type: z.literal("file"),
|
|
29
|
-
file: z.object({
|
|
30
|
-
data: z.string(),
|
|
31
|
-
media_type: z.string(),
|
|
32
|
-
filename: z.string().optional(),
|
|
33
|
-
}),
|
|
34
|
-
// Extension origin: OpenRouter/Vercel/Anthropic
|
|
35
|
-
cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
export const ChatCompletionsContentPartAudioSchema = z.object({
|
|
39
|
-
type: z.literal("input_audio"),
|
|
40
|
-
input_audio: z.object({
|
|
41
|
-
data: z.string(),
|
|
42
|
-
// only wav and mp3 are official by OpenAI, rest is taken from Gemini support:
|
|
43
|
-
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/multimodal/audio-understanding
|
|
44
|
-
format: z.enum([
|
|
45
|
-
"x-aac",
|
|
46
|
-
"flac",
|
|
47
|
-
"mp3",
|
|
48
|
-
"m4a",
|
|
49
|
-
"mpeg",
|
|
50
|
-
"mpga",
|
|
51
|
-
"mp4",
|
|
52
|
-
"ogg",
|
|
53
|
-
"pcm",
|
|
54
|
-
"wav",
|
|
55
|
-
"webm",
|
|
56
|
-
]),
|
|
57
|
-
}),
|
|
58
|
-
// Extension origin: OpenRouter/Vercel/Anthropic
|
|
59
|
-
cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
|
|
60
|
-
});
|
|
61
|
-
|
|
62
|
-
export const ChatCompletionsContentPartSchema = z.discriminatedUnion("type", [
|
|
63
|
-
ChatCompletionsContentPartTextSchema,
|
|
64
|
-
ChatCompletionsContentPartImageSchema,
|
|
65
|
-
ChatCompletionsContentPartFileSchema,
|
|
66
|
-
ChatCompletionsContentPartAudioSchema,
|
|
67
|
-
]);
|
|
68
|
-
export type ChatCompletionsContentPart = z.infer<typeof ChatCompletionsContentPartSchema>;
|
|
69
|
-
|
|
70
|
-
export const ChatCompletionsToolCallSchema = z.object({
|
|
71
|
-
type: z.literal("function"),
|
|
72
|
-
id: z.string(),
|
|
73
|
-
function: z.object({
|
|
74
|
-
arguments: z.string(),
|
|
75
|
-
name: z.string(),
|
|
76
|
-
}),
|
|
77
|
-
// Extension origin: Gemini
|
|
78
|
-
extra_content: z
|
|
79
|
-
.record(z.string(), z.record(z.string(), z.unknown()))
|
|
80
|
-
.optional()
|
|
81
|
-
.meta({ extension: true }),
|
|
82
|
-
});
|
|
83
|
-
export type ChatCompletionsToolCall = z.infer<typeof ChatCompletionsToolCallSchema>;
|
|
84
|
-
|
|
85
|
-
export const ChatCompletionsSystemMessageSchema = z.object({
|
|
86
|
-
role: z.literal("system"),
|
|
87
|
-
content: z.string(),
|
|
88
|
-
name: z.string().optional(),
|
|
89
|
-
// Extension origin: OpenRouter/Vercel/Anthropic
|
|
90
|
-
cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
|
|
91
|
-
});
|
|
92
|
-
export type ChatCompletionsSystemMessage = z.infer<typeof ChatCompletionsSystemMessageSchema>;
|
|
93
|
-
|
|
94
|
-
export const ChatCompletionsUserMessageSchema = z.object({
|
|
95
|
-
role: z.literal("user"),
|
|
96
|
-
content: z.union([z.string(), z.array(ChatCompletionsContentPartSchema)]),
|
|
97
|
-
name: z.string().optional(),
|
|
98
|
-
// Extension origin: OpenRouter/Vercel/Anthropic
|
|
99
|
-
cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
|
|
100
|
-
});
|
|
101
|
-
export type ChatCompletionsUserMessage = z.infer<typeof ChatCompletionsUserMessageSchema>;
|
|
102
|
-
|
|
103
|
-
export const ChatCompletionsReasoningDetailSchema = z.object({
|
|
104
|
-
id: z.string().optional(),
|
|
105
|
-
index: z.int().nonnegative(),
|
|
106
|
-
type: z.string(),
|
|
107
|
-
text: z.string().optional(),
|
|
108
|
-
signature: z.string().optional(),
|
|
109
|
-
data: z.string().optional(),
|
|
110
|
-
summary: z.string().optional(),
|
|
111
|
-
format: z.string().optional(),
|
|
112
|
-
});
|
|
113
|
-
export type ChatCompletionsReasoningDetail = z.infer<typeof ChatCompletionsReasoningDetailSchema>;
|
|
114
|
-
|
|
115
|
-
export const ChatCompletionsAssistantMessageSchema = z.object({
|
|
116
|
-
role: z.literal("assistant"),
|
|
117
|
-
content: z
|
|
118
|
-
.union([z.string(), z.null(), z.array(ChatCompletionsContentPartTextSchema)])
|
|
119
|
-
.optional(),
|
|
120
|
-
name: z.string().optional(),
|
|
121
|
-
// FUTURE: This should also support Custom Tool Calls
|
|
122
|
-
tool_calls: z.array(ChatCompletionsToolCallSchema).optional(),
|
|
123
|
-
// Extension origin: OpenRouter/Vercel
|
|
124
|
-
reasoning: z.string().optional().meta({ extension: true }),
|
|
125
|
-
// Extension origin: OpenRouter/Vercel
|
|
126
|
-
reasoning_details: z
|
|
127
|
-
.array(ChatCompletionsReasoningDetailSchema)
|
|
128
|
-
.optional()
|
|
129
|
-
.meta({ extension: true }),
|
|
130
|
-
// Extension origin: Gemini
|
|
131
|
-
extra_content: z
|
|
132
|
-
.record(z.string(), z.record(z.string(), z.unknown()))
|
|
133
|
-
.optional()
|
|
134
|
-
.meta({ extension: true }),
|
|
135
|
-
// Extension origin: OpenRouter/Vercel/Anthropic
|
|
136
|
-
cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
|
|
137
|
-
});
|
|
138
|
-
export type ChatCompletionsAssistantMessage = z.infer<typeof ChatCompletionsAssistantMessageSchema>;
|
|
139
|
-
|
|
140
|
-
export const ChatCompletionsToolMessageSchema = z.object({
|
|
141
|
-
role: z.literal("tool"),
|
|
142
|
-
content: z.union([z.string(), z.array(ChatCompletionsContentPartTextSchema)]),
|
|
143
|
-
tool_call_id: z.string(),
|
|
144
|
-
});
|
|
145
|
-
export type ChatCompletionsToolMessage = z.infer<typeof ChatCompletionsToolMessageSchema>;
|
|
146
|
-
|
|
147
|
-
export const ChatCompletionsMessageSchema = z.discriminatedUnion("role", [
|
|
148
|
-
ChatCompletionsSystemMessageSchema,
|
|
149
|
-
ChatCompletionsUserMessageSchema,
|
|
150
|
-
ChatCompletionsAssistantMessageSchema,
|
|
151
|
-
ChatCompletionsToolMessageSchema,
|
|
152
|
-
]);
|
|
153
|
-
export type ChatCompletionsMessage = z.infer<typeof ChatCompletionsMessageSchema>;
|
|
154
|
-
|
|
155
|
-
export const ChatCompletionsToolSchema = z.object({
|
|
156
|
-
type: z.literal("function"),
|
|
157
|
-
function: z.object({
|
|
158
|
-
name: z.string(),
|
|
159
|
-
description: z.string().optional(),
|
|
160
|
-
parameters: z.record(z.string(), z.unknown()),
|
|
161
|
-
strict: z.boolean().optional(),
|
|
162
|
-
}),
|
|
163
|
-
// FUTURE: cache_control support on tools
|
|
164
|
-
});
|
|
165
|
-
export type ChatCompletionsTool = z.infer<typeof ChatCompletionsToolSchema>;
|
|
166
|
-
|
|
167
|
-
const ChatCompletionsNamedFunctionToolChoiceSchema = z.object({
|
|
168
|
-
type: z.literal("function"),
|
|
169
|
-
function: z.object({
|
|
170
|
-
name: z.string(),
|
|
171
|
-
}),
|
|
172
|
-
});
|
|
173
|
-
|
|
174
|
-
const ChatCompletionsAllowedFunctionToolChoiceSchema = z.object({
|
|
175
|
-
type: z.literal("allowed_tools"),
|
|
176
|
-
allowed_tools: z.object({
|
|
177
|
-
mode: z.enum(["auto", "required"]),
|
|
178
|
-
tools: z.array(ChatCompletionsNamedFunctionToolChoiceSchema).nonempty(),
|
|
179
|
-
}),
|
|
180
|
-
});
|
|
181
|
-
|
|
182
|
-
export const ChatCompletionsToolChoiceSchema = z.union([
|
|
183
|
-
z.enum(["none", "auto", "required", "validated"]),
|
|
184
|
-
z.discriminatedUnion("type", [
|
|
185
|
-
ChatCompletionsNamedFunctionToolChoiceSchema,
|
|
186
|
-
ChatCompletionsAllowedFunctionToolChoiceSchema,
|
|
187
|
-
]),
|
|
188
|
-
// FUTURE: Missing CustomTool
|
|
189
|
-
]);
|
|
190
|
-
export type ChatCompletionsToolChoice = z.infer<typeof ChatCompletionsToolChoiceSchema>;
|
|
191
|
-
|
|
192
|
-
export const ChatCompletionsReasoningEffortSchema = z.enum([
|
|
193
|
-
"none",
|
|
194
|
-
// Extension origin: Gemini
|
|
195
|
-
"minimal",
|
|
196
|
-
"low",
|
|
197
|
-
"medium",
|
|
198
|
-
"high",
|
|
199
|
-
"xhigh",
|
|
200
|
-
// Extension origin: Anthropic
|
|
201
|
-
"max",
|
|
202
|
-
]);
|
|
203
|
-
export type ChatCompletionsReasoningEffort = z.infer<typeof ChatCompletionsReasoningEffortSchema>;
|
|
204
|
-
|
|
205
|
-
export const ChatCompletionsReasoningConfigSchema = z.object({
|
|
206
|
-
enabled: z.optional(z.boolean()),
|
|
207
|
-
effort: z.optional(ChatCompletionsReasoningEffortSchema),
|
|
208
|
-
max_tokens: z.optional(z.number()),
|
|
209
|
-
exclude: z.optional(z.boolean()),
|
|
210
|
-
});
|
|
211
|
-
export type ChatCompletionsReasoningConfig = z.infer<typeof ChatCompletionsReasoningConfigSchema>;
|
|
212
|
-
|
|
213
|
-
export const ChatCompletionsResponseFormatJsonSchema = z.object({
|
|
214
|
-
// FUTURE: consider support for legacy json_object (if demand)
|
|
215
|
-
type: z.literal("json_schema"),
|
|
216
|
-
json_schema: z.object({
|
|
217
|
-
name: z.string(),
|
|
218
|
-
description: z.string().optional(),
|
|
219
|
-
schema: z.record(z.string(), z.unknown()),
|
|
220
|
-
// FUTURE: consider support for non-strict mode (for providers that support it)
|
|
221
|
-
strict: z.boolean().optional(),
|
|
222
|
-
}),
|
|
223
|
-
});
|
|
224
|
-
export const ChatCompletionsResponseFormatTextSchema = z.object({
|
|
225
|
-
type: z.literal("text"),
|
|
226
|
-
});
|
|
227
|
-
export const ChatCompletionsResponseFormatSchema = z.discriminatedUnion("type", [
|
|
228
|
-
ChatCompletionsResponseFormatJsonSchema,
|
|
229
|
-
ChatCompletionsResponseFormatTextSchema,
|
|
230
|
-
]);
|
|
231
|
-
export type ChatCompletionsResponseFormat = z.infer<typeof ChatCompletionsResponseFormatSchema>;
|
|
232
|
-
|
|
233
|
-
export const ChatCompletionsMetadataSchema = z.record(
|
|
234
|
-
z.string().min(1).max(64),
|
|
235
|
-
z.string().max(512),
|
|
236
|
-
);
|
|
237
|
-
export type ChatCompletionsMetadata = z.infer<typeof ChatCompletionsMetadataSchema>;
|
|
238
|
-
|
|
239
|
-
const ChatCompletionsInputsSchema = z.object({
|
|
240
|
-
messages: z.array(ChatCompletionsMessageSchema),
|
|
241
|
-
tools: z.array(ChatCompletionsToolSchema).optional(),
|
|
242
|
-
tool_choice: ChatCompletionsToolChoiceSchema.optional(),
|
|
243
|
-
temperature: z.number().min(0).max(2).optional(),
|
|
244
|
-
max_tokens: z.int().nonnegative().optional(),
|
|
245
|
-
max_completion_tokens: z.int().nonnegative().optional(),
|
|
246
|
-
frequency_penalty: z.number().min(-2.0).max(2.0).optional(),
|
|
247
|
-
presence_penalty: z.number().min(-2.0).max(2.0).optional(),
|
|
248
|
-
seed: z.int().optional(),
|
|
249
|
-
stop: z.union([z.string(), z.array(z.string())]).optional(),
|
|
250
|
-
top_p: z.number().min(0).max(1.0).optional(),
|
|
251
|
-
metadata: ChatCompletionsMetadataSchema.optional(),
|
|
252
|
-
response_format: ChatCompletionsResponseFormatSchema.optional(),
|
|
253
|
-
reasoning_effort: ChatCompletionsReasoningEffortSchema.optional(),
|
|
254
|
-
prompt_cache_key: z.string().optional(),
|
|
255
|
-
prompt_cache_retention: z.enum(["in_memory", "24h"]).optional(),
|
|
256
|
-
// Extension origin: Gemini explicit cache handle
|
|
257
|
-
// FUTURE: generalize extra_body handling
|
|
258
|
-
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
|
|
259
|
-
extra_body: z
|
|
260
|
-
.object({
|
|
261
|
-
google: z
|
|
262
|
-
.object({
|
|
263
|
-
cached_content: z.string().optional().meta({ extension: true }),
|
|
264
|
-
})
|
|
265
|
-
.optional(),
|
|
266
|
-
})
|
|
267
|
-
.optional(),
|
|
268
|
-
// Extension origin: OpenRouter/Vercel/Anthropic
|
|
269
|
-
cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
|
|
270
|
-
// Extension origin: OpenRouter
|
|
271
|
-
reasoning: ChatCompletionsReasoningConfigSchema.optional().meta({ extension: true }),
|
|
272
|
-
});
|
|
273
|
-
export type ChatCompletionsInputs = z.infer<typeof ChatCompletionsInputsSchema>;
|
|
274
|
-
|
|
275
|
-
export const ChatCompletionsBodySchema = z.looseObject({
|
|
276
|
-
model: z.string(),
|
|
277
|
-
stream: z.boolean().optional(),
|
|
278
|
-
...ChatCompletionsInputsSchema.shape,
|
|
279
|
-
});
|
|
280
|
-
export type ChatCompletionsBody = z.infer<typeof ChatCompletionsBodySchema>;
|
|
281
|
-
|
|
282
|
-
export const ChatCompletionsFinishReasonSchema = z.enum([
|
|
283
|
-
"stop",
|
|
284
|
-
"length",
|
|
285
|
-
"content_filter",
|
|
286
|
-
"tool_calls",
|
|
287
|
-
]);
|
|
288
|
-
export type ChatCompletionsFinishReason = z.infer<typeof ChatCompletionsFinishReasonSchema>;
|
|
289
|
-
|
|
290
|
-
export const ChatCompletionsChoiceSchema = z.object({
|
|
291
|
-
index: z.int().nonnegative(),
|
|
292
|
-
message: ChatCompletionsAssistantMessageSchema,
|
|
293
|
-
finish_reason: ChatCompletionsFinishReasonSchema,
|
|
294
|
-
// FUTURE: model this out
|
|
295
|
-
logprobs: z.unknown().optional(),
|
|
296
|
-
});
|
|
297
|
-
export type ChatCompletionsChoice = z.infer<typeof ChatCompletionsChoiceSchema>;
|
|
298
|
-
|
|
299
|
-
export const ChatCompletionsUsageSchema = z.object({
|
|
300
|
-
prompt_tokens: z.int().nonnegative().optional(),
|
|
301
|
-
completion_tokens: z.int().nonnegative().optional(),
|
|
302
|
-
total_tokens: z.int().nonnegative().optional(),
|
|
303
|
-
completion_tokens_details: z
|
|
304
|
-
.object({
|
|
305
|
-
// FUTURE: add missing properties
|
|
306
|
-
reasoning_tokens: z.int().nonnegative().optional(),
|
|
307
|
-
})
|
|
308
|
-
.optional(),
|
|
309
|
-
prompt_tokens_details: z
|
|
310
|
-
.object({
|
|
311
|
-
// FUTURE: add missing properties
|
|
312
|
-
cached_tokens: z.int().nonnegative().optional(),
|
|
313
|
-
// Extension origin: OpenRouter
|
|
314
|
-
cache_write_tokens: z.int().nonnegative().optional().meta({ extension: true }),
|
|
315
|
-
})
|
|
316
|
-
.optional(),
|
|
317
|
-
});
|
|
318
|
-
export type ChatCompletionsUsage = z.infer<typeof ChatCompletionsUsageSchema>;
|
|
319
|
-
|
|
320
|
-
export const ChatCompletionsSchema = z.object({
|
|
321
|
-
id: z.string(),
|
|
322
|
-
object: z.literal("chat.completion"),
|
|
323
|
-
created: z.int().nonnegative(),
|
|
324
|
-
model: z.string(),
|
|
325
|
-
choices: z.array(ChatCompletionsChoiceSchema),
|
|
326
|
-
usage: ChatCompletionsUsageSchema.nullable(),
|
|
327
|
-
// Extension origin: Vercel AI Gateway
|
|
328
|
-
provider_metadata: z.unknown().optional().meta({ extension: true }),
|
|
329
|
-
});
|
|
330
|
-
export type ChatCompletions = z.infer<typeof ChatCompletionsSchema>;
|
|
331
|
-
|
|
332
|
-
export const ChatCompletionsToolCallDeltaSchema = ChatCompletionsToolCallSchema.partial().extend({
|
|
333
|
-
index: z.int().nonnegative(),
|
|
334
|
-
});
|
|
335
|
-
export type ChatCompletionsToolCallDelta = z.infer<typeof ChatCompletionsToolCallDeltaSchema>;
|
|
336
|
-
|
|
337
|
-
export const ChatCompletionsAssistantMessageDeltaSchema =
|
|
338
|
-
ChatCompletionsAssistantMessageSchema.partial().extend({
|
|
339
|
-
tool_calls: z.array(ChatCompletionsToolCallDeltaSchema).optional(),
|
|
340
|
-
});
|
|
341
|
-
export type ChatCompletionsAssistantMessageDelta = z.infer<
|
|
342
|
-
typeof ChatCompletionsAssistantMessageDeltaSchema
|
|
343
|
-
>;
|
|
344
|
-
|
|
345
|
-
export const ChatCompletionsChoiceDeltaSchema = z.object({
|
|
346
|
-
index: z.int().nonnegative(),
|
|
347
|
-
delta: ChatCompletionsAssistantMessageDeltaSchema,
|
|
348
|
-
finish_reason: ChatCompletionsFinishReasonSchema.nullable(),
|
|
349
|
-
// FUTURE: model this out
|
|
350
|
-
logprobs: z.unknown().optional(),
|
|
351
|
-
});
|
|
352
|
-
export type ChatCompletionsChoiceDelta = z.infer<typeof ChatCompletionsChoiceDeltaSchema>;
|
|
353
|
-
|
|
354
|
-
export const ChatCompletionsChunkSchema = z.object({
|
|
355
|
-
id: z.string(),
|
|
356
|
-
object: z.literal("chat.completion.chunk"),
|
|
357
|
-
created: z.int().nonnegative(),
|
|
358
|
-
model: z.string(),
|
|
359
|
-
choices: z.array(ChatCompletionsChoiceDeltaSchema),
|
|
360
|
-
usage: ChatCompletionsUsageSchema.nullable(),
|
|
361
|
-
// Extension origin: Vercel AI Gateway
|
|
362
|
-
provider_metadata: z.unknown().optional().meta({ extension: true }),
|
|
363
|
-
});
|
|
364
|
-
export type ChatCompletionsChunk = z.infer<typeof ChatCompletionsChunkSchema>;
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
import type { JSONObject, SharedV3ProviderOptions } from "@ai-sdk/provider";
|
|
2
|
-
import type { EmbedManyResult } from "ai";
|
|
3
|
-
|
|
4
|
-
import type { EmbeddingsInputs, EmbeddingsData, EmbeddingsUsage, Embeddings } from "./schema";
|
|
5
|
-
|
|
6
|
-
import { toResponse } from "../../utils/response";
|
|
7
|
-
|
|
8
|
-
export type EmbedCallOptions = {
|
|
9
|
-
values: string[];
|
|
10
|
-
providerOptions: SharedV3ProviderOptions;
|
|
11
|
-
};
|
|
12
|
-
|
|
13
|
-
export function convertToEmbedCallOptions(params: EmbeddingsInputs): EmbedCallOptions {
|
|
14
|
-
const { input, ...rest } = params;
|
|
15
|
-
|
|
16
|
-
return {
|
|
17
|
-
values: Array.isArray(input) ? input : [input],
|
|
18
|
-
providerOptions: {
|
|
19
|
-
unknown: rest as JSONObject,
|
|
20
|
-
},
|
|
21
|
-
};
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
export function toEmbeddings(embedManyResult: EmbedManyResult, modelId: string): Embeddings {
|
|
25
|
-
const data: EmbeddingsData[] = embedManyResult.embeddings.map((embedding, index) => ({
|
|
26
|
-
object: "embedding",
|
|
27
|
-
embedding,
|
|
28
|
-
index,
|
|
29
|
-
}));
|
|
30
|
-
|
|
31
|
-
const usage: EmbeddingsUsage = {
|
|
32
|
-
prompt_tokens: embedManyResult.usage.tokens,
|
|
33
|
-
total_tokens: embedManyResult.usage.tokens,
|
|
34
|
-
};
|
|
35
|
-
|
|
36
|
-
return {
|
|
37
|
-
object: "list",
|
|
38
|
-
data,
|
|
39
|
-
model: modelId,
|
|
40
|
-
usage,
|
|
41
|
-
provider_metadata: embedManyResult.providerMetadata,
|
|
42
|
-
};
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
export function createEmbeddingsResponse(
|
|
46
|
-
embedManyResult: EmbedManyResult,
|
|
47
|
-
modelId: string,
|
|
48
|
-
responseInit?: ResponseInit,
|
|
49
|
-
): Response {
|
|
50
|
-
return toResponse(toEmbeddings(embedManyResult, modelId), responseInit);
|
|
51
|
-
}
|
|
@@ -1,133 +0,0 @@
|
|
|
1
|
-
import { MockEmbeddingModelV3, MockProviderV3 } from "ai/test";
|
|
2
|
-
import { describe, expect, test } from "bun:test";
|
|
3
|
-
|
|
4
|
-
import { parseResponse, postJson } from "../../../test/helpers/http";
|
|
5
|
-
import { embeddings } from "./handler";
|
|
6
|
-
|
|
7
|
-
const baseUrl = "http://localhost/embeddings";
|
|
8
|
-
|
|
9
|
-
const expectedEmbeddingResponse = (count: number) => ({
|
|
10
|
-
object: "list",
|
|
11
|
-
data: Array.from({ length: count }, (_, index) => ({
|
|
12
|
-
object: "embedding",
|
|
13
|
-
embedding: [0.1, 0.2, 0.3],
|
|
14
|
-
index,
|
|
15
|
-
})),
|
|
16
|
-
model: "text-embedding-3-small",
|
|
17
|
-
usage: {
|
|
18
|
-
prompt_tokens: count * 10,
|
|
19
|
-
total_tokens: count * 10,
|
|
20
|
-
},
|
|
21
|
-
provider_metadata: {
|
|
22
|
-
provider: {
|
|
23
|
-
key: "value",
|
|
24
|
-
},
|
|
25
|
-
},
|
|
26
|
-
});
|
|
27
|
-
|
|
28
|
-
describe("Embeddings Handler", () => {
|
|
29
|
-
const endpoint = embeddings({
|
|
30
|
-
providers: {
|
|
31
|
-
openai: new MockProviderV3({
|
|
32
|
-
embeddingModels: {
|
|
33
|
-
"text-embedding-3-small": new MockEmbeddingModelV3({
|
|
34
|
-
// oxlint-disable-next-line require-await
|
|
35
|
-
doEmbed: async (options) => ({
|
|
36
|
-
embeddings: options.values.map(() => [0.1, 0.2, 0.3]),
|
|
37
|
-
usage: { tokens: 10 },
|
|
38
|
-
providerMetadata: { provider: { key: "value" } },
|
|
39
|
-
warnings: [],
|
|
40
|
-
}),
|
|
41
|
-
}),
|
|
42
|
-
},
|
|
43
|
-
}),
|
|
44
|
-
},
|
|
45
|
-
models: {
|
|
46
|
-
"text-embedding-3-small": {
|
|
47
|
-
name: "OpenAI Embedding Model",
|
|
48
|
-
modalities: { input: ["text"], output: ["embedding"] },
|
|
49
|
-
providers: ["openai"],
|
|
50
|
-
},
|
|
51
|
-
"gpt-oss-20b": {
|
|
52
|
-
name: "GPT-OSS 20B",
|
|
53
|
-
modalities: { input: ["text"], output: ["text"] },
|
|
54
|
-
providers: ["openai"],
|
|
55
|
-
},
|
|
56
|
-
},
|
|
57
|
-
});
|
|
58
|
-
|
|
59
|
-
test("should return 422 if model does not support embeddings", async () => {
|
|
60
|
-
const request = postJson(baseUrl, {
|
|
61
|
-
model: "gpt-oss-20b",
|
|
62
|
-
input: "hello world",
|
|
63
|
-
});
|
|
64
|
-
|
|
65
|
-
const res = await endpoint.handler(request);
|
|
66
|
-
const data = await parseResponse(res);
|
|
67
|
-
|
|
68
|
-
expect(data).toMatchObject({
|
|
69
|
-
error: {
|
|
70
|
-
code: "model_unsupported_operation",
|
|
71
|
-
message: "Model 'gpt-oss-20b' does not support 'embedding' output",
|
|
72
|
-
type: "invalid_request_error",
|
|
73
|
-
},
|
|
74
|
-
});
|
|
75
|
-
});
|
|
76
|
-
|
|
77
|
-
test("should generate embeddings for a single string", async () => {
|
|
78
|
-
const request = postJson(baseUrl, {
|
|
79
|
-
model: "text-embedding-3-small",
|
|
80
|
-
input: "hello world",
|
|
81
|
-
});
|
|
82
|
-
|
|
83
|
-
const res = await endpoint.handler(request);
|
|
84
|
-
const data = await parseResponse(res);
|
|
85
|
-
|
|
86
|
-
expect(data).toEqual(expectedEmbeddingResponse(1));
|
|
87
|
-
});
|
|
88
|
-
|
|
89
|
-
test("should generate embeddings for an array of strings", async () => {
|
|
90
|
-
const request = postJson(baseUrl, {
|
|
91
|
-
model: "text-embedding-3-small",
|
|
92
|
-
input: ["hello", "world"],
|
|
93
|
-
});
|
|
94
|
-
|
|
95
|
-
const res = await endpoint.handler(request);
|
|
96
|
-
const data = await parseResponse(res);
|
|
97
|
-
|
|
98
|
-
expect(data).toEqual(expectedEmbeddingResponse(2));
|
|
99
|
-
});
|
|
100
|
-
|
|
101
|
-
test("should return 422 if input is missing", async () => {
|
|
102
|
-
const request = postJson(baseUrl, {
|
|
103
|
-
model: "text-embedding-3-small",
|
|
104
|
-
});
|
|
105
|
-
|
|
106
|
-
const res = await endpoint.handler(request);
|
|
107
|
-
const data = await parseResponse(res);
|
|
108
|
-
|
|
109
|
-
expect(data).toMatchObject({
|
|
110
|
-
error: {
|
|
111
|
-
code: "bad_request",
|
|
112
|
-
message: "✖ Invalid input\n → at input",
|
|
113
|
-
type: "invalid_request_error",
|
|
114
|
-
param: "",
|
|
115
|
-
},
|
|
116
|
-
});
|
|
117
|
-
});
|
|
118
|
-
|
|
119
|
-
test("should return 'Method Not Allowed' for GET request", async () => {
|
|
120
|
-
const request = new Request(baseUrl, { method: "GET" });
|
|
121
|
-
|
|
122
|
-
const res = await endpoint.handler(request);
|
|
123
|
-
const data = await parseResponse(res);
|
|
124
|
-
|
|
125
|
-
expect(data).toMatchObject({
|
|
126
|
-
error: {
|
|
127
|
-
code: "method_not_allowed",
|
|
128
|
-
message: "Method Not Allowed",
|
|
129
|
-
type: "invalid_request_error",
|
|
130
|
-
},
|
|
131
|
-
});
|
|
132
|
-
});
|
|
133
|
-
});
|
|
@@ -1,137 +0,0 @@
|
|
|
1
|
-
import { embedMany, wrapEmbeddingModel } from "ai";
|
|
2
|
-
import * as z from "zod/mini";
|
|
3
|
-
|
|
4
|
-
import type {
|
|
5
|
-
AfterHookContext,
|
|
6
|
-
BeforeHookContext,
|
|
7
|
-
GatewayConfig,
|
|
8
|
-
Endpoint,
|
|
9
|
-
GatewayContext,
|
|
10
|
-
ResolveProviderHookContext,
|
|
11
|
-
ResolveModelHookContext,
|
|
12
|
-
} from "../../types";
|
|
13
|
-
|
|
14
|
-
import { GatewayError } from "../../errors/gateway";
|
|
15
|
-
import { winterCgHandler } from "../../lifecycle";
|
|
16
|
-
import { logger } from "../../logger";
|
|
17
|
-
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
18
|
-
import { resolveProvider } from "../../providers/registry";
|
|
19
|
-
import {
|
|
20
|
-
getGenAiGeneralAttributes,
|
|
21
|
-
recordTimePerOutputToken,
|
|
22
|
-
recordTokenUsage,
|
|
23
|
-
} from "../../telemetry/gen-ai";
|
|
24
|
-
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
25
|
-
import { prepareForwardHeaders } from "../../utils/request";
|
|
26
|
-
import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
|
|
27
|
-
import { getEmbeddingsRequestAttributes, getEmbeddingsResponseAttributes } from "./otel";
|
|
28
|
-
import { EmbeddingsBodySchema, type EmbeddingsBody } from "./schema";
|
|
29
|
-
|
|
30
|
-
export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
31
|
-
const hooks = config.hooks;
|
|
32
|
-
|
|
33
|
-
const handler = async (ctx: GatewayContext) => {
|
|
34
|
-
const start = performance.now();
|
|
35
|
-
ctx.operation = "embeddings";
|
|
36
|
-
addSpanEvent("hebo.handler.started");
|
|
37
|
-
|
|
38
|
-
// Guard: enforce HTTP method early.
|
|
39
|
-
if (!ctx.request || ctx.request.method !== "POST") {
|
|
40
|
-
throw new GatewayError("Method Not Allowed", 405);
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
// Parse + validate input.
|
|
44
|
-
try {
|
|
45
|
-
ctx.body = await ctx.request.json();
|
|
46
|
-
} catch {
|
|
47
|
-
throw new GatewayError("Invalid JSON", 400);
|
|
48
|
-
}
|
|
49
|
-
logger.trace({ requestId: ctx.requestId, result: ctx.body }, "[chat] EmbeddingsBody");
|
|
50
|
-
addSpanEvent("hebo.request.deserialized");
|
|
51
|
-
|
|
52
|
-
const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
|
|
53
|
-
if (!parsed.success) {
|
|
54
|
-
// FUTURE: consider adding body shape to metadata
|
|
55
|
-
throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
|
|
56
|
-
}
|
|
57
|
-
ctx.body = parsed.data;
|
|
58
|
-
addSpanEvent("hebo.request.parsed");
|
|
59
|
-
|
|
60
|
-
if (hooks?.before) {
|
|
61
|
-
ctx.body = ((await hooks.before(ctx as BeforeHookContext)) as EmbeddingsBody) ?? ctx.body;
|
|
62
|
-
addSpanEvent("hebo.hooks.before.completed");
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
// Resolve model + provider (hooks may override defaults).
|
|
66
|
-
let inputs;
|
|
67
|
-
({ model: ctx.modelId, ...inputs } = ctx.body);
|
|
68
|
-
|
|
69
|
-
ctx.resolvedModelId =
|
|
70
|
-
(await hooks?.resolveModelId?.(ctx as ResolveModelHookContext)) ?? ctx.modelId;
|
|
71
|
-
logger.debug(`[embeddings] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
|
|
72
|
-
addSpanEvent("hebo.model.resolved");
|
|
73
|
-
|
|
74
|
-
const override = await hooks?.resolveProvider?.(ctx as ResolveProviderHookContext);
|
|
75
|
-
ctx.provider =
|
|
76
|
-
override ??
|
|
77
|
-
resolveProvider({
|
|
78
|
-
providers: ctx.providers,
|
|
79
|
-
models: ctx.models,
|
|
80
|
-
modelId: ctx.resolvedModelId,
|
|
81
|
-
operation: ctx.operation,
|
|
82
|
-
});
|
|
83
|
-
|
|
84
|
-
const embeddingModel = ctx.provider.embeddingModel(ctx.resolvedModelId);
|
|
85
|
-
ctx.resolvedProviderId = embeddingModel.provider;
|
|
86
|
-
logger.debug(`[embeddings] using ${embeddingModel.provider} for ${ctx.resolvedModelId}`);
|
|
87
|
-
addSpanEvent("hebo.provider.resolved");
|
|
88
|
-
|
|
89
|
-
const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
|
|
90
|
-
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
|
|
91
|
-
setSpanAttributes(genAiGeneralAttrs);
|
|
92
|
-
|
|
93
|
-
// Convert inputs to AI SDK call options.
|
|
94
|
-
const embedOptions = convertToEmbedCallOptions(inputs);
|
|
95
|
-
logger.trace(
|
|
96
|
-
{ requestId: ctx.requestId, options: embedOptions },
|
|
97
|
-
"[embeddings] AI SDK options",
|
|
98
|
-
);
|
|
99
|
-
addSpanEvent("hebo.options.prepared");
|
|
100
|
-
setSpanAttributes(getEmbeddingsRequestAttributes(ctx.body, genAiSignalLevel));
|
|
101
|
-
|
|
102
|
-
// Build middleware chain (model -> forward params -> provider).
|
|
103
|
-
const embeddingModelWithMiddleware = wrapEmbeddingModel({
|
|
104
|
-
model: embeddingModel,
|
|
105
|
-
middleware: modelMiddlewareMatcher.forEmbedding(ctx.resolvedModelId, embeddingModel.provider),
|
|
106
|
-
});
|
|
107
|
-
|
|
108
|
-
// Execute request.
|
|
109
|
-
addSpanEvent("hebo.ai-sdk.started");
|
|
110
|
-
const result = await embedMany({
|
|
111
|
-
model: embeddingModelWithMiddleware,
|
|
112
|
-
headers: prepareForwardHeaders(ctx.request),
|
|
113
|
-
abortSignal: ctx.request.signal,
|
|
114
|
-
...embedOptions,
|
|
115
|
-
});
|
|
116
|
-
logger.trace({ requestId: ctx.requestId, result }, "[embeddings] AI SDK result");
|
|
117
|
-
addSpanEvent("hebo.ai-sdk.completed");
|
|
118
|
-
|
|
119
|
-
// Transform result.
|
|
120
|
-
ctx.result = toEmbeddings(result, ctx.modelId);
|
|
121
|
-
logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[chat] Embeddings");
|
|
122
|
-
addSpanEvent("hebo.result.transformed");
|
|
123
|
-
const genAiResponseAttrs = getEmbeddingsResponseAttributes(ctx.result, genAiSignalLevel);
|
|
124
|
-
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
125
|
-
setSpanAttributes(genAiResponseAttrs);
|
|
126
|
-
|
|
127
|
-
if (hooks?.after) {
|
|
128
|
-
ctx.result = (await hooks.after(ctx as AfterHookContext)) ?? ctx.result;
|
|
129
|
-
addSpanEvent("hebo.hooks.after.completed");
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
133
|
-
return ctx.result;
|
|
134
|
-
};
|
|
135
|
-
|
|
136
|
-
return { handler: winterCgHandler(handler, config) };
|
|
137
|
-
};
|