@hebo-ai/gateway 0.6.2-rc0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/README.md +3 -3
  2. package/dist/endpoints/chat-completions/converters.js +26 -21
  3. package/dist/endpoints/chat-completions/handler.js +2 -0
  4. package/dist/endpoints/chat-completions/otel.js +1 -1
  5. package/dist/endpoints/chat-completions/schema.d.ts +4 -18
  6. package/dist/endpoints/chat-completions/schema.js +14 -17
  7. package/dist/endpoints/embeddings/handler.js +2 -0
  8. package/dist/endpoints/embeddings/otel.js +5 -0
  9. package/dist/endpoints/embeddings/schema.d.ts +6 -0
  10. package/dist/endpoints/embeddings/schema.js +4 -1
  11. package/dist/endpoints/models/converters.js +3 -3
  12. package/dist/lifecycle.js +2 -2
  13. package/dist/logger/default.js +3 -3
  14. package/dist/logger/index.d.ts +2 -5
  15. package/dist/middleware/common.js +1 -0
  16. package/dist/middleware/utils.js +0 -3
  17. package/dist/models/amazon/middleware.js +8 -5
  18. package/dist/models/anthropic/middleware.js +13 -13
  19. package/dist/models/catalog.js +5 -1
  20. package/dist/models/cohere/middleware.js +7 -5
  21. package/dist/models/google/middleware.d.ts +1 -1
  22. package/dist/models/google/middleware.js +29 -25
  23. package/dist/models/openai/middleware.js +13 -9
  24. package/dist/models/voyage/middleware.js +2 -1
  25. package/dist/providers/bedrock/middleware.js +21 -23
  26. package/dist/providers/registry.js +3 -0
  27. package/dist/telemetry/fetch.js +7 -2
  28. package/dist/telemetry/gen-ai.js +15 -12
  29. package/dist/telemetry/memory.d.ts +1 -1
  30. package/dist/telemetry/memory.js +30 -14
  31. package/dist/telemetry/span.js +1 -1
  32. package/dist/telemetry/stream.js +30 -23
  33. package/dist/utils/env.js +4 -2
  34. package/dist/utils/preset.js +1 -0
  35. package/dist/utils/response.js +3 -1
  36. package/package.json +36 -50
  37. package/src/config.ts +0 -98
  38. package/src/endpoints/chat-completions/converters.test.ts +0 -631
  39. package/src/endpoints/chat-completions/converters.ts +0 -899
  40. package/src/endpoints/chat-completions/handler.test.ts +0 -391
  41. package/src/endpoints/chat-completions/handler.ts +0 -201
  42. package/src/endpoints/chat-completions/index.ts +0 -4
  43. package/src/endpoints/chat-completions/otel.test.ts +0 -315
  44. package/src/endpoints/chat-completions/otel.ts +0 -214
  45. package/src/endpoints/chat-completions/schema.ts +0 -364
  46. package/src/endpoints/embeddings/converters.ts +0 -51
  47. package/src/endpoints/embeddings/handler.test.ts +0 -133
  48. package/src/endpoints/embeddings/handler.ts +0 -137
  49. package/src/endpoints/embeddings/index.ts +0 -4
  50. package/src/endpoints/embeddings/otel.ts +0 -40
  51. package/src/endpoints/embeddings/schema.ts +0 -36
  52. package/src/endpoints/models/converters.ts +0 -56
  53. package/src/endpoints/models/handler.test.ts +0 -122
  54. package/src/endpoints/models/handler.ts +0 -37
  55. package/src/endpoints/models/index.ts +0 -3
  56. package/src/endpoints/models/schema.ts +0 -37
  57. package/src/errors/ai-sdk.ts +0 -99
  58. package/src/errors/gateway.ts +0 -17
  59. package/src/errors/openai.ts +0 -57
  60. package/src/errors/utils.ts +0 -47
  61. package/src/gateway.ts +0 -50
  62. package/src/index.ts +0 -19
  63. package/src/lifecycle.ts +0 -135
  64. package/src/logger/default.ts +0 -105
  65. package/src/logger/index.ts +0 -42
  66. package/src/middleware/common.test.ts +0 -215
  67. package/src/middleware/common.ts +0 -163
  68. package/src/middleware/debug.ts +0 -37
  69. package/src/middleware/matcher.ts +0 -161
  70. package/src/middleware/utils.ts +0 -34
  71. package/src/models/amazon/index.ts +0 -2
  72. package/src/models/amazon/middleware.test.ts +0 -133
  73. package/src/models/amazon/middleware.ts +0 -79
  74. package/src/models/amazon/presets.ts +0 -104
  75. package/src/models/anthropic/index.ts +0 -2
  76. package/src/models/anthropic/middleware.test.ts +0 -643
  77. package/src/models/anthropic/middleware.ts +0 -148
  78. package/src/models/anthropic/presets.ts +0 -191
  79. package/src/models/catalog.ts +0 -13
  80. package/src/models/cohere/index.ts +0 -2
  81. package/src/models/cohere/middleware.test.ts +0 -138
  82. package/src/models/cohere/middleware.ts +0 -76
  83. package/src/models/cohere/presets.ts +0 -186
  84. package/src/models/google/index.ts +0 -2
  85. package/src/models/google/middleware.test.ts +0 -298
  86. package/src/models/google/middleware.ts +0 -137
  87. package/src/models/google/presets.ts +0 -118
  88. package/src/models/meta/index.ts +0 -1
  89. package/src/models/meta/presets.ts +0 -143
  90. package/src/models/openai/index.ts +0 -2
  91. package/src/models/openai/middleware.test.ts +0 -189
  92. package/src/models/openai/middleware.ts +0 -103
  93. package/src/models/openai/presets.ts +0 -280
  94. package/src/models/types.ts +0 -114
  95. package/src/models/voyage/index.ts +0 -2
  96. package/src/models/voyage/middleware.test.ts +0 -28
  97. package/src/models/voyage/middleware.ts +0 -23
  98. package/src/models/voyage/presets.ts +0 -126
  99. package/src/providers/anthropic/canonical.ts +0 -17
  100. package/src/providers/anthropic/index.ts +0 -1
  101. package/src/providers/bedrock/canonical.ts +0 -87
  102. package/src/providers/bedrock/index.ts +0 -2
  103. package/src/providers/bedrock/middleware.test.ts +0 -303
  104. package/src/providers/bedrock/middleware.ts +0 -128
  105. package/src/providers/cohere/canonical.ts +0 -26
  106. package/src/providers/cohere/index.ts +0 -1
  107. package/src/providers/groq/canonical.ts +0 -21
  108. package/src/providers/groq/index.ts +0 -1
  109. package/src/providers/openai/canonical.ts +0 -16
  110. package/src/providers/openai/index.ts +0 -1
  111. package/src/providers/registry.test.ts +0 -44
  112. package/src/providers/registry.ts +0 -165
  113. package/src/providers/types.ts +0 -20
  114. package/src/providers/vertex/canonical.ts +0 -17
  115. package/src/providers/vertex/index.ts +0 -1
  116. package/src/providers/voyage/canonical.ts +0 -16
  117. package/src/providers/voyage/index.ts +0 -1
  118. package/src/telemetry/ai-sdk.ts +0 -46
  119. package/src/telemetry/baggage.ts +0 -27
  120. package/src/telemetry/fetch.ts +0 -62
  121. package/src/telemetry/gen-ai.ts +0 -113
  122. package/src/telemetry/http.ts +0 -62
  123. package/src/telemetry/index.ts +0 -1
  124. package/src/telemetry/memory.ts +0 -36
  125. package/src/telemetry/span.ts +0 -85
  126. package/src/telemetry/stream.ts +0 -64
  127. package/src/types.ts +0 -223
  128. package/src/utils/env.ts +0 -7
  129. package/src/utils/headers.ts +0 -27
  130. package/src/utils/preset.ts +0 -65
  131. package/src/utils/request.test.ts +0 -75
  132. package/src/utils/request.ts +0 -52
  133. package/src/utils/response.ts +0 -84
  134. package/src/utils/url.ts +0 -26
@@ -1,364 +0,0 @@
1
- import * as z from "zod";
2
-
3
- export const ChatCompletionsCacheControlSchema = z.object({
4
- type: z.literal("ephemeral"),
5
- ttl: z.string().optional(),
6
- });
7
- export type ChatCompletionsCacheControl = z.infer<typeof ChatCompletionsCacheControlSchema>;
8
-
9
- export const ChatCompletionsContentPartTextSchema = z.object({
10
- type: z.literal("text"),
11
- text: z.string(),
12
- // Extension origin: Anthropic/OpenRouter/Vercel
13
- cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
14
- });
15
- export type ChatCompletionsContentPartText = z.infer<typeof ChatCompletionsContentPartTextSchema>;
16
-
17
- export const ChatCompletionsContentPartImageSchema = z.object({
18
- type: z.literal("image_url"),
19
- image_url: z.object({
20
- url: z.string(),
21
- detail: z.enum(["low", "high", "auto"]).optional(),
22
- }),
23
- // Extension origin: OpenRouter/Vercel/Anthropic
24
- cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
25
- });
26
-
27
- export const ChatCompletionsContentPartFileSchema = z.object({
28
- type: z.literal("file"),
29
- file: z.object({
30
- data: z.string(),
31
- media_type: z.string(),
32
- filename: z.string().optional(),
33
- }),
34
- // Extension origin: OpenRouter/Vercel/Anthropic
35
- cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
36
- });
37
-
38
- export const ChatCompletionsContentPartAudioSchema = z.object({
39
- type: z.literal("input_audio"),
40
- input_audio: z.object({
41
- data: z.string(),
42
- // only wav and mp3 are official by OpenAI, rest is taken from Gemini support:
43
- // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/multimodal/audio-understanding
44
- format: z.enum([
45
- "x-aac",
46
- "flac",
47
- "mp3",
48
- "m4a",
49
- "mpeg",
50
- "mpga",
51
- "mp4",
52
- "ogg",
53
- "pcm",
54
- "wav",
55
- "webm",
56
- ]),
57
- }),
58
- // Extension origin: OpenRouter/Vercel/Anthropic
59
- cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
60
- });
61
-
62
- export const ChatCompletionsContentPartSchema = z.discriminatedUnion("type", [
63
- ChatCompletionsContentPartTextSchema,
64
- ChatCompletionsContentPartImageSchema,
65
- ChatCompletionsContentPartFileSchema,
66
- ChatCompletionsContentPartAudioSchema,
67
- ]);
68
- export type ChatCompletionsContentPart = z.infer<typeof ChatCompletionsContentPartSchema>;
69
-
70
- export const ChatCompletionsToolCallSchema = z.object({
71
- type: z.literal("function"),
72
- id: z.string(),
73
- function: z.object({
74
- arguments: z.string(),
75
- name: z.string(),
76
- }),
77
- // Extension origin: Gemini
78
- extra_content: z
79
- .record(z.string(), z.record(z.string(), z.unknown()))
80
- .optional()
81
- .meta({ extension: true }),
82
- });
83
- export type ChatCompletionsToolCall = z.infer<typeof ChatCompletionsToolCallSchema>;
84
-
85
- export const ChatCompletionsSystemMessageSchema = z.object({
86
- role: z.literal("system"),
87
- content: z.string(),
88
- name: z.string().optional(),
89
- // Extension origin: OpenRouter/Vercel/Anthropic
90
- cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
91
- });
92
- export type ChatCompletionsSystemMessage = z.infer<typeof ChatCompletionsSystemMessageSchema>;
93
-
94
- export const ChatCompletionsUserMessageSchema = z.object({
95
- role: z.literal("user"),
96
- content: z.union([z.string(), z.array(ChatCompletionsContentPartSchema)]),
97
- name: z.string().optional(),
98
- // Extension origin: OpenRouter/Vercel/Anthropic
99
- cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
100
- });
101
- export type ChatCompletionsUserMessage = z.infer<typeof ChatCompletionsUserMessageSchema>;
102
-
103
- export const ChatCompletionsReasoningDetailSchema = z.object({
104
- id: z.string().optional(),
105
- index: z.int().nonnegative(),
106
- type: z.string(),
107
- text: z.string().optional(),
108
- signature: z.string().optional(),
109
- data: z.string().optional(),
110
- summary: z.string().optional(),
111
- format: z.string().optional(),
112
- });
113
- export type ChatCompletionsReasoningDetail = z.infer<typeof ChatCompletionsReasoningDetailSchema>;
114
-
115
- export const ChatCompletionsAssistantMessageSchema = z.object({
116
- role: z.literal("assistant"),
117
- content: z
118
- .union([z.string(), z.null(), z.array(ChatCompletionsContentPartTextSchema)])
119
- .optional(),
120
- name: z.string().optional(),
121
- // FUTURE: This should also support Custom Tool Calls
122
- tool_calls: z.array(ChatCompletionsToolCallSchema).optional(),
123
- // Extension origin: OpenRouter/Vercel
124
- reasoning: z.string().optional().meta({ extension: true }),
125
- // Extension origin: OpenRouter/Vercel
126
- reasoning_details: z
127
- .array(ChatCompletionsReasoningDetailSchema)
128
- .optional()
129
- .meta({ extension: true }),
130
- // Extension origin: Gemini
131
- extra_content: z
132
- .record(z.string(), z.record(z.string(), z.unknown()))
133
- .optional()
134
- .meta({ extension: true }),
135
- // Extension origin: OpenRouter/Vercel/Anthropic
136
- cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
137
- });
138
- export type ChatCompletionsAssistantMessage = z.infer<typeof ChatCompletionsAssistantMessageSchema>;
139
-
140
- export const ChatCompletionsToolMessageSchema = z.object({
141
- role: z.literal("tool"),
142
- content: z.union([z.string(), z.array(ChatCompletionsContentPartTextSchema)]),
143
- tool_call_id: z.string(),
144
- });
145
- export type ChatCompletionsToolMessage = z.infer<typeof ChatCompletionsToolMessageSchema>;
146
-
147
- export const ChatCompletionsMessageSchema = z.discriminatedUnion("role", [
148
- ChatCompletionsSystemMessageSchema,
149
- ChatCompletionsUserMessageSchema,
150
- ChatCompletionsAssistantMessageSchema,
151
- ChatCompletionsToolMessageSchema,
152
- ]);
153
- export type ChatCompletionsMessage = z.infer<typeof ChatCompletionsMessageSchema>;
154
-
155
- export const ChatCompletionsToolSchema = z.object({
156
- type: z.literal("function"),
157
- function: z.object({
158
- name: z.string(),
159
- description: z.string().optional(),
160
- parameters: z.record(z.string(), z.unknown()),
161
- strict: z.boolean().optional(),
162
- }),
163
- // FUTURE: cache_control support on tools
164
- });
165
- export type ChatCompletionsTool = z.infer<typeof ChatCompletionsToolSchema>;
166
-
167
- const ChatCompletionsNamedFunctionToolChoiceSchema = z.object({
168
- type: z.literal("function"),
169
- function: z.object({
170
- name: z.string(),
171
- }),
172
- });
173
-
174
- const ChatCompletionsAllowedFunctionToolChoiceSchema = z.object({
175
- type: z.literal("allowed_tools"),
176
- allowed_tools: z.object({
177
- mode: z.enum(["auto", "required"]),
178
- tools: z.array(ChatCompletionsNamedFunctionToolChoiceSchema).nonempty(),
179
- }),
180
- });
181
-
182
- export const ChatCompletionsToolChoiceSchema = z.union([
183
- z.enum(["none", "auto", "required", "validated"]),
184
- z.discriminatedUnion("type", [
185
- ChatCompletionsNamedFunctionToolChoiceSchema,
186
- ChatCompletionsAllowedFunctionToolChoiceSchema,
187
- ]),
188
- // FUTURE: Missing CustomTool
189
- ]);
190
- export type ChatCompletionsToolChoice = z.infer<typeof ChatCompletionsToolChoiceSchema>;
191
-
192
- export const ChatCompletionsReasoningEffortSchema = z.enum([
193
- "none",
194
- // Extension origin: Gemini
195
- "minimal",
196
- "low",
197
- "medium",
198
- "high",
199
- "xhigh",
200
- // Extension origin: Anthropic
201
- "max",
202
- ]);
203
- export type ChatCompletionsReasoningEffort = z.infer<typeof ChatCompletionsReasoningEffortSchema>;
204
-
205
- export const ChatCompletionsReasoningConfigSchema = z.object({
206
- enabled: z.optional(z.boolean()),
207
- effort: z.optional(ChatCompletionsReasoningEffortSchema),
208
- max_tokens: z.optional(z.number()),
209
- exclude: z.optional(z.boolean()),
210
- });
211
- export type ChatCompletionsReasoningConfig = z.infer<typeof ChatCompletionsReasoningConfigSchema>;
212
-
213
- export const ChatCompletionsResponseFormatJsonSchema = z.object({
214
- // FUTURE: consider support for legacy json_object (if demand)
215
- type: z.literal("json_schema"),
216
- json_schema: z.object({
217
- name: z.string(),
218
- description: z.string().optional(),
219
- schema: z.record(z.string(), z.unknown()),
220
- // FUTURE: consider support for non-strict mode (for providers that support it)
221
- strict: z.boolean().optional(),
222
- }),
223
- });
224
- export const ChatCompletionsResponseFormatTextSchema = z.object({
225
- type: z.literal("text"),
226
- });
227
- export const ChatCompletionsResponseFormatSchema = z.discriminatedUnion("type", [
228
- ChatCompletionsResponseFormatJsonSchema,
229
- ChatCompletionsResponseFormatTextSchema,
230
- ]);
231
- export type ChatCompletionsResponseFormat = z.infer<typeof ChatCompletionsResponseFormatSchema>;
232
-
233
- export const ChatCompletionsMetadataSchema = z.record(
234
- z.string().min(1).max(64),
235
- z.string().max(512),
236
- );
237
- export type ChatCompletionsMetadata = z.infer<typeof ChatCompletionsMetadataSchema>;
238
-
239
- const ChatCompletionsInputsSchema = z.object({
240
- messages: z.array(ChatCompletionsMessageSchema),
241
- tools: z.array(ChatCompletionsToolSchema).optional(),
242
- tool_choice: ChatCompletionsToolChoiceSchema.optional(),
243
- temperature: z.number().min(0).max(2).optional(),
244
- max_tokens: z.int().nonnegative().optional(),
245
- max_completion_tokens: z.int().nonnegative().optional(),
246
- frequency_penalty: z.number().min(-2.0).max(2.0).optional(),
247
- presence_penalty: z.number().min(-2.0).max(2.0).optional(),
248
- seed: z.int().optional(),
249
- stop: z.union([z.string(), z.array(z.string())]).optional(),
250
- top_p: z.number().min(0).max(1.0).optional(),
251
- metadata: ChatCompletionsMetadataSchema.optional(),
252
- response_format: ChatCompletionsResponseFormatSchema.optional(),
253
- reasoning_effort: ChatCompletionsReasoningEffortSchema.optional(),
254
- prompt_cache_key: z.string().optional(),
255
- prompt_cache_retention: z.enum(["in_memory", "24h"]).optional(),
256
- // Extension origin: Gemini explicit cache handle
257
- // FUTURE: generalize extra_body handling
258
- // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
259
- extra_body: z
260
- .object({
261
- google: z
262
- .object({
263
- cached_content: z.string().optional().meta({ extension: true }),
264
- })
265
- .optional(),
266
- })
267
- .optional(),
268
- // Extension origin: OpenRouter/Vercel/Anthropic
269
- cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
270
- // Extension origin: OpenRouter
271
- reasoning: ChatCompletionsReasoningConfigSchema.optional().meta({ extension: true }),
272
- });
273
- export type ChatCompletionsInputs = z.infer<typeof ChatCompletionsInputsSchema>;
274
-
275
- export const ChatCompletionsBodySchema = z.looseObject({
276
- model: z.string(),
277
- stream: z.boolean().optional(),
278
- ...ChatCompletionsInputsSchema.shape,
279
- });
280
- export type ChatCompletionsBody = z.infer<typeof ChatCompletionsBodySchema>;
281
-
282
- export const ChatCompletionsFinishReasonSchema = z.enum([
283
- "stop",
284
- "length",
285
- "content_filter",
286
- "tool_calls",
287
- ]);
288
- export type ChatCompletionsFinishReason = z.infer<typeof ChatCompletionsFinishReasonSchema>;
289
-
290
- export const ChatCompletionsChoiceSchema = z.object({
291
- index: z.int().nonnegative(),
292
- message: ChatCompletionsAssistantMessageSchema,
293
- finish_reason: ChatCompletionsFinishReasonSchema,
294
- // FUTURE: model this out
295
- logprobs: z.unknown().optional(),
296
- });
297
- export type ChatCompletionsChoice = z.infer<typeof ChatCompletionsChoiceSchema>;
298
-
299
- export const ChatCompletionsUsageSchema = z.object({
300
- prompt_tokens: z.int().nonnegative().optional(),
301
- completion_tokens: z.int().nonnegative().optional(),
302
- total_tokens: z.int().nonnegative().optional(),
303
- completion_tokens_details: z
304
- .object({
305
- // FUTURE: add missing properties
306
- reasoning_tokens: z.int().nonnegative().optional(),
307
- })
308
- .optional(),
309
- prompt_tokens_details: z
310
- .object({
311
- // FUTURE: add missing properties
312
- cached_tokens: z.int().nonnegative().optional(),
313
- // Extension origin: OpenRouter
314
- cache_write_tokens: z.int().nonnegative().optional().meta({ extension: true }),
315
- })
316
- .optional(),
317
- });
318
- export type ChatCompletionsUsage = z.infer<typeof ChatCompletionsUsageSchema>;
319
-
320
- export const ChatCompletionsSchema = z.object({
321
- id: z.string(),
322
- object: z.literal("chat.completion"),
323
- created: z.int().nonnegative(),
324
- model: z.string(),
325
- choices: z.array(ChatCompletionsChoiceSchema),
326
- usage: ChatCompletionsUsageSchema.nullable(),
327
- // Extension origin: Vercel AI Gateway
328
- provider_metadata: z.unknown().optional().meta({ extension: true }),
329
- });
330
- export type ChatCompletions = z.infer<typeof ChatCompletionsSchema>;
331
-
332
- export const ChatCompletionsToolCallDeltaSchema = ChatCompletionsToolCallSchema.partial().extend({
333
- index: z.int().nonnegative(),
334
- });
335
- export type ChatCompletionsToolCallDelta = z.infer<typeof ChatCompletionsToolCallDeltaSchema>;
336
-
337
- export const ChatCompletionsAssistantMessageDeltaSchema =
338
- ChatCompletionsAssistantMessageSchema.partial().extend({
339
- tool_calls: z.array(ChatCompletionsToolCallDeltaSchema).optional(),
340
- });
341
- export type ChatCompletionsAssistantMessageDelta = z.infer<
342
- typeof ChatCompletionsAssistantMessageDeltaSchema
343
- >;
344
-
345
- export const ChatCompletionsChoiceDeltaSchema = z.object({
346
- index: z.int().nonnegative(),
347
- delta: ChatCompletionsAssistantMessageDeltaSchema,
348
- finish_reason: ChatCompletionsFinishReasonSchema.nullable(),
349
- // FUTURE: model this out
350
- logprobs: z.unknown().optional(),
351
- });
352
- export type ChatCompletionsChoiceDelta = z.infer<typeof ChatCompletionsChoiceDeltaSchema>;
353
-
354
- export const ChatCompletionsChunkSchema = z.object({
355
- id: z.string(),
356
- object: z.literal("chat.completion.chunk"),
357
- created: z.int().nonnegative(),
358
- model: z.string(),
359
- choices: z.array(ChatCompletionsChoiceDeltaSchema),
360
- usage: ChatCompletionsUsageSchema.nullable(),
361
- // Extension origin: Vercel AI Gateway
362
- provider_metadata: z.unknown().optional().meta({ extension: true }),
363
- });
364
- export type ChatCompletionsChunk = z.infer<typeof ChatCompletionsChunkSchema>;
@@ -1,51 +0,0 @@
1
- import type { JSONObject, SharedV3ProviderOptions } from "@ai-sdk/provider";
2
- import type { EmbedManyResult } from "ai";
3
-
4
- import type { EmbeddingsInputs, EmbeddingsData, EmbeddingsUsage, Embeddings } from "./schema";
5
-
6
- import { toResponse } from "../../utils/response";
7
-
8
- export type EmbedCallOptions = {
9
- values: string[];
10
- providerOptions: SharedV3ProviderOptions;
11
- };
12
-
13
- export function convertToEmbedCallOptions(params: EmbeddingsInputs): EmbedCallOptions {
14
- const { input, ...rest } = params;
15
-
16
- return {
17
- values: Array.isArray(input) ? input : [input],
18
- providerOptions: {
19
- unknown: rest as JSONObject,
20
- },
21
- };
22
- }
23
-
24
- export function toEmbeddings(embedManyResult: EmbedManyResult, modelId: string): Embeddings {
25
- const data: EmbeddingsData[] = embedManyResult.embeddings.map((embedding, index) => ({
26
- object: "embedding",
27
- embedding,
28
- index,
29
- }));
30
-
31
- const usage: EmbeddingsUsage = {
32
- prompt_tokens: embedManyResult.usage.tokens,
33
- total_tokens: embedManyResult.usage.tokens,
34
- };
35
-
36
- return {
37
- object: "list",
38
- data,
39
- model: modelId,
40
- usage,
41
- provider_metadata: embedManyResult.providerMetadata,
42
- };
43
- }
44
-
45
- export function createEmbeddingsResponse(
46
- embedManyResult: EmbedManyResult,
47
- modelId: string,
48
- responseInit?: ResponseInit,
49
- ): Response {
50
- return toResponse(toEmbeddings(embedManyResult, modelId), responseInit);
51
- }
@@ -1,133 +0,0 @@
1
- import { MockEmbeddingModelV3, MockProviderV3 } from "ai/test";
2
- import { describe, expect, test } from "bun:test";
3
-
4
- import { parseResponse, postJson } from "../../../test/helpers/http";
5
- import { embeddings } from "./handler";
6
-
7
- const baseUrl = "http://localhost/embeddings";
8
-
9
- const expectedEmbeddingResponse = (count: number) => ({
10
- object: "list",
11
- data: Array.from({ length: count }, (_, index) => ({
12
- object: "embedding",
13
- embedding: [0.1, 0.2, 0.3],
14
- index,
15
- })),
16
- model: "text-embedding-3-small",
17
- usage: {
18
- prompt_tokens: count * 10,
19
- total_tokens: count * 10,
20
- },
21
- provider_metadata: {
22
- provider: {
23
- key: "value",
24
- },
25
- },
26
- });
27
-
28
- describe("Embeddings Handler", () => {
29
- const endpoint = embeddings({
30
- providers: {
31
- openai: new MockProviderV3({
32
- embeddingModels: {
33
- "text-embedding-3-small": new MockEmbeddingModelV3({
34
- // oxlint-disable-next-line require-await
35
- doEmbed: async (options) => ({
36
- embeddings: options.values.map(() => [0.1, 0.2, 0.3]),
37
- usage: { tokens: 10 },
38
- providerMetadata: { provider: { key: "value" } },
39
- warnings: [],
40
- }),
41
- }),
42
- },
43
- }),
44
- },
45
- models: {
46
- "text-embedding-3-small": {
47
- name: "OpenAI Embedding Model",
48
- modalities: { input: ["text"], output: ["embedding"] },
49
- providers: ["openai"],
50
- },
51
- "gpt-oss-20b": {
52
- name: "GPT-OSS 20B",
53
- modalities: { input: ["text"], output: ["text"] },
54
- providers: ["openai"],
55
- },
56
- },
57
- });
58
-
59
- test("should return 422 if model does not support embeddings", async () => {
60
- const request = postJson(baseUrl, {
61
- model: "gpt-oss-20b",
62
- input: "hello world",
63
- });
64
-
65
- const res = await endpoint.handler(request);
66
- const data = await parseResponse(res);
67
-
68
- expect(data).toMatchObject({
69
- error: {
70
- code: "model_unsupported_operation",
71
- message: "Model 'gpt-oss-20b' does not support 'embedding' output",
72
- type: "invalid_request_error",
73
- },
74
- });
75
- });
76
-
77
- test("should generate embeddings for a single string", async () => {
78
- const request = postJson(baseUrl, {
79
- model: "text-embedding-3-small",
80
- input: "hello world",
81
- });
82
-
83
- const res = await endpoint.handler(request);
84
- const data = await parseResponse(res);
85
-
86
- expect(data).toEqual(expectedEmbeddingResponse(1));
87
- });
88
-
89
- test("should generate embeddings for an array of strings", async () => {
90
- const request = postJson(baseUrl, {
91
- model: "text-embedding-3-small",
92
- input: ["hello", "world"],
93
- });
94
-
95
- const res = await endpoint.handler(request);
96
- const data = await parseResponse(res);
97
-
98
- expect(data).toEqual(expectedEmbeddingResponse(2));
99
- });
100
-
101
- test("should return 422 if input is missing", async () => {
102
- const request = postJson(baseUrl, {
103
- model: "text-embedding-3-small",
104
- });
105
-
106
- const res = await endpoint.handler(request);
107
- const data = await parseResponse(res);
108
-
109
- expect(data).toMatchObject({
110
- error: {
111
- code: "bad_request",
112
- message: "✖ Invalid input\n → at input",
113
- type: "invalid_request_error",
114
- param: "",
115
- },
116
- });
117
- });
118
-
119
- test("should return 'Method Not Allowed' for GET request", async () => {
120
- const request = new Request(baseUrl, { method: "GET" });
121
-
122
- const res = await endpoint.handler(request);
123
- const data = await parseResponse(res);
124
-
125
- expect(data).toMatchObject({
126
- error: {
127
- code: "method_not_allowed",
128
- message: "Method Not Allowed",
129
- type: "invalid_request_error",
130
- },
131
- });
132
- });
133
- });
@@ -1,137 +0,0 @@
1
- import { embedMany, wrapEmbeddingModel } from "ai";
2
- import * as z from "zod/mini";
3
-
4
- import type {
5
- AfterHookContext,
6
- BeforeHookContext,
7
- GatewayConfig,
8
- Endpoint,
9
- GatewayContext,
10
- ResolveProviderHookContext,
11
- ResolveModelHookContext,
12
- } from "../../types";
13
-
14
- import { GatewayError } from "../../errors/gateway";
15
- import { winterCgHandler } from "../../lifecycle";
16
- import { logger } from "../../logger";
17
- import { modelMiddlewareMatcher } from "../../middleware/matcher";
18
- import { resolveProvider } from "../../providers/registry";
19
- import {
20
- getGenAiGeneralAttributes,
21
- recordTimePerOutputToken,
22
- recordTokenUsage,
23
- } from "../../telemetry/gen-ai";
24
- import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
25
- import { prepareForwardHeaders } from "../../utils/request";
26
- import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
27
- import { getEmbeddingsRequestAttributes, getEmbeddingsResponseAttributes } from "./otel";
28
- import { EmbeddingsBodySchema, type EmbeddingsBody } from "./schema";
29
-
30
- export const embeddings = (config: GatewayConfig): Endpoint => {
31
- const hooks = config.hooks;
32
-
33
- const handler = async (ctx: GatewayContext) => {
34
- const start = performance.now();
35
- ctx.operation = "embeddings";
36
- addSpanEvent("hebo.handler.started");
37
-
38
- // Guard: enforce HTTP method early.
39
- if (!ctx.request || ctx.request.method !== "POST") {
40
- throw new GatewayError("Method Not Allowed", 405);
41
- }
42
-
43
- // Parse + validate input.
44
- try {
45
- ctx.body = await ctx.request.json();
46
- } catch {
47
- throw new GatewayError("Invalid JSON", 400);
48
- }
49
- logger.trace({ requestId: ctx.requestId, result: ctx.body }, "[chat] EmbeddingsBody");
50
- addSpanEvent("hebo.request.deserialized");
51
-
52
- const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
53
- if (!parsed.success) {
54
- // FUTURE: consider adding body shape to metadata
55
- throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
56
- }
57
- ctx.body = parsed.data;
58
- addSpanEvent("hebo.request.parsed");
59
-
60
- if (hooks?.before) {
61
- ctx.body = ((await hooks.before(ctx as BeforeHookContext)) as EmbeddingsBody) ?? ctx.body;
62
- addSpanEvent("hebo.hooks.before.completed");
63
- }
64
-
65
- // Resolve model + provider (hooks may override defaults).
66
- let inputs;
67
- ({ model: ctx.modelId, ...inputs } = ctx.body);
68
-
69
- ctx.resolvedModelId =
70
- (await hooks?.resolveModelId?.(ctx as ResolveModelHookContext)) ?? ctx.modelId;
71
- logger.debug(`[embeddings] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
72
- addSpanEvent("hebo.model.resolved");
73
-
74
- const override = await hooks?.resolveProvider?.(ctx as ResolveProviderHookContext);
75
- ctx.provider =
76
- override ??
77
- resolveProvider({
78
- providers: ctx.providers,
79
- models: ctx.models,
80
- modelId: ctx.resolvedModelId,
81
- operation: ctx.operation,
82
- });
83
-
84
- const embeddingModel = ctx.provider.embeddingModel(ctx.resolvedModelId);
85
- ctx.resolvedProviderId = embeddingModel.provider;
86
- logger.debug(`[embeddings] using ${embeddingModel.provider} for ${ctx.resolvedModelId}`);
87
- addSpanEvent("hebo.provider.resolved");
88
-
89
- const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
90
- const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
91
- setSpanAttributes(genAiGeneralAttrs);
92
-
93
- // Convert inputs to AI SDK call options.
94
- const embedOptions = convertToEmbedCallOptions(inputs);
95
- logger.trace(
96
- { requestId: ctx.requestId, options: embedOptions },
97
- "[embeddings] AI SDK options",
98
- );
99
- addSpanEvent("hebo.options.prepared");
100
- setSpanAttributes(getEmbeddingsRequestAttributes(ctx.body, genAiSignalLevel));
101
-
102
- // Build middleware chain (model -> forward params -> provider).
103
- const embeddingModelWithMiddleware = wrapEmbeddingModel({
104
- model: embeddingModel,
105
- middleware: modelMiddlewareMatcher.forEmbedding(ctx.resolvedModelId, embeddingModel.provider),
106
- });
107
-
108
- // Execute request.
109
- addSpanEvent("hebo.ai-sdk.started");
110
- const result = await embedMany({
111
- model: embeddingModelWithMiddleware,
112
- headers: prepareForwardHeaders(ctx.request),
113
- abortSignal: ctx.request.signal,
114
- ...embedOptions,
115
- });
116
- logger.trace({ requestId: ctx.requestId, result }, "[embeddings] AI SDK result");
117
- addSpanEvent("hebo.ai-sdk.completed");
118
-
119
- // Transform result.
120
- ctx.result = toEmbeddings(result, ctx.modelId);
121
- logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[chat] Embeddings");
122
- addSpanEvent("hebo.result.transformed");
123
- const genAiResponseAttrs = getEmbeddingsResponseAttributes(ctx.result, genAiSignalLevel);
124
- recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
125
- setSpanAttributes(genAiResponseAttrs);
126
-
127
- if (hooks?.after) {
128
- ctx.result = (await hooks.after(ctx as AfterHookContext)) ?? ctx.result;
129
- addSpanEvent("hebo.hooks.after.completed");
130
- }
131
-
132
- recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
133
- return ctx.result;
134
- };
135
-
136
- return { handler: winterCgHandler(handler, config) };
137
- };
@@ -1,4 +0,0 @@
1
- export * from "./converters";
2
- export * from "./handler";
3
- export * from "./schema";
4
- export * from "./otel";