@hebo-ai/gateway 0.6.2 → 0.8.0-rc0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. package/README.md +134 -7
  2. package/package.json +46 -1
  3. package/dist/config.d.ts +0 -2
  4. package/dist/config.js +0 -81
  5. package/dist/endpoints/chat-completions/converters.d.ts +0 -43
  6. package/dist/endpoints/chat-completions/converters.js +0 -625
  7. package/dist/endpoints/chat-completions/handler.d.ts +0 -2
  8. package/dist/endpoints/chat-completions/handler.js +0 -149
  9. package/dist/endpoints/chat-completions/index.d.ts +0 -4
  10. package/dist/endpoints/chat-completions/index.js +0 -4
  11. package/dist/endpoints/chat-completions/otel.d.ts +0 -5
  12. package/dist/endpoints/chat-completions/otel.js +0 -171
  13. package/dist/endpoints/chat-completions/schema.d.ts +0 -1188
  14. package/dist/endpoints/chat-completions/schema.js +0 -298
  15. package/dist/endpoints/embeddings/converters.d.ts +0 -10
  16. package/dist/endpoints/embeddings/converters.js +0 -31
  17. package/dist/endpoints/embeddings/handler.d.ts +0 -2
  18. package/dist/endpoints/embeddings/handler.js +0 -104
  19. package/dist/endpoints/embeddings/index.d.ts +0 -4
  20. package/dist/endpoints/embeddings/index.js +0 -4
  21. package/dist/endpoints/embeddings/otel.d.ts +0 -5
  22. package/dist/endpoints/embeddings/otel.js +0 -29
  23. package/dist/endpoints/embeddings/schema.d.ts +0 -44
  24. package/dist/endpoints/embeddings/schema.js +0 -29
  25. package/dist/endpoints/models/converters.d.ts +0 -6
  26. package/dist/endpoints/models/converters.js +0 -42
  27. package/dist/endpoints/models/handler.d.ts +0 -2
  28. package/dist/endpoints/models/handler.js +0 -29
  29. package/dist/endpoints/models/index.d.ts +0 -3
  30. package/dist/endpoints/models/index.js +0 -3
  31. package/dist/endpoints/models/schema.d.ts +0 -42
  32. package/dist/endpoints/models/schema.js +0 -31
  33. package/dist/errors/ai-sdk.d.ts +0 -2
  34. package/dist/errors/ai-sdk.js +0 -52
  35. package/dist/errors/gateway.d.ts +0 -5
  36. package/dist/errors/gateway.js +0 -13
  37. package/dist/errors/openai.d.ts +0 -20
  38. package/dist/errors/openai.js +0 -40
  39. package/dist/errors/utils.d.ts +0 -22
  40. package/dist/errors/utils.js +0 -44
  41. package/dist/gateway.d.ts +0 -9
  42. package/dist/gateway.js +0 -40
  43. package/dist/index.d.ts +0 -14
  44. package/dist/index.js +0 -13
  45. package/dist/lifecycle.d.ts +0 -2
  46. package/dist/lifecycle.js +0 -98
  47. package/dist/logger/default.d.ts +0 -4
  48. package/dist/logger/default.js +0 -81
  49. package/dist/logger/index.d.ts +0 -11
  50. package/dist/logger/index.js +0 -25
  51. package/dist/middleware/common.d.ts +0 -12
  52. package/dist/middleware/common.js +0 -146
  53. package/dist/middleware/debug.d.ts +0 -3
  54. package/dist/middleware/debug.js +0 -27
  55. package/dist/middleware/matcher.d.ts +0 -28
  56. package/dist/middleware/matcher.js +0 -118
  57. package/dist/middleware/utils.d.ts +0 -2
  58. package/dist/middleware/utils.js +0 -24
  59. package/dist/models/amazon/index.d.ts +0 -2
  60. package/dist/models/amazon/index.js +0 -2
  61. package/dist/models/amazon/middleware.d.ts +0 -3
  62. package/dist/models/amazon/middleware.js +0 -68
  63. package/dist/models/amazon/presets.d.ts +0 -345
  64. package/dist/models/amazon/presets.js +0 -80
  65. package/dist/models/anthropic/index.d.ts +0 -2
  66. package/dist/models/anthropic/index.js +0 -2
  67. package/dist/models/anthropic/middleware.d.ts +0 -5
  68. package/dist/models/anthropic/middleware.js +0 -127
  69. package/dist/models/anthropic/presets.d.ts +0 -711
  70. package/dist/models/anthropic/presets.js +0 -135
  71. package/dist/models/catalog.d.ts +0 -4
  72. package/dist/models/catalog.js +0 -8
  73. package/dist/models/cohere/index.d.ts +0 -2
  74. package/dist/models/cohere/index.js +0 -2
  75. package/dist/models/cohere/middleware.d.ts +0 -3
  76. package/dist/models/cohere/middleware.js +0 -62
  77. package/dist/models/cohere/presets.d.ts +0 -411
  78. package/dist/models/cohere/presets.js +0 -134
  79. package/dist/models/google/index.d.ts +0 -2
  80. package/dist/models/google/index.js +0 -2
  81. package/dist/models/google/middleware.d.ts +0 -8
  82. package/dist/models/google/middleware.js +0 -111
  83. package/dist/models/google/presets.d.ts +0 -375
  84. package/dist/models/google/presets.js +0 -82
  85. package/dist/models/meta/index.d.ts +0 -1
  86. package/dist/models/meta/index.js +0 -1
  87. package/dist/models/meta/presets.d.ts +0 -483
  88. package/dist/models/meta/presets.js +0 -95
  89. package/dist/models/openai/index.d.ts +0 -2
  90. package/dist/models/openai/index.js +0 -2
  91. package/dist/models/openai/middleware.d.ts +0 -4
  92. package/dist/models/openai/middleware.js +0 -88
  93. package/dist/models/openai/presets.d.ts +0 -959
  94. package/dist/models/openai/presets.js +0 -213
  95. package/dist/models/types.d.ts +0 -20
  96. package/dist/models/types.js +0 -84
  97. package/dist/models/voyage/index.d.ts +0 -2
  98. package/dist/models/voyage/index.js +0 -2
  99. package/dist/models/voyage/middleware.d.ts +0 -2
  100. package/dist/models/voyage/middleware.js +0 -19
  101. package/dist/models/voyage/presets.d.ts +0 -436
  102. package/dist/models/voyage/presets.js +0 -85
  103. package/dist/providers/anthropic/canonical.d.ts +0 -3
  104. package/dist/providers/anthropic/canonical.js +0 -9
  105. package/dist/providers/anthropic/index.d.ts +0 -1
  106. package/dist/providers/anthropic/index.js +0 -1
  107. package/dist/providers/bedrock/canonical.d.ts +0 -17
  108. package/dist/providers/bedrock/canonical.js +0 -61
  109. package/dist/providers/bedrock/index.d.ts +0 -2
  110. package/dist/providers/bedrock/index.js +0 -2
  111. package/dist/providers/bedrock/middleware.d.ts +0 -4
  112. package/dist/providers/bedrock/middleware.js +0 -104
  113. package/dist/providers/cohere/canonical.d.ts +0 -3
  114. package/dist/providers/cohere/canonical.js +0 -17
  115. package/dist/providers/cohere/index.d.ts +0 -1
  116. package/dist/providers/cohere/index.js +0 -1
  117. package/dist/providers/groq/canonical.d.ts +0 -3
  118. package/dist/providers/groq/canonical.js +0 -12
  119. package/dist/providers/groq/index.d.ts +0 -1
  120. package/dist/providers/groq/index.js +0 -1
  121. package/dist/providers/openai/canonical.d.ts +0 -3
  122. package/dist/providers/openai/canonical.js +0 -8
  123. package/dist/providers/openai/index.d.ts +0 -1
  124. package/dist/providers/openai/index.js +0 -1
  125. package/dist/providers/registry.d.ts +0 -24
  126. package/dist/providers/registry.js +0 -103
  127. package/dist/providers/types.d.ts +0 -7
  128. package/dist/providers/types.js +0 -11
  129. package/dist/providers/vertex/canonical.d.ts +0 -3
  130. package/dist/providers/vertex/canonical.js +0 -8
  131. package/dist/providers/vertex/index.d.ts +0 -1
  132. package/dist/providers/vertex/index.js +0 -1
  133. package/dist/providers/voyage/canonical.d.ts +0 -3
  134. package/dist/providers/voyage/canonical.js +0 -7
  135. package/dist/providers/voyage/index.d.ts +0 -1
  136. package/dist/providers/voyage/index.js +0 -1
  137. package/dist/telemetry/ai-sdk.d.ts +0 -2
  138. package/dist/telemetry/ai-sdk.js +0 -31
  139. package/dist/telemetry/baggage.d.ts +0 -1
  140. package/dist/telemetry/baggage.js +0 -24
  141. package/dist/telemetry/fetch.d.ts +0 -2
  142. package/dist/telemetry/fetch.js +0 -49
  143. package/dist/telemetry/gen-ai.d.ts +0 -6
  144. package/dist/telemetry/gen-ai.js +0 -78
  145. package/dist/telemetry/http.d.ts +0 -3
  146. package/dist/telemetry/http.js +0 -54
  147. package/dist/telemetry/index.d.ts +0 -1
  148. package/dist/telemetry/index.js +0 -1
  149. package/dist/telemetry/memory.d.ts +0 -2
  150. package/dist/telemetry/memory.js +0 -43
  151. package/dist/telemetry/span.d.ts +0 -13
  152. package/dist/telemetry/span.js +0 -60
  153. package/dist/telemetry/stream.d.ts +0 -3
  154. package/dist/telemetry/stream.js +0 -58
  155. package/dist/types.d.ts +0 -176
  156. package/dist/types.js +0 -1
  157. package/dist/utils/env.d.ts +0 -2
  158. package/dist/utils/env.js +0 -7
  159. package/dist/utils/headers.d.ts +0 -4
  160. package/dist/utils/headers.js +0 -22
  161. package/dist/utils/preset.d.ts +0 -10
  162. package/dist/utils/preset.js +0 -42
  163. package/dist/utils/request.d.ts +0 -2
  164. package/dist/utils/request.js +0 -43
  165. package/dist/utils/response.d.ts +0 -3
  166. package/dist/utils/response.js +0 -70
  167. package/dist/utils/url.d.ts +0 -4
  168. package/dist/utils/url.js +0 -21
@@ -1,298 +0,0 @@
1
- import * as z from "zod";
2
- export const ChatCompletionsCacheControlSchema = z.object({
3
- type: z.literal("ephemeral"),
4
- ttl: z.string().optional(),
5
- });
6
- export const ChatCompletionsContentPartTextSchema = z.object({
7
- type: z.literal("text"),
8
- text: z.string(),
9
- // Extension origin: Anthropic/OpenRouter/Vercel
10
- cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
11
- });
12
- export const ChatCompletionsContentPartImageSchema = z.object({
13
- type: z.literal("image_url"),
14
- image_url: z.object({
15
- url: z.string(),
16
- detail: z.enum(["low", "high", "auto"]).optional(),
17
- }),
18
- // Extension origin: OpenRouter/Vercel/Anthropic
19
- cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
20
- });
21
- export const ChatCompletionsContentPartFileSchema = z.object({
22
- type: z.literal("file"),
23
- file: z.object({
24
- data: z.string(),
25
- media_type: z.string(),
26
- filename: z.string().optional(),
27
- }),
28
- // Extension origin: OpenRouter/Vercel/Anthropic
29
- cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
30
- });
31
- export const ChatCompletionsContentPartAudioSchema = z.object({
32
- type: z.literal("input_audio"),
33
- input_audio: z.object({
34
- data: z.string(),
35
- // only wav and mp3 are official by OpenAI, rest is taken from Gemini support:
36
- // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/multimodal/audio-understanding
37
- format: z.enum([
38
- "x-aac",
39
- "flac",
40
- "mp3",
41
- "m4a",
42
- "mpeg",
43
- "mpga",
44
- "mp4",
45
- "ogg",
46
- "pcm",
47
- "wav",
48
- "webm",
49
- ]),
50
- }),
51
- // Extension origin: OpenRouter/Vercel/Anthropic
52
- cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
53
- });
54
- export const ChatCompletionsContentPartSchema = z.discriminatedUnion("type", [
55
- ChatCompletionsContentPartTextSchema,
56
- ChatCompletionsContentPartImageSchema,
57
- ChatCompletionsContentPartFileSchema,
58
- ChatCompletionsContentPartAudioSchema,
59
- ]);
60
- export const ChatCompletionsToolCallSchema = z.object({
61
- type: z.literal("function"),
62
- id: z.string(),
63
- function: z.object({
64
- arguments: z.string(),
65
- name: z.string(),
66
- }),
67
- // Extension origin: Gemini
68
- extra_content: z
69
- .record(z.string(), z.record(z.string(), z.unknown()))
70
- .optional()
71
- .meta({ extension: true }),
72
- });
73
- export const ChatCompletionsSystemMessageSchema = z.object({
74
- role: z.literal("system"),
75
- content: z.string(),
76
- name: z.string().optional(),
77
- // Extension origin: OpenRouter/Vercel/Anthropic
78
- cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
79
- });
80
- export const ChatCompletionsUserMessageSchema = z.object({
81
- role: z.literal("user"),
82
- content: z.union([z.string(), z.array(ChatCompletionsContentPartSchema)]),
83
- name: z.string().optional(),
84
- // Extension origin: OpenRouter/Vercel/Anthropic
85
- cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
86
- });
87
- export const ChatCompletionsReasoningDetailSchema = z.object({
88
- id: z.string().optional(),
89
- index: z.int().nonnegative(),
90
- type: z.string(),
91
- text: z.string().optional(),
92
- signature: z.string().optional(),
93
- data: z.string().optional(),
94
- summary: z.string().optional(),
95
- format: z.string().optional(),
96
- });
97
- export const ChatCompletionsAssistantMessageSchema = z.object({
98
- role: z.literal("assistant"),
99
- content: z
100
- .union([z.string(), z.null(), z.array(ChatCompletionsContentPartTextSchema)])
101
- .optional(),
102
- name: z.string().optional(),
103
- // FUTURE: This should also support Custom Tool Calls
104
- tool_calls: z.array(ChatCompletionsToolCallSchema).optional(),
105
- // Extension origin: OpenRouter/Vercel
106
- reasoning: z.string().optional().meta({ extension: true }),
107
- // Extension origin: OpenRouter/Vercel
108
- reasoning_details: z
109
- .array(ChatCompletionsReasoningDetailSchema)
110
- .optional()
111
- .meta({ extension: true }),
112
- // Extension origin: Gemini
113
- extra_content: z
114
- .record(z.string(), z.record(z.string(), z.unknown()))
115
- .optional()
116
- .meta({ extension: true }),
117
- // Extension origin: OpenRouter/Vercel/Anthropic
118
- cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
119
- });
120
- export const ChatCompletionsToolMessageSchema = z.object({
121
- role: z.literal("tool"),
122
- content: z.union([z.string(), z.array(ChatCompletionsContentPartTextSchema)]),
123
- tool_call_id: z.string(),
124
- });
125
- export const ChatCompletionsMessageSchema = z.discriminatedUnion("role", [
126
- ChatCompletionsSystemMessageSchema,
127
- ChatCompletionsUserMessageSchema,
128
- ChatCompletionsAssistantMessageSchema,
129
- ChatCompletionsToolMessageSchema,
130
- ]);
131
- export const ChatCompletionsToolSchema = z.object({
132
- type: z.literal("function"),
133
- function: z.object({
134
- name: z.string(),
135
- description: z.string().optional(),
136
- parameters: z.record(z.string(), z.unknown()),
137
- strict: z.boolean().optional(),
138
- }),
139
- // FUTURE: cache_control support on tools
140
- });
141
- const ChatCompletionsNamedFunctionToolChoiceSchema = z.object({
142
- type: z.literal("function"),
143
- function: z.object({
144
- name: z.string(),
145
- }),
146
- });
147
- const ChatCompletionsAllowedFunctionToolChoiceSchema = z.object({
148
- type: z.literal("allowed_tools"),
149
- allowed_tools: z.object({
150
- mode: z.enum(["auto", "required"]),
151
- tools: z.array(ChatCompletionsNamedFunctionToolChoiceSchema).nonempty(),
152
- }),
153
- });
154
- export const ChatCompletionsToolChoiceSchema = z.union([
155
- z.enum(["none", "auto", "required", "validated"]),
156
- z.discriminatedUnion("type", [
157
- ChatCompletionsNamedFunctionToolChoiceSchema,
158
- ChatCompletionsAllowedFunctionToolChoiceSchema,
159
- ]),
160
- // FUTURE: Missing CustomTool
161
- ]);
162
- export const ChatCompletionsReasoningEffortSchema = z.enum([
163
- "none",
164
- "minimal",
165
- "low",
166
- "medium",
167
- "high",
168
- "xhigh",
169
- ]);
170
- export const ChatCompletionsReasoningConfigSchema = z.object({
171
- enabled: z.optional(z.boolean()),
172
- effort: z.optional(ChatCompletionsReasoningEffortSchema),
173
- max_tokens: z.optional(z.number()),
174
- exclude: z.optional(z.boolean()),
175
- });
176
- export const ChatCompletionsResponseFormatJsonSchema = z.object({
177
- // FUTURE: consider support for legacy json_object (if demand)
178
- type: z.literal("json_schema"),
179
- json_schema: z.object({
180
- name: z.string(),
181
- description: z.string().optional(),
182
- schema: z.record(z.string(), z.unknown()),
183
- // FUTURE: consider support for non-strict mode (for providers that support it)
184
- strict: z.boolean().optional(),
185
- }),
186
- });
187
- export const ChatCompletionsResponseFormatTextSchema = z.object({
188
- type: z.literal("text"),
189
- });
190
- export const ChatCompletionsResponseFormatSchema = z.discriminatedUnion("type", [
191
- ChatCompletionsResponseFormatJsonSchema,
192
- ChatCompletionsResponseFormatTextSchema,
193
- ]);
194
- export const ChatCompletionsMetadataSchema = z.record(z.string().min(1).max(64), z.string().max(512));
195
- const ChatCompletionsInputsSchema = z.object({
196
- messages: z.array(ChatCompletionsMessageSchema),
197
- tools: z.array(ChatCompletionsToolSchema).optional(),
198
- tool_choice: ChatCompletionsToolChoiceSchema.optional(),
199
- temperature: z.number().min(0).max(2).optional(),
200
- max_tokens: z.int().nonnegative().optional(),
201
- max_completion_tokens: z.int().nonnegative().optional(),
202
- frequency_penalty: z.number().min(-2.0).max(2.0).optional(),
203
- presence_penalty: z.number().min(-2.0).max(2.0).optional(),
204
- seed: z.int().optional(),
205
- stop: z.union([z.string(), z.array(z.string())]).optional(),
206
- top_p: z.number().min(0).max(1.0).optional(),
207
- metadata: ChatCompletionsMetadataSchema.optional(),
208
- response_format: ChatCompletionsResponseFormatSchema.optional(),
209
- reasoning_effort: ChatCompletionsReasoningEffortSchema.optional(),
210
- prompt_cache_key: z.string().optional(),
211
- prompt_cache_retention: z.enum(["in_memory", "24h"]).optional(),
212
- // Extension origin: OpenRouter/Vercel/Anthropic
213
- cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
214
- // Extension origin: OpenRouter
215
- reasoning: ChatCompletionsReasoningConfigSchema.optional().meta({ extension: true }),
216
- // Extension origin: Gemini extra_body
217
- // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview#extra_body
218
- extra_body: z
219
- .record(z.string(), z.record(z.string(), z.unknown()))
220
- .optional()
221
- .meta({ extension: true }),
222
- });
223
- export const ChatCompletionsBodySchema = z.looseObject({
224
- model: z.string(),
225
- stream: z.boolean().optional(),
226
- ...ChatCompletionsInputsSchema.shape,
227
- });
228
- export const ChatCompletionsFinishReasonSchema = z.enum([
229
- "stop",
230
- "length",
231
- "content_filter",
232
- "tool_calls",
233
- ]);
234
- export const ChatCompletionsChoiceSchema = z.object({
235
- index: z.int().nonnegative(),
236
- message: ChatCompletionsAssistantMessageSchema,
237
- finish_reason: ChatCompletionsFinishReasonSchema,
238
- // FUTURE: model this out
239
- logprobs: z.unknown().optional(),
240
- });
241
- export const ChatCompletionsUsageSchema = z.object({
242
- prompt_tokens: z.int().nonnegative().optional(),
243
- completion_tokens: z.int().nonnegative().optional(),
244
- total_tokens: z.int().nonnegative().optional(),
245
- completion_tokens_details: z
246
- .object({
247
- // FUTURE: add missing properties
248
- reasoning_tokens: z.int().nonnegative().optional(),
249
- })
250
- .optional(),
251
- prompt_tokens_details: z
252
- .object({
253
- // FUTURE: add missing properties
254
- cached_tokens: z.int().nonnegative().optional(),
255
- // Extension origin: OpenRouter
256
- cache_write_tokens: z.int().nonnegative().optional().meta({ extension: true }),
257
- })
258
- .optional(),
259
- });
260
- export const ChatCompletionsSchema = z.object({
261
- id: z.string(),
262
- object: z.literal("chat.completion"),
263
- created: z.int().nonnegative(),
264
- model: z.string(),
265
- choices: z.array(ChatCompletionsChoiceSchema),
266
- usage: ChatCompletionsUsageSchema.nullable(),
267
- // Extension origin: Vercel AI Gateway
268
- provider_metadata: z
269
- .record(z.string(), z.record(z.string(), z.unknown()))
270
- .optional()
271
- .meta({ extension: true }),
272
- });
273
- export const ChatCompletionsToolCallDeltaSchema = ChatCompletionsToolCallSchema.partial().extend({
274
- index: z.int().nonnegative(),
275
- });
276
- export const ChatCompletionsAssistantMessageDeltaSchema = ChatCompletionsAssistantMessageSchema.partial().extend({
277
- tool_calls: z.array(ChatCompletionsToolCallDeltaSchema).optional(),
278
- });
279
- export const ChatCompletionsChoiceDeltaSchema = z.object({
280
- index: z.int().nonnegative(),
281
- delta: ChatCompletionsAssistantMessageDeltaSchema,
282
- finish_reason: ChatCompletionsFinishReasonSchema.nullable(),
283
- // FUTURE: model this out
284
- logprobs: z.unknown().optional(),
285
- });
286
- export const ChatCompletionsChunkSchema = z.object({
287
- id: z.string(),
288
- object: z.literal("chat.completion.chunk"),
289
- created: z.int().nonnegative(),
290
- model: z.string(),
291
- choices: z.array(ChatCompletionsChoiceDeltaSchema),
292
- usage: ChatCompletionsUsageSchema.nullable(),
293
- // Extension origin: Vercel AI Gateway
294
- provider_metadata: z
295
- .record(z.string(), z.record(z.string(), z.unknown()))
296
- .optional()
297
- .meta({ extension: true }),
298
- });
@@ -1,10 +0,0 @@
1
- import type { SharedV3ProviderOptions } from "@ai-sdk/provider";
2
- import type { EmbedManyResult } from "ai";
3
- import type { EmbeddingsInputs, Embeddings } from "./schema";
4
- export type EmbedCallOptions = {
5
- values: string[];
6
- providerOptions: SharedV3ProviderOptions;
7
- };
8
- export declare function convertToEmbedCallOptions(params: EmbeddingsInputs): EmbedCallOptions;
9
- export declare function toEmbeddings(embedManyResult: EmbedManyResult, modelId: string): Embeddings;
10
- export declare function createEmbeddingsResponse(embedManyResult: EmbedManyResult, modelId: string, responseInit?: ResponseInit): Response;
@@ -1,31 +0,0 @@
1
- import { toResponse } from "../../utils/response";
2
- export function convertToEmbedCallOptions(params) {
3
- const { input, ...rest } = params;
4
- return {
5
- values: Array.isArray(input) ? input : [input],
6
- providerOptions: {
7
- unknown: rest,
8
- },
9
- };
10
- }
11
- export function toEmbeddings(embedManyResult, modelId) {
12
- const data = embedManyResult.embeddings.map((embedding, index) => ({
13
- object: "embedding",
14
- embedding,
15
- index,
16
- }));
17
- const usage = {
18
- prompt_tokens: embedManyResult.usage.tokens,
19
- total_tokens: embedManyResult.usage.tokens,
20
- };
21
- return {
22
- object: "list",
23
- data,
24
- model: modelId,
25
- usage,
26
- provider_metadata: embedManyResult.providerMetadata,
27
- };
28
- }
29
- export function createEmbeddingsResponse(embedManyResult, modelId, responseInit) {
30
- return toResponse(toEmbeddings(embedManyResult, modelId), responseInit);
31
- }
@@ -1,2 +0,0 @@
1
- import type { GatewayConfig, Endpoint } from "../../types";
2
- export declare const embeddings: (config: GatewayConfig) => Endpoint;
@@ -1,104 +0,0 @@
1
- import { embedMany, wrapEmbeddingModel } from "ai";
2
- import * as z from "zod/mini";
3
- import { GatewayError } from "../../errors/gateway";
4
- import { winterCgHandler } from "../../lifecycle";
5
- import { logger } from "../../logger";
6
- import { modelMiddlewareMatcher } from "../../middleware/matcher";
7
- import { resolveProvider } from "../../providers/registry";
8
- import { getGenAiGeneralAttributes, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
9
- import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
10
- import { prepareForwardHeaders } from "../../utils/request";
11
- import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
12
- import { getEmbeddingsRequestAttributes, getEmbeddingsResponseAttributes } from "./otel";
13
- import { EmbeddingsBodySchema } from "./schema";
14
- export const embeddings = (config) => {
15
- const hooks = config.hooks;
16
- const handler = async (ctx) => {
17
- const start = performance.now();
18
- ctx.operation = "embeddings";
19
- addSpanEvent("hebo.handler.started");
20
- // Guard: enforce HTTP method early.
21
- if (!ctx.request || ctx.request.method !== "POST") {
22
- throw new GatewayError("Method Not Allowed", 405);
23
- }
24
- // Parse + validate input.
25
- try {
26
- // oxlint-disable-next-line no-unsafe-assignment
27
- ctx.body = await ctx.request.json();
28
- }
29
- catch {
30
- throw new GatewayError("Invalid JSON", 400);
31
- }
32
- logger.trace({ requestId: ctx.requestId, result: ctx.body }, "[chat] EmbeddingsBody");
33
- addSpanEvent("hebo.request.deserialized");
34
- const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
35
- if (!parsed.success) {
36
- // FUTURE: consider adding body shape to metadata
37
- throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
38
- }
39
- ctx.body = parsed.data;
40
- addSpanEvent("hebo.request.parsed");
41
- if (hooks?.before) {
42
- ctx.body = (await hooks.before(ctx)) ?? ctx.body;
43
- addSpanEvent("hebo.hooks.before.completed");
44
- }
45
- // Resolve model + provider (hooks may override defaults).
46
- let inputs;
47
- ({ model: ctx.modelId, ...inputs } = ctx.body);
48
- ctx.resolvedModelId =
49
- (await hooks?.resolveModelId?.(ctx)) ?? ctx.modelId;
50
- logger.debug(`[embeddings] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
51
- addSpanEvent("hebo.model.resolved");
52
- const override = await hooks?.resolveProvider?.(ctx);
53
- ctx.provider =
54
- override ??
55
- resolveProvider({
56
- providers: ctx.providers,
57
- models: ctx.models,
58
- modelId: ctx.resolvedModelId,
59
- operation: ctx.operation,
60
- });
61
- const embeddingModel = ctx.provider.embeddingModel(ctx.resolvedModelId);
62
- ctx.resolvedProviderId = embeddingModel.provider;
63
- logger.debug(`[embeddings] using ${embeddingModel.provider} for ${ctx.resolvedModelId}`);
64
- addSpanEvent("hebo.provider.resolved");
65
- const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
66
- const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
67
- setSpanAttributes(genAiGeneralAttrs);
68
- // Convert inputs to AI SDK call options.
69
- // oxlint-disable-next-line no-unsafe-argument
70
- const embedOptions = convertToEmbedCallOptions(inputs);
71
- logger.trace({ requestId: ctx.requestId, options: embedOptions }, "[embeddings] AI SDK options");
72
- addSpanEvent("hebo.options.prepared");
73
- setSpanAttributes(getEmbeddingsRequestAttributes(ctx.body, genAiSignalLevel));
74
- // Build middleware chain (model -> forward params -> provider).
75
- const embeddingModelWithMiddleware = wrapEmbeddingModel({
76
- model: embeddingModel,
77
- middleware: modelMiddlewareMatcher.forEmbedding(ctx.resolvedModelId, embeddingModel.provider),
78
- });
79
- // Execute request.
80
- addSpanEvent("hebo.ai-sdk.started");
81
- const result = await embedMany({
82
- model: embeddingModelWithMiddleware,
83
- headers: prepareForwardHeaders(ctx.request),
84
- abortSignal: ctx.request.signal,
85
- ...embedOptions,
86
- });
87
- logger.trace({ requestId: ctx.requestId, result }, "[embeddings] AI SDK result");
88
- addSpanEvent("hebo.ai-sdk.completed");
89
- // Transform result.
90
- ctx.result = toEmbeddings(result, ctx.modelId);
91
- logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[chat] Embeddings");
92
- addSpanEvent("hebo.result.transformed");
93
- const genAiResponseAttrs = getEmbeddingsResponseAttributes(ctx.result, genAiSignalLevel);
94
- recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
95
- setSpanAttributes(genAiResponseAttrs);
96
- if (hooks?.after) {
97
- ctx.result = (await hooks.after(ctx)) ?? ctx.result;
98
- addSpanEvent("hebo.hooks.after.completed");
99
- }
100
- recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
101
- return ctx.result;
102
- };
103
- return { handler: winterCgHandler(handler, config) };
104
- };
@@ -1,4 +0,0 @@
1
- export * from "./converters";
2
- export * from "./handler";
3
- export * from "./schema";
4
- export * from "./otel";
@@ -1,4 +0,0 @@
1
- export * from "./converters";
2
- export * from "./handler";
3
- export * from "./schema";
4
- export * from "./otel";
@@ -1,5 +0,0 @@
1
- import type { Attributes } from "@opentelemetry/api";
2
- import type { Embeddings, EmbeddingsInputs } from "./schema";
3
- import { type TelemetrySignalLevel } from "../../types";
4
- export declare const getEmbeddingsRequestAttributes: (inputs: EmbeddingsInputs, signalLevel?: TelemetrySignalLevel) => Attributes;
5
- export declare const getEmbeddingsResponseAttributes: (embeddings: Embeddings, signalLevel?: TelemetrySignalLevel) => Attributes;
@@ -1,29 +0,0 @@
1
- import {} from "../../types";
2
- export const getEmbeddingsRequestAttributes = (inputs, signalLevel) => {
3
- if (!signalLevel || signalLevel === "off")
4
- return {};
5
- const attrs = {};
6
- if (signalLevel !== "required") {
7
- Object.assign(attrs, {
8
- "gen_ai.embeddings.dimension.count": inputs.dimensions,
9
- });
10
- if (inputs.metadata) {
11
- for (const key in inputs.metadata) {
12
- attrs[`gen_ai.request.metadata.${key}`] = inputs.metadata[key];
13
- }
14
- }
15
- }
16
- return attrs;
17
- };
18
- export const getEmbeddingsResponseAttributes = (embeddings, signalLevel) => {
19
- if (!signalLevel || signalLevel === "off")
20
- return {};
21
- const attrs = {};
22
- if (signalLevel !== "required") {
23
- Object.assign(attrs, {
24
- "gen_ai.usage.input_tokens": embeddings.usage?.prompt_tokens,
25
- "gen_ai.usage.total_tokens": embeddings.usage?.total_tokens,
26
- });
27
- }
28
- return attrs;
29
- };
@@ -1,44 +0,0 @@
1
- import * as z from "zod";
2
- export declare const EmbeddingsDimensionsSchema: z.ZodInt;
3
- export type EmbeddingsDimensions = z.infer<typeof EmbeddingsDimensionsSchema>;
4
- export declare const EmbeddingsMetadataSchema: z.ZodRecord<z.ZodString, z.ZodString>;
5
- export type EmbeddingsMetadata = z.infer<typeof EmbeddingsMetadataSchema>;
6
- export declare const EmbeddingsInputsSchema: z.ZodObject<{
7
- input: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
8
- dimensions: z.ZodOptional<z.ZodInt>;
9
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
10
- }, z.core.$strip>;
11
- export type EmbeddingsInputs = z.infer<typeof EmbeddingsInputsSchema>;
12
- export declare const EmbeddingsBodySchema: z.ZodObject<{
13
- input: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
14
- dimensions: z.ZodOptional<z.ZodInt>;
15
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
16
- model: z.ZodString;
17
- }, z.core.$loose>;
18
- export type EmbeddingsBody = z.infer<typeof EmbeddingsBodySchema>;
19
- export declare const EmbeddingsDataSchema: z.ZodObject<{
20
- object: z.ZodLiteral<"embedding">;
21
- embedding: z.ZodArray<z.ZodNumber>;
22
- index: z.ZodInt;
23
- }, z.core.$strip>;
24
- export type EmbeddingsData = z.infer<typeof EmbeddingsDataSchema>;
25
- export declare const EmbeddingsUsageSchema: z.ZodObject<{
26
- prompt_tokens: z.ZodOptional<z.ZodInt>;
27
- total_tokens: z.ZodOptional<z.ZodInt>;
28
- }, z.core.$strip>;
29
- export type EmbeddingsUsage = z.infer<typeof EmbeddingsUsageSchema>;
30
- export declare const EmbeddingsSchema: z.ZodObject<{
31
- object: z.ZodLiteral<"list">;
32
- data: z.ZodArray<z.ZodObject<{
33
- object: z.ZodLiteral<"embedding">;
34
- embedding: z.ZodArray<z.ZodNumber>;
35
- index: z.ZodInt;
36
- }, z.core.$strip>>;
37
- model: z.ZodString;
38
- usage: z.ZodNullable<z.ZodObject<{
39
- prompt_tokens: z.ZodOptional<z.ZodInt>;
40
- total_tokens: z.ZodOptional<z.ZodInt>;
41
- }, z.core.$strip>>;
42
- provider_metadata: z.ZodOptional<z.ZodUnknown>;
43
- }, z.core.$strip>;
44
- export type Embeddings = z.infer<typeof EmbeddingsSchema>;
@@ -1,29 +0,0 @@
1
- import * as z from "zod";
2
- export const EmbeddingsDimensionsSchema = z.int().nonnegative().max(65536);
3
- export const EmbeddingsMetadataSchema = z.record(z.string().min(1).max(64), z.string().max(512));
4
- export const EmbeddingsInputsSchema = z.object({
5
- input: z.union([z.string(), z.array(z.string())]),
6
- dimensions: EmbeddingsDimensionsSchema.optional(),
7
- metadata: EmbeddingsMetadataSchema.optional(),
8
- });
9
- export const EmbeddingsBodySchema = z.looseObject({
10
- model: z.string(),
11
- ...EmbeddingsInputsSchema.shape,
12
- });
13
- export const EmbeddingsDataSchema = z.object({
14
- object: z.literal("embedding"),
15
- embedding: z.array(z.number()),
16
- index: z.int().nonnegative(),
17
- });
18
- export const EmbeddingsUsageSchema = z.object({
19
- prompt_tokens: z.int().nonnegative().optional(),
20
- total_tokens: z.int().nonnegative().optional(),
21
- });
22
- export const EmbeddingsSchema = z.object({
23
- object: z.literal("list"),
24
- data: z.array(EmbeddingsDataSchema),
25
- model: z.string(),
26
- usage: EmbeddingsUsageSchema.nullable(),
27
- // Extensions
28
- provider_metadata: z.unknown().optional().meta({ extension: true }),
29
- });
@@ -1,6 +0,0 @@
1
- import type { ModelCatalog, CatalogModel } from "../../models/types";
2
- import type { ModelList, Model } from "./schema";
3
- export declare function toModel(id: string, catalogModel: CatalogModel): Model;
4
- export declare function toModels(models: ModelCatalog): ModelList;
5
- export declare function createModelsResponse(models: ModelCatalog, responseInit?: ResponseInit): Response;
6
- export declare function createModelResponse(id: string, catalogModel: CatalogModel, responseInit?: ResponseInit): Response;
@@ -1,42 +0,0 @@
1
- import { toResponse } from "../../utils/response";
2
- export function toModel(id, catalogModel) {
3
- const { created, providers, modalities, additionalProperties, ...rest } = catalogModel;
4
- let createdTimestamp = Math.floor(Date.now() / 1000);
5
- if (created) {
6
- const parsed = Date.parse(created);
7
- if (!isNaN(parsed)) {
8
- createdTimestamp = Math.floor(parsed / 1000);
9
- }
10
- }
11
- const model = {
12
- id,
13
- object: "model",
14
- created: createdTimestamp,
15
- owned_by: id.split("/")[0] ?? "system",
16
- architecture: {
17
- input_modalities: modalities?.input ?? [],
18
- modality: modalities?.input &&
19
- modalities?.output &&
20
- `${modalities.input?.[0]}->${modalities.output?.[0]}`,
21
- output_modalities: modalities?.output ?? [],
22
- },
23
- endpoints: providers?.map((provider) => ({
24
- tag: provider,
25
- })) || [],
26
- ...rest,
27
- ...additionalProperties,
28
- };
29
- return model;
30
- }
31
- export function toModels(models) {
32
- return {
33
- object: "list",
34
- data: Object.entries(models).map(([id, catalogModel]) => toModel(id, catalogModel)),
35
- };
36
- }
37
- export function createModelsResponse(models, responseInit) {
38
- return toResponse(toModels(models), responseInit);
39
- }
40
- export function createModelResponse(id, catalogModel, responseInit) {
41
- return toResponse(toModel(id, catalogModel), responseInit);
42
- }
@@ -1,2 +0,0 @@
1
- import type { GatewayConfig, Endpoint } from "../../types";
2
- export declare const models: (config: GatewayConfig) => Endpoint;
@@ -1,29 +0,0 @@
1
- import { GatewayError } from "../../errors/gateway";
2
- import { winterCgHandler } from "../../lifecycle";
3
- import { toModels, toModel } from "./converters";
4
- export const models = (config) => {
5
- // oxlint-disable-next-line require-await
6
- const handler = async (ctx) => {
7
- ctx.operation = "models";
8
- if (!ctx.request || ctx.request.method !== "GET") {
9
- throw new GatewayError("Method Not Allowed", 405);
10
- }
11
- const rawId = ctx.request.url.split("/models/", 2)[1]?.split("?", 1)[0];
12
- if (!rawId) {
13
- return toModels(ctx.models);
14
- }
15
- let modelId = rawId;
16
- try {
17
- modelId = decodeURIComponent(rawId);
18
- }
19
- catch {
20
- throw new GatewayError(`Invalid model ID: '${modelId}'`, 400);
21
- }
22
- const model = ctx.models[modelId];
23
- if (!model) {
24
- throw new GatewayError(`Model not found: '${modelId}'`, 404);
25
- }
26
- return toModel(modelId, model);
27
- };
28
- return { handler: winterCgHandler(handler, config) };
29
- };
@@ -1,3 +0,0 @@
1
- export * from "./converters";
2
- export * from "./handler";
3
- export * from "./schema";
@@ -1,3 +0,0 @@
1
- export * from "./converters";
2
- export * from "./handler";
3
- export * from "./schema";