@hebo-ai/gateway 0.5.2 → 0.6.0-rc0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/README.md +32 -1
  2. package/package.json +17 -12
  3. package/src/endpoints/chat-completions/converters.test.ts +85 -1
  4. package/src/endpoints/chat-completions/converters.ts +139 -18
  5. package/src/endpoints/chat-completions/handler.test.ts +2 -0
  6. package/src/endpoints/chat-completions/index.ts +1 -0
  7. package/src/endpoints/chat-completions/otel.ts +1 -0
  8. package/src/endpoints/chat-completions/schema.ts +38 -4
  9. package/src/endpoints/embeddings/index.ts +1 -0
  10. package/src/lifecycle.ts +2 -2
  11. package/src/models/anthropic/middleware.test.ts +45 -1
  12. package/src/models/anthropic/middleware.ts +21 -1
  13. package/src/models/google/middleware.test.ts +30 -1
  14. package/src/models/google/middleware.ts +20 -1
  15. package/src/models/openai/middleware.test.ts +32 -1
  16. package/src/models/openai/middleware.ts +25 -1
  17. package/src/providers/bedrock/middleware.test.ts +121 -1
  18. package/src/providers/bedrock/middleware.ts +61 -1
  19. package/src/telemetry/fetch.ts +31 -4
  20. package/src/telemetry/index.ts +1 -0
  21. package/dist/config.d.ts +0 -2
  22. package/dist/config.js +0 -81
  23. package/dist/endpoints/chat-completions/converters.d.ts +0 -43
  24. package/dist/endpoints/chat-completions/converters.js +0 -551
  25. package/dist/endpoints/chat-completions/handler.d.ts +0 -2
  26. package/dist/endpoints/chat-completions/handler.js +0 -145
  27. package/dist/endpoints/chat-completions/index.d.ts +0 -3
  28. package/dist/endpoints/chat-completions/index.js +0 -3
  29. package/dist/endpoints/chat-completions/otel.d.ts +0 -6
  30. package/dist/endpoints/chat-completions/otel.js +0 -134
  31. package/dist/endpoints/chat-completions/schema.d.ts +0 -946
  32. package/dist/endpoints/chat-completions/schema.js +0 -257
  33. package/dist/endpoints/embeddings/converters.d.ts +0 -10
  34. package/dist/endpoints/embeddings/converters.js +0 -31
  35. package/dist/endpoints/embeddings/handler.d.ts +0 -2
  36. package/dist/endpoints/embeddings/handler.js +0 -101
  37. package/dist/endpoints/embeddings/index.d.ts +0 -3
  38. package/dist/endpoints/embeddings/index.js +0 -3
  39. package/dist/endpoints/embeddings/otel.d.ts +0 -6
  40. package/dist/endpoints/embeddings/otel.js +0 -35
  41. package/dist/endpoints/embeddings/schema.d.ts +0 -38
  42. package/dist/endpoints/embeddings/schema.js +0 -26
  43. package/dist/endpoints/models/converters.d.ts +0 -6
  44. package/dist/endpoints/models/converters.js +0 -42
  45. package/dist/endpoints/models/handler.d.ts +0 -2
  46. package/dist/endpoints/models/handler.js +0 -29
  47. package/dist/endpoints/models/index.d.ts +0 -3
  48. package/dist/endpoints/models/index.js +0 -3
  49. package/dist/endpoints/models/schema.d.ts +0 -42
  50. package/dist/endpoints/models/schema.js +0 -31
  51. package/dist/errors/ai-sdk.d.ts +0 -2
  52. package/dist/errors/ai-sdk.js +0 -52
  53. package/dist/errors/gateway.d.ts +0 -5
  54. package/dist/errors/gateway.js +0 -13
  55. package/dist/errors/openai.d.ts +0 -20
  56. package/dist/errors/openai.js +0 -40
  57. package/dist/errors/utils.d.ts +0 -22
  58. package/dist/errors/utils.js +0 -44
  59. package/dist/gateway.d.ts +0 -9
  60. package/dist/gateway.js +0 -34
  61. package/dist/index.d.ts +0 -14
  62. package/dist/index.js +0 -13
  63. package/dist/lifecycle.d.ts +0 -2
  64. package/dist/lifecycle.js +0 -94
  65. package/dist/logger/default.d.ts +0 -4
  66. package/dist/logger/default.js +0 -81
  67. package/dist/logger/index.d.ts +0 -14
  68. package/dist/logger/index.js +0 -25
  69. package/dist/middleware/common.d.ts +0 -12
  70. package/dist/middleware/common.js +0 -145
  71. package/dist/middleware/matcher.d.ts +0 -27
  72. package/dist/middleware/matcher.js +0 -112
  73. package/dist/middleware/utils.d.ts +0 -2
  74. package/dist/middleware/utils.js +0 -27
  75. package/dist/models/amazon/index.d.ts +0 -2
  76. package/dist/models/amazon/index.js +0 -2
  77. package/dist/models/amazon/middleware.d.ts +0 -3
  78. package/dist/models/amazon/middleware.js +0 -65
  79. package/dist/models/amazon/presets.d.ts +0 -2390
  80. package/dist/models/amazon/presets.js +0 -80
  81. package/dist/models/anthropic/index.d.ts +0 -2
  82. package/dist/models/anthropic/index.js +0 -2
  83. package/dist/models/anthropic/middleware.d.ts +0 -4
  84. package/dist/models/anthropic/middleware.js +0 -111
  85. package/dist/models/anthropic/presets.d.ts +0 -4802
  86. package/dist/models/anthropic/presets.js +0 -135
  87. package/dist/models/catalog.d.ts +0 -4
  88. package/dist/models/catalog.js +0 -4
  89. package/dist/models/cohere/index.d.ts +0 -2
  90. package/dist/models/cohere/index.js +0 -2
  91. package/dist/models/cohere/middleware.d.ts +0 -3
  92. package/dist/models/cohere/middleware.js +0 -60
  93. package/dist/models/cohere/presets.d.ts +0 -2918
  94. package/dist/models/cohere/presets.js +0 -134
  95. package/dist/models/google/index.d.ts +0 -2
  96. package/dist/models/google/index.js +0 -2
  97. package/dist/models/google/middleware.d.ts +0 -7
  98. package/dist/models/google/middleware.js +0 -103
  99. package/dist/models/google/presets.d.ts +0 -2553
  100. package/dist/models/google/presets.js +0 -83
  101. package/dist/models/meta/index.d.ts +0 -1
  102. package/dist/models/meta/index.js +0 -1
  103. package/dist/models/meta/presets.d.ts +0 -3254
  104. package/dist/models/meta/presets.js +0 -95
  105. package/dist/models/openai/index.d.ts +0 -2
  106. package/dist/models/openai/index.js +0 -2
  107. package/dist/models/openai/middleware.d.ts +0 -3
  108. package/dist/models/openai/middleware.js +0 -62
  109. package/dist/models/openai/presets.d.ts +0 -6634
  110. package/dist/models/openai/presets.js +0 -213
  111. package/dist/models/types.d.ts +0 -20
  112. package/dist/models/types.js +0 -84
  113. package/dist/models/voyage/index.d.ts +0 -2
  114. package/dist/models/voyage/index.js +0 -2
  115. package/dist/models/voyage/middleware.d.ts +0 -2
  116. package/dist/models/voyage/middleware.js +0 -18
  117. package/dist/models/voyage/presets.d.ts +0 -3471
  118. package/dist/models/voyage/presets.js +0 -85
  119. package/dist/providers/anthropic/canonical.d.ts +0 -3
  120. package/dist/providers/anthropic/canonical.js +0 -9
  121. package/dist/providers/anthropic/index.d.ts +0 -1
  122. package/dist/providers/anthropic/index.js +0 -1
  123. package/dist/providers/bedrock/canonical.d.ts +0 -17
  124. package/dist/providers/bedrock/canonical.js +0 -61
  125. package/dist/providers/bedrock/index.d.ts +0 -2
  126. package/dist/providers/bedrock/index.js +0 -2
  127. package/dist/providers/bedrock/middleware.d.ts +0 -3
  128. package/dist/providers/bedrock/middleware.js +0 -55
  129. package/dist/providers/cohere/canonical.d.ts +0 -3
  130. package/dist/providers/cohere/canonical.js +0 -17
  131. package/dist/providers/cohere/index.d.ts +0 -1
  132. package/dist/providers/cohere/index.js +0 -1
  133. package/dist/providers/groq/canonical.d.ts +0 -3
  134. package/dist/providers/groq/canonical.js +0 -12
  135. package/dist/providers/groq/index.d.ts +0 -1
  136. package/dist/providers/groq/index.js +0 -1
  137. package/dist/providers/openai/canonical.d.ts +0 -3
  138. package/dist/providers/openai/canonical.js +0 -8
  139. package/dist/providers/openai/index.d.ts +0 -1
  140. package/dist/providers/openai/index.js +0 -1
  141. package/dist/providers/registry.d.ts +0 -24
  142. package/dist/providers/registry.js +0 -100
  143. package/dist/providers/types.d.ts +0 -7
  144. package/dist/providers/types.js +0 -11
  145. package/dist/providers/vertex/canonical.d.ts +0 -3
  146. package/dist/providers/vertex/canonical.js +0 -8
  147. package/dist/providers/vertex/index.d.ts +0 -1
  148. package/dist/providers/vertex/index.js +0 -1
  149. package/dist/providers/voyage/canonical.d.ts +0 -3
  150. package/dist/providers/voyage/canonical.js +0 -7
  151. package/dist/providers/voyage/index.d.ts +0 -1
  152. package/dist/providers/voyage/index.js +0 -1
  153. package/dist/telemetry/ai-sdk.d.ts +0 -2
  154. package/dist/telemetry/ai-sdk.js +0 -31
  155. package/dist/telemetry/baggage.d.ts +0 -1
  156. package/dist/telemetry/baggage.js +0 -24
  157. package/dist/telemetry/fetch.d.ts +0 -2
  158. package/dist/telemetry/fetch.js +0 -24
  159. package/dist/telemetry/gen-ai.d.ts +0 -5
  160. package/dist/telemetry/gen-ai.js +0 -60
  161. package/dist/telemetry/http.d.ts +0 -3
  162. package/dist/telemetry/http.js +0 -54
  163. package/dist/telemetry/memory.d.ts +0 -2
  164. package/dist/telemetry/memory.js +0 -27
  165. package/dist/telemetry/span.d.ts +0 -13
  166. package/dist/telemetry/span.js +0 -60
  167. package/dist/telemetry/stream.d.ts +0 -3
  168. package/dist/telemetry/stream.js +0 -51
  169. package/dist/types.d.ts +0 -176
  170. package/dist/types.js +0 -1
  171. package/dist/utils/env.d.ts +0 -2
  172. package/dist/utils/env.js +0 -5
  173. package/dist/utils/headers.d.ts +0 -4
  174. package/dist/utils/headers.js +0 -22
  175. package/dist/utils/preset.d.ts +0 -9
  176. package/dist/utils/preset.js +0 -41
  177. package/dist/utils/request.d.ts +0 -2
  178. package/dist/utils/request.js +0 -14
  179. package/dist/utils/response.d.ts +0 -3
  180. package/dist/utils/response.js +0 -68
@@ -1,257 +0,0 @@
1
- import * as z from "zod";
2
- export const ChatCompletionsContentPartTextSchema = z.object({
3
- type: z.literal("text"),
4
- text: z.string(),
5
- });
6
- export const ChatCompletionsContentPartImageSchema = z.object({
7
- type: z.literal("image_url"),
8
- image_url: z.object({
9
- url: z.string(),
10
- detail: z.enum(["low", "high", "auto"]).optional(),
11
- }),
12
- });
13
- export const ChatCompletionsContentPartFileSchema = z.object({
14
- type: z.literal("file"),
15
- file: z.object({
16
- data: z.string(),
17
- media_type: z.string(),
18
- filename: z.string().optional(),
19
- }),
20
- });
21
- export const ChatCompletionsContentPartAudioSchema = z.object({
22
- type: z.literal("input_audio"),
23
- input_audio: z.object({
24
- data: z.string(),
25
- // only wav and mp3 are official by OpenAI, rest is taken from Gemini support:
26
- // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/multimodal/audio-understanding
27
- format: z.enum([
28
- "x-aac",
29
- "flac",
30
- "mp3",
31
- "m4a",
32
- "mpeg",
33
- "mpga",
34
- "mp4",
35
- "ogg",
36
- "pcm",
37
- "wav",
38
- "webm",
39
- ]),
40
- }),
41
- });
42
- export const ChatCompletionsContentPartSchema = z.discriminatedUnion("type", [
43
- ChatCompletionsContentPartTextSchema,
44
- ChatCompletionsContentPartImageSchema,
45
- ChatCompletionsContentPartFileSchema,
46
- ChatCompletionsContentPartAudioSchema,
47
- ]);
48
- export const ChatCompletionsToolCallSchema = z.object({
49
- type: z.literal("function"),
50
- id: z.string(),
51
- function: z.object({
52
- arguments: z.string(),
53
- name: z.string(),
54
- }),
55
- extra_content: z
56
- .record(z.string(), z.record(z.string(), z.unknown()))
57
- .optional()
58
- .meta({ extension: true }),
59
- });
60
- export const ChatCompletionsSystemMessageSchema = z.object({
61
- role: z.literal("system"),
62
- content: z.string(),
63
- name: z.string().optional(),
64
- });
65
- export const ChatCompletionsUserMessageSchema = z.object({
66
- role: z.literal("user"),
67
- content: z.union([z.string(), z.array(ChatCompletionsContentPartSchema)]),
68
- name: z.string().optional(),
69
- });
70
- export const ChatCompletionsReasoningDetailSchema = z.object({
71
- id: z.string().optional(),
72
- index: z.int().nonnegative(),
73
- type: z.string(),
74
- text: z.string().optional(),
75
- signature: z.string().optional(),
76
- data: z.string().optional(),
77
- summary: z.string().optional(),
78
- format: z.string().optional(),
79
- });
80
- export const ChatCompletionsAssistantMessageSchema = z.object({
81
- role: z.literal("assistant"),
82
- content: z
83
- .union([z.string(), z.null(), z.array(ChatCompletionsContentPartTextSchema)])
84
- .optional(),
85
- name: z.string().optional(),
86
- // FUTURE: This should also support Custom Tool Calls
87
- tool_calls: z.array(ChatCompletionsToolCallSchema).optional(),
88
- // Extensions
89
- reasoning_content: z.string().optional().meta({ extension: true }),
90
- reasoning_details: z
91
- .array(ChatCompletionsReasoningDetailSchema)
92
- .optional()
93
- .meta({ extension: true }),
94
- extra_content: z
95
- .record(z.string(), z.record(z.string(), z.unknown()))
96
- .optional()
97
- .meta({ extension: true }),
98
- });
99
- export const ChatCompletionsToolMessageSchema = z.object({
100
- role: z.literal("tool"),
101
- content: z.union([z.string(), z.array(ChatCompletionsContentPartTextSchema)]),
102
- tool_call_id: z.string(),
103
- });
104
- export const ChatCompletionsMessageSchema = z.discriminatedUnion("role", [
105
- ChatCompletionsSystemMessageSchema,
106
- ChatCompletionsUserMessageSchema,
107
- ChatCompletionsAssistantMessageSchema,
108
- ChatCompletionsToolMessageSchema,
109
- ]);
110
- export const ChatCompletionsToolSchema = z.object({
111
- type: z.literal("function"),
112
- function: z.object({
113
- name: z.string(),
114
- description: z.string().optional(),
115
- parameters: z.record(z.string(), z.unknown()),
116
- strict: z.boolean().optional(),
117
- }),
118
- });
119
- const ChatCompletionsNamedFunctionToolChoiceSchema = z.object({
120
- type: z.literal("function"),
121
- function: z.object({
122
- name: z.string(),
123
- }),
124
- });
125
- const ChatCompletionsAllowedFunctionToolChoiceSchema = z.object({
126
- type: z.literal("allowed_tools"),
127
- allowed_tools: z.object({
128
- mode: z.enum(["auto", "required"]),
129
- tools: z.array(ChatCompletionsNamedFunctionToolChoiceSchema).nonempty(),
130
- }),
131
- });
132
- export const ChatCompletionsToolChoiceSchema = z.union([
133
- z.enum(["none", "auto", "required", "validated"]),
134
- z.discriminatedUnion("type", [
135
- ChatCompletionsNamedFunctionToolChoiceSchema,
136
- ChatCompletionsAllowedFunctionToolChoiceSchema,
137
- ]),
138
- // FUTURE: Missing CustomTool
139
- ]);
140
- export const ChatCompletionsReasoningEffortSchema = z.enum([
141
- "none",
142
- "minimal",
143
- "low",
144
- "medium",
145
- "high",
146
- "xhigh",
147
- "max",
148
- ]);
149
- export const ChatCompletionsReasoningConfigSchema = z.object({
150
- enabled: z.optional(z.boolean()),
151
- effort: z.optional(ChatCompletionsReasoningEffortSchema),
152
- max_tokens: z.optional(z.number()),
153
- exclude: z.optional(z.boolean()),
154
- });
155
- export const ChatCompletionsResponseFormatJsonSchema = z.object({
156
- // FUTURE: consider support for legacy json_object (if demand)
157
- type: z.literal("json_schema"),
158
- json_schema: z.object({
159
- name: z.string(),
160
- description: z.string().optional(),
161
- schema: z.record(z.string(), z.unknown()),
162
- // FUTURE: consider support for non-strict mode (for providers that support it)
163
- strict: z.boolean().optional(),
164
- }),
165
- });
166
- export const ChatCompletionsResponseFormatTextSchema = z.object({
167
- type: z.literal("text"),
168
- });
169
- export const ChatCompletionsResponseFormatSchema = z.discriminatedUnion("type", [
170
- ChatCompletionsResponseFormatJsonSchema,
171
- ChatCompletionsResponseFormatTextSchema,
172
- ]);
173
- const ChatCompletionsInputsSchema = z.object({
174
- messages: z.array(ChatCompletionsMessageSchema),
175
- tools: z.array(ChatCompletionsToolSchema).optional(),
176
- tool_choice: ChatCompletionsToolChoiceSchema.optional(),
177
- temperature: z.number().min(0).max(2).optional(),
178
- max_tokens: z.int().nonnegative().optional(),
179
- max_completion_tokens: z.int().nonnegative().optional(),
180
- frequency_penalty: z.number().min(-2.0).max(2.0).optional(),
181
- presence_penalty: z.number().min(-2.0).max(2.0).optional(),
182
- seed: z.int().optional(),
183
- stop: z.union([z.string(), z.array(z.string())]).optional(),
184
- top_p: z.number().min(0).max(1.0).optional(),
185
- response_format: ChatCompletionsResponseFormatSchema.optional(),
186
- reasoning_effort: ChatCompletionsReasoningEffortSchema.optional(),
187
- // Extensions
188
- reasoning: ChatCompletionsReasoningConfigSchema.optional().meta({ extension: true }),
189
- });
190
- export const ChatCompletionsBodySchema = z.looseObject({
191
- model: z.string(),
192
- stream: z.boolean().optional(),
193
- ...ChatCompletionsInputsSchema.shape,
194
- });
195
- export const ChatCompletionsFinishReasonSchema = z.enum([
196
- "stop",
197
- "length",
198
- "content_filter",
199
- "tool_calls",
200
- ]);
201
- export const ChatCompletionsChoiceSchema = z.object({
202
- index: z.int().nonnegative(),
203
- message: ChatCompletionsAssistantMessageSchema,
204
- finish_reason: ChatCompletionsFinishReasonSchema,
205
- // FUTURE: model this out
206
- logprobs: z.unknown().optional(),
207
- });
208
- export const ChatCompletionsUsageSchema = z.object({
209
- prompt_tokens: z.int().nonnegative().optional(),
210
- completion_tokens: z.int().nonnegative().optional(),
211
- total_tokens: z.int().nonnegative().optional(),
212
- completion_tokens_details: z
213
- .object({
214
- // FUTURE: add missing properties
215
- reasoning_tokens: z.int().nonnegative().optional(),
216
- })
217
- .optional(),
218
- prompt_tokens_details: z
219
- .object({
220
- // FUTURE: add missing properties
221
- cached_tokens: z.int().nonnegative().optional(),
222
- })
223
- .optional(),
224
- });
225
- export const ChatCompletionsSchema = z.object({
226
- id: z.string(),
227
- object: z.literal("chat.completion"),
228
- created: z.int().nonnegative(),
229
- model: z.string(),
230
- choices: z.array(ChatCompletionsChoiceSchema),
231
- usage: ChatCompletionsUsageSchema.nullable(),
232
- // Extensions
233
- provider_metadata: z.unknown().optional().meta({ extension: true }),
234
- });
235
- export const ChatCompletionsToolCallDeltaSchema = ChatCompletionsToolCallSchema.partial().extend({
236
- index: z.int().nonnegative(),
237
- });
238
- export const ChatCompletionsAssistantMessageDeltaSchema = ChatCompletionsAssistantMessageSchema.partial().extend({
239
- tool_calls: z.array(ChatCompletionsToolCallDeltaSchema).optional(),
240
- });
241
- export const ChatCompletionsChoiceDeltaSchema = z.object({
242
- index: z.int().nonnegative(),
243
- delta: ChatCompletionsAssistantMessageDeltaSchema,
244
- finish_reason: ChatCompletionsFinishReasonSchema.nullable(),
245
- // FUTURE: model this out
246
- logprobs: z.unknown().optional(),
247
- });
248
- export const ChatCompletionsChunkSchema = z.object({
249
- id: z.string(),
250
- object: z.literal("chat.completion.chunk"),
251
- created: z.int().nonnegative(),
252
- model: z.string(),
253
- choices: z.array(ChatCompletionsChoiceDeltaSchema),
254
- usage: ChatCompletionsUsageSchema.nullable(),
255
- // Extensions
256
- provider_metadata: z.unknown().optional().meta({ extension: true }),
257
- });
@@ -1,10 +0,0 @@
1
- import type { SharedV3ProviderOptions } from "@ai-sdk/provider";
2
- import type { EmbedManyResult } from "ai";
3
- import type { EmbeddingsInputs, Embeddings } from "./schema";
4
- export type EmbedCallOptions = {
5
- values: string[];
6
- providerOptions: SharedV3ProviderOptions;
7
- };
8
- export declare function convertToEmbedCallOptions(params: EmbeddingsInputs): EmbedCallOptions;
9
- export declare function toEmbeddings(embedManyResult: EmbedManyResult, modelId: string): Embeddings;
10
- export declare function createEmbeddingsResponse(embedManyResult: EmbedManyResult, modelId: string, responseInit?: ResponseInit): Response;
@@ -1,31 +0,0 @@
1
- import { toResponse } from "../../utils/response";
2
- export function convertToEmbedCallOptions(params) {
3
- const { input, ...rest } = params;
4
- return {
5
- values: Array.isArray(input) ? input : [input],
6
- providerOptions: {
7
- unknown: rest,
8
- },
9
- };
10
- }
11
- export function toEmbeddings(embedManyResult, modelId) {
12
- const data = embedManyResult.embeddings.map((embedding, index) => ({
13
- object: "embedding",
14
- embedding,
15
- index,
16
- }));
17
- const usage = {
18
- prompt_tokens: embedManyResult.usage.tokens,
19
- total_tokens: embedManyResult.usage.tokens,
20
- };
21
- return {
22
- object: "list",
23
- data,
24
- model: modelId,
25
- usage,
26
- provider_metadata: embedManyResult.providerMetadata,
27
- };
28
- }
29
- export function createEmbeddingsResponse(embedManyResult, modelId, responseInit) {
30
- return toResponse(toEmbeddings(embedManyResult, modelId), responseInit);
31
- }
@@ -1,2 +0,0 @@
1
- import type { GatewayConfig, Endpoint } from "../../types";
2
- export declare const embeddings: (config: GatewayConfig) => Endpoint;
@@ -1,101 +0,0 @@
1
- import { embedMany, wrapEmbeddingModel } from "ai";
2
- import * as z from "zod/mini";
3
- import { GatewayError } from "../../errors/gateway";
4
- import { winterCgHandler } from "../../lifecycle";
5
- import { logger } from "../../logger";
6
- import { modelMiddlewareMatcher } from "../../middleware/matcher";
7
- import { resolveProvider } from "../../providers/registry";
8
- import { recordRequestDuration, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
9
- import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
10
- import { prepareForwardHeaders } from "../../utils/request";
11
- import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
12
- import { getEmbeddingsGeneralAttributes, getEmbeddingsRequestAttributes, getEmbeddingsResponseAttributes, } from "./otel";
13
- import { EmbeddingsBodySchema } from "./schema";
14
- export const embeddings = (config) => {
15
- const hooks = config.hooks;
16
- const handler = async (ctx) => {
17
- const start = performance.now();
18
- ctx.operation = "embeddings";
19
- addSpanEvent("hebo.handler.started");
20
- // Guard: enforce HTTP method early.
21
- if (!ctx.request || ctx.request.method !== "POST") {
22
- throw new GatewayError("Method Not Allowed", 405);
23
- }
24
- // Parse + validate input.
25
- try {
26
- ctx.body = await ctx.request.json();
27
- }
28
- catch {
29
- throw new GatewayError("Invalid JSON", 400);
30
- }
31
- addSpanEvent("hebo.request.deserialized");
32
- const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
33
- if (!parsed.success) {
34
- // FUTURE: consider adding body shape to metadata
35
- throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
36
- }
37
- ctx.body = parsed.data;
38
- addSpanEvent("hebo.request.parsed");
39
- if (hooks?.before) {
40
- ctx.body = (await hooks.before(ctx)) ?? ctx.body;
41
- addSpanEvent("hebo.hooks.before.completed");
42
- }
43
- // Resolve model + provider (hooks may override defaults).
44
- let inputs;
45
- ({ model: ctx.modelId, ...inputs } = ctx.body);
46
- ctx.resolvedModelId =
47
- (await hooks?.resolveModelId?.(ctx)) ?? ctx.modelId;
48
- logger.debug(`[embeddings] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
49
- addSpanEvent("hebo.model.resolved");
50
- const override = await hooks?.resolveProvider?.(ctx);
51
- ctx.provider =
52
- override ??
53
- resolveProvider({
54
- providers: ctx.providers,
55
- models: ctx.models,
56
- modelId: ctx.resolvedModelId,
57
- operation: ctx.operation,
58
- });
59
- const embeddingModel = ctx.provider.embeddingModel(ctx.resolvedModelId);
60
- ctx.resolvedProviderId = embeddingModel.provider;
61
- logger.debug(`[embeddings] using ${embeddingModel.provider} for ${ctx.resolvedModelId}`);
62
- addSpanEvent("hebo.provider.resolved");
63
- const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
64
- const genAiGeneralAttrs = getEmbeddingsGeneralAttributes(ctx, genAiSignalLevel);
65
- setSpanAttributes(genAiGeneralAttrs);
66
- // Convert inputs to AI SDK call options.
67
- const embedOptions = convertToEmbedCallOptions(inputs);
68
- logger.trace({ requestId: ctx.requestId, options: embedOptions }, "[embeddings] AI SDK options");
69
- addSpanEvent("hebo.options.prepared");
70
- setSpanAttributes(getEmbeddingsRequestAttributes(inputs, genAiSignalLevel));
71
- // Build middleware chain (model -> forward params -> provider).
72
- const embeddingModelWithMiddleware = wrapEmbeddingModel({
73
- model: embeddingModel,
74
- middleware: modelMiddlewareMatcher.forEmbedding(ctx.resolvedModelId, embeddingModel.provider),
75
- });
76
- // Execute request.
77
- addSpanEvent("hebo.ai-sdk.started");
78
- const result = await embedMany({
79
- model: embeddingModelWithMiddleware,
80
- headers: prepareForwardHeaders(ctx.request),
81
- abortSignal: ctx.request.signal,
82
- ...embedOptions,
83
- });
84
- logger.trace({ requestId: ctx.requestId, result }, "[embeddings] AI SDK result");
85
- addSpanEvent("hebo.ai-sdk.completed");
86
- // Transform result.
87
- ctx.result = toEmbeddings(result, ctx.modelId);
88
- addSpanEvent("hebo.result.transformed");
89
- const genAiResponseAttrs = getEmbeddingsResponseAttributes(ctx.result, genAiSignalLevel);
90
- recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
91
- setSpanAttributes(genAiResponseAttrs);
92
- if (hooks?.after) {
93
- ctx.result = (await hooks.after(ctx)) ?? ctx.result;
94
- addSpanEvent("hebo.hooks.after.completed");
95
- }
96
- recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
97
- recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
98
- return ctx.result;
99
- };
100
- return { handler: winterCgHandler(handler, config) };
101
- };
@@ -1,3 +0,0 @@
1
- export * from "./converters";
2
- export * from "./handler";
3
- export * from "./schema";
@@ -1,3 +0,0 @@
1
- export * from "./converters";
2
- export * from "./handler";
3
- export * from "./schema";
@@ -1,6 +0,0 @@
1
- import type { Attributes } from "@opentelemetry/api";
2
- import type { Embeddings, EmbeddingsInputs } from "./schema";
3
- import { type GatewayContext, type TelemetrySignalLevel } from "../../types";
4
- export declare const getEmbeddingsGeneralAttributes: (ctx: GatewayContext, signalLevel?: TelemetrySignalLevel) => Attributes;
5
- export declare const getEmbeddingsRequestAttributes: (inputs: EmbeddingsInputs, signalLevel?: TelemetrySignalLevel) => Attributes;
6
- export declare const getEmbeddingsResponseAttributes: (embeddings: Embeddings, signalLevel?: TelemetrySignalLevel) => Attributes;
@@ -1,35 +0,0 @@
1
- import {} from "../../types";
2
- export const getEmbeddingsGeneralAttributes = (ctx, signalLevel) => {
3
- if (!signalLevel || signalLevel === "off")
4
- return {};
5
- const requestModel = typeof ctx.body?.model === "string" ? ctx.body.model : ctx.modelId;
6
- return {
7
- "gen_ai.operation.name": ctx.operation,
8
- "gen_ai.request.model": requestModel,
9
- "gen_ai.response.model": ctx.resolvedModelId,
10
- "gen_ai.provider.name": ctx.resolvedProviderId,
11
- };
12
- };
13
- export const getEmbeddingsRequestAttributes = (inputs, signalLevel) => {
14
- if (!signalLevel || signalLevel === "off")
15
- return {};
16
- const attrs = {};
17
- if (signalLevel !== "required") {
18
- Object.assign(attrs, {
19
- "gen_ai.embeddings.dimension.count": inputs.dimensions,
20
- });
21
- }
22
- return attrs;
23
- };
24
- export const getEmbeddingsResponseAttributes = (embeddings, signalLevel) => {
25
- if (!signalLevel || signalLevel === "off")
26
- return {};
27
- const attrs = {};
28
- if (signalLevel !== "required") {
29
- Object.assign(attrs, {
30
- "gen_ai.usage.input_tokens": embeddings.usage?.prompt_tokens,
31
- "gen_ai.usage.total_tokens": embeddings.usage?.total_tokens,
32
- });
33
- }
34
- return attrs;
35
- };
@@ -1,38 +0,0 @@
1
- import * as z from "zod";
2
- export declare const EmbeddingsInputsSchema: z.ZodObject<{
3
- input: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
4
- dimensions: z.ZodOptional<z.ZodInt>;
5
- }, z.core.$strip>;
6
- export type EmbeddingsInputs = z.infer<typeof EmbeddingsInputsSchema>;
7
- export declare const EmbeddingsBodySchema: z.ZodObject<{
8
- input: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
9
- dimensions: z.ZodOptional<z.ZodInt>;
10
- model: z.ZodString;
11
- }, z.core.$loose>;
12
- export type EmbeddingsBody = z.infer<typeof EmbeddingsBodySchema>;
13
- export declare const EmbeddingsDataSchema: z.ZodObject<{
14
- object: z.ZodLiteral<"embedding">;
15
- embedding: z.ZodArray<z.ZodNumber>;
16
- index: z.ZodInt;
17
- }, z.core.$strip>;
18
- export type EmbeddingsData = z.infer<typeof EmbeddingsDataSchema>;
19
- export declare const EmbeddingsUsageSchema: z.ZodObject<{
20
- prompt_tokens: z.ZodOptional<z.ZodInt>;
21
- total_tokens: z.ZodOptional<z.ZodInt>;
22
- }, z.core.$strip>;
23
- export type EmbeddingsUsage = z.infer<typeof EmbeddingsUsageSchema>;
24
- export declare const EmbeddingsSchema: z.ZodObject<{
25
- object: z.ZodLiteral<"list">;
26
- data: z.ZodArray<z.ZodObject<{
27
- object: z.ZodLiteral<"embedding">;
28
- embedding: z.ZodArray<z.ZodNumber>;
29
- index: z.ZodInt;
30
- }, z.core.$strip>>;
31
- model: z.ZodString;
32
- usage: z.ZodNullable<z.ZodObject<{
33
- prompt_tokens: z.ZodOptional<z.ZodInt>;
34
- total_tokens: z.ZodOptional<z.ZodInt>;
35
- }, z.core.$strip>>;
36
- provider_metadata: z.ZodOptional<z.ZodUnknown>;
37
- }, z.core.$strip>;
38
- export type Embeddings = z.infer<typeof EmbeddingsSchema>;
@@ -1,26 +0,0 @@
1
- import * as z from "zod";
2
- export const EmbeddingsInputsSchema = z.object({
3
- input: z.union([z.string(), z.array(z.string())]),
4
- dimensions: z.int().nonnegative().max(65536).optional(),
5
- });
6
- export const EmbeddingsBodySchema = z.looseObject({
7
- model: z.string(),
8
- ...EmbeddingsInputsSchema.shape,
9
- });
10
- export const EmbeddingsDataSchema = z.object({
11
- object: z.literal("embedding"),
12
- embedding: z.array(z.number()),
13
- index: z.int().nonnegative(),
14
- });
15
- export const EmbeddingsUsageSchema = z.object({
16
- prompt_tokens: z.int().nonnegative().optional(),
17
- total_tokens: z.int().nonnegative().optional(),
18
- });
19
- export const EmbeddingsSchema = z.object({
20
- object: z.literal("list"),
21
- data: z.array(EmbeddingsDataSchema),
22
- model: z.string(),
23
- usage: EmbeddingsUsageSchema.nullable(),
24
- // Extensions
25
- provider_metadata: z.unknown().optional().meta({ extension: true }),
26
- });
@@ -1,6 +0,0 @@
1
- import type { ModelCatalog, CatalogModel } from "../../models/types";
2
- import type { ModelList, Model } from "./schema";
3
- export declare function toModel(id: string, catalogModel: CatalogModel): Model;
4
- export declare function toModels(models: ModelCatalog): ModelList;
5
- export declare function createModelsResponse(models: ModelCatalog, responseInit?: ResponseInit): Response;
6
- export declare function createModelResponse(id: string, catalogModel: CatalogModel, responseInit?: ResponseInit): Response;
@@ -1,42 +0,0 @@
1
- import { toResponse } from "../../utils/response";
2
- export function toModel(id, catalogModel) {
3
- const { created, providers, modalities, additionalProperties, ...rest } = catalogModel;
4
- let createdTimestamp = Math.floor(Date.now() / 1000);
5
- if (created) {
6
- const parsed = Date.parse(created);
7
- if (!isNaN(parsed)) {
8
- createdTimestamp = Math.floor(parsed / 1000);
9
- }
10
- }
11
- const model = {
12
- id,
13
- object: "model",
14
- created: createdTimestamp,
15
- owned_by: id.split("/")[0] || "system",
16
- architecture: {
17
- input_modalities: modalities?.input || [],
18
- modality: modalities?.input &&
19
- modalities?.output &&
20
- `${modalities.input?.[0]}->${modalities.output?.[0]}`,
21
- output_modalities: modalities?.output || [],
22
- },
23
- endpoints: providers?.map((provider) => ({
24
- tag: provider,
25
- })) || [],
26
- ...rest,
27
- ...additionalProperties,
28
- };
29
- return model;
30
- }
31
- export function toModels(models) {
32
- return {
33
- object: "list",
34
- data: Object.entries(models).map(([id, catalogModel]) => toModel(id, catalogModel)),
35
- };
36
- }
37
- export function createModelsResponse(models, responseInit) {
38
- return toResponse(toModels(models), responseInit);
39
- }
40
- export function createModelResponse(id, catalogModel, responseInit) {
41
- return toResponse(toModel(id, catalogModel), responseInit);
42
- }
@@ -1,2 +0,0 @@
1
- import type { GatewayConfig, Endpoint } from "../../types";
2
- export declare const models: (config: GatewayConfig) => Endpoint;
@@ -1,29 +0,0 @@
1
- import { GatewayError } from "../../errors/gateway";
2
- import { winterCgHandler } from "../../lifecycle";
3
- import { toModels, toModel } from "./converters";
4
- export const models = (config) => {
5
- // eslint-disable-next-line require-await
6
- const handler = async (ctx) => {
7
- ctx.operation = "models";
8
- if (!ctx.request || ctx.request.method !== "GET") {
9
- throw new GatewayError("Method Not Allowed", 405);
10
- }
11
- const rawId = ctx.request.url.split("/models/", 2)[1]?.split("?", 1)[0];
12
- if (!rawId) {
13
- return toModels(ctx.models);
14
- }
15
- let modelId = rawId;
16
- try {
17
- modelId = decodeURIComponent(rawId);
18
- }
19
- catch {
20
- throw new GatewayError(`Invalid model ID: '${modelId}'`, 400);
21
- }
22
- const model = ctx.models[modelId];
23
- if (!model) {
24
- throw new GatewayError(`Model not found: '${modelId}'`, 404);
25
- }
26
- return toModel(modelId, model);
27
- };
28
- return { handler: winterCgHandler(handler, config) };
29
- };
@@ -1,3 +0,0 @@
1
- export * from "./converters";
2
- export * from "./handler";
3
- export * from "./schema";
@@ -1,3 +0,0 @@
1
- export * from "./converters";
2
- export * from "./handler";
3
- export * from "./schema";
@@ -1,42 +0,0 @@
1
- import * as z from "zod";
2
- export declare const ModelSchema: z.ZodObject<{
3
- id: z.ZodString;
4
- object: z.ZodLiteral<"model">;
5
- created: z.ZodInt;
6
- owned_by: z.ZodString;
7
- name: z.ZodOptional<z.ZodString>;
8
- knowledge: z.ZodOptional<z.ZodString>;
9
- context: z.ZodOptional<z.ZodInt>;
10
- architecture: z.ZodOptional<z.ZodObject<{
11
- modality: z.ZodOptional<z.ZodString>;
12
- input_modalities: z.ZodOptional<z.ZodReadonly<z.ZodArray<z.ZodString>>>;
13
- output_modalities: z.ZodOptional<z.ZodReadonly<z.ZodArray<z.ZodString>>>;
14
- }, z.core.$strip>>;
15
- endpoints: z.ZodOptional<z.ZodArray<z.ZodObject<{
16
- tag: z.ZodString;
17
- }, z.core.$strip>>>;
18
- capabilities: z.ZodOptional<z.ZodReadonly<z.ZodArray<z.ZodString>>>;
19
- }, z.core.$loose>;
20
- export type Model = z.infer<typeof ModelSchema>;
21
- export declare const ModelListSchema: z.ZodObject<{
22
- object: z.ZodLiteral<"list">;
23
- data: z.ZodArray<z.ZodObject<{
24
- id: z.ZodString;
25
- object: z.ZodLiteral<"model">;
26
- created: z.ZodInt;
27
- owned_by: z.ZodString;
28
- name: z.ZodOptional<z.ZodString>;
29
- knowledge: z.ZodOptional<z.ZodString>;
30
- context: z.ZodOptional<z.ZodInt>;
31
- architecture: z.ZodOptional<z.ZodObject<{
32
- modality: z.ZodOptional<z.ZodString>;
33
- input_modalities: z.ZodOptional<z.ZodReadonly<z.ZodArray<z.ZodString>>>;
34
- output_modalities: z.ZodOptional<z.ZodReadonly<z.ZodArray<z.ZodString>>>;
35
- }, z.core.$strip>>;
36
- endpoints: z.ZodOptional<z.ZodArray<z.ZodObject<{
37
- tag: z.ZodString;
38
- }, z.core.$strip>>>;
39
- capabilities: z.ZodOptional<z.ZodReadonly<z.ZodArray<z.ZodString>>>;
40
- }, z.core.$loose>>;
41
- }, z.core.$strip>;
42
- export type ModelList = z.infer<typeof ModelListSchema>;