@hebo-ai/gateway 0.4.1 → 0.5.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +2 -3
  2. package/dist/endpoints/chat-completions/converters.d.ts +3 -1
  3. package/dist/endpoints/chat-completions/converters.js +121 -90
  4. package/dist/endpoints/chat-completions/handler.js +2 -4
  5. package/dist/endpoints/chat-completions/otel.js +7 -0
  6. package/dist/endpoints/chat-completions/schema.d.ts +400 -76
  7. package/dist/endpoints/chat-completions/schema.js +80 -36
  8. package/dist/endpoints/embeddings/handler.js +2 -4
  9. package/dist/endpoints/embeddings/schema.d.ts +1 -1
  10. package/dist/endpoints/embeddings/schema.js +1 -1
  11. package/dist/errors/gateway.js +1 -0
  12. package/dist/lifecycle.js +7 -12
  13. package/dist/logger/default.d.ts +0 -1
  14. package/dist/logger/default.js +30 -6
  15. package/dist/middleware/utils.js +1 -0
  16. package/dist/models/amazon/middleware.js +1 -0
  17. package/dist/models/anthropic/middleware.d.ts +2 -0
  18. package/dist/models/anthropic/middleware.js +77 -16
  19. package/dist/models/google/middleware.js +17 -0
  20. package/dist/models/google/presets.d.ts +387 -0
  21. package/dist/models/google/presets.js +9 -2
  22. package/dist/models/openai/middleware.js +1 -0
  23. package/dist/models/types.d.ts +1 -1
  24. package/dist/models/types.js +1 -0
  25. package/dist/providers/bedrock/index.d.ts +1 -0
  26. package/dist/providers/bedrock/index.js +1 -0
  27. package/dist/providers/bedrock/middleware.d.ts +2 -0
  28. package/dist/providers/bedrock/middleware.js +35 -0
  29. package/dist/telemetry/http.js +0 -3
  30. package/dist/types.d.ts +10 -20
  31. package/dist/utils/request.d.ts +1 -3
  32. package/dist/utils/request.js +3 -26
  33. package/dist/utils/response.d.ts +1 -1
  34. package/dist/utils/response.js +3 -3
  35. package/package.json +19 -21
  36. package/src/endpoints/chat-completions/converters.test.ts +219 -0
  37. package/src/endpoints/chat-completions/converters.ts +144 -104
  38. package/src/endpoints/chat-completions/handler.test.ts +87 -0
  39. package/src/endpoints/chat-completions/handler.ts +2 -5
  40. package/src/endpoints/chat-completions/otel.ts +6 -0
  41. package/src/endpoints/chat-completions/schema.ts +85 -43
  42. package/src/endpoints/embeddings/handler.ts +5 -5
  43. package/src/endpoints/embeddings/schema.ts +1 -1
  44. package/src/errors/gateway.ts +2 -0
  45. package/src/lifecycle.ts +7 -11
  46. package/src/logger/default.ts +34 -8
  47. package/src/middleware/utils.ts +1 -0
  48. package/src/models/amazon/middleware.ts +1 -0
  49. package/src/models/anthropic/middleware.test.ts +332 -1
  50. package/src/models/anthropic/middleware.ts +83 -19
  51. package/src/models/google/middleware.test.ts +31 -0
  52. package/src/models/google/middleware.ts +18 -0
  53. package/src/models/google/presets.ts +13 -2
  54. package/src/models/openai/middleware.ts +1 -0
  55. package/src/models/types.ts +1 -0
  56. package/src/providers/bedrock/index.ts +1 -0
  57. package/src/providers/bedrock/middleware.test.ts +73 -0
  58. package/src/providers/bedrock/middleware.ts +43 -0
  59. package/src/telemetry/http.ts +0 -3
  60. package/src/types.ts +19 -23
  61. package/src/utils/request.ts +5 -33
  62. package/src/utils/response.ts +3 -3
package/README.md CHANGED
@@ -286,10 +286,9 @@ const gw = gateway({
286
286
  /**
287
287
  * Runs before any endpoint handler logic.
288
288
  * @param ctx.request Incoming request.
289
- * @returns Optional RequestPatch to merge into headers / override body.
290
- * Returning a Response stops execution of the endpoint.
289
+ * @returns Optional Response to short-circuit the request.
291
290
  */
292
- onRequest: async (ctx: { request: Request }): Promise<RequestPatch | Response | void> => {
291
+ onRequest: async (ctx: { request: Request }): Promise<Response | void> => {
293
292
  // Example Use Cases:
294
293
  // - Verify authentication
295
294
  // - Enforce rate limits
@@ -1,11 +1,13 @@
1
1
  import type { SharedV3ProviderOptions, SharedV3ProviderMetadata } from "@ai-sdk/provider";
2
- import type { GenerateTextResult, StreamTextResult, FinishReason, ToolChoice, ToolSet, ModelMessage, UserContent, LanguageModelUsage, Output, TextStreamPart, ReasoningOutput, AssistantModelMessage, ToolModelMessage, UserModelMessage } from "ai";
2
+ import type { GenerateTextResult, StreamTextResult, FinishReason, ToolChoice, ToolSet, ModelMessage, UserContent, LanguageModelUsage, TextStreamPart, ReasoningOutput, AssistantModelMessage, ToolModelMessage, UserModelMessage } from "ai";
3
+ import { Output } from "ai";
3
4
  import type { ChatCompletionsToolCall, ChatCompletionsTool, ChatCompletionsToolChoice, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsUserMessage, ChatCompletionsAssistantMessage, ChatCompletionsToolMessage, ChatCompletionsFinishReason, ChatCompletionsUsage, ChatCompletionsInputs, ChatCompletions, ChatCompletionsChunk, ChatCompletionsReasoningDetail } from "./schema";
4
5
  import { OpenAIError } from "../../errors/openai";
5
6
  export type TextCallOptions = {
6
7
  messages: ModelMessage[];
7
8
  tools?: ToolSet;
8
9
  toolChoice?: ToolChoice<ToolSet>;
10
+ output?: Output.Output;
9
11
  temperature?: number;
10
12
  maxOutputTokens?: number;
11
13
  frequencyPenalty?: number;
@@ -1,16 +1,17 @@
1
- import { convertBase64ToUint8Array } from "@ai-sdk/provider-utils";
2
- import { jsonSchema, tool } from "ai";
1
+ import { Output, jsonSchema, tool } from "ai";
2
+ import { z } from "zod";
3
3
  import { GatewayError } from "../../errors/gateway";
4
4
  import { OpenAIError, toOpenAIError } from "../../errors/openai";
5
5
  import { toResponse } from "../../utils/response";
6
6
  // --- Request Flow ---
7
7
  export function convertToTextCallOptions(params) {
8
- const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, reasoning_effort, reasoning, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
8
+ const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
9
9
  Object.assign(rest, parseReasoningOptions(reasoning_effort, reasoning));
10
10
  return {
11
11
  messages: convertToModelMessages(messages),
12
12
  tools: convertToToolSet(tools),
13
13
  toolChoice: convertToToolChoice(tool_choice),
14
+ output: convertToOutput(response_format),
14
15
  temperature,
15
16
  maxOutputTokens: max_completion_tokens ?? max_tokens,
16
17
  frequencyPenalty: frequency_penalty,
@@ -23,6 +24,17 @@ export function convertToTextCallOptions(params) {
23
24
  },
24
25
  };
25
26
  }
27
+ function convertToOutput(responseFormat) {
28
+ if (!responseFormat || responseFormat.type === "text") {
29
+ return;
30
+ }
31
+ const { name, description, schema } = responseFormat.json_schema;
32
+ return Output.object({
33
+ name,
34
+ description,
35
+ schema: jsonSchema(schema),
36
+ });
37
+ }
26
38
  export function convertToModelMessages(messages) {
27
39
  const modelMessages = [];
28
40
  const toolById = indexToolMessages(messages);
@@ -63,61 +75,66 @@ export function fromChatCompletionsUserMessage(message) {
63
75
  export function fromChatCompletionsAssistantMessage(message) {
64
76
  const { tool_calls, role, content, extra_content, reasoning_details } = message;
65
77
  const parts = [];
66
- if (Array.isArray(parts)) {
67
- if (reasoning_details?.length) {
68
- for (const detail of reasoning_details) {
69
- if (detail.text && detail.type === "reasoning.text") {
70
- parts.push({
71
- type: "reasoning",
72
- text: detail.text,
73
- providerOptions: detail.signature
74
- ? {
75
- unknown: {
76
- signature: detail.signature,
77
- },
78
- }
79
- : undefined,
80
- });
81
- }
82
- else if (detail.type === "reasoning.encrypted" && detail.data) {
83
- parts.push({
84
- type: "reasoning",
85
- text: "",
86
- providerOptions: {
78
+ if (reasoning_details?.length) {
79
+ for (const detail of reasoning_details) {
80
+ if (detail.text && detail.type === "reasoning.text") {
81
+ parts.push({
82
+ type: "reasoning",
83
+ text: detail.text,
84
+ providerOptions: detail.signature
85
+ ? {
87
86
  unknown: {
88
- redactedData: detail.data,
87
+ signature: detail.signature,
89
88
  },
89
+ }
90
+ : undefined,
91
+ });
92
+ }
93
+ else if (detail.type === "reasoning.encrypted" && detail.data) {
94
+ parts.push({
95
+ type: "reasoning",
96
+ text: "",
97
+ providerOptions: {
98
+ unknown: {
99
+ redactedData: detail.data,
90
100
  },
91
- });
92
- }
101
+ },
102
+ });
93
103
  }
94
104
  }
95
- if (tool_calls?.length) {
96
- for (const tc of tool_calls) {
97
- // eslint-disable-next-line no-shadow
98
- const { id, function: fn, extra_content } = tc;
99
- const out = {
100
- type: "tool-call",
101
- toolCallId: id,
102
- toolName: fn.name,
103
- input: parseToolOutput(fn.arguments).value,
104
- };
105
- if (extra_content) {
106
- out.providerOptions = extra_content;
107
- }
108
- parts.push(out);
105
+ }
106
+ if (content !== undefined && content !== null) {
107
+ const inputContent = typeof content === "string"
108
+ ? [{ type: "text", text: content }]
109
+ : content;
110
+ for (const part of inputContent) {
111
+ if (part.type === "text") {
112
+ parts.push({
113
+ type: "text",
114
+ text: part.text,
115
+ });
109
116
  }
110
117
  }
111
- else if (content !== undefined && content !== null) {
112
- parts.push({
113
- type: "text",
114
- text: content,
115
- });
118
+ }
119
+ if (tool_calls?.length) {
120
+ for (const tc of tool_calls) {
121
+ // eslint-disable-next-line no-shadow
122
+ const { id, function: fn, extra_content } = tc;
123
+ const out = {
124
+ type: "tool-call",
125
+ toolCallId: id,
126
+ toolName: fn.name,
127
+ input: parseJsonOrText(fn.arguments).value,
128
+ };
129
+ if (extra_content) {
130
+ out.providerOptions = extra_content;
131
+ }
132
+ parts.push(out);
116
133
  }
117
134
  }
118
135
  const out = {
119
- role: role,
120
- content: Array.isArray(parts) && parts.length > 0 ? parts : (content ?? ""),
136
+ role,
137
+ content: parts.length > 0 ? parts : (content ?? ""),
121
138
  };
122
139
  if (extra_content) {
123
140
  out.providerOptions = extra_content;
@@ -137,52 +154,50 @@ export function fromChatCompletionsToolResultMessage(message, toolById) {
137
154
  type: "tool-result",
138
155
  toolCallId: tc.id,
139
156
  toolName: tc.function.name,
140
- output: parseToolOutput(toolMsg.content),
157
+ output: parseToolResult(toolMsg.content),
141
158
  });
142
159
  }
143
160
  return toolResultParts.length > 0 ? { role: "tool", content: toolResultParts } : undefined;
144
161
  }
145
162
  export function fromChatCompletionsContent(content) {
146
163
  return content.map((part) => {
147
- if (part.type === "image_url") {
148
- const url = part.image_url.url;
149
- if (url.startsWith("data:")) {
150
- const { mimeType, base64Data } = parseDataUrl(url);
151
- return mimeType.startsWith("image/")
152
- ? {
153
- type: "image",
154
- image: convertBase64ToUint8Array(base64Data),
155
- mediaType: mimeType,
156
- }
157
- : {
158
- type: "file",
159
- data: convertBase64ToUint8Array(base64Data),
160
- mediaType: mimeType,
161
- };
162
- }
163
- return {
164
- type: "image",
165
- image: new URL(url),
166
- };
164
+ switch (part.type) {
165
+ case "image_url":
166
+ return fromImageUrlPart(part.image_url.url);
167
+ case "file":
168
+ return fromFilePart(part.file.data, part.file.media_type, part.file.filename);
169
+ case "input_audio":
170
+ return fromFilePart(part.input_audio.data, `audio/${part.input_audio.format}`);
171
+ default:
172
+ return part;
167
173
  }
168
- if (part.type === "file") {
169
- let { data, media_type, filename } = part.file;
170
- return media_type.startsWith("image/")
171
- ? {
172
- type: "image",
173
- image: convertBase64ToUint8Array(data),
174
- mediaType: media_type,
175
- }
176
- : {
177
- type: "file",
178
- data: convertBase64ToUint8Array(data),
179
- filename,
180
- mediaType: media_type,
181
- };
182
- }
183
- return part;
184
174
  });
185
175
  }
176
+ function fromImageUrlPart(url) {
177
+ if (url.startsWith("data:")) {
178
+ const { mimeType, base64Data } = parseDataUrl(url);
179
+ return fromFilePart(base64Data, mimeType);
180
+ }
181
+ return {
182
+ type: "image",
183
+ image: new URL(url),
184
+ };
185
+ }
186
+ function fromFilePart(base64Data, mediaType, filename) {
187
+ if (mediaType.startsWith("image/")) {
188
+ return {
189
+ type: "image",
190
+ image: z.util.base64ToUint8Array(base64Data),
191
+ mediaType,
192
+ };
193
+ }
194
+ return {
195
+ type: "file",
196
+ data: z.util.base64ToUint8Array(base64Data),
197
+ filename,
198
+ mediaType,
199
+ };
200
+ }
186
201
  export const convertToToolSet = (tools) => {
187
202
  if (!tools) {
188
203
  return;
@@ -203,12 +218,28 @@ export const convertToToolChoice = (toolChoice) => {
203
218
  if (toolChoice === "none" || toolChoice === "auto" || toolChoice === "required") {
204
219
  return toolChoice;
205
220
  }
221
+ // FUTURE: this is right now google specific, which is not supported by AI SDK, until then, we temporarily map it to auto for now https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
222
+ if (toolChoice === "validated") {
223
+ return "auto";
224
+ }
206
225
  return {
207
226
  type: "tool",
208
227
  toolName: toolChoice.function.name,
209
228
  };
210
229
  };
211
- function parseToolOutput(content) {
230
+ function parseToolResult(content) {
231
+ if (Array.isArray(content)) {
232
+ return {
233
+ type: "content",
234
+ value: content.map((part) => ({
235
+ type: "text",
236
+ text: part.text,
237
+ })),
238
+ };
239
+ }
240
+ return parseJsonOrText(content);
241
+ }
242
+ function parseJsonOrText(content) {
212
243
  try {
213
244
  return { type: "json", value: JSON.parse(content) };
214
245
  }
@@ -255,7 +286,6 @@ function parseReasoningOptions(reasoning_effort, reasoning) {
255
286
  }
256
287
  // --- Response Flow ---
257
288
  export function toChatCompletions(result, model) {
258
- const finish_reason = toChatCompletionsFinishReason(result.finishReason);
259
289
  return {
260
290
  id: "chatcmpl-" + crypto.randomUUID(),
261
291
  object: "chat.completion",
@@ -265,7 +295,7 @@ export function toChatCompletions(result, model) {
265
295
  {
266
296
  index: 0,
267
297
  message: toChatCompletionsAssistantMessage(result),
268
- finish_reason,
298
+ finish_reason: toChatCompletionsFinishReason(result.finishReason),
269
299
  },
270
300
  ],
271
301
  usage: result.totalUsage ? toChatCompletionsUsage(result.totalUsage) : null,
@@ -287,6 +317,7 @@ export class ChatCompletionsStream extends TransformStream {
287
317
  const creationTime = Math.floor(Date.now() / 1000);
288
318
  let toolCallIndexCounter = 0;
289
319
  const reasoningIdToIndex = new Map();
320
+ let finishProviderMetadata;
290
321
  const createChunk = (delta, provider_metadata, finish_reason, usage) => {
291
322
  if (provider_metadata) {
292
323
  delta.extra_content = provider_metadata;
@@ -340,11 +371,11 @@ export class ChatCompletionsStream extends TransformStream {
340
371
  break;
341
372
  }
342
373
  case "finish-step": {
343
- controller.enqueue(createChunk({}, part.providerMetadata, toChatCompletionsFinishReason(part.finishReason), toChatCompletionsUsage(part.usage)));
374
+ finishProviderMetadata = part.providerMetadata;
344
375
  break;
345
376
  }
346
377
  case "finish": {
347
- controller.enqueue(createChunk({}, undefined, toChatCompletionsFinishReason(part.finishReason), toChatCompletionsUsage(part.totalUsage)));
378
+ controller.enqueue(createChunk({}, finishProviderMetadata, toChatCompletionsFinishReason(part.finishReason), toChatCompletionsUsage(part.totalUsage)));
348
379
  break;
349
380
  }
350
381
  case "error": {
@@ -7,7 +7,6 @@ import { modelMiddlewareMatcher } from "../../middleware/matcher";
7
7
  import { resolveProvider } from "../../providers/registry";
8
8
  import { recordRequestDuration, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
9
9
  import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
10
- import { resolveRequestId } from "../../utils/headers";
11
10
  import { prepareForwardHeaders } from "../../utils/request";
12
11
  import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
13
12
  import { getChatGeneralAttributes, getChatRequestAttributes, getChatResponseAttributes, } from "./otel";
@@ -22,7 +21,6 @@ export const chatCompletions = (config) => {
22
21
  if (!ctx.request || ctx.request.method !== "POST") {
23
22
  throw new GatewayError("Method Not Allowed", 405);
24
23
  }
25
- const requestId = resolveRequestId(ctx.request);
26
24
  // Parse + validate input.
27
25
  try {
28
26
  ctx.body = await ctx.request.json();
@@ -68,7 +66,7 @@ export const chatCompletions = (config) => {
68
66
  // Convert inputs to AI SDK call options.
69
67
  const textOptions = convertToTextCallOptions(inputs);
70
68
  logger.trace({
71
- requestId,
69
+ requestId: ctx.requestId,
72
70
  options: textOptions,
73
71
  }, "[chat] AI SDK options");
74
72
  addSpanEvent("hebo.options.prepared");
@@ -127,7 +125,7 @@ export const chatCompletions = (config) => {
127
125
  },
128
126
  ...textOptions,
129
127
  });
130
- logger.trace({ requestId, result }, "[chat] AI SDK result");
128
+ logger.trace({ requestId: ctx.requestId, result }, "[chat] AI SDK result");
131
129
  addSpanEvent("hebo.ai-sdk.completed");
132
130
  // Transform result.
133
131
  ctx.result = toChatCompletions(result, ctx.resolvedModelId);
@@ -32,6 +32,13 @@ const toMessageParts = (message) => {
32
32
  else if (part.type === "image_url") {
33
33
  parts.push({ type: "image", content: part.image_url.url });
34
34
  }
35
+ else if (part.type === "input_audio") {
36
+ parts.push({
37
+ type: "audio",
38
+ content: "[REDACTED_BINARY_DATA]",
39
+ format: part.input_audio.format,
40
+ });
41
+ }
35
42
  else {
36
43
  parts.push({
37
44
  type: "file",