@hebo-ai/gateway 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +8 -6
  2. package/dist/endpoints/chat-completions/converters.d.ts +3 -1
  3. package/dist/endpoints/chat-completions/converters.js +121 -90
  4. package/dist/endpoints/chat-completions/otel.js +7 -0
  5. package/dist/endpoints/chat-completions/schema.d.ts +400 -76
  6. package/dist/endpoints/chat-completions/schema.js +80 -36
  7. package/dist/endpoints/embeddings/schema.d.ts +1 -1
  8. package/dist/endpoints/embeddings/schema.js +1 -1
  9. package/dist/errors/gateway.js +1 -0
  10. package/dist/logger/default.d.ts +0 -1
  11. package/dist/logger/default.js +30 -6
  12. package/dist/middleware/utils.js +1 -0
  13. package/dist/models/amazon/middleware.js +1 -0
  14. package/dist/models/anthropic/middleware.d.ts +2 -0
  15. package/dist/models/anthropic/middleware.js +77 -16
  16. package/dist/models/google/middleware.js +17 -0
  17. package/dist/models/google/presets.d.ts +387 -0
  18. package/dist/models/google/presets.js +9 -2
  19. package/dist/models/openai/middleware.js +1 -0
  20. package/dist/models/types.d.ts +1 -1
  21. package/dist/models/types.js +1 -0
  22. package/dist/providers/bedrock/index.d.ts +1 -0
  23. package/dist/providers/bedrock/index.js +1 -0
  24. package/dist/providers/bedrock/middleware.d.ts +2 -0
  25. package/dist/providers/bedrock/middleware.js +35 -0
  26. package/package.json +19 -21
  27. package/src/endpoints/chat-completions/converters.test.ts +219 -0
  28. package/src/endpoints/chat-completions/converters.ts +144 -104
  29. package/src/endpoints/chat-completions/handler.test.ts +87 -0
  30. package/src/endpoints/chat-completions/otel.ts +6 -0
  31. package/src/endpoints/chat-completions/schema.ts +85 -43
  32. package/src/endpoints/embeddings/schema.ts +1 -1
  33. package/src/errors/gateway.ts +2 -0
  34. package/src/logger/default.ts +34 -8
  35. package/src/middleware/utils.ts +1 -0
  36. package/src/models/amazon/middleware.ts +1 -0
  37. package/src/models/anthropic/middleware.test.ts +332 -1
  38. package/src/models/anthropic/middleware.ts +83 -19
  39. package/src/models/google/middleware.test.ts +31 -0
  40. package/src/models/google/middleware.ts +18 -0
  41. package/src/models/google/presets.ts +13 -2
  42. package/src/models/openai/middleware.ts +1 -0
  43. package/src/models/types.ts +1 -0
  44. package/src/providers/bedrock/index.ts +1 -0
  45. package/src/providers/bedrock/middleware.test.ts +73 -0
  46. package/src/providers/bedrock/middleware.ts +43 -0
package/README.md CHANGED
@@ -32,7 +32,7 @@ bun install @hebo-ai/gateway
32
32
  - Quickstart
33
33
  - [Setup A Gateway Instance](#setup-a-gateway-instance) | [Mount Route Handlers](#mount-route-handlers) | [Call the Gateway](#call-the-gateway)
34
34
  - Configuration Reference
35
- - [Providers](#providers) | [Models](#models) | [Hooks](#hooks) | [Logger](#logger-settings) | [Telemetry](#telemetry-settings)
35
+ - [Providers](#providers) | [Models](#models) | [Hooks](#hooks) | [Logger](#logger-settings) | [Observability](#observability)
36
36
  - Framework Support
37
37
  - [ElysiaJS](#elysiajs) | [Hono](#hono) | [Next.js](#nextjs) | [TanStack Start](#tanstack-start)
38
38
  - Runtime Support
@@ -540,13 +540,14 @@ Normalization rules:
540
540
 
541
541
  - `enabled` -> fall-back to model default if none provided
542
542
  - `max_tokens`: fall-back to model default if model supports
543
- - `effort` -> budget = percentage of `max_tokens`
543
+ - `effort` supports: `none`, `minimal`, `low`, `medium`, `high`, `xhigh`, `max`
544
+ - Generic `effort` -> budget = percentage of `max_tokens`
544
545
  - `none`: 0%
545
546
  - `minimal`: 10%
546
547
  - `low`: 20%
547
548
  - `medium`: 50% (default)
548
549
  - `high`: 80%
549
- - `xhigh`: 95%
550
+ - `xhigh` / `max`: 95%
550
551
 
551
552
  Reasoning output is surfaced as extension to the `completion` object.
552
553
 
@@ -602,9 +603,9 @@ const gw = gateway({
602
603
  > [!TIP]
603
604
  > For production workloads, we recommend `pino` for better logging performance and lower overhead.
604
605
 
605
- ### Telemetry Settings
606
+ ### Observability
606
607
 
607
- Hebo Gateway can forward telemetry settings via the `telemetry` config field.
608
+ Hebo Gateway can forward traces & metrics via the `telemetry` config field.
608
609
 
609
610
  ```ts
610
611
  import { gateway } from "@hebo-ai/gateway";
@@ -633,8 +634,9 @@ const gw = gateway({
633
634
  });
634
635
  ```
635
636
 
636
- Attribute names and span semantics follow OpenTelemetry GenAI semantic conventions:
637
+ Attribute names and span & metrics semantics follow OpenTelemetry GenAI semantic conventions:
637
638
  https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/
639
+ https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/
638
640
 
639
641
  > [!TIP]
640
642
  > To populate custom span attributes, the inbound W3C `baggage` header is supported. Keys in the `hebo.` namespace are mapped to span attributes, with the namespace stripped. For example: `baggage: hebo.user_id=u-123` becomes span attribute `user_id=u-123`.
@@ -1,11 +1,13 @@
1
1
  import type { SharedV3ProviderOptions, SharedV3ProviderMetadata } from "@ai-sdk/provider";
2
- import type { GenerateTextResult, StreamTextResult, FinishReason, ToolChoice, ToolSet, ModelMessage, UserContent, LanguageModelUsage, Output, TextStreamPart, ReasoningOutput, AssistantModelMessage, ToolModelMessage, UserModelMessage } from "ai";
2
+ import type { GenerateTextResult, StreamTextResult, FinishReason, ToolChoice, ToolSet, ModelMessage, UserContent, LanguageModelUsage, TextStreamPart, ReasoningOutput, AssistantModelMessage, ToolModelMessage, UserModelMessage } from "ai";
3
+ import { Output } from "ai";
3
4
  import type { ChatCompletionsToolCall, ChatCompletionsTool, ChatCompletionsToolChoice, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsUserMessage, ChatCompletionsAssistantMessage, ChatCompletionsToolMessage, ChatCompletionsFinishReason, ChatCompletionsUsage, ChatCompletionsInputs, ChatCompletions, ChatCompletionsChunk, ChatCompletionsReasoningDetail } from "./schema";
4
5
  import { OpenAIError } from "../../errors/openai";
5
6
  export type TextCallOptions = {
6
7
  messages: ModelMessage[];
7
8
  tools?: ToolSet;
8
9
  toolChoice?: ToolChoice<ToolSet>;
10
+ output?: Output.Output;
9
11
  temperature?: number;
10
12
  maxOutputTokens?: number;
11
13
  frequencyPenalty?: number;
@@ -1,16 +1,17 @@
1
- import { convertBase64ToUint8Array } from "@ai-sdk/provider-utils";
2
- import { jsonSchema, tool } from "ai";
1
+ import { Output, jsonSchema, tool } from "ai";
2
+ import { z } from "zod";
3
3
  import { GatewayError } from "../../errors/gateway";
4
4
  import { OpenAIError, toOpenAIError } from "../../errors/openai";
5
5
  import { toResponse } from "../../utils/response";
6
6
  // --- Request Flow ---
7
7
  export function convertToTextCallOptions(params) {
8
- const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, reasoning_effort, reasoning, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
8
+ const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
9
9
  Object.assign(rest, parseReasoningOptions(reasoning_effort, reasoning));
10
10
  return {
11
11
  messages: convertToModelMessages(messages),
12
12
  tools: convertToToolSet(tools),
13
13
  toolChoice: convertToToolChoice(tool_choice),
14
+ output: convertToOutput(response_format),
14
15
  temperature,
15
16
  maxOutputTokens: max_completion_tokens ?? max_tokens,
16
17
  frequencyPenalty: frequency_penalty,
@@ -23,6 +24,17 @@ export function convertToTextCallOptions(params) {
23
24
  },
24
25
  };
25
26
  }
27
+ function convertToOutput(responseFormat) {
28
+ if (!responseFormat || responseFormat.type === "text") {
29
+ return;
30
+ }
31
+ const { name, description, schema } = responseFormat.json_schema;
32
+ return Output.object({
33
+ name,
34
+ description,
35
+ schema: jsonSchema(schema),
36
+ });
37
+ }
26
38
  export function convertToModelMessages(messages) {
27
39
  const modelMessages = [];
28
40
  const toolById = indexToolMessages(messages);
@@ -63,61 +75,66 @@ export function fromChatCompletionsUserMessage(message) {
63
75
  export function fromChatCompletionsAssistantMessage(message) {
64
76
  const { tool_calls, role, content, extra_content, reasoning_details } = message;
65
77
  const parts = [];
66
- if (Array.isArray(parts)) {
67
- if (reasoning_details?.length) {
68
- for (const detail of reasoning_details) {
69
- if (detail.text && detail.type === "reasoning.text") {
70
- parts.push({
71
- type: "reasoning",
72
- text: detail.text,
73
- providerOptions: detail.signature
74
- ? {
75
- unknown: {
76
- signature: detail.signature,
77
- },
78
- }
79
- : undefined,
80
- });
81
- }
82
- else if (detail.type === "reasoning.encrypted" && detail.data) {
83
- parts.push({
84
- type: "reasoning",
85
- text: "",
86
- providerOptions: {
78
+ if (reasoning_details?.length) {
79
+ for (const detail of reasoning_details) {
80
+ if (detail.text && detail.type === "reasoning.text") {
81
+ parts.push({
82
+ type: "reasoning",
83
+ text: detail.text,
84
+ providerOptions: detail.signature
85
+ ? {
87
86
  unknown: {
88
- redactedData: detail.data,
87
+ signature: detail.signature,
89
88
  },
89
+ }
90
+ : undefined,
91
+ });
92
+ }
93
+ else if (detail.type === "reasoning.encrypted" && detail.data) {
94
+ parts.push({
95
+ type: "reasoning",
96
+ text: "",
97
+ providerOptions: {
98
+ unknown: {
99
+ redactedData: detail.data,
90
100
  },
91
- });
92
- }
101
+ },
102
+ });
93
103
  }
94
104
  }
95
- if (tool_calls?.length) {
96
- for (const tc of tool_calls) {
97
- // eslint-disable-next-line no-shadow
98
- const { id, function: fn, extra_content } = tc;
99
- const out = {
100
- type: "tool-call",
101
- toolCallId: id,
102
- toolName: fn.name,
103
- input: parseToolOutput(fn.arguments).value,
104
- };
105
- if (extra_content) {
106
- out.providerOptions = extra_content;
107
- }
108
- parts.push(out);
105
+ }
106
+ if (content !== undefined && content !== null) {
107
+ const inputContent = typeof content === "string"
108
+ ? [{ type: "text", text: content }]
109
+ : content;
110
+ for (const part of inputContent) {
111
+ if (part.type === "text") {
112
+ parts.push({
113
+ type: "text",
114
+ text: part.text,
115
+ });
109
116
  }
110
117
  }
111
- else if (content !== undefined && content !== null) {
112
- parts.push({
113
- type: "text",
114
- text: content,
115
- });
118
+ }
119
+ if (tool_calls?.length) {
120
+ for (const tc of tool_calls) {
121
+ // eslint-disable-next-line no-shadow
122
+ const { id, function: fn, extra_content } = tc;
123
+ const out = {
124
+ type: "tool-call",
125
+ toolCallId: id,
126
+ toolName: fn.name,
127
+ input: parseJsonOrText(fn.arguments).value,
128
+ };
129
+ if (extra_content) {
130
+ out.providerOptions = extra_content;
131
+ }
132
+ parts.push(out);
116
133
  }
117
134
  }
118
135
  const out = {
119
- role: role,
120
- content: Array.isArray(parts) && parts.length > 0 ? parts : (content ?? ""),
136
+ role,
137
+ content: parts.length > 0 ? parts : (content ?? ""),
121
138
  };
122
139
  if (extra_content) {
123
140
  out.providerOptions = extra_content;
@@ -137,52 +154,50 @@ export function fromChatCompletionsToolResultMessage(message, toolById) {
137
154
  type: "tool-result",
138
155
  toolCallId: tc.id,
139
156
  toolName: tc.function.name,
140
- output: parseToolOutput(toolMsg.content),
157
+ output: parseToolResult(toolMsg.content),
141
158
  });
142
159
  }
143
160
  return toolResultParts.length > 0 ? { role: "tool", content: toolResultParts } : undefined;
144
161
  }
145
162
  export function fromChatCompletionsContent(content) {
146
163
  return content.map((part) => {
147
- if (part.type === "image_url") {
148
- const url = part.image_url.url;
149
- if (url.startsWith("data:")) {
150
- const { mimeType, base64Data } = parseDataUrl(url);
151
- return mimeType.startsWith("image/")
152
- ? {
153
- type: "image",
154
- image: convertBase64ToUint8Array(base64Data),
155
- mediaType: mimeType,
156
- }
157
- : {
158
- type: "file",
159
- data: convertBase64ToUint8Array(base64Data),
160
- mediaType: mimeType,
161
- };
162
- }
163
- return {
164
- type: "image",
165
- image: new URL(url),
166
- };
164
+ switch (part.type) {
165
+ case "image_url":
166
+ return fromImageUrlPart(part.image_url.url);
167
+ case "file":
168
+ return fromFilePart(part.file.data, part.file.media_type, part.file.filename);
169
+ case "input_audio":
170
+ return fromFilePart(part.input_audio.data, `audio/${part.input_audio.format}`);
171
+ default:
172
+ return part;
167
173
  }
168
- if (part.type === "file") {
169
- let { data, media_type, filename } = part.file;
170
- return media_type.startsWith("image/")
171
- ? {
172
- type: "image",
173
- image: convertBase64ToUint8Array(data),
174
- mediaType: media_type,
175
- }
176
- : {
177
- type: "file",
178
- data: convertBase64ToUint8Array(data),
179
- filename,
180
- mediaType: media_type,
181
- };
182
- }
183
- return part;
184
174
  });
185
175
  }
176
+ function fromImageUrlPart(url) {
177
+ if (url.startsWith("data:")) {
178
+ const { mimeType, base64Data } = parseDataUrl(url);
179
+ return fromFilePart(base64Data, mimeType);
180
+ }
181
+ return {
182
+ type: "image",
183
+ image: new URL(url),
184
+ };
185
+ }
186
+ function fromFilePart(base64Data, mediaType, filename) {
187
+ if (mediaType.startsWith("image/")) {
188
+ return {
189
+ type: "image",
190
+ image: z.util.base64ToUint8Array(base64Data),
191
+ mediaType,
192
+ };
193
+ }
194
+ return {
195
+ type: "file",
196
+ data: z.util.base64ToUint8Array(base64Data),
197
+ filename,
198
+ mediaType,
199
+ };
200
+ }
186
201
  export const convertToToolSet = (tools) => {
187
202
  if (!tools) {
188
203
  return;
@@ -203,12 +218,28 @@ export const convertToToolChoice = (toolChoice) => {
203
218
  if (toolChoice === "none" || toolChoice === "auto" || toolChoice === "required") {
204
219
  return toolChoice;
205
220
  }
221
+ // FUTURE: this is right now google specific, which is not supported by AI SDK, until then, we temporarily map it to auto for now https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
222
+ if (toolChoice === "validated") {
223
+ return "auto";
224
+ }
206
225
  return {
207
226
  type: "tool",
208
227
  toolName: toolChoice.function.name,
209
228
  };
210
229
  };
211
- function parseToolOutput(content) {
230
+ function parseToolResult(content) {
231
+ if (Array.isArray(content)) {
232
+ return {
233
+ type: "content",
234
+ value: content.map((part) => ({
235
+ type: "text",
236
+ text: part.text,
237
+ })),
238
+ };
239
+ }
240
+ return parseJsonOrText(content);
241
+ }
242
+ function parseJsonOrText(content) {
212
243
  try {
213
244
  return { type: "json", value: JSON.parse(content) };
214
245
  }
@@ -255,7 +286,6 @@ function parseReasoningOptions(reasoning_effort, reasoning) {
255
286
  }
256
287
  // --- Response Flow ---
257
288
  export function toChatCompletions(result, model) {
258
- const finish_reason = toChatCompletionsFinishReason(result.finishReason);
259
289
  return {
260
290
  id: "chatcmpl-" + crypto.randomUUID(),
261
291
  object: "chat.completion",
@@ -265,7 +295,7 @@ export function toChatCompletions(result, model) {
265
295
  {
266
296
  index: 0,
267
297
  message: toChatCompletionsAssistantMessage(result),
268
- finish_reason,
298
+ finish_reason: toChatCompletionsFinishReason(result.finishReason),
269
299
  },
270
300
  ],
271
301
  usage: result.totalUsage ? toChatCompletionsUsage(result.totalUsage) : null,
@@ -287,6 +317,7 @@ export class ChatCompletionsStream extends TransformStream {
287
317
  const creationTime = Math.floor(Date.now() / 1000);
288
318
  let toolCallIndexCounter = 0;
289
319
  const reasoningIdToIndex = new Map();
320
+ let finishProviderMetadata;
290
321
  const createChunk = (delta, provider_metadata, finish_reason, usage) => {
291
322
  if (provider_metadata) {
292
323
  delta.extra_content = provider_metadata;
@@ -340,11 +371,11 @@ export class ChatCompletionsStream extends TransformStream {
340
371
  break;
341
372
  }
342
373
  case "finish-step": {
343
- controller.enqueue(createChunk({}, part.providerMetadata, toChatCompletionsFinishReason(part.finishReason), toChatCompletionsUsage(part.usage)));
374
+ finishProviderMetadata = part.providerMetadata;
344
375
  break;
345
376
  }
346
377
  case "finish": {
347
- controller.enqueue(createChunk({}, undefined, toChatCompletionsFinishReason(part.finishReason), toChatCompletionsUsage(part.totalUsage)));
378
+ controller.enqueue(createChunk({}, finishProviderMetadata, toChatCompletionsFinishReason(part.finishReason), toChatCompletionsUsage(part.totalUsage)));
348
379
  break;
349
380
  }
350
381
  case "error": {
@@ -32,6 +32,13 @@ const toMessageParts = (message) => {
32
32
  else if (part.type === "image_url") {
33
33
  parts.push({ type: "image", content: part.image_url.url });
34
34
  }
35
+ else if (part.type === "input_audio") {
36
+ parts.push({
37
+ type: "audio",
38
+ content: "[REDACTED_BINARY_DATA]",
39
+ format: part.input_audio.format,
40
+ });
41
+ }
35
42
  else {
36
43
  parts.push({
37
44
  type: "file",