@hebo-ai/gateway 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -6
- package/dist/endpoints/chat-completions/converters.d.ts +3 -1
- package/dist/endpoints/chat-completions/converters.js +121 -90
- package/dist/endpoints/chat-completions/otel.js +7 -0
- package/dist/endpoints/chat-completions/schema.d.ts +400 -76
- package/dist/endpoints/chat-completions/schema.js +80 -36
- package/dist/endpoints/embeddings/schema.d.ts +1 -1
- package/dist/endpoints/embeddings/schema.js +1 -1
- package/dist/errors/gateway.js +1 -0
- package/dist/logger/default.d.ts +0 -1
- package/dist/logger/default.js +30 -6
- package/dist/middleware/utils.js +1 -0
- package/dist/models/amazon/middleware.js +1 -0
- package/dist/models/anthropic/middleware.d.ts +2 -0
- package/dist/models/anthropic/middleware.js +77 -16
- package/dist/models/google/middleware.js +17 -0
- package/dist/models/google/presets.d.ts +387 -0
- package/dist/models/google/presets.js +9 -2
- package/dist/models/openai/middleware.js +1 -0
- package/dist/models/types.d.ts +1 -1
- package/dist/models/types.js +1 -0
- package/dist/providers/bedrock/index.d.ts +1 -0
- package/dist/providers/bedrock/index.js +1 -0
- package/dist/providers/bedrock/middleware.d.ts +2 -0
- package/dist/providers/bedrock/middleware.js +35 -0
- package/package.json +19 -21
- package/src/endpoints/chat-completions/converters.test.ts +219 -0
- package/src/endpoints/chat-completions/converters.ts +144 -104
- package/src/endpoints/chat-completions/handler.test.ts +87 -0
- package/src/endpoints/chat-completions/otel.ts +6 -0
- package/src/endpoints/chat-completions/schema.ts +85 -43
- package/src/endpoints/embeddings/schema.ts +1 -1
- package/src/errors/gateway.ts +2 -0
- package/src/logger/default.ts +34 -8
- package/src/middleware/utils.ts +1 -0
- package/src/models/amazon/middleware.ts +1 -0
- package/src/models/anthropic/middleware.test.ts +332 -1
- package/src/models/anthropic/middleware.ts +83 -19
- package/src/models/google/middleware.test.ts +31 -0
- package/src/models/google/middleware.ts +18 -0
- package/src/models/google/presets.ts +13 -2
- package/src/models/openai/middleware.ts +1 -0
- package/src/models/types.ts +1 -0
- package/src/providers/bedrock/index.ts +1 -0
- package/src/providers/bedrock/middleware.test.ts +73 -0
- package/src/providers/bedrock/middleware.ts +43 -0
package/README.md
CHANGED
|
@@ -32,7 +32,7 @@ bun install @hebo-ai/gateway
|
|
|
32
32
|
- Quickstart
|
|
33
33
|
- [Setup A Gateway Instance](#setup-a-gateway-instance) | [Mount Route Handlers](#mount-route-handlers) | [Call the Gateway](#call-the-gateway)
|
|
34
34
|
- Configuration Reference
|
|
35
|
-
- [Providers](#providers) | [Models](#models) | [Hooks](#hooks) | [Logger](#logger-settings) | [
|
|
35
|
+
- [Providers](#providers) | [Models](#models) | [Hooks](#hooks) | [Logger](#logger-settings) | [Observability](#observability)
|
|
36
36
|
- Framework Support
|
|
37
37
|
- [ElysiaJS](#elysiajs) | [Hono](#hono) | [Next.js](#nextjs) | [TanStack Start](#tanstack-start)
|
|
38
38
|
- Runtime Support
|
|
@@ -540,13 +540,14 @@ Normalization rules:
|
|
|
540
540
|
|
|
541
541
|
- `enabled` -> fall-back to model default if none provided
|
|
542
542
|
- `max_tokens`: fall-back to model default if model supports
|
|
543
|
-
- `effort`
|
|
543
|
+
- `effort` supports: `none`, `minimal`, `low`, `medium`, `high`, `xhigh`, `max`
|
|
544
|
+
- Generic `effort` -> budget = percentage of `max_tokens`
|
|
544
545
|
- `none`: 0%
|
|
545
546
|
- `minimal`: 10%
|
|
546
547
|
- `low`: 20%
|
|
547
548
|
- `medium`: 50% (default)
|
|
548
549
|
- `high`: 80%
|
|
549
|
-
- `xhigh`: 95%
|
|
550
|
+
- `xhigh` / `max`: 95%
|
|
550
551
|
|
|
551
552
|
Reasoning output is surfaced as extension to the `completion` object.
|
|
552
553
|
|
|
@@ -602,9 +603,9 @@ const gw = gateway({
|
|
|
602
603
|
> [!TIP]
|
|
603
604
|
> For production workloads, we recommend `pino` for better logging performance and lower overhead.
|
|
604
605
|
|
|
605
|
-
###
|
|
606
|
+
### Observability
|
|
606
607
|
|
|
607
|
-
Hebo Gateway can forward
|
|
608
|
+
Hebo Gateway can forward traces & metrics via the `telemetry` config field.
|
|
608
609
|
|
|
609
610
|
```ts
|
|
610
611
|
import { gateway } from "@hebo-ai/gateway";
|
|
@@ -633,8 +634,9 @@ const gw = gateway({
|
|
|
633
634
|
});
|
|
634
635
|
```
|
|
635
636
|
|
|
636
|
-
Attribute names and span semantics follow OpenTelemetry GenAI semantic conventions:
|
|
637
|
+
Attribute names and span & metrics semantics follow OpenTelemetry GenAI semantic conventions:
|
|
637
638
|
https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/
|
|
639
|
+
https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/
|
|
638
640
|
|
|
639
641
|
> [!TIP]
|
|
640
642
|
> To populate custom span attributes, the inbound W3C `baggage` header is supported. Keys in the `hebo.` namespace are mapped to span attributes, with the namespace stripped. For example: `baggage: hebo.user_id=u-123` becomes span attribute `user_id=u-123`.
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
import type { SharedV3ProviderOptions, SharedV3ProviderMetadata } from "@ai-sdk/provider";
|
|
2
|
-
import type { GenerateTextResult, StreamTextResult, FinishReason, ToolChoice, ToolSet, ModelMessage, UserContent, LanguageModelUsage,
|
|
2
|
+
import type { GenerateTextResult, StreamTextResult, FinishReason, ToolChoice, ToolSet, ModelMessage, UserContent, LanguageModelUsage, TextStreamPart, ReasoningOutput, AssistantModelMessage, ToolModelMessage, UserModelMessage } from "ai";
|
|
3
|
+
import { Output } from "ai";
|
|
3
4
|
import type { ChatCompletionsToolCall, ChatCompletionsTool, ChatCompletionsToolChoice, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsUserMessage, ChatCompletionsAssistantMessage, ChatCompletionsToolMessage, ChatCompletionsFinishReason, ChatCompletionsUsage, ChatCompletionsInputs, ChatCompletions, ChatCompletionsChunk, ChatCompletionsReasoningDetail } from "./schema";
|
|
4
5
|
import { OpenAIError } from "../../errors/openai";
|
|
5
6
|
export type TextCallOptions = {
|
|
6
7
|
messages: ModelMessage[];
|
|
7
8
|
tools?: ToolSet;
|
|
8
9
|
toolChoice?: ToolChoice<ToolSet>;
|
|
10
|
+
output?: Output.Output;
|
|
9
11
|
temperature?: number;
|
|
10
12
|
maxOutputTokens?: number;
|
|
11
13
|
frequencyPenalty?: number;
|
|
@@ -1,16 +1,17 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { Output, jsonSchema, tool } from "ai";
|
|
2
|
+
import { z } from "zod";
|
|
3
3
|
import { GatewayError } from "../../errors/gateway";
|
|
4
4
|
import { OpenAIError, toOpenAIError } from "../../errors/openai";
|
|
5
5
|
import { toResponse } from "../../utils/response";
|
|
6
6
|
// --- Request Flow ---
|
|
7
7
|
export function convertToTextCallOptions(params) {
|
|
8
|
-
const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, reasoning_effort, reasoning, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
|
|
8
|
+
const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
|
|
9
9
|
Object.assign(rest, parseReasoningOptions(reasoning_effort, reasoning));
|
|
10
10
|
return {
|
|
11
11
|
messages: convertToModelMessages(messages),
|
|
12
12
|
tools: convertToToolSet(tools),
|
|
13
13
|
toolChoice: convertToToolChoice(tool_choice),
|
|
14
|
+
output: convertToOutput(response_format),
|
|
14
15
|
temperature,
|
|
15
16
|
maxOutputTokens: max_completion_tokens ?? max_tokens,
|
|
16
17
|
frequencyPenalty: frequency_penalty,
|
|
@@ -23,6 +24,17 @@ export function convertToTextCallOptions(params) {
|
|
|
23
24
|
},
|
|
24
25
|
};
|
|
25
26
|
}
|
|
27
|
+
function convertToOutput(responseFormat) {
|
|
28
|
+
if (!responseFormat || responseFormat.type === "text") {
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
const { name, description, schema } = responseFormat.json_schema;
|
|
32
|
+
return Output.object({
|
|
33
|
+
name,
|
|
34
|
+
description,
|
|
35
|
+
schema: jsonSchema(schema),
|
|
36
|
+
});
|
|
37
|
+
}
|
|
26
38
|
export function convertToModelMessages(messages) {
|
|
27
39
|
const modelMessages = [];
|
|
28
40
|
const toolById = indexToolMessages(messages);
|
|
@@ -63,61 +75,66 @@ export function fromChatCompletionsUserMessage(message) {
|
|
|
63
75
|
export function fromChatCompletionsAssistantMessage(message) {
|
|
64
76
|
const { tool_calls, role, content, extra_content, reasoning_details } = message;
|
|
65
77
|
const parts = [];
|
|
66
|
-
if (
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
? {
|
|
75
|
-
unknown: {
|
|
76
|
-
signature: detail.signature,
|
|
77
|
-
},
|
|
78
|
-
}
|
|
79
|
-
: undefined,
|
|
80
|
-
});
|
|
81
|
-
}
|
|
82
|
-
else if (detail.type === "reasoning.encrypted" && detail.data) {
|
|
83
|
-
parts.push({
|
|
84
|
-
type: "reasoning",
|
|
85
|
-
text: "",
|
|
86
|
-
providerOptions: {
|
|
78
|
+
if (reasoning_details?.length) {
|
|
79
|
+
for (const detail of reasoning_details) {
|
|
80
|
+
if (detail.text && detail.type === "reasoning.text") {
|
|
81
|
+
parts.push({
|
|
82
|
+
type: "reasoning",
|
|
83
|
+
text: detail.text,
|
|
84
|
+
providerOptions: detail.signature
|
|
85
|
+
? {
|
|
87
86
|
unknown: {
|
|
88
|
-
|
|
87
|
+
signature: detail.signature,
|
|
89
88
|
},
|
|
89
|
+
}
|
|
90
|
+
: undefined,
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
else if (detail.type === "reasoning.encrypted" && detail.data) {
|
|
94
|
+
parts.push({
|
|
95
|
+
type: "reasoning",
|
|
96
|
+
text: "",
|
|
97
|
+
providerOptions: {
|
|
98
|
+
unknown: {
|
|
99
|
+
redactedData: detail.data,
|
|
90
100
|
},
|
|
91
|
-
}
|
|
92
|
-
}
|
|
101
|
+
},
|
|
102
|
+
});
|
|
93
103
|
}
|
|
94
104
|
}
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
out.providerOptions = extra_content;
|
|
107
|
-
}
|
|
108
|
-
parts.push(out);
|
|
105
|
+
}
|
|
106
|
+
if (content !== undefined && content !== null) {
|
|
107
|
+
const inputContent = typeof content === "string"
|
|
108
|
+
? [{ type: "text", text: content }]
|
|
109
|
+
: content;
|
|
110
|
+
for (const part of inputContent) {
|
|
111
|
+
if (part.type === "text") {
|
|
112
|
+
parts.push({
|
|
113
|
+
type: "text",
|
|
114
|
+
text: part.text,
|
|
115
|
+
});
|
|
109
116
|
}
|
|
110
117
|
}
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
}
|
|
118
|
+
}
|
|
119
|
+
if (tool_calls?.length) {
|
|
120
|
+
for (const tc of tool_calls) {
|
|
121
|
+
// eslint-disable-next-line no-shadow
|
|
122
|
+
const { id, function: fn, extra_content } = tc;
|
|
123
|
+
const out = {
|
|
124
|
+
type: "tool-call",
|
|
125
|
+
toolCallId: id,
|
|
126
|
+
toolName: fn.name,
|
|
127
|
+
input: parseJsonOrText(fn.arguments).value,
|
|
128
|
+
};
|
|
129
|
+
if (extra_content) {
|
|
130
|
+
out.providerOptions = extra_content;
|
|
131
|
+
}
|
|
132
|
+
parts.push(out);
|
|
116
133
|
}
|
|
117
134
|
}
|
|
118
135
|
const out = {
|
|
119
|
-
role
|
|
120
|
-
content:
|
|
136
|
+
role,
|
|
137
|
+
content: parts.length > 0 ? parts : (content ?? ""),
|
|
121
138
|
};
|
|
122
139
|
if (extra_content) {
|
|
123
140
|
out.providerOptions = extra_content;
|
|
@@ -137,52 +154,50 @@ export function fromChatCompletionsToolResultMessage(message, toolById) {
|
|
|
137
154
|
type: "tool-result",
|
|
138
155
|
toolCallId: tc.id,
|
|
139
156
|
toolName: tc.function.name,
|
|
140
|
-
output:
|
|
157
|
+
output: parseToolResult(toolMsg.content),
|
|
141
158
|
});
|
|
142
159
|
}
|
|
143
160
|
return toolResultParts.length > 0 ? { role: "tool", content: toolResultParts } : undefined;
|
|
144
161
|
}
|
|
145
162
|
export function fromChatCompletionsContent(content) {
|
|
146
163
|
return content.map((part) => {
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
return
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
}
|
|
157
|
-
: {
|
|
158
|
-
type: "file",
|
|
159
|
-
data: convertBase64ToUint8Array(base64Data),
|
|
160
|
-
mediaType: mimeType,
|
|
161
|
-
};
|
|
162
|
-
}
|
|
163
|
-
return {
|
|
164
|
-
type: "image",
|
|
165
|
-
image: new URL(url),
|
|
166
|
-
};
|
|
164
|
+
switch (part.type) {
|
|
165
|
+
case "image_url":
|
|
166
|
+
return fromImageUrlPart(part.image_url.url);
|
|
167
|
+
case "file":
|
|
168
|
+
return fromFilePart(part.file.data, part.file.media_type, part.file.filename);
|
|
169
|
+
case "input_audio":
|
|
170
|
+
return fromFilePart(part.input_audio.data, `audio/${part.input_audio.format}`);
|
|
171
|
+
default:
|
|
172
|
+
return part;
|
|
167
173
|
}
|
|
168
|
-
if (part.type === "file") {
|
|
169
|
-
let { data, media_type, filename } = part.file;
|
|
170
|
-
return media_type.startsWith("image/")
|
|
171
|
-
? {
|
|
172
|
-
type: "image",
|
|
173
|
-
image: convertBase64ToUint8Array(data),
|
|
174
|
-
mediaType: media_type,
|
|
175
|
-
}
|
|
176
|
-
: {
|
|
177
|
-
type: "file",
|
|
178
|
-
data: convertBase64ToUint8Array(data),
|
|
179
|
-
filename,
|
|
180
|
-
mediaType: media_type,
|
|
181
|
-
};
|
|
182
|
-
}
|
|
183
|
-
return part;
|
|
184
174
|
});
|
|
185
175
|
}
|
|
176
|
+
function fromImageUrlPart(url) {
|
|
177
|
+
if (url.startsWith("data:")) {
|
|
178
|
+
const { mimeType, base64Data } = parseDataUrl(url);
|
|
179
|
+
return fromFilePart(base64Data, mimeType);
|
|
180
|
+
}
|
|
181
|
+
return {
|
|
182
|
+
type: "image",
|
|
183
|
+
image: new URL(url),
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
function fromFilePart(base64Data, mediaType, filename) {
|
|
187
|
+
if (mediaType.startsWith("image/")) {
|
|
188
|
+
return {
|
|
189
|
+
type: "image",
|
|
190
|
+
image: z.util.base64ToUint8Array(base64Data),
|
|
191
|
+
mediaType,
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
return {
|
|
195
|
+
type: "file",
|
|
196
|
+
data: z.util.base64ToUint8Array(base64Data),
|
|
197
|
+
filename,
|
|
198
|
+
mediaType,
|
|
199
|
+
};
|
|
200
|
+
}
|
|
186
201
|
export const convertToToolSet = (tools) => {
|
|
187
202
|
if (!tools) {
|
|
188
203
|
return;
|
|
@@ -203,12 +218,28 @@ export const convertToToolChoice = (toolChoice) => {
|
|
|
203
218
|
if (toolChoice === "none" || toolChoice === "auto" || toolChoice === "required") {
|
|
204
219
|
return toolChoice;
|
|
205
220
|
}
|
|
221
|
+
// FUTURE: this is right now google specific, which is not supported by AI SDK, until then, we temporarily map it to auto for now https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
|
|
222
|
+
if (toolChoice === "validated") {
|
|
223
|
+
return "auto";
|
|
224
|
+
}
|
|
206
225
|
return {
|
|
207
226
|
type: "tool",
|
|
208
227
|
toolName: toolChoice.function.name,
|
|
209
228
|
};
|
|
210
229
|
};
|
|
211
|
-
function
|
|
230
|
+
function parseToolResult(content) {
|
|
231
|
+
if (Array.isArray(content)) {
|
|
232
|
+
return {
|
|
233
|
+
type: "content",
|
|
234
|
+
value: content.map((part) => ({
|
|
235
|
+
type: "text",
|
|
236
|
+
text: part.text,
|
|
237
|
+
})),
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
return parseJsonOrText(content);
|
|
241
|
+
}
|
|
242
|
+
function parseJsonOrText(content) {
|
|
212
243
|
try {
|
|
213
244
|
return { type: "json", value: JSON.parse(content) };
|
|
214
245
|
}
|
|
@@ -255,7 +286,6 @@ function parseReasoningOptions(reasoning_effort, reasoning) {
|
|
|
255
286
|
}
|
|
256
287
|
// --- Response Flow ---
|
|
257
288
|
export function toChatCompletions(result, model) {
|
|
258
|
-
const finish_reason = toChatCompletionsFinishReason(result.finishReason);
|
|
259
289
|
return {
|
|
260
290
|
id: "chatcmpl-" + crypto.randomUUID(),
|
|
261
291
|
object: "chat.completion",
|
|
@@ -265,7 +295,7 @@ export function toChatCompletions(result, model) {
|
|
|
265
295
|
{
|
|
266
296
|
index: 0,
|
|
267
297
|
message: toChatCompletionsAssistantMessage(result),
|
|
268
|
-
finish_reason,
|
|
298
|
+
finish_reason: toChatCompletionsFinishReason(result.finishReason),
|
|
269
299
|
},
|
|
270
300
|
],
|
|
271
301
|
usage: result.totalUsage ? toChatCompletionsUsage(result.totalUsage) : null,
|
|
@@ -287,6 +317,7 @@ export class ChatCompletionsStream extends TransformStream {
|
|
|
287
317
|
const creationTime = Math.floor(Date.now() / 1000);
|
|
288
318
|
let toolCallIndexCounter = 0;
|
|
289
319
|
const reasoningIdToIndex = new Map();
|
|
320
|
+
let finishProviderMetadata;
|
|
290
321
|
const createChunk = (delta, provider_metadata, finish_reason, usage) => {
|
|
291
322
|
if (provider_metadata) {
|
|
292
323
|
delta.extra_content = provider_metadata;
|
|
@@ -340,11 +371,11 @@ export class ChatCompletionsStream extends TransformStream {
|
|
|
340
371
|
break;
|
|
341
372
|
}
|
|
342
373
|
case "finish-step": {
|
|
343
|
-
|
|
374
|
+
finishProviderMetadata = part.providerMetadata;
|
|
344
375
|
break;
|
|
345
376
|
}
|
|
346
377
|
case "finish": {
|
|
347
|
-
controller.enqueue(createChunk({},
|
|
378
|
+
controller.enqueue(createChunk({}, finishProviderMetadata, toChatCompletionsFinishReason(part.finishReason), toChatCompletionsUsage(part.totalUsage)));
|
|
348
379
|
break;
|
|
349
380
|
}
|
|
350
381
|
case "error": {
|
|
@@ -32,6 +32,13 @@ const toMessageParts = (message) => {
|
|
|
32
32
|
else if (part.type === "image_url") {
|
|
33
33
|
parts.push({ type: "image", content: part.image_url.url });
|
|
34
34
|
}
|
|
35
|
+
else if (part.type === "input_audio") {
|
|
36
|
+
parts.push({
|
|
37
|
+
type: "audio",
|
|
38
|
+
content: "[REDACTED_BINARY_DATA]",
|
|
39
|
+
format: part.input_audio.format,
|
|
40
|
+
});
|
|
41
|
+
}
|
|
35
42
|
else {
|
|
36
43
|
parts.push({
|
|
37
44
|
type: "file",
|