@hebo-ai/gateway 0.5.2 → 0.6.0-rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -1
- package/dist/endpoints/chat-completions/converters.js +100 -18
- package/dist/endpoints/chat-completions/handler.js +6 -2
- package/dist/endpoints/chat-completions/index.d.ts +1 -0
- package/dist/endpoints/chat-completions/index.js +1 -0
- package/dist/endpoints/chat-completions/otel.js +1 -0
- package/dist/endpoints/chat-completions/schema.d.ts +244 -0
- package/dist/endpoints/chat-completions/schema.js +36 -4
- package/dist/endpoints/embeddings/handler.js +3 -1
- package/dist/endpoints/embeddings/index.d.ts +1 -0
- package/dist/endpoints/embeddings/index.js +1 -0
- package/dist/lifecycle.js +2 -2
- package/dist/middleware/debug.d.ts +3 -0
- package/dist/middleware/debug.js +27 -0
- package/dist/middleware/matcher.js +2 -0
- package/dist/models/anthropic/middleware.d.ts +1 -0
- package/dist/models/anthropic/middleware.js +17 -1
- package/dist/models/google/middleware.d.ts +1 -0
- package/dist/models/google/middleware.js +18 -1
- package/dist/models/openai/middleware.d.ts +1 -0
- package/dist/models/openai/middleware.js +23 -1
- package/dist/providers/bedrock/middleware.d.ts +1 -0
- package/dist/providers/bedrock/middleware.js +52 -1
- package/dist/telemetry/fetch.d.ts +1 -1
- package/dist/telemetry/fetch.js +23 -3
- package/dist/telemetry/index.d.ts +1 -0
- package/dist/telemetry/index.js +1 -0
- package/package.json +17 -12
- package/src/endpoints/chat-completions/converters.test.ts +85 -1
- package/src/endpoints/chat-completions/converters.ts +139 -18
- package/src/endpoints/chat-completions/handler.test.ts +2 -0
- package/src/endpoints/chat-completions/index.ts +1 -0
- package/src/endpoints/chat-completions/otel.ts +1 -0
- package/src/endpoints/chat-completions/schema.ts +38 -4
- package/src/endpoints/embeddings/index.ts +1 -0
- package/src/lifecycle.ts +2 -2
- package/src/models/anthropic/middleware.test.ts +45 -1
- package/src/models/anthropic/middleware.ts +21 -1
- package/src/models/google/middleware.test.ts +30 -1
- package/src/models/google/middleware.ts +20 -1
- package/src/models/openai/middleware.test.ts +32 -1
- package/src/models/openai/middleware.ts +25 -1
- package/src/providers/bedrock/middleware.test.ts +121 -1
- package/src/providers/bedrock/middleware.ts +61 -1
- package/src/telemetry/fetch.ts +31 -4
- package/src/telemetry/index.ts +1 -0
package/README.md
CHANGED
|
@@ -38,7 +38,7 @@ bun install @hebo-ai/gateway
|
|
|
38
38
|
- Runtime Support
|
|
39
39
|
- [Vercel Edge](#vercel-edge) | [Cloudflare Workers](#cloudflare-workers) | [Deno Deploy](#deno-deploy) | [AWS Lambda](#aws-lambda)
|
|
40
40
|
- OpenAI Extensions
|
|
41
|
-
- [Reasoning](#reasoning)
|
|
41
|
+
- [Reasoning](#reasoning) | [Prompt Caching](#prompt-caching)
|
|
42
42
|
- Advanced Usage
|
|
43
43
|
- [Passing Framework State to Hooks](#passing-framework-state-to-hooks) | [Selective Route Mounting](#selective-route-mounting) | [Low-level Schemas & Converters](#low-level-schemas--converters)
|
|
44
44
|
|
|
@@ -565,6 +565,37 @@ Advanced models (like Anthropic Claude 3.7 or Gemini 3) surface structured reaso
|
|
|
565
565
|
|
|
566
566
|
For **Gemini 3** models, returning the thought signature via `extra_content` is mandatory to resume the chain-of-thought; failing to do so may result in errors or degraded performance.
|
|
567
567
|
|
|
568
|
+
### Prompt Caching
|
|
569
|
+
|
|
570
|
+
The chat completions endpoint supports both implicit (provider-managed) and explicit prompt caching across OpenAI-compatible providers.
|
|
571
|
+
|
|
572
|
+
Accepted request fields:
|
|
573
|
+
|
|
574
|
+
- `prompt_cache_key` + `prompt_cache_retention` (OpenAI style)
|
|
575
|
+
- `cache_control` (OpenRouter / Vercel / Claude style)
|
|
576
|
+
- `cached_content` (Gemini style)
|
|
577
|
+
|
|
578
|
+
```json
|
|
579
|
+
{
|
|
580
|
+
"model": "anthropic/claude-sonnet-4.6",
|
|
581
|
+
"messages": [
|
|
582
|
+
{
|
|
583
|
+
"role": "system",
|
|
584
|
+
"content": "Reusable policy and instructions",
|
|
585
|
+
"cache_control": { "type": "ephemeral", "ttl": "1h" }
|
|
586
|
+
},
|
|
587
|
+
{ "role": "user", "content": "Apply policy to this request." }
|
|
588
|
+
]
|
|
589
|
+
}
|
|
590
|
+
```
|
|
591
|
+
|
|
592
|
+
Provider behavior:
|
|
593
|
+
|
|
594
|
+
- **OpenAI-compatible**: forwards `prompt_cache_key` and `prompt_cache_retention` as native provider options.
|
|
595
|
+
- **Anthropic Claude**: maps top-level caching to Anthropic cache control, while message/part `cache_control` breakpoints are preserved.
|
|
596
|
+
- **Google Gemini**: maps `cached_content` to Gemini `cachedContent`.
|
|
597
|
+
- **Amazon Nova (Bedrock)**: maps `cache_control` to Bedrock `cachePoints` and inserts an automatic cache point on a stable prefix when none is provided.
|
|
598
|
+
|
|
568
599
|
## 🧪 Advanced Usage
|
|
569
600
|
|
|
570
601
|
### Logger Settings
|
|
@@ -5,8 +5,9 @@ import { OpenAIError, toOpenAIError } from "../../errors/openai";
|
|
|
5
5
|
import { toResponse } from "../../utils/response";
|
|
6
6
|
// --- Request Flow ---
|
|
7
7
|
export function convertToTextCallOptions(params) {
|
|
8
|
-
const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
|
|
8
|
+
const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, prompt_cache_key, prompt_cache_retention, cached_content, cache_control, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
|
|
9
9
|
Object.assign(rest, parseReasoningOptions(reasoning_effort, reasoning));
|
|
10
|
+
Object.assign(rest, parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cached_content, cache_control));
|
|
10
11
|
const { toolChoice, activeTools } = convertToToolChoiceOptions(tool_choice);
|
|
11
12
|
return {
|
|
12
13
|
messages: convertToModelMessages(messages),
|
|
@@ -44,6 +45,11 @@ export function convertToModelMessages(messages) {
|
|
|
44
45
|
if (message.role === "tool")
|
|
45
46
|
continue;
|
|
46
47
|
if (message.role === "system") {
|
|
48
|
+
if (message.cache_control) {
|
|
49
|
+
message.providerOptions = {
|
|
50
|
+
unknown: { cache_control: message.cache_control },
|
|
51
|
+
};
|
|
52
|
+
}
|
|
47
53
|
modelMessages.push(message);
|
|
48
54
|
continue;
|
|
49
55
|
}
|
|
@@ -67,15 +73,21 @@ function indexToolMessages(messages) {
|
|
|
67
73
|
return map;
|
|
68
74
|
}
|
|
69
75
|
export function fromChatCompletionsUserMessage(message) {
|
|
70
|
-
|
|
76
|
+
const out = {
|
|
71
77
|
role: "user",
|
|
72
78
|
content: Array.isArray(message.content)
|
|
73
79
|
? fromChatCompletionsContent(message.content)
|
|
74
80
|
: message.content,
|
|
75
81
|
};
|
|
82
|
+
if (message.cache_control) {
|
|
83
|
+
out.providerOptions = {
|
|
84
|
+
unknown: { cache_control: message.cache_control },
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
return out;
|
|
76
88
|
}
|
|
77
89
|
export function fromChatCompletionsAssistantMessage(message) {
|
|
78
|
-
const { tool_calls, role, content, extra_content, reasoning_details } = message;
|
|
90
|
+
const { tool_calls, role, content, extra_content, reasoning_details, cache_control } = message;
|
|
79
91
|
const parts = [];
|
|
80
92
|
if (reasoning_details?.length) {
|
|
81
93
|
for (const detail of reasoning_details) {
|
|
@@ -111,10 +123,16 @@ export function fromChatCompletionsAssistantMessage(message) {
|
|
|
111
123
|
: content;
|
|
112
124
|
for (const part of inputContent) {
|
|
113
125
|
if (part.type === "text") {
|
|
114
|
-
|
|
126
|
+
const textPart = {
|
|
115
127
|
type: "text",
|
|
116
128
|
text: part.text,
|
|
117
|
-
}
|
|
129
|
+
};
|
|
130
|
+
if (part.cache_control) {
|
|
131
|
+
textPart.providerOptions = {
|
|
132
|
+
unknown: { cache_control: part.cache_control },
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
parts.push(textPart);
|
|
118
136
|
}
|
|
119
137
|
}
|
|
120
138
|
}
|
|
@@ -141,6 +159,9 @@ export function fromChatCompletionsAssistantMessage(message) {
|
|
|
141
159
|
if (extra_content) {
|
|
142
160
|
out.providerOptions = extra_content;
|
|
143
161
|
}
|
|
162
|
+
if (cache_control) {
|
|
163
|
+
((out.providerOptions ??= { unknown: {} })["unknown"] ??= {})["cache_control"] = cache_control;
|
|
164
|
+
}
|
|
144
165
|
return out;
|
|
145
166
|
}
|
|
146
167
|
export function fromChatCompletionsToolResultMessage(message, toolById) {
|
|
@@ -165,40 +186,68 @@ export function fromChatCompletionsContent(content) {
|
|
|
165
186
|
return content.map((part) => {
|
|
166
187
|
switch (part.type) {
|
|
167
188
|
case "image_url":
|
|
168
|
-
return fromImageUrlPart(part.image_url.url);
|
|
189
|
+
return fromImageUrlPart(part.image_url.url, part.cache_control);
|
|
169
190
|
case "file":
|
|
170
|
-
return fromFilePart(part.file.data, part.file.media_type, part.file.filename);
|
|
191
|
+
return fromFilePart(part.file.data, part.file.media_type, part.file.filename, part.cache_control);
|
|
171
192
|
case "input_audio":
|
|
172
|
-
return fromFilePart(part.input_audio.data, `audio/${part.input_audio.format}
|
|
173
|
-
default:
|
|
174
|
-
|
|
193
|
+
return fromFilePart(part.input_audio.data, `audio/${part.input_audio.format}`, undefined, part.cache_control);
|
|
194
|
+
default: {
|
|
195
|
+
const out = {
|
|
196
|
+
type: "text",
|
|
197
|
+
text: part.text,
|
|
198
|
+
};
|
|
199
|
+
if (part.cache_control) {
|
|
200
|
+
out.providerOptions = {
|
|
201
|
+
unknown: { cache_control: part.cache_control },
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
return out;
|
|
205
|
+
}
|
|
175
206
|
}
|
|
176
207
|
});
|
|
177
208
|
}
|
|
178
|
-
function fromImageUrlPart(url) {
|
|
209
|
+
function fromImageUrlPart(url, cacheControl) {
|
|
179
210
|
if (url.startsWith("data:")) {
|
|
180
211
|
const { mimeType, base64Data } = parseDataUrl(url);
|
|
181
|
-
return fromFilePart(base64Data, mimeType);
|
|
212
|
+
return fromFilePart(base64Data, mimeType, undefined, cacheControl);
|
|
182
213
|
}
|
|
183
|
-
|
|
214
|
+
const out = {
|
|
184
215
|
type: "image",
|
|
185
216
|
image: new URL(url),
|
|
186
217
|
};
|
|
218
|
+
if (cacheControl) {
|
|
219
|
+
out.providerOptions = {
|
|
220
|
+
unknown: { cache_control: cacheControl },
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
return out;
|
|
187
224
|
}
|
|
188
|
-
function fromFilePart(base64Data, mediaType, filename) {
|
|
225
|
+
function fromFilePart(base64Data, mediaType, filename, cacheControl) {
|
|
189
226
|
if (mediaType.startsWith("image/")) {
|
|
190
|
-
|
|
227
|
+
const out = {
|
|
191
228
|
type: "image",
|
|
192
229
|
image: z.util.base64ToUint8Array(base64Data),
|
|
193
230
|
mediaType,
|
|
194
231
|
};
|
|
232
|
+
if (cacheControl) {
|
|
233
|
+
out.providerOptions = {
|
|
234
|
+
unknown: { cache_control: cacheControl },
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
return out;
|
|
195
238
|
}
|
|
196
|
-
|
|
239
|
+
const out = {
|
|
197
240
|
type: "file",
|
|
198
241
|
data: z.util.base64ToUint8Array(base64Data),
|
|
199
242
|
filename,
|
|
200
243
|
mediaType,
|
|
201
244
|
};
|
|
245
|
+
if (cacheControl) {
|
|
246
|
+
out.providerOptions = {
|
|
247
|
+
unknown: { cache_control: cacheControl },
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
return out;
|
|
202
251
|
}
|
|
203
252
|
export const convertToToolSet = (tools) => {
|
|
204
253
|
if (!tools) {
|
|
@@ -295,6 +344,31 @@ function parseReasoningOptions(reasoning_effort, reasoning) {
|
|
|
295
344
|
}
|
|
296
345
|
return out;
|
|
297
346
|
}
|
|
347
|
+
function parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cached_content, cache_control) {
|
|
348
|
+
const out = {};
|
|
349
|
+
const syncedCacheKey = prompt_cache_key ?? cached_content;
|
|
350
|
+
const syncedCachedContent = cached_content ?? prompt_cache_key;
|
|
351
|
+
let syncedCacheRetention = prompt_cache_retention;
|
|
352
|
+
if (!syncedCacheRetention && cache_control?.ttl) {
|
|
353
|
+
syncedCacheRetention = cache_control.ttl === "24h" ? "24h" : "in_memory";
|
|
354
|
+
}
|
|
355
|
+
let syncedCacheControl = cache_control;
|
|
356
|
+
if (!syncedCacheControl && syncedCacheRetention) {
|
|
357
|
+
syncedCacheControl = {
|
|
358
|
+
type: "ephemeral",
|
|
359
|
+
ttl: syncedCacheRetention === "24h" ? "24h" : "5m",
|
|
360
|
+
};
|
|
361
|
+
}
|
|
362
|
+
if (syncedCacheKey)
|
|
363
|
+
out["prompt_cache_key"] = syncedCacheKey;
|
|
364
|
+
if (syncedCacheRetention)
|
|
365
|
+
out["prompt_cache_retention"] = syncedCacheRetention;
|
|
366
|
+
if (syncedCachedContent)
|
|
367
|
+
out["cached_content"] = syncedCachedContent;
|
|
368
|
+
if (syncedCacheControl)
|
|
369
|
+
out["cache_control"] = syncedCacheControl;
|
|
370
|
+
return out;
|
|
371
|
+
}
|
|
298
372
|
// --- Response Flow ---
|
|
299
373
|
export function toChatCompletions(result, model) {
|
|
300
374
|
return {
|
|
@@ -494,8 +568,16 @@ export function toChatCompletionsUsage(usage) {
|
|
|
494
568
|
if (reasoning !== undefined)
|
|
495
569
|
out.completion_tokens_details = { reasoning_tokens: reasoning };
|
|
496
570
|
const cached = usage.inputTokenDetails?.cacheReadTokens;
|
|
497
|
-
|
|
498
|
-
|
|
571
|
+
const cacheWrite = usage.inputTokenDetails?.cacheWriteTokens;
|
|
572
|
+
if (cached !== undefined || cacheWrite !== undefined) {
|
|
573
|
+
out.prompt_tokens_details = {};
|
|
574
|
+
if (cached !== undefined) {
|
|
575
|
+
out.prompt_tokens_details.cached_tokens = cached;
|
|
576
|
+
}
|
|
577
|
+
if (cacheWrite !== undefined) {
|
|
578
|
+
out.prompt_tokens_details.cache_write_tokens = cacheWrite;
|
|
579
|
+
}
|
|
580
|
+
}
|
|
499
581
|
return out;
|
|
500
582
|
}
|
|
501
583
|
export function toChatCompletionsToolCall(id, name, args, providerMetadata) {
|
|
@@ -28,6 +28,7 @@ export const chatCompletions = (config) => {
|
|
|
28
28
|
catch {
|
|
29
29
|
throw new GatewayError("Invalid JSON", 400);
|
|
30
30
|
}
|
|
31
|
+
logger.trace({ requestId: ctx.requestId, body: ctx.body }, "[chat] ChatCompletionsBody");
|
|
31
32
|
addSpanEvent("hebo.request.deserialized");
|
|
32
33
|
const parsed = ChatCompletionsBodySchema.safeParse(ctx.body);
|
|
33
34
|
if (!parsed.success) {
|
|
@@ -37,7 +38,8 @@ export const chatCompletions = (config) => {
|
|
|
37
38
|
ctx.body = parsed.data;
|
|
38
39
|
addSpanEvent("hebo.request.parsed");
|
|
39
40
|
if (hooks?.before) {
|
|
40
|
-
ctx.body =
|
|
41
|
+
ctx.body =
|
|
42
|
+
(await hooks.before(ctx)) ?? ctx.body;
|
|
41
43
|
addSpanEvent("hebo.hooks.before.completed");
|
|
42
44
|
}
|
|
43
45
|
// Resolve model + provider (hooks may override defaults).
|
|
@@ -70,7 +72,7 @@ export const chatCompletions = (config) => {
|
|
|
70
72
|
options: textOptions,
|
|
71
73
|
}, "[chat] AI SDK options");
|
|
72
74
|
addSpanEvent("hebo.options.prepared");
|
|
73
|
-
setSpanAttributes(getChatRequestAttributes(
|
|
75
|
+
setSpanAttributes(getChatRequestAttributes(ctx.body, genAiSignalLevel));
|
|
74
76
|
// Build middleware chain (model -> forward params -> provider).
|
|
75
77
|
const languageModelWithMiddleware = wrapLanguageModel({
|
|
76
78
|
model: languageModel,
|
|
@@ -93,6 +95,7 @@ export const chatCompletions = (config) => {
|
|
|
93
95
|
onFinish: (res) => {
|
|
94
96
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
95
97
|
const streamResult = toChatCompletions(res, ctx.resolvedModelId);
|
|
98
|
+
logger.trace({ requestId: ctx.requestId, result: streamResult }, "[chat] ChatCompletions");
|
|
96
99
|
addSpanEvent("hebo.result.transformed");
|
|
97
100
|
const genAiResponseAttrs = getChatResponseAttributes(streamResult, genAiSignalLevel);
|
|
98
101
|
setSpanAttributes(genAiResponseAttrs);
|
|
@@ -129,6 +132,7 @@ export const chatCompletions = (config) => {
|
|
|
129
132
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
130
133
|
// Transform result.
|
|
131
134
|
ctx.result = toChatCompletions(result, ctx.resolvedModelId);
|
|
135
|
+
logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[chat] ChatCompletions");
|
|
132
136
|
addSpanEvent("hebo.result.transformed");
|
|
133
137
|
const genAiResponseAttrs = getChatResponseAttributes(ctx.result, genAiSignalLevel);
|
|
134
138
|
setSpanAttributes(genAiResponseAttrs);
|
|
@@ -77,6 +77,7 @@ export const getChatRequestAttributes = (inputs, signalLevel) => {
|
|
|
77
77
|
}
|
|
78
78
|
if (signalLevel !== "required") {
|
|
79
79
|
Object.assign(attrs, {
|
|
80
|
+
// FUTURE: add reasoning info
|
|
80
81
|
"gen_ai.request.stream": inputs.stream,
|
|
81
82
|
"gen_ai.request.frequency_penalty": inputs.frequency_penalty,
|
|
82
83
|
"gen_ai.request.max_tokens": inputs.max_completion_tokens,
|