@hebo-ai/gateway 0.5.1 → 0.6.0-rc0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -1
- package/package.json +17 -12
- package/src/endpoints/chat-completions/converters.test.ts +85 -1
- package/src/endpoints/chat-completions/converters.ts +139 -18
- package/src/endpoints/chat-completions/handler.test.ts +2 -0
- package/src/endpoints/chat-completions/handler.ts +10 -3
- package/src/endpoints/chat-completions/index.ts +1 -0
- package/src/endpoints/chat-completions/otel.ts +1 -0
- package/src/endpoints/chat-completions/schema.ts +38 -4
- package/src/endpoints/embeddings/handler.ts +5 -3
- package/src/endpoints/embeddings/index.ts +1 -0
- package/src/lifecycle.ts +2 -2
- package/src/middleware/debug.ts +37 -0
- package/src/middleware/matcher.ts +4 -0
- package/src/models/anthropic/middleware.test.ts +45 -1
- package/src/models/anthropic/middleware.ts +21 -1
- package/src/models/google/middleware.test.ts +30 -1
- package/src/models/google/middleware.ts +20 -1
- package/src/models/openai/middleware.test.ts +32 -1
- package/src/models/openai/middleware.ts +25 -1
- package/src/providers/bedrock/middleware.test.ts +121 -1
- package/src/providers/bedrock/middleware.ts +61 -1
- package/src/telemetry/fetch.ts +31 -4
- package/src/telemetry/index.ts +1 -0
- package/dist/config.d.ts +0 -2
- package/dist/config.js +0 -81
- package/dist/endpoints/chat-completions/converters.d.ts +0 -43
- package/dist/endpoints/chat-completions/converters.js +0 -551
- package/dist/endpoints/chat-completions/handler.d.ts +0 -2
- package/dist/endpoints/chat-completions/handler.js +0 -145
- package/dist/endpoints/chat-completions/index.d.ts +0 -3
- package/dist/endpoints/chat-completions/index.js +0 -3
- package/dist/endpoints/chat-completions/otel.d.ts +0 -6
- package/dist/endpoints/chat-completions/otel.js +0 -134
- package/dist/endpoints/chat-completions/schema.d.ts +0 -946
- package/dist/endpoints/chat-completions/schema.js +0 -257
- package/dist/endpoints/embeddings/converters.d.ts +0 -10
- package/dist/endpoints/embeddings/converters.js +0 -31
- package/dist/endpoints/embeddings/handler.d.ts +0 -2
- package/dist/endpoints/embeddings/handler.js +0 -101
- package/dist/endpoints/embeddings/index.d.ts +0 -3
- package/dist/endpoints/embeddings/index.js +0 -3
- package/dist/endpoints/embeddings/otel.d.ts +0 -6
- package/dist/endpoints/embeddings/otel.js +0 -35
- package/dist/endpoints/embeddings/schema.d.ts +0 -38
- package/dist/endpoints/embeddings/schema.js +0 -26
- package/dist/endpoints/models/converters.d.ts +0 -6
- package/dist/endpoints/models/converters.js +0 -42
- package/dist/endpoints/models/handler.d.ts +0 -2
- package/dist/endpoints/models/handler.js +0 -29
- package/dist/endpoints/models/index.d.ts +0 -3
- package/dist/endpoints/models/index.js +0 -3
- package/dist/endpoints/models/schema.d.ts +0 -42
- package/dist/endpoints/models/schema.js +0 -31
- package/dist/errors/ai-sdk.d.ts +0 -2
- package/dist/errors/ai-sdk.js +0 -52
- package/dist/errors/gateway.d.ts +0 -5
- package/dist/errors/gateway.js +0 -13
- package/dist/errors/openai.d.ts +0 -20
- package/dist/errors/openai.js +0 -40
- package/dist/errors/utils.d.ts +0 -22
- package/dist/errors/utils.js +0 -44
- package/dist/gateway.d.ts +0 -9
- package/dist/gateway.js +0 -34
- package/dist/index.d.ts +0 -14
- package/dist/index.js +0 -13
- package/dist/lifecycle.d.ts +0 -2
- package/dist/lifecycle.js +0 -94
- package/dist/logger/default.d.ts +0 -4
- package/dist/logger/default.js +0 -81
- package/dist/logger/index.d.ts +0 -14
- package/dist/logger/index.js +0 -25
- package/dist/middleware/common.d.ts +0 -12
- package/dist/middleware/common.js +0 -145
- package/dist/middleware/matcher.d.ts +0 -27
- package/dist/middleware/matcher.js +0 -112
- package/dist/middleware/utils.d.ts +0 -2
- package/dist/middleware/utils.js +0 -27
- package/dist/models/amazon/index.d.ts +0 -2
- package/dist/models/amazon/index.js +0 -2
- package/dist/models/amazon/middleware.d.ts +0 -3
- package/dist/models/amazon/middleware.js +0 -65
- package/dist/models/amazon/presets.d.ts +0 -2390
- package/dist/models/amazon/presets.js +0 -80
- package/dist/models/anthropic/index.d.ts +0 -2
- package/dist/models/anthropic/index.js +0 -2
- package/dist/models/anthropic/middleware.d.ts +0 -4
- package/dist/models/anthropic/middleware.js +0 -111
- package/dist/models/anthropic/presets.d.ts +0 -4802
- package/dist/models/anthropic/presets.js +0 -135
- package/dist/models/catalog.d.ts +0 -4
- package/dist/models/catalog.js +0 -4
- package/dist/models/cohere/index.d.ts +0 -2
- package/dist/models/cohere/index.js +0 -2
- package/dist/models/cohere/middleware.d.ts +0 -3
- package/dist/models/cohere/middleware.js +0 -60
- package/dist/models/cohere/presets.d.ts +0 -2918
- package/dist/models/cohere/presets.js +0 -134
- package/dist/models/google/index.d.ts +0 -2
- package/dist/models/google/index.js +0 -2
- package/dist/models/google/middleware.d.ts +0 -7
- package/dist/models/google/middleware.js +0 -103
- package/dist/models/google/presets.d.ts +0 -2553
- package/dist/models/google/presets.js +0 -83
- package/dist/models/meta/index.d.ts +0 -1
- package/dist/models/meta/index.js +0 -1
- package/dist/models/meta/presets.d.ts +0 -3254
- package/dist/models/meta/presets.js +0 -95
- package/dist/models/openai/index.d.ts +0 -2
- package/dist/models/openai/index.js +0 -2
- package/dist/models/openai/middleware.d.ts +0 -3
- package/dist/models/openai/middleware.js +0 -62
- package/dist/models/openai/presets.d.ts +0 -6634
- package/dist/models/openai/presets.js +0 -213
- package/dist/models/types.d.ts +0 -20
- package/dist/models/types.js +0 -84
- package/dist/models/voyage/index.d.ts +0 -2
- package/dist/models/voyage/index.js +0 -2
- package/dist/models/voyage/middleware.d.ts +0 -2
- package/dist/models/voyage/middleware.js +0 -18
- package/dist/models/voyage/presets.d.ts +0 -3471
- package/dist/models/voyage/presets.js +0 -85
- package/dist/providers/anthropic/canonical.d.ts +0 -3
- package/dist/providers/anthropic/canonical.js +0 -9
- package/dist/providers/anthropic/index.d.ts +0 -1
- package/dist/providers/anthropic/index.js +0 -1
- package/dist/providers/bedrock/canonical.d.ts +0 -17
- package/dist/providers/bedrock/canonical.js +0 -61
- package/dist/providers/bedrock/index.d.ts +0 -2
- package/dist/providers/bedrock/index.js +0 -2
- package/dist/providers/bedrock/middleware.d.ts +0 -3
- package/dist/providers/bedrock/middleware.js +0 -55
- package/dist/providers/cohere/canonical.d.ts +0 -3
- package/dist/providers/cohere/canonical.js +0 -17
- package/dist/providers/cohere/index.d.ts +0 -1
- package/dist/providers/cohere/index.js +0 -1
- package/dist/providers/groq/canonical.d.ts +0 -3
- package/dist/providers/groq/canonical.js +0 -12
- package/dist/providers/groq/index.d.ts +0 -1
- package/dist/providers/groq/index.js +0 -1
- package/dist/providers/openai/canonical.d.ts +0 -3
- package/dist/providers/openai/canonical.js +0 -8
- package/dist/providers/openai/index.d.ts +0 -1
- package/dist/providers/openai/index.js +0 -1
- package/dist/providers/registry.d.ts +0 -24
- package/dist/providers/registry.js +0 -100
- package/dist/providers/types.d.ts +0 -7
- package/dist/providers/types.js +0 -11
- package/dist/providers/vertex/canonical.d.ts +0 -3
- package/dist/providers/vertex/canonical.js +0 -8
- package/dist/providers/vertex/index.d.ts +0 -1
- package/dist/providers/vertex/index.js +0 -1
- package/dist/providers/voyage/canonical.d.ts +0 -3
- package/dist/providers/voyage/canonical.js +0 -7
- package/dist/providers/voyage/index.d.ts +0 -1
- package/dist/providers/voyage/index.js +0 -1
- package/dist/telemetry/ai-sdk.d.ts +0 -2
- package/dist/telemetry/ai-sdk.js +0 -31
- package/dist/telemetry/baggage.d.ts +0 -1
- package/dist/telemetry/baggage.js +0 -24
- package/dist/telemetry/fetch.d.ts +0 -2
- package/dist/telemetry/fetch.js +0 -24
- package/dist/telemetry/gen-ai.d.ts +0 -5
- package/dist/telemetry/gen-ai.js +0 -60
- package/dist/telemetry/http.d.ts +0 -3
- package/dist/telemetry/http.js +0 -54
- package/dist/telemetry/memory.d.ts +0 -2
- package/dist/telemetry/memory.js +0 -27
- package/dist/telemetry/span.d.ts +0 -13
- package/dist/telemetry/span.js +0 -60
- package/dist/telemetry/stream.d.ts +0 -3
- package/dist/telemetry/stream.js +0 -51
- package/dist/types.d.ts +0 -176
- package/dist/types.js +0 -1
- package/dist/utils/env.d.ts +0 -2
- package/dist/utils/env.js +0 -5
- package/dist/utils/headers.d.ts +0 -4
- package/dist/utils/headers.js +0 -22
- package/dist/utils/preset.d.ts +0 -9
- package/dist/utils/preset.js +0 -41
- package/dist/utils/request.d.ts +0 -2
- package/dist/utils/request.js +0 -14
- package/dist/utils/response.d.ts +0 -3
- package/dist/utils/response.js +0 -68
package/README.md
CHANGED
|
@@ -38,7 +38,7 @@ bun install @hebo-ai/gateway
|
|
|
38
38
|
- Runtime Support
|
|
39
39
|
- [Vercel Edge](#vercel-edge) | [Cloudflare Workers](#cloudflare-workers) | [Deno Deploy](#deno-deploy) | [AWS Lambda](#aws-lambda)
|
|
40
40
|
- OpenAI Extensions
|
|
41
|
-
- [Reasoning](#reasoning)
|
|
41
|
+
- [Reasoning](#reasoning) | [Prompt Caching](#prompt-caching)
|
|
42
42
|
- Advanced Usage
|
|
43
43
|
- [Passing Framework State to Hooks](#passing-framework-state-to-hooks) | [Selective Route Mounting](#selective-route-mounting) | [Low-level Schemas & Converters](#low-level-schemas--converters)
|
|
44
44
|
|
|
@@ -565,6 +565,37 @@ Advanced models (like Anthropic Claude 3.7 or Gemini 3) surface structured reaso
|
|
|
565
565
|
|
|
566
566
|
For **Gemini 3** models, returning the thought signature via `extra_content` is mandatory to resume the chain-of-thought; failing to do so may result in errors or degraded performance.
|
|
567
567
|
|
|
568
|
+
### Prompt Caching
|
|
569
|
+
|
|
570
|
+
The chat completions endpoint supports both implicit (provider-managed) and explicit prompt caching across OpenAI-compatible providers.
|
|
571
|
+
|
|
572
|
+
Accepted request fields:
|
|
573
|
+
|
|
574
|
+
- `prompt_cache_key` + `prompt_cache_retention` (OpenAI style)
|
|
575
|
+
- `cache_control` (OpenRouter / Vercel / Claude style)
|
|
576
|
+
- `cached_content` (Gemini style)
|
|
577
|
+
|
|
578
|
+
```json
|
|
579
|
+
{
|
|
580
|
+
"model": "anthropic/claude-sonnet-4.6",
|
|
581
|
+
"messages": [
|
|
582
|
+
{
|
|
583
|
+
"role": "system",
|
|
584
|
+
"content": "Reusable policy and instructions",
|
|
585
|
+
"cache_control": { "type": "ephemeral", "ttl": "1h" }
|
|
586
|
+
},
|
|
587
|
+
{ "role": "user", "content": "Apply policy to this request." }
|
|
588
|
+
]
|
|
589
|
+
}
|
|
590
|
+
```
|
|
591
|
+
|
|
592
|
+
Provider behavior:
|
|
593
|
+
|
|
594
|
+
- **OpenAI-compatible**: forwards `prompt_cache_key` and `prompt_cache_retention` as native provider options.
|
|
595
|
+
- **Anthropic Claude**: maps top-level caching to Anthropic cache control, while message/part `cache_control` breakpoints are preserved.
|
|
596
|
+
- **Google Gemini**: maps `cached_content` to Gemini `cachedContent`.
|
|
597
|
+
- **Amazon Nova (Bedrock)**: maps `cache_control` to Bedrock `cachePoints` and inserts an automatic cache point on a stable prefix when none is provided.
|
|
598
|
+
|
|
568
599
|
## 🧪 Advanced Usage
|
|
569
600
|
|
|
570
601
|
### Logger Settings
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hebo-ai/gateway",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0-rc0",
|
|
4
4
|
"description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI-compatible /chat/completions, /embeddings & /models.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|
|
@@ -140,6 +140,11 @@
|
|
|
140
140
|
"types": "./dist/providers/voyage/index.d.ts",
|
|
141
141
|
"import": "./dist/providers/voyage/index.js",
|
|
142
142
|
"dev-source": "./src/providers/voyage/index.ts"
|
|
143
|
+
},
|
|
144
|
+
"./telemetry": {
|
|
145
|
+
"types": "./dist/telemetry/index.d.ts",
|
|
146
|
+
"import": "./dist/telemetry/index.js",
|
|
147
|
+
"dev-source": "./src/telemetry/index.ts"
|
|
143
148
|
}
|
|
144
149
|
},
|
|
145
150
|
"scripts": {
|
|
@@ -155,33 +160,33 @@
|
|
|
155
160
|
},
|
|
156
161
|
"dependencies": {
|
|
157
162
|
"@ai-sdk/provider": "^3.0.8",
|
|
158
|
-
"ai": "^6.0.
|
|
163
|
+
"ai": "^6.0.101",
|
|
159
164
|
"zod": "^4.3.6"
|
|
160
165
|
},
|
|
161
166
|
"devDependencies": {
|
|
162
|
-
"@ai-sdk/amazon-bedrock": "^4.0.
|
|
163
|
-
"@ai-sdk/anthropic": "^3.0.
|
|
167
|
+
"@ai-sdk/amazon-bedrock": "^4.0.65",
|
|
168
|
+
"@ai-sdk/anthropic": "^3.0.47",
|
|
164
169
|
"@ai-sdk/cohere": "^3.0.21",
|
|
165
|
-
"@ai-sdk/google-vertex": "^4.0.
|
|
170
|
+
"@ai-sdk/google-vertex": "^4.0.63",
|
|
166
171
|
"@ai-sdk/groq": "^3.0.24",
|
|
167
|
-
"@ai-sdk/openai": "^3.0.
|
|
168
|
-
"@aws-sdk/credential-providers": "^3.
|
|
172
|
+
"@ai-sdk/openai": "^3.0.34",
|
|
173
|
+
"@aws-sdk/credential-providers": "^3.998.0",
|
|
169
174
|
"@langfuse/otel": "^4.6.1",
|
|
170
175
|
"@mjackson/node-fetch-server": "^0.7.0",
|
|
171
176
|
"@opentelemetry/api": "^1.9.0",
|
|
172
177
|
"@opentelemetry/context-async-hooks": "^2.5.1",
|
|
173
178
|
"@opentelemetry/sdk-trace-base": "^2.5.1",
|
|
174
|
-
"@tanstack/react-router": "^1.
|
|
175
|
-
"@tanstack/react-start": "^1.
|
|
179
|
+
"@tanstack/react-router": "^1.163.2",
|
|
180
|
+
"@tanstack/react-start": "^1.163.2",
|
|
176
181
|
"@types/bun": "latest",
|
|
177
182
|
"@types/react": "^19.2.14",
|
|
178
183
|
"@types/react-dom": "^19.2.3",
|
|
179
|
-
"elysia": "^1.4.
|
|
180
|
-
"hono": "^4.12.
|
|
184
|
+
"elysia": "^1.4.26",
|
|
185
|
+
"hono": "^4.12.2",
|
|
181
186
|
"lefthook": "^2.1.1",
|
|
182
187
|
"next": "^16.1.6",
|
|
183
188
|
"oxfmt": "^0.24.0",
|
|
184
|
-
"oxlint": "^1.
|
|
189
|
+
"oxlint": "^1.50.0",
|
|
185
190
|
"pino": "^10.3.1",
|
|
186
191
|
"typescript": "^5.9.3",
|
|
187
192
|
"vite": "^7.3.1",
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { GenerateTextResult, ToolSet, Output } from "ai";
|
|
1
|
+
import type { GenerateTextResult, ToolSet, Output, LanguageModelUsage } from "ai";
|
|
2
2
|
|
|
3
3
|
import { describe, expect, test } from "bun:test";
|
|
4
4
|
|
|
@@ -8,6 +8,7 @@ import {
|
|
|
8
8
|
convertToTextCallOptions,
|
|
9
9
|
toChatCompletionsAssistantMessage,
|
|
10
10
|
toChatCompletionsToolCall,
|
|
11
|
+
toChatCompletionsUsage,
|
|
11
12
|
fromChatCompletionsAssistantMessage,
|
|
12
13
|
fromChatCompletionsToolResultMessage,
|
|
13
14
|
} from "./converters";
|
|
@@ -520,6 +521,89 @@ describe("Chat Completions Converters", () => {
|
|
|
520
521
|
expect(result.tools).toBeDefined();
|
|
521
522
|
expect(Object.keys(result.tools!)).toEqual(["get_weather"]);
|
|
522
523
|
});
|
|
524
|
+
|
|
525
|
+
test("should map prompt cache options into providerOptions.unknown", () => {
|
|
526
|
+
const result = convertToTextCallOptions({
|
|
527
|
+
messages: [{ role: "system", content: "You are concise." }],
|
|
528
|
+
prompt_cache_key: "tenant:docs:v1",
|
|
529
|
+
prompt_cache_retention: "24h",
|
|
530
|
+
});
|
|
531
|
+
|
|
532
|
+
expect(result.providerOptions).toEqual({
|
|
533
|
+
unknown: {
|
|
534
|
+
prompt_cache_key: "tenant:docs:v1",
|
|
535
|
+
prompt_cache_retention: "24h",
|
|
536
|
+
cached_content: "tenant:docs:v1",
|
|
537
|
+
cache_control: {
|
|
538
|
+
type: "ephemeral",
|
|
539
|
+
ttl: "24h",
|
|
540
|
+
},
|
|
541
|
+
},
|
|
542
|
+
});
|
|
543
|
+
});
|
|
544
|
+
|
|
545
|
+
test("should sync retention from cache_control ttl", () => {
|
|
546
|
+
const result = convertToTextCallOptions({
|
|
547
|
+
messages: [{ role: "system", content: "You are concise." }],
|
|
548
|
+
cache_control: {
|
|
549
|
+
type: "ephemeral",
|
|
550
|
+
ttl: "5m",
|
|
551
|
+
},
|
|
552
|
+
});
|
|
553
|
+
|
|
554
|
+
expect(result.providerOptions).toEqual({
|
|
555
|
+
unknown: {
|
|
556
|
+
prompt_cache_retention: "in_memory",
|
|
557
|
+
cache_control: {
|
|
558
|
+
type: "ephemeral",
|
|
559
|
+
ttl: "5m",
|
|
560
|
+
},
|
|
561
|
+
},
|
|
562
|
+
});
|
|
563
|
+
});
|
|
564
|
+
|
|
565
|
+
test("should preserve cache_control on message and content parts", () => {
|
|
566
|
+
const result = convertToTextCallOptions({
|
|
567
|
+
messages: [
|
|
568
|
+
{
|
|
569
|
+
role: "system",
|
|
570
|
+
content: "Policy block",
|
|
571
|
+
cache_control: { type: "ephemeral", ttl: "1h" },
|
|
572
|
+
},
|
|
573
|
+
{
|
|
574
|
+
role: "user",
|
|
575
|
+
content: [{ type: "text", text: "Question", cache_control: { type: "ephemeral" } }],
|
|
576
|
+
},
|
|
577
|
+
],
|
|
578
|
+
});
|
|
579
|
+
|
|
580
|
+
expect((result.messages[0] as any).providerOptions.unknown.cache_control).toEqual({
|
|
581
|
+
type: "ephemeral",
|
|
582
|
+
ttl: "1h",
|
|
583
|
+
});
|
|
584
|
+
expect((result.messages[1] as any).content[0].providerOptions.unknown.cache_control).toEqual({
|
|
585
|
+
type: "ephemeral",
|
|
586
|
+
});
|
|
587
|
+
});
|
|
588
|
+
});
|
|
589
|
+
|
|
590
|
+
describe("toChatCompletionsUsage", () => {
|
|
591
|
+
test("should include cached token details", () => {
|
|
592
|
+
const usage = toChatCompletionsUsage({
|
|
593
|
+
inputTokens: 100,
|
|
594
|
+
outputTokens: 20,
|
|
595
|
+
totalTokens: 120,
|
|
596
|
+
inputTokenDetails: {
|
|
597
|
+
cacheReadTokens: 60,
|
|
598
|
+
cacheWriteTokens: 10,
|
|
599
|
+
},
|
|
600
|
+
} as LanguageModelUsage);
|
|
601
|
+
|
|
602
|
+
expect(usage.prompt_tokens_details).toEqual({
|
|
603
|
+
cached_tokens: 60,
|
|
604
|
+
cache_write_tokens: 10,
|
|
605
|
+
});
|
|
606
|
+
});
|
|
523
607
|
});
|
|
524
608
|
|
|
525
609
|
describe("toChatCompletionsToolCall", () => {
|
|
@@ -17,6 +17,9 @@ import type {
|
|
|
17
17
|
AssistantModelMessage,
|
|
18
18
|
ToolModelMessage,
|
|
19
19
|
UserModelMessage,
|
|
20
|
+
TextPart,
|
|
21
|
+
ImagePart,
|
|
22
|
+
FilePart,
|
|
20
23
|
} from "ai";
|
|
21
24
|
|
|
22
25
|
import { Output, jsonSchema, tool } from "ai";
|
|
@@ -28,7 +31,6 @@ import type {
|
|
|
28
31
|
ChatCompletionsToolChoice,
|
|
29
32
|
ChatCompletionsContentPart,
|
|
30
33
|
ChatCompletionsMessage,
|
|
31
|
-
ChatCompletionsSystemMessage,
|
|
32
34
|
ChatCompletionsUserMessage,
|
|
33
35
|
ChatCompletionsAssistantMessage,
|
|
34
36
|
ChatCompletionsToolMessage,
|
|
@@ -46,6 +48,7 @@ import type {
|
|
|
46
48
|
ChatCompletionsReasoningDetail,
|
|
47
49
|
ChatCompletionsResponseFormat,
|
|
48
50
|
ChatCompletionsContentPartText,
|
|
51
|
+
ChatCompletionsCacheControl,
|
|
49
52
|
} from "./schema";
|
|
50
53
|
|
|
51
54
|
import { GatewayError } from "../../errors/gateway";
|
|
@@ -81,6 +84,10 @@ export function convertToTextCallOptions(params: ChatCompletionsInputs): TextCal
|
|
|
81
84
|
response_format,
|
|
82
85
|
reasoning_effort,
|
|
83
86
|
reasoning,
|
|
87
|
+
prompt_cache_key,
|
|
88
|
+
prompt_cache_retention,
|
|
89
|
+
cached_content,
|
|
90
|
+
cache_control,
|
|
84
91
|
frequency_penalty,
|
|
85
92
|
presence_penalty,
|
|
86
93
|
seed,
|
|
@@ -90,6 +97,15 @@ export function convertToTextCallOptions(params: ChatCompletionsInputs): TextCal
|
|
|
90
97
|
} = params;
|
|
91
98
|
|
|
92
99
|
Object.assign(rest, parseReasoningOptions(reasoning_effort, reasoning));
|
|
100
|
+
Object.assign(
|
|
101
|
+
rest,
|
|
102
|
+
parsePromptCachingOptions(
|
|
103
|
+
prompt_cache_key,
|
|
104
|
+
prompt_cache_retention,
|
|
105
|
+
cached_content,
|
|
106
|
+
cache_control,
|
|
107
|
+
),
|
|
108
|
+
);
|
|
93
109
|
|
|
94
110
|
const { toolChoice, activeTools } = convertToToolChoiceOptions(tool_choice);
|
|
95
111
|
|
|
@@ -133,7 +149,12 @@ export function convertToModelMessages(messages: ChatCompletionsMessage[]): Mode
|
|
|
133
149
|
if (message.role === "tool") continue;
|
|
134
150
|
|
|
135
151
|
if (message.role === "system") {
|
|
136
|
-
|
|
152
|
+
if (message.cache_control) {
|
|
153
|
+
(message as ModelMessage).providerOptions = {
|
|
154
|
+
unknown: { cache_control: message.cache_control },
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
modelMessages.push(message);
|
|
137
158
|
continue;
|
|
138
159
|
}
|
|
139
160
|
|
|
@@ -161,18 +182,24 @@ function indexToolMessages(messages: ChatCompletionsMessage[]) {
|
|
|
161
182
|
export function fromChatCompletionsUserMessage(
|
|
162
183
|
message: ChatCompletionsUserMessage,
|
|
163
184
|
): UserModelMessage {
|
|
164
|
-
|
|
185
|
+
const out: UserModelMessage = {
|
|
165
186
|
role: "user",
|
|
166
187
|
content: Array.isArray(message.content)
|
|
167
188
|
? fromChatCompletionsContent(message.content)
|
|
168
189
|
: message.content,
|
|
169
190
|
};
|
|
191
|
+
if (message.cache_control) {
|
|
192
|
+
out.providerOptions = {
|
|
193
|
+
unknown: { cache_control: message.cache_control },
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
return out;
|
|
170
197
|
}
|
|
171
198
|
|
|
172
199
|
export function fromChatCompletionsAssistantMessage(
|
|
173
200
|
message: ChatCompletionsAssistantMessage,
|
|
174
201
|
): AssistantModelMessage {
|
|
175
|
-
const { tool_calls, role, content, extra_content, reasoning_details } = message;
|
|
202
|
+
const { tool_calls, role, content, extra_content, reasoning_details, cache_control } = message;
|
|
176
203
|
|
|
177
204
|
const parts: AssistantContent = [];
|
|
178
205
|
|
|
@@ -211,10 +238,16 @@ export function fromChatCompletionsAssistantMessage(
|
|
|
211
238
|
: content;
|
|
212
239
|
for (const part of inputContent) {
|
|
213
240
|
if (part.type === "text") {
|
|
214
|
-
|
|
241
|
+
const textPart: TextPart = {
|
|
215
242
|
type: "text",
|
|
216
243
|
text: part.text,
|
|
217
|
-
}
|
|
244
|
+
};
|
|
245
|
+
if (part.cache_control) {
|
|
246
|
+
textPart.providerOptions = {
|
|
247
|
+
unknown: { cache_control: part.cache_control },
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
parts.push(textPart);
|
|
218
251
|
}
|
|
219
252
|
}
|
|
220
253
|
}
|
|
@@ -245,6 +278,10 @@ export function fromChatCompletionsAssistantMessage(
|
|
|
245
278
|
out.providerOptions = extra_content as SharedV3ProviderOptions;
|
|
246
279
|
}
|
|
247
280
|
|
|
281
|
+
if (cache_control) {
|
|
282
|
+
((out.providerOptions ??= { unknown: {} })["unknown"] ??= {})["cache_control"] = cache_control;
|
|
283
|
+
}
|
|
284
|
+
|
|
248
285
|
return out;
|
|
249
286
|
}
|
|
250
287
|
|
|
@@ -275,44 +312,87 @@ export function fromChatCompletionsContent(content: ChatCompletionsContentPart[]
|
|
|
275
312
|
return content.map((part) => {
|
|
276
313
|
switch (part.type) {
|
|
277
314
|
case "image_url":
|
|
278
|
-
return fromImageUrlPart(part.image_url.url);
|
|
315
|
+
return fromImageUrlPart(part.image_url.url, part.cache_control);
|
|
279
316
|
case "file":
|
|
280
|
-
return fromFilePart(
|
|
317
|
+
return fromFilePart(
|
|
318
|
+
part.file.data,
|
|
319
|
+
part.file.media_type,
|
|
320
|
+
part.file.filename,
|
|
321
|
+
part.cache_control,
|
|
322
|
+
);
|
|
281
323
|
case "input_audio":
|
|
282
|
-
return fromFilePart(
|
|
283
|
-
|
|
284
|
-
|
|
324
|
+
return fromFilePart(
|
|
325
|
+
part.input_audio.data,
|
|
326
|
+
`audio/${part.input_audio.format}`,
|
|
327
|
+
undefined,
|
|
328
|
+
part.cache_control,
|
|
329
|
+
);
|
|
330
|
+
default: {
|
|
331
|
+
const out: TextPart = {
|
|
332
|
+
type: "text" as const,
|
|
333
|
+
text: part.text,
|
|
334
|
+
};
|
|
335
|
+
if (part.cache_control) {
|
|
336
|
+
out.providerOptions = {
|
|
337
|
+
unknown: { cache_control: part.cache_control },
|
|
338
|
+
};
|
|
339
|
+
}
|
|
340
|
+
return out;
|
|
341
|
+
}
|
|
285
342
|
}
|
|
286
343
|
});
|
|
287
344
|
}
|
|
288
345
|
|
|
289
|
-
function fromImageUrlPart(url: string) {
|
|
346
|
+
function fromImageUrlPart(url: string, cacheControl?: ChatCompletionsCacheControl) {
|
|
290
347
|
if (url.startsWith("data:")) {
|
|
291
348
|
const { mimeType, base64Data } = parseDataUrl(url);
|
|
292
|
-
return fromFilePart(base64Data, mimeType);
|
|
349
|
+
return fromFilePart(base64Data, mimeType, undefined, cacheControl);
|
|
293
350
|
}
|
|
294
351
|
|
|
295
|
-
|
|
352
|
+
const out: ImagePart = {
|
|
296
353
|
type: "image" as const,
|
|
297
354
|
image: new URL(url),
|
|
298
355
|
};
|
|
356
|
+
if (cacheControl) {
|
|
357
|
+
out.providerOptions = {
|
|
358
|
+
unknown: { cache_control: cacheControl },
|
|
359
|
+
};
|
|
360
|
+
}
|
|
361
|
+
return out;
|
|
299
362
|
}
|
|
300
363
|
|
|
301
|
-
function fromFilePart(
|
|
364
|
+
function fromFilePart(
|
|
365
|
+
base64Data: string,
|
|
366
|
+
mediaType: string,
|
|
367
|
+
filename?: string,
|
|
368
|
+
cacheControl?: ChatCompletionsCacheControl,
|
|
369
|
+
) {
|
|
302
370
|
if (mediaType.startsWith("image/")) {
|
|
303
|
-
|
|
371
|
+
const out: ImagePart = {
|
|
304
372
|
type: "image" as const,
|
|
305
373
|
image: z.util.base64ToUint8Array(base64Data),
|
|
306
374
|
mediaType,
|
|
307
375
|
};
|
|
376
|
+
if (cacheControl) {
|
|
377
|
+
out.providerOptions = {
|
|
378
|
+
unknown: { cache_control: cacheControl },
|
|
379
|
+
};
|
|
380
|
+
}
|
|
381
|
+
return out;
|
|
308
382
|
}
|
|
309
383
|
|
|
310
|
-
|
|
384
|
+
const out: FilePart = {
|
|
311
385
|
type: "file" as const,
|
|
312
386
|
data: z.util.base64ToUint8Array(base64Data),
|
|
313
387
|
filename,
|
|
314
388
|
mediaType,
|
|
315
389
|
};
|
|
390
|
+
if (cacheControl) {
|
|
391
|
+
out.providerOptions = {
|
|
392
|
+
unknown: { cache_control: cacheControl },
|
|
393
|
+
};
|
|
394
|
+
}
|
|
395
|
+
return out;
|
|
316
396
|
}
|
|
317
397
|
|
|
318
398
|
export const convertToToolSet = (tools: ChatCompletionsTool[] | undefined): ToolSet | undefined => {
|
|
@@ -438,6 +518,38 @@ function parseReasoningOptions(
|
|
|
438
518
|
return out;
|
|
439
519
|
}
|
|
440
520
|
|
|
521
|
+
function parsePromptCachingOptions(
|
|
522
|
+
prompt_cache_key: string | undefined,
|
|
523
|
+
prompt_cache_retention: "in_memory" | "24h" | undefined,
|
|
524
|
+
cached_content: string | undefined,
|
|
525
|
+
cache_control: ChatCompletionsCacheControl | undefined,
|
|
526
|
+
) {
|
|
527
|
+
const out: Record<string, unknown> = {};
|
|
528
|
+
|
|
529
|
+
const syncedCacheKey = prompt_cache_key ?? cached_content;
|
|
530
|
+
const syncedCachedContent = cached_content ?? prompt_cache_key;
|
|
531
|
+
|
|
532
|
+
let syncedCacheRetention = prompt_cache_retention;
|
|
533
|
+
if (!syncedCacheRetention && cache_control?.ttl) {
|
|
534
|
+
syncedCacheRetention = cache_control.ttl === "24h" ? "24h" : "in_memory";
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
let syncedCacheControl = cache_control;
|
|
538
|
+
if (!syncedCacheControl && syncedCacheRetention) {
|
|
539
|
+
syncedCacheControl = {
|
|
540
|
+
type: "ephemeral",
|
|
541
|
+
ttl: syncedCacheRetention === "24h" ? "24h" : "5m",
|
|
542
|
+
};
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
if (syncedCacheKey) out["prompt_cache_key"] = syncedCacheKey;
|
|
546
|
+
if (syncedCacheRetention) out["prompt_cache_retention"] = syncedCacheRetention;
|
|
547
|
+
if (syncedCachedContent) out["cached_content"] = syncedCachedContent;
|
|
548
|
+
if (syncedCacheControl) out["cache_control"] = syncedCacheControl;
|
|
549
|
+
|
|
550
|
+
return out;
|
|
551
|
+
}
|
|
552
|
+
|
|
441
553
|
// --- Response Flow ---
|
|
442
554
|
|
|
443
555
|
export function toChatCompletions(
|
|
@@ -731,7 +843,16 @@ export function toChatCompletionsUsage(usage: LanguageModelUsage): ChatCompletio
|
|
|
731
843
|
if (reasoning !== undefined) out.completion_tokens_details = { reasoning_tokens: reasoning };
|
|
732
844
|
|
|
733
845
|
const cached = usage.inputTokenDetails?.cacheReadTokens;
|
|
734
|
-
|
|
846
|
+
const cacheWrite = usage.inputTokenDetails?.cacheWriteTokens;
|
|
847
|
+
if (cached !== undefined || cacheWrite !== undefined) {
|
|
848
|
+
out.prompt_tokens_details = {};
|
|
849
|
+
if (cached !== undefined) {
|
|
850
|
+
out.prompt_tokens_details.cached_tokens = cached;
|
|
851
|
+
}
|
|
852
|
+
if (cacheWrite !== undefined) {
|
|
853
|
+
out.prompt_tokens_details.cache_write_tokens = cacheWrite;
|
|
854
|
+
}
|
|
855
|
+
}
|
|
735
856
|
|
|
736
857
|
return out;
|
|
737
858
|
}
|
|
@@ -197,6 +197,7 @@ describe("Chat Completions Handler", () => {
|
|
|
197
197
|
},
|
|
198
198
|
prompt_tokens_details: {
|
|
199
199
|
cached_tokens: 20,
|
|
200
|
+
cache_write_tokens: 0,
|
|
200
201
|
},
|
|
201
202
|
},
|
|
202
203
|
provider_metadata: { provider: { key: "value" } },
|
|
@@ -284,6 +285,7 @@ describe("Chat Completions Handler", () => {
|
|
|
284
285
|
},
|
|
285
286
|
prompt_tokens_details: {
|
|
286
287
|
cached_tokens: 20,
|
|
288
|
+
cache_write_tokens: 0,
|
|
287
289
|
},
|
|
288
290
|
},
|
|
289
291
|
provider_metadata: { provider: { key: "value" } },
|
|
@@ -36,7 +36,7 @@ import {
|
|
|
36
36
|
getChatRequestAttributes,
|
|
37
37
|
getChatResponseAttributes,
|
|
38
38
|
} from "./otel";
|
|
39
|
-
import { ChatCompletionsBodySchema } from "./schema";
|
|
39
|
+
import { ChatCompletionsBodySchema, type ChatCompletionsBody } from "./schema";
|
|
40
40
|
|
|
41
41
|
export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
42
42
|
const hooks = config.hooks;
|
|
@@ -57,6 +57,7 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
57
57
|
} catch {
|
|
58
58
|
throw new GatewayError("Invalid JSON", 400);
|
|
59
59
|
}
|
|
60
|
+
logger.trace({ requestId: ctx.requestId, body: ctx.body }, "[chat] ChatCompletionsBody");
|
|
60
61
|
addSpanEvent("hebo.request.deserialized");
|
|
61
62
|
|
|
62
63
|
const parsed = ChatCompletionsBodySchema.safeParse(ctx.body);
|
|
@@ -68,7 +69,8 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
68
69
|
addSpanEvent("hebo.request.parsed");
|
|
69
70
|
|
|
70
71
|
if (hooks?.before) {
|
|
71
|
-
ctx.body =
|
|
72
|
+
ctx.body =
|
|
73
|
+
((await hooks.before(ctx as BeforeHookContext)) as ChatCompletionsBody) ?? ctx.body;
|
|
72
74
|
addSpanEvent("hebo.hooks.before.completed");
|
|
73
75
|
}
|
|
74
76
|
|
|
@@ -110,7 +112,7 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
110
112
|
"[chat] AI SDK options",
|
|
111
113
|
);
|
|
112
114
|
addSpanEvent("hebo.options.prepared");
|
|
113
|
-
setSpanAttributes(getChatRequestAttributes(
|
|
115
|
+
setSpanAttributes(getChatRequestAttributes(ctx.body, genAiSignalLevel));
|
|
114
116
|
|
|
115
117
|
// Build middleware chain (model -> forward params -> provider).
|
|
116
118
|
const languageModelWithMiddleware = wrapLanguageModel({
|
|
@@ -138,6 +140,10 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
138
140
|
res as unknown as GenerateTextResult<ToolSet, Output.Output>,
|
|
139
141
|
ctx.resolvedModelId!,
|
|
140
142
|
);
|
|
143
|
+
logger.trace(
|
|
144
|
+
{ requestId: ctx.requestId, result: streamResult },
|
|
145
|
+
"[chat] ChatCompletions",
|
|
146
|
+
);
|
|
141
147
|
addSpanEvent("hebo.result.transformed");
|
|
142
148
|
|
|
143
149
|
const genAiResponseAttrs = getChatResponseAttributes(streamResult, genAiSignalLevel);
|
|
@@ -180,6 +186,7 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
180
186
|
|
|
181
187
|
// Transform result.
|
|
182
188
|
ctx.result = toChatCompletions(result, ctx.resolvedModelId);
|
|
189
|
+
logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[chat] ChatCompletions");
|
|
183
190
|
addSpanEvent("hebo.result.transformed");
|
|
184
191
|
|
|
185
192
|
const genAiResponseAttrs = getChatResponseAttributes(ctx.result, genAiSignalLevel);
|
|
@@ -98,6 +98,7 @@ export const getChatRequestAttributes = (
|
|
|
98
98
|
|
|
99
99
|
if (signalLevel !== "required") {
|
|
100
100
|
Object.assign(attrs, {
|
|
101
|
+
// FUTURE: add reasoning info
|
|
101
102
|
"gen_ai.request.stream": inputs.stream,
|
|
102
103
|
"gen_ai.request.frequency_penalty": inputs.frequency_penalty,
|
|
103
104
|
"gen_ai.request.max_tokens": inputs.max_completion_tokens,
|