@hebo-ai/gateway 0.5.2 → 0.6.0-rc0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/README.md +32 -1
  2. package/package.json +17 -12
  3. package/src/endpoints/chat-completions/converters.test.ts +85 -1
  4. package/src/endpoints/chat-completions/converters.ts +139 -18
  5. package/src/endpoints/chat-completions/handler.test.ts +2 -0
  6. package/src/endpoints/chat-completions/index.ts +1 -0
  7. package/src/endpoints/chat-completions/otel.ts +1 -0
  8. package/src/endpoints/chat-completions/schema.ts +38 -4
  9. package/src/endpoints/embeddings/index.ts +1 -0
  10. package/src/lifecycle.ts +2 -2
  11. package/src/models/anthropic/middleware.test.ts +45 -1
  12. package/src/models/anthropic/middleware.ts +21 -1
  13. package/src/models/google/middleware.test.ts +30 -1
  14. package/src/models/google/middleware.ts +20 -1
  15. package/src/models/openai/middleware.test.ts +32 -1
  16. package/src/models/openai/middleware.ts +25 -1
  17. package/src/providers/bedrock/middleware.test.ts +121 -1
  18. package/src/providers/bedrock/middleware.ts +61 -1
  19. package/src/telemetry/fetch.ts +31 -4
  20. package/src/telemetry/index.ts +1 -0
  21. package/dist/config.d.ts +0 -2
  22. package/dist/config.js +0 -81
  23. package/dist/endpoints/chat-completions/converters.d.ts +0 -43
  24. package/dist/endpoints/chat-completions/converters.js +0 -551
  25. package/dist/endpoints/chat-completions/handler.d.ts +0 -2
  26. package/dist/endpoints/chat-completions/handler.js +0 -145
  27. package/dist/endpoints/chat-completions/index.d.ts +0 -3
  28. package/dist/endpoints/chat-completions/index.js +0 -3
  29. package/dist/endpoints/chat-completions/otel.d.ts +0 -6
  30. package/dist/endpoints/chat-completions/otel.js +0 -134
  31. package/dist/endpoints/chat-completions/schema.d.ts +0 -946
  32. package/dist/endpoints/chat-completions/schema.js +0 -257
  33. package/dist/endpoints/embeddings/converters.d.ts +0 -10
  34. package/dist/endpoints/embeddings/converters.js +0 -31
  35. package/dist/endpoints/embeddings/handler.d.ts +0 -2
  36. package/dist/endpoints/embeddings/handler.js +0 -101
  37. package/dist/endpoints/embeddings/index.d.ts +0 -3
  38. package/dist/endpoints/embeddings/index.js +0 -3
  39. package/dist/endpoints/embeddings/otel.d.ts +0 -6
  40. package/dist/endpoints/embeddings/otel.js +0 -35
  41. package/dist/endpoints/embeddings/schema.d.ts +0 -38
  42. package/dist/endpoints/embeddings/schema.js +0 -26
  43. package/dist/endpoints/models/converters.d.ts +0 -6
  44. package/dist/endpoints/models/converters.js +0 -42
  45. package/dist/endpoints/models/handler.d.ts +0 -2
  46. package/dist/endpoints/models/handler.js +0 -29
  47. package/dist/endpoints/models/index.d.ts +0 -3
  48. package/dist/endpoints/models/index.js +0 -3
  49. package/dist/endpoints/models/schema.d.ts +0 -42
  50. package/dist/endpoints/models/schema.js +0 -31
  51. package/dist/errors/ai-sdk.d.ts +0 -2
  52. package/dist/errors/ai-sdk.js +0 -52
  53. package/dist/errors/gateway.d.ts +0 -5
  54. package/dist/errors/gateway.js +0 -13
  55. package/dist/errors/openai.d.ts +0 -20
  56. package/dist/errors/openai.js +0 -40
  57. package/dist/errors/utils.d.ts +0 -22
  58. package/dist/errors/utils.js +0 -44
  59. package/dist/gateway.d.ts +0 -9
  60. package/dist/gateway.js +0 -34
  61. package/dist/index.d.ts +0 -14
  62. package/dist/index.js +0 -13
  63. package/dist/lifecycle.d.ts +0 -2
  64. package/dist/lifecycle.js +0 -94
  65. package/dist/logger/default.d.ts +0 -4
  66. package/dist/logger/default.js +0 -81
  67. package/dist/logger/index.d.ts +0 -14
  68. package/dist/logger/index.js +0 -25
  69. package/dist/middleware/common.d.ts +0 -12
  70. package/dist/middleware/common.js +0 -145
  71. package/dist/middleware/matcher.d.ts +0 -27
  72. package/dist/middleware/matcher.js +0 -112
  73. package/dist/middleware/utils.d.ts +0 -2
  74. package/dist/middleware/utils.js +0 -27
  75. package/dist/models/amazon/index.d.ts +0 -2
  76. package/dist/models/amazon/index.js +0 -2
  77. package/dist/models/amazon/middleware.d.ts +0 -3
  78. package/dist/models/amazon/middleware.js +0 -65
  79. package/dist/models/amazon/presets.d.ts +0 -2390
  80. package/dist/models/amazon/presets.js +0 -80
  81. package/dist/models/anthropic/index.d.ts +0 -2
  82. package/dist/models/anthropic/index.js +0 -2
  83. package/dist/models/anthropic/middleware.d.ts +0 -4
  84. package/dist/models/anthropic/middleware.js +0 -111
  85. package/dist/models/anthropic/presets.d.ts +0 -4802
  86. package/dist/models/anthropic/presets.js +0 -135
  87. package/dist/models/catalog.d.ts +0 -4
  88. package/dist/models/catalog.js +0 -4
  89. package/dist/models/cohere/index.d.ts +0 -2
  90. package/dist/models/cohere/index.js +0 -2
  91. package/dist/models/cohere/middleware.d.ts +0 -3
  92. package/dist/models/cohere/middleware.js +0 -60
  93. package/dist/models/cohere/presets.d.ts +0 -2918
  94. package/dist/models/cohere/presets.js +0 -134
  95. package/dist/models/google/index.d.ts +0 -2
  96. package/dist/models/google/index.js +0 -2
  97. package/dist/models/google/middleware.d.ts +0 -7
  98. package/dist/models/google/middleware.js +0 -103
  99. package/dist/models/google/presets.d.ts +0 -2553
  100. package/dist/models/google/presets.js +0 -83
  101. package/dist/models/meta/index.d.ts +0 -1
  102. package/dist/models/meta/index.js +0 -1
  103. package/dist/models/meta/presets.d.ts +0 -3254
  104. package/dist/models/meta/presets.js +0 -95
  105. package/dist/models/openai/index.d.ts +0 -2
  106. package/dist/models/openai/index.js +0 -2
  107. package/dist/models/openai/middleware.d.ts +0 -3
  108. package/dist/models/openai/middleware.js +0 -62
  109. package/dist/models/openai/presets.d.ts +0 -6634
  110. package/dist/models/openai/presets.js +0 -213
  111. package/dist/models/types.d.ts +0 -20
  112. package/dist/models/types.js +0 -84
  113. package/dist/models/voyage/index.d.ts +0 -2
  114. package/dist/models/voyage/index.js +0 -2
  115. package/dist/models/voyage/middleware.d.ts +0 -2
  116. package/dist/models/voyage/middleware.js +0 -18
  117. package/dist/models/voyage/presets.d.ts +0 -3471
  118. package/dist/models/voyage/presets.js +0 -85
  119. package/dist/providers/anthropic/canonical.d.ts +0 -3
  120. package/dist/providers/anthropic/canonical.js +0 -9
  121. package/dist/providers/anthropic/index.d.ts +0 -1
  122. package/dist/providers/anthropic/index.js +0 -1
  123. package/dist/providers/bedrock/canonical.d.ts +0 -17
  124. package/dist/providers/bedrock/canonical.js +0 -61
  125. package/dist/providers/bedrock/index.d.ts +0 -2
  126. package/dist/providers/bedrock/index.js +0 -2
  127. package/dist/providers/bedrock/middleware.d.ts +0 -3
  128. package/dist/providers/bedrock/middleware.js +0 -55
  129. package/dist/providers/cohere/canonical.d.ts +0 -3
  130. package/dist/providers/cohere/canonical.js +0 -17
  131. package/dist/providers/cohere/index.d.ts +0 -1
  132. package/dist/providers/cohere/index.js +0 -1
  133. package/dist/providers/groq/canonical.d.ts +0 -3
  134. package/dist/providers/groq/canonical.js +0 -12
  135. package/dist/providers/groq/index.d.ts +0 -1
  136. package/dist/providers/groq/index.js +0 -1
  137. package/dist/providers/openai/canonical.d.ts +0 -3
  138. package/dist/providers/openai/canonical.js +0 -8
  139. package/dist/providers/openai/index.d.ts +0 -1
  140. package/dist/providers/openai/index.js +0 -1
  141. package/dist/providers/registry.d.ts +0 -24
  142. package/dist/providers/registry.js +0 -100
  143. package/dist/providers/types.d.ts +0 -7
  144. package/dist/providers/types.js +0 -11
  145. package/dist/providers/vertex/canonical.d.ts +0 -3
  146. package/dist/providers/vertex/canonical.js +0 -8
  147. package/dist/providers/vertex/index.d.ts +0 -1
  148. package/dist/providers/vertex/index.js +0 -1
  149. package/dist/providers/voyage/canonical.d.ts +0 -3
  150. package/dist/providers/voyage/canonical.js +0 -7
  151. package/dist/providers/voyage/index.d.ts +0 -1
  152. package/dist/providers/voyage/index.js +0 -1
  153. package/dist/telemetry/ai-sdk.d.ts +0 -2
  154. package/dist/telemetry/ai-sdk.js +0 -31
  155. package/dist/telemetry/baggage.d.ts +0 -1
  156. package/dist/telemetry/baggage.js +0 -24
  157. package/dist/telemetry/fetch.d.ts +0 -2
  158. package/dist/telemetry/fetch.js +0 -24
  159. package/dist/telemetry/gen-ai.d.ts +0 -5
  160. package/dist/telemetry/gen-ai.js +0 -60
  161. package/dist/telemetry/http.d.ts +0 -3
  162. package/dist/telemetry/http.js +0 -54
  163. package/dist/telemetry/memory.d.ts +0 -2
  164. package/dist/telemetry/memory.js +0 -27
  165. package/dist/telemetry/span.d.ts +0 -13
  166. package/dist/telemetry/span.js +0 -60
  167. package/dist/telemetry/stream.d.ts +0 -3
  168. package/dist/telemetry/stream.js +0 -51
  169. package/dist/types.d.ts +0 -176
  170. package/dist/types.js +0 -1
  171. package/dist/utils/env.d.ts +0 -2
  172. package/dist/utils/env.js +0 -5
  173. package/dist/utils/headers.d.ts +0 -4
  174. package/dist/utils/headers.js +0 -22
  175. package/dist/utils/preset.d.ts +0 -9
  176. package/dist/utils/preset.js +0 -41
  177. package/dist/utils/request.d.ts +0 -2
  178. package/dist/utils/request.js +0 -14
  179. package/dist/utils/response.d.ts +0 -3
  180. package/dist/utils/response.js +0 -68
package/README.md CHANGED
@@ -38,7 +38,7 @@ bun install @hebo-ai/gateway
38
38
  - Runtime Support
39
39
  - [Vercel Edge](#vercel-edge) | [Cloudflare Workers](#cloudflare-workers) | [Deno Deploy](#deno-deploy) | [AWS Lambda](#aws-lambda)
40
40
  - OpenAI Extensions
41
- - [Reasoning](#reasoning)
41
+ - [Reasoning](#reasoning) | [Prompt Caching](#prompt-caching)
42
42
  - Advanced Usage
43
43
  - [Passing Framework State to Hooks](#passing-framework-state-to-hooks) | [Selective Route Mounting](#selective-route-mounting) | [Low-level Schemas & Converters](#low-level-schemas--converters)
44
44
 
@@ -565,6 +565,37 @@ Advanced models (like Anthropic Claude 3.7 or Gemini 3) surface structured reaso
565
565
 
566
566
  For **Gemini 3** models, returning the thought signature via `extra_content` is mandatory to resume the chain-of-thought; failing to do so may result in errors or degraded performance.
567
567
 
568
+ ### Prompt Caching
569
+
570
+ The chat completions endpoint supports both implicit (provider-managed) and explicit prompt caching across OpenAI-compatible providers.
571
+
572
+ Accepted request fields:
573
+
574
+ - `prompt_cache_key` + `prompt_cache_retention` (OpenAI style)
575
+ - `cache_control` (OpenRouter / Vercel / Claude style)
576
+ - `cached_content` (Gemini style)
577
+
578
+ ```json
579
+ {
580
+ "model": "anthropic/claude-sonnet-4.6",
581
+ "messages": [
582
+ {
583
+ "role": "system",
584
+ "content": "Reusable policy and instructions",
585
+ "cache_control": { "type": "ephemeral", "ttl": "1h" }
586
+ },
587
+ { "role": "user", "content": "Apply policy to this request." }
588
+ ]
589
+ }
590
+ ```
591
+
592
+ Provider behavior:
593
+
594
+ - **OpenAI-compatible**: forwards `prompt_cache_key` and `prompt_cache_retention` as native provider options.
595
+ - **Anthropic Claude**: maps top-level caching to Anthropic cache control, while message/part `cache_control` breakpoints are preserved.
596
+ - **Google Gemini**: maps `cached_content` to Gemini `cachedContent`.
597
+ - **Amazon Nova (Bedrock)**: maps `cache_control` to Bedrock `cachePoints` and inserts an automatic cache point on a stable prefix when none is provided.
598
+
568
599
  ## 🧪 Advanced Usage
569
600
 
570
601
  ### Logger Settings
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hebo-ai/gateway",
3
- "version": "0.5.2",
3
+ "version": "0.6.0-rc0",
4
4
  "description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI-compatible /chat/completions, /embeddings & /models.",
5
5
  "keywords": [
6
6
  "ai",
@@ -140,6 +140,11 @@
140
140
  "types": "./dist/providers/voyage/index.d.ts",
141
141
  "import": "./dist/providers/voyage/index.js",
142
142
  "dev-source": "./src/providers/voyage/index.ts"
143
+ },
144
+ "./telemetry": {
145
+ "types": "./dist/telemetry/index.d.ts",
146
+ "import": "./dist/telemetry/index.js",
147
+ "dev-source": "./src/telemetry/index.ts"
143
148
  }
144
149
  },
145
150
  "scripts": {
@@ -155,33 +160,33 @@
155
160
  },
156
161
  "dependencies": {
157
162
  "@ai-sdk/provider": "^3.0.8",
158
- "ai": "^6.0.97",
163
+ "ai": "^6.0.101",
159
164
  "zod": "^4.3.6"
160
165
  },
161
166
  "devDependencies": {
162
- "@ai-sdk/amazon-bedrock": "^4.0.63",
163
- "@ai-sdk/anthropic": "^3.0.46",
167
+ "@ai-sdk/amazon-bedrock": "^4.0.65",
168
+ "@ai-sdk/anthropic": "^3.0.47",
164
169
  "@ai-sdk/cohere": "^3.0.21",
165
- "@ai-sdk/google-vertex": "^4.0.61",
170
+ "@ai-sdk/google-vertex": "^4.0.63",
166
171
  "@ai-sdk/groq": "^3.0.24",
167
- "@ai-sdk/openai": "^3.0.30",
168
- "@aws-sdk/credential-providers": "^3.995.0",
172
+ "@ai-sdk/openai": "^3.0.34",
173
+ "@aws-sdk/credential-providers": "^3.998.0",
169
174
  "@langfuse/otel": "^4.6.1",
170
175
  "@mjackson/node-fetch-server": "^0.7.0",
171
176
  "@opentelemetry/api": "^1.9.0",
172
177
  "@opentelemetry/context-async-hooks": "^2.5.1",
173
178
  "@opentelemetry/sdk-trace-base": "^2.5.1",
174
- "@tanstack/react-router": "^1.161.3",
175
- "@tanstack/react-start": "^1.161.3",
179
+ "@tanstack/react-router": "^1.163.2",
180
+ "@tanstack/react-start": "^1.163.2",
176
181
  "@types/bun": "latest",
177
182
  "@types/react": "^19.2.14",
178
183
  "@types/react-dom": "^19.2.3",
179
- "elysia": "^1.4.25",
180
- "hono": "^4.12.0",
184
+ "elysia": "^1.4.26",
185
+ "hono": "^4.12.2",
181
186
  "lefthook": "^2.1.1",
182
187
  "next": "^16.1.6",
183
188
  "oxfmt": "^0.24.0",
184
- "oxlint": "^1.49.0",
189
+ "oxlint": "^1.50.0",
185
190
  "pino": "^10.3.1",
186
191
  "typescript": "^5.9.3",
187
192
  "vite": "^7.3.1",
@@ -1,4 +1,4 @@
1
- import type { GenerateTextResult, ToolSet, Output } from "ai";
1
+ import type { GenerateTextResult, ToolSet, Output, LanguageModelUsage } from "ai";
2
2
 
3
3
  import { describe, expect, test } from "bun:test";
4
4
 
@@ -8,6 +8,7 @@ import {
8
8
  convertToTextCallOptions,
9
9
  toChatCompletionsAssistantMessage,
10
10
  toChatCompletionsToolCall,
11
+ toChatCompletionsUsage,
11
12
  fromChatCompletionsAssistantMessage,
12
13
  fromChatCompletionsToolResultMessage,
13
14
  } from "./converters";
@@ -520,6 +521,89 @@ describe("Chat Completions Converters", () => {
520
521
  expect(result.tools).toBeDefined();
521
522
  expect(Object.keys(result.tools!)).toEqual(["get_weather"]);
522
523
  });
524
+
525
+ test("should map prompt cache options into providerOptions.unknown", () => {
526
+ const result = convertToTextCallOptions({
527
+ messages: [{ role: "system", content: "You are concise." }],
528
+ prompt_cache_key: "tenant:docs:v1",
529
+ prompt_cache_retention: "24h",
530
+ });
531
+
532
+ expect(result.providerOptions).toEqual({
533
+ unknown: {
534
+ prompt_cache_key: "tenant:docs:v1",
535
+ prompt_cache_retention: "24h",
536
+ cached_content: "tenant:docs:v1",
537
+ cache_control: {
538
+ type: "ephemeral",
539
+ ttl: "24h",
540
+ },
541
+ },
542
+ });
543
+ });
544
+
545
+ test("should sync retention from cache_control ttl", () => {
546
+ const result = convertToTextCallOptions({
547
+ messages: [{ role: "system", content: "You are concise." }],
548
+ cache_control: {
549
+ type: "ephemeral",
550
+ ttl: "5m",
551
+ },
552
+ });
553
+
554
+ expect(result.providerOptions).toEqual({
555
+ unknown: {
556
+ prompt_cache_retention: "in_memory",
557
+ cache_control: {
558
+ type: "ephemeral",
559
+ ttl: "5m",
560
+ },
561
+ },
562
+ });
563
+ });
564
+
565
+ test("should preserve cache_control on message and content parts", () => {
566
+ const result = convertToTextCallOptions({
567
+ messages: [
568
+ {
569
+ role: "system",
570
+ content: "Policy block",
571
+ cache_control: { type: "ephemeral", ttl: "1h" },
572
+ },
573
+ {
574
+ role: "user",
575
+ content: [{ type: "text", text: "Question", cache_control: { type: "ephemeral" } }],
576
+ },
577
+ ],
578
+ });
579
+
580
+ expect((result.messages[0] as any).providerOptions.unknown.cache_control).toEqual({
581
+ type: "ephemeral",
582
+ ttl: "1h",
583
+ });
584
+ expect((result.messages[1] as any).content[0].providerOptions.unknown.cache_control).toEqual({
585
+ type: "ephemeral",
586
+ });
587
+ });
588
+ });
589
+
590
+ describe("toChatCompletionsUsage", () => {
591
+ test("should include cached token details", () => {
592
+ const usage = toChatCompletionsUsage({
593
+ inputTokens: 100,
594
+ outputTokens: 20,
595
+ totalTokens: 120,
596
+ inputTokenDetails: {
597
+ cacheReadTokens: 60,
598
+ cacheWriteTokens: 10,
599
+ },
600
+ } as LanguageModelUsage);
601
+
602
+ expect(usage.prompt_tokens_details).toEqual({
603
+ cached_tokens: 60,
604
+ cache_write_tokens: 10,
605
+ });
606
+ });
523
607
  });
524
608
 
525
609
  describe("toChatCompletionsToolCall", () => {
@@ -17,6 +17,9 @@ import type {
17
17
  AssistantModelMessage,
18
18
  ToolModelMessage,
19
19
  UserModelMessage,
20
+ TextPart,
21
+ ImagePart,
22
+ FilePart,
20
23
  } from "ai";
21
24
 
22
25
  import { Output, jsonSchema, tool } from "ai";
@@ -28,7 +31,6 @@ import type {
28
31
  ChatCompletionsToolChoice,
29
32
  ChatCompletionsContentPart,
30
33
  ChatCompletionsMessage,
31
- ChatCompletionsSystemMessage,
32
34
  ChatCompletionsUserMessage,
33
35
  ChatCompletionsAssistantMessage,
34
36
  ChatCompletionsToolMessage,
@@ -46,6 +48,7 @@ import type {
46
48
  ChatCompletionsReasoningDetail,
47
49
  ChatCompletionsResponseFormat,
48
50
  ChatCompletionsContentPartText,
51
+ ChatCompletionsCacheControl,
49
52
  } from "./schema";
50
53
 
51
54
  import { GatewayError } from "../../errors/gateway";
@@ -81,6 +84,10 @@ export function convertToTextCallOptions(params: ChatCompletionsInputs): TextCal
81
84
  response_format,
82
85
  reasoning_effort,
83
86
  reasoning,
87
+ prompt_cache_key,
88
+ prompt_cache_retention,
89
+ cached_content,
90
+ cache_control,
84
91
  frequency_penalty,
85
92
  presence_penalty,
86
93
  seed,
@@ -90,6 +97,15 @@ export function convertToTextCallOptions(params: ChatCompletionsInputs): TextCal
90
97
  } = params;
91
98
 
92
99
  Object.assign(rest, parseReasoningOptions(reasoning_effort, reasoning));
100
+ Object.assign(
101
+ rest,
102
+ parsePromptCachingOptions(
103
+ prompt_cache_key,
104
+ prompt_cache_retention,
105
+ cached_content,
106
+ cache_control,
107
+ ),
108
+ );
93
109
 
94
110
  const { toolChoice, activeTools } = convertToToolChoiceOptions(tool_choice);
95
111
 
@@ -133,7 +149,12 @@ export function convertToModelMessages(messages: ChatCompletionsMessage[]): Mode
133
149
  if (message.role === "tool") continue;
134
150
 
135
151
  if (message.role === "system") {
136
- modelMessages.push(message satisfies ChatCompletionsSystemMessage);
152
+ if (message.cache_control) {
153
+ (message as ModelMessage).providerOptions = {
154
+ unknown: { cache_control: message.cache_control },
155
+ };
156
+ }
157
+ modelMessages.push(message);
137
158
  continue;
138
159
  }
139
160
 
@@ -161,18 +182,24 @@ function indexToolMessages(messages: ChatCompletionsMessage[]) {
161
182
  export function fromChatCompletionsUserMessage(
162
183
  message: ChatCompletionsUserMessage,
163
184
  ): UserModelMessage {
164
- return {
185
+ const out: UserModelMessage = {
165
186
  role: "user",
166
187
  content: Array.isArray(message.content)
167
188
  ? fromChatCompletionsContent(message.content)
168
189
  : message.content,
169
190
  };
191
+ if (message.cache_control) {
192
+ out.providerOptions = {
193
+ unknown: { cache_control: message.cache_control },
194
+ };
195
+ }
196
+ return out;
170
197
  }
171
198
 
172
199
  export function fromChatCompletionsAssistantMessage(
173
200
  message: ChatCompletionsAssistantMessage,
174
201
  ): AssistantModelMessage {
175
- const { tool_calls, role, content, extra_content, reasoning_details } = message;
202
+ const { tool_calls, role, content, extra_content, reasoning_details, cache_control } = message;
176
203
 
177
204
  const parts: AssistantContent = [];
178
205
 
@@ -211,10 +238,16 @@ export function fromChatCompletionsAssistantMessage(
211
238
  : content;
212
239
  for (const part of inputContent) {
213
240
  if (part.type === "text") {
214
- parts.push({
241
+ const textPart: TextPart = {
215
242
  type: "text",
216
243
  text: part.text,
217
- });
244
+ };
245
+ if (part.cache_control) {
246
+ textPart.providerOptions = {
247
+ unknown: { cache_control: part.cache_control },
248
+ };
249
+ }
250
+ parts.push(textPart);
218
251
  }
219
252
  }
220
253
  }
@@ -245,6 +278,10 @@ export function fromChatCompletionsAssistantMessage(
245
278
  out.providerOptions = extra_content as SharedV3ProviderOptions;
246
279
  }
247
280
 
281
+ if (cache_control) {
282
+ ((out.providerOptions ??= { unknown: {} })["unknown"] ??= {})["cache_control"] = cache_control;
283
+ }
284
+
248
285
  return out;
249
286
  }
250
287
 
@@ -275,44 +312,87 @@ export function fromChatCompletionsContent(content: ChatCompletionsContentPart[]
275
312
  return content.map((part) => {
276
313
  switch (part.type) {
277
314
  case "image_url":
278
- return fromImageUrlPart(part.image_url.url);
315
+ return fromImageUrlPart(part.image_url.url, part.cache_control);
279
316
  case "file":
280
- return fromFilePart(part.file.data, part.file.media_type, part.file.filename);
317
+ return fromFilePart(
318
+ part.file.data,
319
+ part.file.media_type,
320
+ part.file.filename,
321
+ part.cache_control,
322
+ );
281
323
  case "input_audio":
282
- return fromFilePart(part.input_audio.data, `audio/${part.input_audio.format}`);
283
- default:
284
- return part;
324
+ return fromFilePart(
325
+ part.input_audio.data,
326
+ `audio/${part.input_audio.format}`,
327
+ undefined,
328
+ part.cache_control,
329
+ );
330
+ default: {
331
+ const out: TextPart = {
332
+ type: "text" as const,
333
+ text: part.text,
334
+ };
335
+ if (part.cache_control) {
336
+ out.providerOptions = {
337
+ unknown: { cache_control: part.cache_control },
338
+ };
339
+ }
340
+ return out;
341
+ }
285
342
  }
286
343
  });
287
344
  }
288
345
 
289
- function fromImageUrlPart(url: string) {
346
+ function fromImageUrlPart(url: string, cacheControl?: ChatCompletionsCacheControl) {
290
347
  if (url.startsWith("data:")) {
291
348
  const { mimeType, base64Data } = parseDataUrl(url);
292
- return fromFilePart(base64Data, mimeType);
349
+ return fromFilePart(base64Data, mimeType, undefined, cacheControl);
293
350
  }
294
351
 
295
- return {
352
+ const out: ImagePart = {
296
353
  type: "image" as const,
297
354
  image: new URL(url),
298
355
  };
356
+ if (cacheControl) {
357
+ out.providerOptions = {
358
+ unknown: { cache_control: cacheControl },
359
+ };
360
+ }
361
+ return out;
299
362
  }
300
363
 
301
- function fromFilePart(base64Data: string, mediaType: string, filename?: string) {
364
+ function fromFilePart(
365
+ base64Data: string,
366
+ mediaType: string,
367
+ filename?: string,
368
+ cacheControl?: ChatCompletionsCacheControl,
369
+ ) {
302
370
  if (mediaType.startsWith("image/")) {
303
- return {
371
+ const out: ImagePart = {
304
372
  type: "image" as const,
305
373
  image: z.util.base64ToUint8Array(base64Data),
306
374
  mediaType,
307
375
  };
376
+ if (cacheControl) {
377
+ out.providerOptions = {
378
+ unknown: { cache_control: cacheControl },
379
+ };
380
+ }
381
+ return out;
308
382
  }
309
383
 
310
- return {
384
+ const out: FilePart = {
311
385
  type: "file" as const,
312
386
  data: z.util.base64ToUint8Array(base64Data),
313
387
  filename,
314
388
  mediaType,
315
389
  };
390
+ if (cacheControl) {
391
+ out.providerOptions = {
392
+ unknown: { cache_control: cacheControl },
393
+ };
394
+ }
395
+ return out;
316
396
  }
317
397
 
318
398
  export const convertToToolSet = (tools: ChatCompletionsTool[] | undefined): ToolSet | undefined => {
@@ -438,6 +518,38 @@ function parseReasoningOptions(
438
518
  return out;
439
519
  }
440
520
 
521
+ function parsePromptCachingOptions(
522
+ prompt_cache_key: string | undefined,
523
+ prompt_cache_retention: "in_memory" | "24h" | undefined,
524
+ cached_content: string | undefined,
525
+ cache_control: ChatCompletionsCacheControl | undefined,
526
+ ) {
527
+ const out: Record<string, unknown> = {};
528
+
529
+ const syncedCacheKey = prompt_cache_key ?? cached_content;
530
+ const syncedCachedContent = cached_content ?? prompt_cache_key;
531
+
532
+ let syncedCacheRetention = prompt_cache_retention;
533
+ if (!syncedCacheRetention && cache_control?.ttl) {
534
+ syncedCacheRetention = cache_control.ttl === "24h" ? "24h" : "in_memory";
535
+ }
536
+
537
+ let syncedCacheControl = cache_control;
538
+ if (!syncedCacheControl && syncedCacheRetention) {
539
+ syncedCacheControl = {
540
+ type: "ephemeral",
541
+ ttl: syncedCacheRetention === "24h" ? "24h" : "5m",
542
+ };
543
+ }
544
+
545
+ if (syncedCacheKey) out["prompt_cache_key"] = syncedCacheKey;
546
+ if (syncedCacheRetention) out["prompt_cache_retention"] = syncedCacheRetention;
547
+ if (syncedCachedContent) out["cached_content"] = syncedCachedContent;
548
+ if (syncedCacheControl) out["cache_control"] = syncedCacheControl;
549
+
550
+ return out;
551
+ }
552
+
441
553
  // --- Response Flow ---
442
554
 
443
555
  export function toChatCompletions(
@@ -731,7 +843,16 @@ export function toChatCompletionsUsage(usage: LanguageModelUsage): ChatCompletio
731
843
  if (reasoning !== undefined) out.completion_tokens_details = { reasoning_tokens: reasoning };
732
844
 
733
845
  const cached = usage.inputTokenDetails?.cacheReadTokens;
734
- if (cached !== undefined) out.prompt_tokens_details = { cached_tokens: cached };
846
+ const cacheWrite = usage.inputTokenDetails?.cacheWriteTokens;
847
+ if (cached !== undefined || cacheWrite !== undefined) {
848
+ out.prompt_tokens_details = {};
849
+ if (cached !== undefined) {
850
+ out.prompt_tokens_details.cached_tokens = cached;
851
+ }
852
+ if (cacheWrite !== undefined) {
853
+ out.prompt_tokens_details.cache_write_tokens = cacheWrite;
854
+ }
855
+ }
735
856
 
736
857
  return out;
737
858
  }
@@ -197,6 +197,7 @@ describe("Chat Completions Handler", () => {
197
197
  },
198
198
  prompt_tokens_details: {
199
199
  cached_tokens: 20,
200
+ cache_write_tokens: 0,
200
201
  },
201
202
  },
202
203
  provider_metadata: { provider: { key: "value" } },
@@ -284,6 +285,7 @@ describe("Chat Completions Handler", () => {
284
285
  },
285
286
  prompt_tokens_details: {
286
287
  cached_tokens: 20,
288
+ cache_write_tokens: 0,
287
289
  },
288
290
  },
289
291
  provider_metadata: { provider: { key: "value" } },
@@ -1,3 +1,4 @@
1
1
  export * from "./converters";
2
2
  export * from "./handler";
3
3
  export * from "./schema";
4
+ export * from "./otel";
@@ -98,6 +98,7 @@ export const getChatRequestAttributes = (
98
98
 
99
99
  if (signalLevel !== "required") {
100
100
  Object.assign(attrs, {
101
+ // FUTURE: add reasoning info
101
102
  "gen_ai.request.stream": inputs.stream,
102
103
  "gen_ai.request.frequency_penalty": inputs.frequency_penalty,
103
104
  "gen_ai.request.max_tokens": inputs.max_completion_tokens,
@@ -1,8 +1,16 @@
1
1
  import * as z from "zod";
2
2
 
3
+ export const ChatCompletionsCacheControlSchema = z.object({
4
+ type: z.literal("ephemeral"),
5
+ ttl: z.string().optional(),
6
+ });
7
+ export type ChatCompletionsCacheControl = z.infer<typeof ChatCompletionsCacheControlSchema>;
8
+
3
9
  export const ChatCompletionsContentPartTextSchema = z.object({
4
10
  type: z.literal("text"),
5
11
  text: z.string(),
12
+ // Extension origin: Anthropic/OpenRouter/Vercel
13
+ cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
6
14
  });
7
15
  export type ChatCompletionsContentPartText = z.infer<typeof ChatCompletionsContentPartTextSchema>;
8
16
 
@@ -12,6 +20,8 @@ export const ChatCompletionsContentPartImageSchema = z.object({
12
20
  url: z.string(),
13
21
  detail: z.enum(["low", "high", "auto"]).optional(),
14
22
  }),
23
+ // Extension origin: OpenRouter/Vercel/Anthropic
24
+ cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
15
25
  });
16
26
 
17
27
  export const ChatCompletionsContentPartFileSchema = z.object({
@@ -21,6 +31,8 @@ export const ChatCompletionsContentPartFileSchema = z.object({
21
31
  media_type: z.string(),
22
32
  filename: z.string().optional(),
23
33
  }),
34
+ // Extension origin: OpenRouter/Vercel/Anthropic
35
+ cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
24
36
  });
25
37
 
26
38
  export const ChatCompletionsContentPartAudioSchema = z.object({
@@ -43,6 +55,8 @@ export const ChatCompletionsContentPartAudioSchema = z.object({
43
55
  "webm",
44
56
  ]),
45
57
  }),
58
+ // Extension origin: OpenRouter/Vercel/Anthropic
59
+ cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
46
60
  });
47
61
 
48
62
  export const ChatCompletionsContentPartSchema = z.discriminatedUnion("type", [
@@ -60,6 +74,7 @@ export const ChatCompletionsToolCallSchema = z.object({
60
74
  arguments: z.string(),
61
75
  name: z.string(),
62
76
  }),
77
+ // Extension origin: Gemini
63
78
  extra_content: z
64
79
  .record(z.string(), z.record(z.string(), z.unknown()))
65
80
  .optional()
@@ -71,6 +86,8 @@ export const ChatCompletionsSystemMessageSchema = z.object({
71
86
  role: z.literal("system"),
72
87
  content: z.string(),
73
88
  name: z.string().optional(),
89
+ // Extension origin: OpenRouter/Vercel/Anthropic
90
+ cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
74
91
  });
75
92
  export type ChatCompletionsSystemMessage = z.infer<typeof ChatCompletionsSystemMessageSchema>;
76
93
 
@@ -78,6 +95,8 @@ export const ChatCompletionsUserMessageSchema = z.object({
78
95
  role: z.literal("user"),
79
96
  content: z.union([z.string(), z.array(ChatCompletionsContentPartSchema)]),
80
97
  name: z.string().optional(),
98
+ // Extension origin: OpenRouter/Vercel/Anthropic
99
+ cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
81
100
  });
82
101
  export type ChatCompletionsUserMessage = z.infer<typeof ChatCompletionsUserMessageSchema>;
83
102
 
@@ -101,16 +120,20 @@ export const ChatCompletionsAssistantMessageSchema = z.object({
101
120
  name: z.string().optional(),
102
121
  // FUTURE: This should also support Custom Tool Calls
103
122
  tool_calls: z.array(ChatCompletionsToolCallSchema).optional(),
104
- // Extensions
123
+ // Extension origin: OpenRouter/Vercel - TODO: should be "reasoning"?
105
124
  reasoning_content: z.string().optional().meta({ extension: true }),
125
+ // Extension origin: OpenRouter/Vercel
106
126
  reasoning_details: z
107
127
  .array(ChatCompletionsReasoningDetailSchema)
108
128
  .optional()
109
129
  .meta({ extension: true }),
130
+ // Extension origin: Gemini
110
131
  extra_content: z
111
132
  .record(z.string(), z.record(z.string(), z.unknown()))
112
133
  .optional()
113
134
  .meta({ extension: true }),
135
+ // Extension origin: OpenRouter/Vercel/Anthropic
136
+ cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
114
137
  });
115
138
  export type ChatCompletionsAssistantMessage = z.infer<typeof ChatCompletionsAssistantMessageSchema>;
116
139
 
@@ -137,6 +160,7 @@ export const ChatCompletionsToolSchema = z.object({
137
160
  parameters: z.record(z.string(), z.unknown()),
138
161
  strict: z.boolean().optional(),
139
162
  }),
163
+ // FUTURE: cache_control support on tools
140
164
  });
141
165
  export type ChatCompletionsTool = z.infer<typeof ChatCompletionsToolSchema>;
142
166
 
@@ -167,11 +191,13 @@ export type ChatCompletionsToolChoice = z.infer<typeof ChatCompletionsToolChoice
167
191
 
168
192
  export const ChatCompletionsReasoningEffortSchema = z.enum([
169
193
  "none",
194
+ // Extension origin: Gemini
170
195
  "minimal",
171
196
  "low",
172
197
  "medium",
173
198
  "high",
174
199
  "xhigh",
200
+ // Extension origin: Anthropic
175
201
  "max",
176
202
  ]);
177
203
  export type ChatCompletionsReasoningEffort = z.infer<typeof ChatCompletionsReasoningEffortSchema>;
@@ -218,7 +244,13 @@ const ChatCompletionsInputsSchema = z.object({
218
244
  top_p: z.number().min(0).max(1.0).optional(),
219
245
  response_format: ChatCompletionsResponseFormatSchema.optional(),
220
246
  reasoning_effort: ChatCompletionsReasoningEffortSchema.optional(),
221
- // Extensions
247
+ prompt_cache_key: z.string().optional(),
248
+ prompt_cache_retention: z.enum(["in_memory", "24h"]).optional(),
249
+ // Extension origin: Gemini explicit cache handle
250
+ cached_content: z.string().optional().meta({ extension: true }),
251
+ // Extension origin: OpenRouter/Vercel/Anthropic
252
+ cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
253
+ // Extension origin: OpenRouter
222
254
  reasoning: ChatCompletionsReasoningConfigSchema.optional().meta({ extension: true }),
223
255
  });
224
256
  export type ChatCompletionsInputs = z.infer<typeof ChatCompletionsInputsSchema>;
@@ -261,6 +293,8 @@ export const ChatCompletionsUsageSchema = z.object({
261
293
  .object({
262
294
  // FUTURE: add missing properties
263
295
  cached_tokens: z.int().nonnegative().optional(),
296
+ // Extension origin: OpenRouter
297
+ cache_write_tokens: z.int().nonnegative().optional().meta({ extension: true }),
264
298
  })
265
299
  .optional(),
266
300
  });
@@ -273,7 +307,7 @@ export const ChatCompletionsSchema = z.object({
273
307
  model: z.string(),
274
308
  choices: z.array(ChatCompletionsChoiceSchema),
275
309
  usage: ChatCompletionsUsageSchema.nullable(),
276
- // Extensions
310
+ // Extension origin: Vercel AI Gateway
277
311
  provider_metadata: z.unknown().optional().meta({ extension: true }),
278
312
  });
279
313
  export type ChatCompletions = z.infer<typeof ChatCompletionsSchema>;
@@ -307,7 +341,7 @@ export const ChatCompletionsChunkSchema = z.object({
307
341
  model: z.string(),
308
342
  choices: z.array(ChatCompletionsChoiceDeltaSchema),
309
343
  usage: ChatCompletionsUsageSchema.nullable(),
310
- // Extensions
344
+ // Extension origin: Vercel AI Gateway
311
345
  provider_metadata: z.unknown().optional().meta({ extension: true }),
312
346
  });
313
347
  export type ChatCompletionsChunk = z.infer<typeof ChatCompletionsChunkSchema>;
@@ -1,3 +1,4 @@
1
1
  export * from "./converters";
2
2
  export * from "./handler";
3
3
  export * from "./schema";
4
+ export * from "./otel";