@hebo-ai/gateway 0.5.2 → 0.6.0-rc0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/README.md +32 -1
  2. package/package.json +17 -12
  3. package/src/endpoints/chat-completions/converters.test.ts +85 -1
  4. package/src/endpoints/chat-completions/converters.ts +139 -18
  5. package/src/endpoints/chat-completions/handler.test.ts +2 -0
  6. package/src/endpoints/chat-completions/index.ts +1 -0
  7. package/src/endpoints/chat-completions/otel.ts +1 -0
  8. package/src/endpoints/chat-completions/schema.ts +38 -4
  9. package/src/endpoints/embeddings/index.ts +1 -0
  10. package/src/lifecycle.ts +2 -2
  11. package/src/models/anthropic/middleware.test.ts +45 -1
  12. package/src/models/anthropic/middleware.ts +21 -1
  13. package/src/models/google/middleware.test.ts +30 -1
  14. package/src/models/google/middleware.ts +20 -1
  15. package/src/models/openai/middleware.test.ts +32 -1
  16. package/src/models/openai/middleware.ts +25 -1
  17. package/src/providers/bedrock/middleware.test.ts +121 -1
  18. package/src/providers/bedrock/middleware.ts +61 -1
  19. package/src/telemetry/fetch.ts +31 -4
  20. package/src/telemetry/index.ts +1 -0
  21. package/dist/config.d.ts +0 -2
  22. package/dist/config.js +0 -81
  23. package/dist/endpoints/chat-completions/converters.d.ts +0 -43
  24. package/dist/endpoints/chat-completions/converters.js +0 -551
  25. package/dist/endpoints/chat-completions/handler.d.ts +0 -2
  26. package/dist/endpoints/chat-completions/handler.js +0 -145
  27. package/dist/endpoints/chat-completions/index.d.ts +0 -3
  28. package/dist/endpoints/chat-completions/index.js +0 -3
  29. package/dist/endpoints/chat-completions/otel.d.ts +0 -6
  30. package/dist/endpoints/chat-completions/otel.js +0 -134
  31. package/dist/endpoints/chat-completions/schema.d.ts +0 -946
  32. package/dist/endpoints/chat-completions/schema.js +0 -257
  33. package/dist/endpoints/embeddings/converters.d.ts +0 -10
  34. package/dist/endpoints/embeddings/converters.js +0 -31
  35. package/dist/endpoints/embeddings/handler.d.ts +0 -2
  36. package/dist/endpoints/embeddings/handler.js +0 -101
  37. package/dist/endpoints/embeddings/index.d.ts +0 -3
  38. package/dist/endpoints/embeddings/index.js +0 -3
  39. package/dist/endpoints/embeddings/otel.d.ts +0 -6
  40. package/dist/endpoints/embeddings/otel.js +0 -35
  41. package/dist/endpoints/embeddings/schema.d.ts +0 -38
  42. package/dist/endpoints/embeddings/schema.js +0 -26
  43. package/dist/endpoints/models/converters.d.ts +0 -6
  44. package/dist/endpoints/models/converters.js +0 -42
  45. package/dist/endpoints/models/handler.d.ts +0 -2
  46. package/dist/endpoints/models/handler.js +0 -29
  47. package/dist/endpoints/models/index.d.ts +0 -3
  48. package/dist/endpoints/models/index.js +0 -3
  49. package/dist/endpoints/models/schema.d.ts +0 -42
  50. package/dist/endpoints/models/schema.js +0 -31
  51. package/dist/errors/ai-sdk.d.ts +0 -2
  52. package/dist/errors/ai-sdk.js +0 -52
  53. package/dist/errors/gateway.d.ts +0 -5
  54. package/dist/errors/gateway.js +0 -13
  55. package/dist/errors/openai.d.ts +0 -20
  56. package/dist/errors/openai.js +0 -40
  57. package/dist/errors/utils.d.ts +0 -22
  58. package/dist/errors/utils.js +0 -44
  59. package/dist/gateway.d.ts +0 -9
  60. package/dist/gateway.js +0 -34
  61. package/dist/index.d.ts +0 -14
  62. package/dist/index.js +0 -13
  63. package/dist/lifecycle.d.ts +0 -2
  64. package/dist/lifecycle.js +0 -94
  65. package/dist/logger/default.d.ts +0 -4
  66. package/dist/logger/default.js +0 -81
  67. package/dist/logger/index.d.ts +0 -14
  68. package/dist/logger/index.js +0 -25
  69. package/dist/middleware/common.d.ts +0 -12
  70. package/dist/middleware/common.js +0 -145
  71. package/dist/middleware/matcher.d.ts +0 -27
  72. package/dist/middleware/matcher.js +0 -112
  73. package/dist/middleware/utils.d.ts +0 -2
  74. package/dist/middleware/utils.js +0 -27
  75. package/dist/models/amazon/index.d.ts +0 -2
  76. package/dist/models/amazon/index.js +0 -2
  77. package/dist/models/amazon/middleware.d.ts +0 -3
  78. package/dist/models/amazon/middleware.js +0 -65
  79. package/dist/models/amazon/presets.d.ts +0 -2390
  80. package/dist/models/amazon/presets.js +0 -80
  81. package/dist/models/anthropic/index.d.ts +0 -2
  82. package/dist/models/anthropic/index.js +0 -2
  83. package/dist/models/anthropic/middleware.d.ts +0 -4
  84. package/dist/models/anthropic/middleware.js +0 -111
  85. package/dist/models/anthropic/presets.d.ts +0 -4802
  86. package/dist/models/anthropic/presets.js +0 -135
  87. package/dist/models/catalog.d.ts +0 -4
  88. package/dist/models/catalog.js +0 -4
  89. package/dist/models/cohere/index.d.ts +0 -2
  90. package/dist/models/cohere/index.js +0 -2
  91. package/dist/models/cohere/middleware.d.ts +0 -3
  92. package/dist/models/cohere/middleware.js +0 -60
  93. package/dist/models/cohere/presets.d.ts +0 -2918
  94. package/dist/models/cohere/presets.js +0 -134
  95. package/dist/models/google/index.d.ts +0 -2
  96. package/dist/models/google/index.js +0 -2
  97. package/dist/models/google/middleware.d.ts +0 -7
  98. package/dist/models/google/middleware.js +0 -103
  99. package/dist/models/google/presets.d.ts +0 -2553
  100. package/dist/models/google/presets.js +0 -83
  101. package/dist/models/meta/index.d.ts +0 -1
  102. package/dist/models/meta/index.js +0 -1
  103. package/dist/models/meta/presets.d.ts +0 -3254
  104. package/dist/models/meta/presets.js +0 -95
  105. package/dist/models/openai/index.d.ts +0 -2
  106. package/dist/models/openai/index.js +0 -2
  107. package/dist/models/openai/middleware.d.ts +0 -3
  108. package/dist/models/openai/middleware.js +0 -62
  109. package/dist/models/openai/presets.d.ts +0 -6634
  110. package/dist/models/openai/presets.js +0 -213
  111. package/dist/models/types.d.ts +0 -20
  112. package/dist/models/types.js +0 -84
  113. package/dist/models/voyage/index.d.ts +0 -2
  114. package/dist/models/voyage/index.js +0 -2
  115. package/dist/models/voyage/middleware.d.ts +0 -2
  116. package/dist/models/voyage/middleware.js +0 -18
  117. package/dist/models/voyage/presets.d.ts +0 -3471
  118. package/dist/models/voyage/presets.js +0 -85
  119. package/dist/providers/anthropic/canonical.d.ts +0 -3
  120. package/dist/providers/anthropic/canonical.js +0 -9
  121. package/dist/providers/anthropic/index.d.ts +0 -1
  122. package/dist/providers/anthropic/index.js +0 -1
  123. package/dist/providers/bedrock/canonical.d.ts +0 -17
  124. package/dist/providers/bedrock/canonical.js +0 -61
  125. package/dist/providers/bedrock/index.d.ts +0 -2
  126. package/dist/providers/bedrock/index.js +0 -2
  127. package/dist/providers/bedrock/middleware.d.ts +0 -3
  128. package/dist/providers/bedrock/middleware.js +0 -55
  129. package/dist/providers/cohere/canonical.d.ts +0 -3
  130. package/dist/providers/cohere/canonical.js +0 -17
  131. package/dist/providers/cohere/index.d.ts +0 -1
  132. package/dist/providers/cohere/index.js +0 -1
  133. package/dist/providers/groq/canonical.d.ts +0 -3
  134. package/dist/providers/groq/canonical.js +0 -12
  135. package/dist/providers/groq/index.d.ts +0 -1
  136. package/dist/providers/groq/index.js +0 -1
  137. package/dist/providers/openai/canonical.d.ts +0 -3
  138. package/dist/providers/openai/canonical.js +0 -8
  139. package/dist/providers/openai/index.d.ts +0 -1
  140. package/dist/providers/openai/index.js +0 -1
  141. package/dist/providers/registry.d.ts +0 -24
  142. package/dist/providers/registry.js +0 -100
  143. package/dist/providers/types.d.ts +0 -7
  144. package/dist/providers/types.js +0 -11
  145. package/dist/providers/vertex/canonical.d.ts +0 -3
  146. package/dist/providers/vertex/canonical.js +0 -8
  147. package/dist/providers/vertex/index.d.ts +0 -1
  148. package/dist/providers/vertex/index.js +0 -1
  149. package/dist/providers/voyage/canonical.d.ts +0 -3
  150. package/dist/providers/voyage/canonical.js +0 -7
  151. package/dist/providers/voyage/index.d.ts +0 -1
  152. package/dist/providers/voyage/index.js +0 -1
  153. package/dist/telemetry/ai-sdk.d.ts +0 -2
  154. package/dist/telemetry/ai-sdk.js +0 -31
  155. package/dist/telemetry/baggage.d.ts +0 -1
  156. package/dist/telemetry/baggage.js +0 -24
  157. package/dist/telemetry/fetch.d.ts +0 -2
  158. package/dist/telemetry/fetch.js +0 -24
  159. package/dist/telemetry/gen-ai.d.ts +0 -5
  160. package/dist/telemetry/gen-ai.js +0 -60
  161. package/dist/telemetry/http.d.ts +0 -3
  162. package/dist/telemetry/http.js +0 -54
  163. package/dist/telemetry/memory.d.ts +0 -2
  164. package/dist/telemetry/memory.js +0 -27
  165. package/dist/telemetry/span.d.ts +0 -13
  166. package/dist/telemetry/span.js +0 -60
  167. package/dist/telemetry/stream.d.ts +0 -3
  168. package/dist/telemetry/stream.js +0 -51
  169. package/dist/types.d.ts +0 -176
  170. package/dist/types.js +0 -1
  171. package/dist/utils/env.d.ts +0 -2
  172. package/dist/utils/env.js +0 -5
  173. package/dist/utils/headers.d.ts +0 -4
  174. package/dist/utils/headers.js +0 -22
  175. package/dist/utils/preset.d.ts +0 -9
  176. package/dist/utils/preset.js +0 -41
  177. package/dist/utils/request.d.ts +0 -2
  178. package/dist/utils/request.js +0 -14
  179. package/dist/utils/response.d.ts +0 -3
  180. package/dist/utils/response.js +0 -68
package/src/lifecycle.ts CHANGED
@@ -10,7 +10,7 @@ import { GatewayError } from "./errors/gateway";
10
10
  import { toOpenAIErrorResponse } from "./errors/openai";
11
11
  import { logger } from "./logger";
12
12
  import { getBaggageAttributes } from "./telemetry/baggage";
13
- import { initFetch } from "./telemetry/fetch";
13
+ import { instrumentFetch } from "./telemetry/fetch";
14
14
  import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
15
15
  import { recordV8jsMemory } from "./telemetry/memory";
16
16
  import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
@@ -27,7 +27,7 @@ export const winterCgHandler = (
27
27
  if (parsedConfig.telemetry?.enabled) {
28
28
  setSpanTracer(parsedConfig.telemetry?.tracer);
29
29
  setSpanEventsEnabled(parsedConfig.telemetry?.signals?.hebo);
30
- initFetch(parsedConfig.telemetry?.signals?.hebo);
30
+ instrumentFetch(parsedConfig.telemetry?.signals?.hebo);
31
31
  }
32
32
 
33
33
  return async (request: Request, state?: Record<string, unknown>): Promise<Response> => {
@@ -3,7 +3,7 @@ import { expect, test } from "bun:test";
3
3
 
4
4
  import { modelMiddlewareMatcher } from "../../middleware/matcher";
5
5
  import { CANONICAL_MODEL_IDS } from "../../models/types";
6
- import { claudeReasoningMiddleware } from "./middleware";
6
+ import { claudePromptCachingMiddleware, claudeReasoningMiddleware } from "./middleware";
7
7
 
8
8
  test("claudeReasoningMiddleware > matching patterns", () => {
9
9
  const matching = [
@@ -27,6 +27,7 @@ test("claudeReasoningMiddleware > matching patterns", () => {
27
27
  for (const id of matching) {
28
28
  const middleware = modelMiddlewareMatcher.resolve({ kind: "text", modelId: id });
29
29
  expect(middleware).toContain(claudeReasoningMiddleware);
30
+ expect(middleware).toContain(claudePromptCachingMiddleware);
30
31
  }
31
32
 
32
33
  for (const id of nonMatching) {
@@ -35,6 +36,49 @@ test("claudeReasoningMiddleware > matching patterns", () => {
35
36
  }
36
37
  });
37
38
 
39
+ test("claudePromptCachingMiddleware > should not auto-enable top-level cache control", async () => {
40
+ const params = {
41
+ prompt: [],
42
+ providerOptions: {
43
+ unknown: {},
44
+ },
45
+ };
46
+
47
+ const result = await claudePromptCachingMiddleware.transformParams!({
48
+ type: "generate",
49
+ params,
50
+ model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.6" }),
51
+ });
52
+
53
+ expect(result.providerOptions).toEqual({
54
+ unknown: {},
55
+ });
56
+ });
57
+
58
+ test("claudePromptCachingMiddleware > should map cache_control ttl", async () => {
59
+ const params = {
60
+ prompt: [],
61
+ providerOptions: {
62
+ unknown: {
63
+ cache_control: { type: "ephemeral", ttl: "1h" },
64
+ },
65
+ },
66
+ };
67
+
68
+ const result = await claudePromptCachingMiddleware.transformParams!({
69
+ type: "generate",
70
+ params,
71
+ model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.6" }),
72
+ });
73
+
74
+ expect(result.providerOptions).toEqual({
75
+ anthropic: {
76
+ cacheControl: { type: "ephemeral", ttl: "1h" },
77
+ },
78
+ unknown: {},
79
+ });
80
+ });
81
+
38
82
  test("claudeReasoningMiddleware > should transform reasoning_effort string to thinking budget", async () => {
39
83
  const params = {
40
84
  prompt: [],
@@ -1,6 +1,7 @@
1
1
  import type { LanguageModelMiddleware } from "ai";
2
2
 
3
3
  import type {
4
+ ChatCompletionsCacheControl,
4
5
  ChatCompletionsReasoningConfig,
5
6
  ChatCompletionsReasoningEffort,
6
7
  } from "../../endpoints/chat-completions/schema";
@@ -123,6 +124,25 @@ export const claudeReasoningMiddleware: LanguageModelMiddleware = {
123
124
  },
124
125
  };
125
126
 
127
+ // https://platform.claude.com/docs/en/build-with-claude/prompt-caching
128
+ export const claudePromptCachingMiddleware: LanguageModelMiddleware = {
129
+ specificationVersion: "v3",
130
+ // eslint-disable-next-line require-await
131
+ transformParams: async ({ params }) => {
132
+ const unknown = params.providerOptions?.["unknown"];
133
+ if (!unknown) return params;
134
+
135
+ const cacheControl = unknown["cache_control"] as ChatCompletionsCacheControl;
136
+ if (cacheControl) {
137
+ (params.providerOptions!["anthropic"] ??= {})["cacheControl"] = cacheControl;
138
+ }
139
+
140
+ delete unknown["cache_control"];
141
+
142
+ return params;
143
+ },
144
+ };
145
+
126
146
  modelMiddlewareMatcher.useForModel(["anthropic/claude-*3*7*", "anthropic/claude-*4*"], {
127
- language: [claudeReasoningMiddleware],
147
+ language: [claudeReasoningMiddleware, claudePromptCachingMiddleware],
128
148
  });
@@ -4,7 +4,11 @@ import { expect, test } from "bun:test";
4
4
  import { modelMiddlewareMatcher } from "../../middleware/matcher";
5
5
  import { calculateReasoningBudgetFromEffort } from "../../middleware/utils";
6
6
  import { CANONICAL_MODEL_IDS } from "../../models/types";
7
- import { geminiDimensionsMiddleware, geminiReasoningMiddleware } from "./middleware";
7
+ import {
8
+ geminiDimensionsMiddleware,
9
+ geminiPromptCachingMiddleware,
10
+ geminiReasoningMiddleware,
11
+ } from "./middleware";
8
12
 
9
13
  test("geminiReasoningMiddleware > matching patterns", () => {
10
14
  const matching = [
@@ -21,6 +25,7 @@ test("geminiReasoningMiddleware > matching patterns", () => {
21
25
  for (const id of matching) {
22
26
  const middleware = modelMiddlewareMatcher.resolve({ kind: "text", modelId: id });
23
27
  expect(middleware).toContain(geminiReasoningMiddleware);
28
+ expect(middleware).toContain(geminiPromptCachingMiddleware);
24
29
  }
25
30
 
26
31
  for (const id of nonMatching) {
@@ -29,6 +34,30 @@ test("geminiReasoningMiddleware > matching patterns", () => {
29
34
  }
30
35
  });
31
36
 
37
+ test("geminiPromptCachingMiddleware > should map normalized cached_content", async () => {
38
+ const params = {
39
+ prompt: [],
40
+ providerOptions: {
41
+ unknown: {
42
+ cached_content: "cachedContents/reusable",
43
+ },
44
+ },
45
+ };
46
+
47
+ const result = await geminiPromptCachingMiddleware.transformParams!({
48
+ type: "generate",
49
+ params,
50
+ model: new MockLanguageModelV3({ modelId: "google/gemini-2.5-flash" }),
51
+ });
52
+
53
+ expect(result.providerOptions).toEqual({
54
+ google: {
55
+ cachedContent: "cachedContents/reusable",
56
+ },
57
+ unknown: {},
58
+ });
59
+ });
60
+
32
61
  test("geminiDimensionsMiddleware > matching patterns", () => {
33
62
  const matching = ["google/gemini-embedding-001"];
34
63
  const nonMatching = [
@@ -123,10 +123,29 @@ export const geminiReasoningMiddleware: LanguageModelMiddleware = {
123
123
  },
124
124
  };
125
125
 
126
+ // https://ai.google.dev/gemini-api/docs/caching
127
+ // FUTURE: auto-create cached_content for message-level cache_control blocks
128
+ export const geminiPromptCachingMiddleware: LanguageModelMiddleware = {
129
+ specificationVersion: "v3",
130
+ // eslint-disable-next-line require-await
131
+ transformParams: async ({ params }) => {
132
+ const unknown = params.providerOptions?.["unknown"];
133
+ if (!unknown) return params;
134
+
135
+ const cachedContent = unknown["cached_content"] as string | undefined;
136
+ if (cachedContent) {
137
+ (params.providerOptions!["google"] ??= {})["cachedContent"] = cachedContent;
138
+ }
139
+
140
+ delete unknown["cached_content"];
141
+ return params;
142
+ },
143
+ };
144
+
126
145
  modelMiddlewareMatcher.useForModel("google/gemini-*embedding-*", {
127
146
  embedding: [geminiDimensionsMiddleware],
128
147
  });
129
148
 
130
149
  modelMiddlewareMatcher.useForModel(["google/gemini-2*", "google/gemini-3*"], {
131
- language: [geminiReasoningMiddleware],
150
+ language: [geminiReasoningMiddleware, geminiPromptCachingMiddleware],
132
151
  });
@@ -3,7 +3,11 @@ import { expect, test } from "bun:test";
3
3
 
4
4
  import { modelMiddlewareMatcher } from "../../middleware/matcher";
5
5
  import { CANONICAL_MODEL_IDS } from "../../models/types";
6
- import { openAIDimensionsMiddleware, openAIReasoningMiddleware } from "./middleware";
6
+ import {
7
+ openAIDimensionsMiddleware,
8
+ openAIPromptCachingMiddleware,
9
+ openAIReasoningMiddleware,
10
+ } from "./middleware";
7
11
 
8
12
  test("openAI middleware > matching patterns", () => {
9
13
  const languageMatching = [
@@ -21,6 +25,7 @@ test("openAI middleware > matching patterns", () => {
21
25
  for (const id of languageMatching) {
22
26
  const middleware = modelMiddlewareMatcher.resolve({ kind: "text", modelId: id });
23
27
  expect(middleware).toContain(openAIReasoningMiddleware);
28
+ expect(middleware).toContain(openAIPromptCachingMiddleware);
24
29
  }
25
30
 
26
31
  for (const id of languageNonMatching) {
@@ -46,6 +51,32 @@ test("openAI middleware > matching patterns", () => {
46
51
  }
47
52
  });
48
53
 
54
+ test("openAIPromptCachingMiddleware > should map key and retention", async () => {
55
+ const params = {
56
+ prompt: [],
57
+ providerOptions: {
58
+ unknown: {
59
+ prompt_cache_key: "tenant:shared:legal-v1",
60
+ prompt_cache_retention: "24h",
61
+ },
62
+ },
63
+ };
64
+
65
+ const result = await openAIPromptCachingMiddleware.transformParams!({
66
+ type: "generate",
67
+ params,
68
+ model: new MockLanguageModelV3({ modelId: "openai/gpt-5" }),
69
+ });
70
+
71
+ expect(result.providerOptions).toEqual({
72
+ openai: {
73
+ promptCacheKey: "tenant:shared:legal-v1",
74
+ promptCacheRetention: "24h",
75
+ },
76
+ unknown: {},
77
+ });
78
+ });
79
+
49
80
  test("openAIReasoningMiddleware > should map reasoning effort to OpenAI provider options", async () => {
50
81
  const params = {
51
82
  prompt: [],
@@ -70,10 +70,34 @@ export const openAIReasoningMiddleware: LanguageModelMiddleware = {
70
70
  },
71
71
  };
72
72
 
73
+ // https://developers.openai.com/api/docs/guides/prompt-caching/
74
+ export const openAIPromptCachingMiddleware: LanguageModelMiddleware = {
75
+ specificationVersion: "v3",
76
+ // eslint-disable-next-line require-await
77
+ transformParams: async ({ params }) => {
78
+ const unknown = params.providerOptions?.["unknown"];
79
+ if (!unknown) return params;
80
+
81
+ const key = unknown["prompt_cache_key"] as string | undefined;
82
+ const retention = unknown["prompt_cache_retention"] as "in_memory" | "24h" | undefined;
83
+
84
+ if (key || retention) {
85
+ const target = (params.providerOptions!["openai"] ??= {});
86
+ if (key) target["promptCacheKey"] = key;
87
+ if (retention) target["promptCacheRetention"] = retention;
88
+ }
89
+
90
+ delete unknown["prompt_cache_key"];
91
+ delete unknown["prompt_cache_retention"];
92
+
93
+ return params;
94
+ },
95
+ };
96
+
73
97
  modelMiddlewareMatcher.useForModel("openai/text-embedding-*", {
74
98
  embedding: [openAIDimensionsMiddleware],
75
99
  });
76
100
 
77
101
  modelMiddlewareMatcher.useForModel("openai/gpt-*", {
78
- language: [openAIReasoningMiddleware],
102
+ language: [openAIReasoningMiddleware, openAIPromptCachingMiddleware],
79
103
  });
@@ -2,7 +2,11 @@ import { MockLanguageModelV3 } from "ai/test";
2
2
  import { expect, test } from "bun:test";
3
3
 
4
4
  import { modelMiddlewareMatcher } from "../../middleware/matcher";
5
- import { bedrockClaudeReasoningMiddleware, bedrockGptReasoningMiddleware } from "./middleware";
5
+ import {
6
+ bedrockClaudeReasoningMiddleware,
7
+ bedrockGptReasoningMiddleware,
8
+ bedrockPromptCachingMiddleware,
9
+ } from "./middleware";
6
10
 
7
11
  test("bedrock middlewares > matching provider resolves GPT middleware", () => {
8
12
  const middleware = modelMiddlewareMatcher.resolve({
@@ -24,6 +28,26 @@ test("bedrock middlewares > matching provider resolves Claude middleware", () =>
24
28
  expect(middleware).toContain(bedrockClaudeReasoningMiddleware);
25
29
  });
26
30
 
31
+ test("bedrock middlewares > matching provider resolves prompt caching middleware for Claude", () => {
32
+ const middleware = modelMiddlewareMatcher.resolve({
33
+ kind: "text",
34
+ modelId: "anthropic/claude-opus-4.6",
35
+ providerId: "amazon-bedrock",
36
+ });
37
+
38
+ expect(middleware).toContain(bedrockPromptCachingMiddleware);
39
+ });
40
+
41
+ test("bedrock middlewares > matching provider resolves prompt caching middleware for Nova", () => {
42
+ const middleware = modelMiddlewareMatcher.resolve({
43
+ kind: "text",
44
+ modelId: "amazon/nova-2-lite",
45
+ providerId: "amazon-bedrock",
46
+ });
47
+
48
+ expect(middleware).toContain(bedrockPromptCachingMiddleware);
49
+ });
50
+
27
51
  test("bedrockGptReasoningMiddleware > should map reasoningEffort into reasoningConfig", async () => {
28
52
  const params = {
29
53
  prompt: [],
@@ -181,3 +205,99 @@ test("bedrockClaudeReasoningMiddleware > should not set maxReasoningEffort for C
181
205
  },
182
206
  });
183
207
  });
208
+
209
+ test("bedrockPromptCachingMiddleware > should map message and part cacheControl to cachePoint", async () => {
210
+ const params = {
211
+ prompt: [
212
+ {
213
+ role: "system",
214
+ content: [
215
+ {
216
+ type: "text",
217
+ text: "Policy",
218
+ providerOptions: {
219
+ bedrock: {
220
+ cacheControl: { type: "ephemeral", ttl: "1h" },
221
+ },
222
+ },
223
+ },
224
+ ],
225
+ providerOptions: {
226
+ bedrock: {
227
+ cacheControl: { type: "ephemeral", ttl: "1h" },
228
+ },
229
+ },
230
+ },
231
+ ],
232
+ providerOptions: {
233
+ bedrock: {},
234
+ },
235
+ };
236
+
237
+ const result = await bedrockPromptCachingMiddleware.transformParams!({
238
+ type: "generate",
239
+ params: params as any,
240
+ model: new MockLanguageModelV3({ modelId: "amazon/nova-2-lite" }),
241
+ });
242
+
243
+ expect((result.prompt[0] as any).providerOptions.bedrock.cachePoint).toEqual({
244
+ type: "default",
245
+ });
246
+ expect((result.prompt[0] as any).providerOptions.bedrock.cacheControl).toBeUndefined();
247
+ expect((result.prompt[0] as any).content[0].providerOptions.bedrock.cachePoint).toEqual({
248
+ type: "default",
249
+ });
250
+ expect((result.prompt[0] as any).content[0].providerOptions.bedrock.cacheControl).toBeUndefined();
251
+ });
252
+
253
+ test("bedrockPromptCachingMiddleware > should fallback from top-level cacheControl", async () => {
254
+ const params = {
255
+ prompt: [
256
+ {
257
+ role: "system",
258
+ content: "Reusable context",
259
+ },
260
+ {
261
+ role: "user",
262
+ content: "Question",
263
+ },
264
+ ],
265
+ providerOptions: {
266
+ bedrock: {
267
+ cacheControl: { type: "ephemeral", ttl: "1h" },
268
+ },
269
+ },
270
+ };
271
+
272
+ const result = await bedrockPromptCachingMiddleware.transformParams!({
273
+ type: "generate",
274
+ params: params as any,
275
+ model: new MockLanguageModelV3({ modelId: "anthropic/claude-opus-4.6" }),
276
+ });
277
+
278
+ expect((result.prompt[1] as any).providerOptions).toBeUndefined();
279
+ expect((result.providerOptions as any).bedrock.cacheControl).toBeUndefined();
280
+ });
281
+
282
+ test("bedrockPromptCachingMiddleware > should skip non-claude non-nova models", async () => {
283
+ const params = {
284
+ prompt: [{ role: "user", content: "Hello" }],
285
+ providerOptions: {
286
+ bedrock: {
287
+ cacheControl: { type: "ephemeral", ttl: "1h" },
288
+ },
289
+ },
290
+ };
291
+
292
+ const result = await bedrockPromptCachingMiddleware.transformParams!({
293
+ type: "generate",
294
+ params: params as any,
295
+ model: new MockLanguageModelV3({ modelId: "openai/gpt-oss-20b" }),
296
+ });
297
+
298
+ expect((result.providerOptions as any).bedrock.cacheControl).toEqual({
299
+ type: "ephemeral",
300
+ ttl: "1h",
301
+ });
302
+ expect((result.prompt[0] as any).providerOptions).toBeUndefined();
303
+ });
@@ -1,5 +1,7 @@
1
1
  import type { LanguageModelMiddleware } from "ai";
2
2
 
3
+ import type { ChatCompletionsCacheControl } from "../../endpoints/chat-completions/schema";
4
+
3
5
  import { modelMiddlewareMatcher } from "../../middleware/matcher";
4
6
 
5
7
  const isClaude46 = (modelId: string) => modelId.includes("-4-6");
@@ -63,6 +65,64 @@ export const bedrockClaudeReasoningMiddleware: LanguageModelMiddleware = {
63
65
  },
64
66
  };
65
67
 
68
+ function toBedrockCachePoint(modelId: string, cacheControl?: ChatCompletionsCacheControl) {
69
+ const out: { type: "default"; ttl?: string } = { type: "default" };
70
+ // Nova currently only supports 5m
71
+ if (cacheControl?.ttl && !modelId.includes("nova")) {
72
+ out.ttl = cacheControl.ttl;
73
+ }
74
+ return out;
75
+ }
76
+
77
+ // https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
78
+ export const bedrockPromptCachingMiddleware: LanguageModelMiddleware = {
79
+ specificationVersion: "v3",
80
+ // eslint-disable-next-line require-await
81
+ transformParams: async ({ params, model }) => {
82
+ if (!model.modelId.includes("nova") && !model.modelId.includes("claude")) return params;
83
+
84
+ let hasExplicitCacheControl = false;
85
+ let lastCacheableBlock;
86
+
87
+ const processCacheControl = (providerOptions?: Record<string, any>) => {
88
+ if (!providerOptions) return;
89
+
90
+ const entryBedrock = providerOptions["bedrock"] as Record<string, unknown> | undefined;
91
+ const entryCacheControl = entryBedrock?.["cacheControl"] as ChatCompletionsCacheControl;
92
+ if (!entryBedrock || !entryCacheControl) return;
93
+
94
+ hasExplicitCacheControl = true;
95
+ entryBedrock["cachePoint"] = toBedrockCachePoint(model.modelId, entryCacheControl);
96
+ delete entryBedrock["cacheControl"];
97
+ };
98
+
99
+ for (const message of params.prompt) {
100
+ processCacheControl(message["providerOptions"]);
101
+
102
+ if (!Array.isArray(message["content"])) continue;
103
+ for (const part of message["content"]) {
104
+ processCacheControl(part["providerOptions"]);
105
+ }
106
+ lastCacheableBlock = message;
107
+ }
108
+
109
+ const bedrock = params.providerOptions?.["bedrock"];
110
+ const cacheControl = bedrock?.["cacheControl"] as ChatCompletionsCacheControl;
111
+ if (cacheControl && !hasExplicitCacheControl && lastCacheableBlock) {
112
+ ((lastCacheableBlock["providerOptions"] ??= {})["bedrock"] ??= {})["cachePoint"] =
113
+ toBedrockCachePoint(model.modelId, cacheControl);
114
+ }
115
+
116
+ delete bedrock?.["cacheControl"];
117
+
118
+ return params;
119
+ },
120
+ };
121
+
66
122
  modelMiddlewareMatcher.useForProvider("amazon-bedrock", {
67
- language: [bedrockGptReasoningMiddleware, bedrockClaudeReasoningMiddleware],
123
+ language: [
124
+ bedrockGptReasoningMiddleware,
125
+ bedrockClaudeReasoningMiddleware,
126
+ bedrockPromptCachingMiddleware,
127
+ ],
68
128
  });
@@ -1,8 +1,8 @@
1
- import { SpanKind } from "@opentelemetry/api";
1
+ import { SpanKind, type Attributes } from "@opentelemetry/api";
2
2
 
3
3
  import type { TelemetrySignalLevel } from "../types";
4
4
 
5
- import { withSpan } from "./span";
5
+ import { setSpanAttributes, withSpan } from "./span";
6
6
 
7
7
  const ORIGINAL_FETCH_KEY = Symbol.for("@hebo/fetch/original-fetch");
8
8
 
@@ -13,6 +13,21 @@ type GlobalFetchState = typeof globalThis & {
13
13
  const g = globalThis as GlobalFetchState;
14
14
  let fetchTracingEnabled = false;
15
15
 
16
+ const isRequest = (value: unknown): value is Request =>
17
+ typeof Request !== "undefined" && value instanceof Request;
18
+
19
+ const getRequestAttributes = (input: RequestInfo | URL, init?: RequestInit): Attributes => {
20
+ const attrs: Attributes = {
21
+ "http.request.method": init?.method ?? (isRequest(input) ? input.method : "GET"),
22
+ };
23
+
24
+ if (input instanceof URL) attrs["url.full"] = input.href;
25
+ else if (typeof input === "string") attrs["url.full"] = input;
26
+ else if (isRequest(input)) attrs["url.full"] = input.url;
27
+
28
+ return attrs;
29
+ };
30
+
16
31
  const shouldTraceFetch = (init?: RequestInit): boolean =>
17
32
  typeof (init?.headers as any)?.["user-agent"] === "string" &&
18
33
  (init!.headers as any)["user-agent"].indexOf("ai-sdk/provider-utils") !== -1;
@@ -22,10 +37,22 @@ const otelFetch = (input: RequestInfo | URL, init?: RequestInit) => {
22
37
 
23
38
  if (!fetchTracingEnabled) return original(input, init);
24
39
  if (!shouldTraceFetch(init)) return original(input, init);
25
- return withSpan("fetch", () => original(input, init), { kind: SpanKind.CLIENT });
40
+
41
+ return withSpan(
42
+ "fetch",
43
+ async () => {
44
+ const response = await original(input, init);
45
+ setSpanAttributes({ "http.response.status_code": response.status });
46
+ return response;
47
+ },
48
+ {
49
+ kind: SpanKind.CLIENT,
50
+ attributes: getRequestAttributes(input, init),
51
+ },
52
+ );
26
53
  };
27
54
 
28
- export const initFetch = (level?: TelemetrySignalLevel) => {
55
+ export const instrumentFetch = (level?: TelemetrySignalLevel) => {
29
56
  fetchTracingEnabled = level === "full";
30
57
  if (!fetchTracingEnabled) return;
31
58
  if (g[ORIGINAL_FETCH_KEY]) return;
@@ -0,0 +1 @@
1
+ export * from "./fetch";
package/dist/config.d.ts DELETED
@@ -1,2 +0,0 @@
1
- import { type GatewayConfig, type GatewayConfigParsed } from "./types";
2
- export declare const parseConfig: (config: GatewayConfig) => GatewayConfigParsed;
package/dist/config.js DELETED
@@ -1,81 +0,0 @@
1
- import { isLogger, logger, setLoggerInstance } from "./logger";
2
- import { createDefaultLogger } from "./logger/default";
3
- import { installAiSdkWarningLogger } from "./telemetry/ai-sdk";
4
- import { kParsed, } from "./types";
5
- export const parseConfig = (config) => {
6
- // If it has been parsed before, just return.
7
- if (kParsed in config)
8
- return config;
9
- const providers = config.providers ?? {};
10
- const parsedProviders = {};
11
- const models = config.models ?? {};
12
- // Set the global logger instance.
13
- if (config.logger === undefined) {
14
- setLoggerInstance(createDefaultLogger({}));
15
- }
16
- else if (config.logger !== null) {
17
- setLoggerInstance(isLogger(config.logger) ? config.logger : createDefaultLogger(config.logger));
18
- logger.info(isLogger(config.logger)
19
- ? `[logger] custom logger configured`
20
- : `[logger] logger configured: level=${config.logger.level}`);
21
- }
22
- // Strip providers that are not configured.
23
- for (const id in providers) {
24
- const provider = providers[id];
25
- if (provider === undefined) {
26
- logger.warn(`[config] ${id} provider removed (undefined)`);
27
- continue;
28
- }
29
- parsedProviders[id] = provider;
30
- }
31
- if (Object.keys(parsedProviders).length === 0) {
32
- throw new Error("No providers configured (config.providers is empty)");
33
- }
34
- // Strip providers that are not configured from models.
35
- const parsedModels = {};
36
- const warnings = new Set();
37
- for (const id in models) {
38
- const model = models[id];
39
- const kept = [];
40
- for (const p of model.providers) {
41
- if (p in parsedProviders)
42
- kept.push(p);
43
- else
44
- warnings.add(p);
45
- }
46
- if (kept.length > 0)
47
- parsedModels[id] = { ...model, providers: kept };
48
- }
49
- for (const warning of warnings) {
50
- logger.warn(`[config] ${warning} provider removed (not configured)`);
51
- }
52
- if (Object.keys(parsedModels).length === 0) {
53
- throw new Error("No models configured (config.models is empty)");
54
- }
55
- // Default for the telemetry settings.
56
- const telemetryEnabled = config.telemetry?.enabled ?? false;
57
- const telemetrySignals = telemetryEnabled
58
- ? {
59
- http: config.telemetry?.signals?.http ?? "recommended",
60
- gen_ai: config.telemetry?.signals?.gen_ai ?? "full",
61
- hebo: config.telemetry?.signals?.hebo ?? "off",
62
- }
63
- : {
64
- http: "off",
65
- gen_ai: "off",
66
- hebo: "off",
67
- };
68
- installAiSdkWarningLogger(telemetrySignals.gen_ai);
69
- // Return parsed config.
70
- return {
71
- ...config,
72
- telemetry: {
73
- ...config.telemetry,
74
- enabled: telemetryEnabled,
75
- signals: telemetrySignals,
76
- },
77
- providers: parsedProviders,
78
- models: parsedModels,
79
- [kParsed]: true,
80
- };
81
- };