@hebo-ai/gateway 0.5.2 → 0.6.0-rc0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -1
- package/package.json +17 -12
- package/src/endpoints/chat-completions/converters.test.ts +85 -1
- package/src/endpoints/chat-completions/converters.ts +139 -18
- package/src/endpoints/chat-completions/handler.test.ts +2 -0
- package/src/endpoints/chat-completions/index.ts +1 -0
- package/src/endpoints/chat-completions/otel.ts +1 -0
- package/src/endpoints/chat-completions/schema.ts +38 -4
- package/src/endpoints/embeddings/index.ts +1 -0
- package/src/lifecycle.ts +2 -2
- package/src/models/anthropic/middleware.test.ts +45 -1
- package/src/models/anthropic/middleware.ts +21 -1
- package/src/models/google/middleware.test.ts +30 -1
- package/src/models/google/middleware.ts +20 -1
- package/src/models/openai/middleware.test.ts +32 -1
- package/src/models/openai/middleware.ts +25 -1
- package/src/providers/bedrock/middleware.test.ts +121 -1
- package/src/providers/bedrock/middleware.ts +61 -1
- package/src/telemetry/fetch.ts +31 -4
- package/src/telemetry/index.ts +1 -0
- package/dist/config.d.ts +0 -2
- package/dist/config.js +0 -81
- package/dist/endpoints/chat-completions/converters.d.ts +0 -43
- package/dist/endpoints/chat-completions/converters.js +0 -551
- package/dist/endpoints/chat-completions/handler.d.ts +0 -2
- package/dist/endpoints/chat-completions/handler.js +0 -145
- package/dist/endpoints/chat-completions/index.d.ts +0 -3
- package/dist/endpoints/chat-completions/index.js +0 -3
- package/dist/endpoints/chat-completions/otel.d.ts +0 -6
- package/dist/endpoints/chat-completions/otel.js +0 -134
- package/dist/endpoints/chat-completions/schema.d.ts +0 -946
- package/dist/endpoints/chat-completions/schema.js +0 -257
- package/dist/endpoints/embeddings/converters.d.ts +0 -10
- package/dist/endpoints/embeddings/converters.js +0 -31
- package/dist/endpoints/embeddings/handler.d.ts +0 -2
- package/dist/endpoints/embeddings/handler.js +0 -101
- package/dist/endpoints/embeddings/index.d.ts +0 -3
- package/dist/endpoints/embeddings/index.js +0 -3
- package/dist/endpoints/embeddings/otel.d.ts +0 -6
- package/dist/endpoints/embeddings/otel.js +0 -35
- package/dist/endpoints/embeddings/schema.d.ts +0 -38
- package/dist/endpoints/embeddings/schema.js +0 -26
- package/dist/endpoints/models/converters.d.ts +0 -6
- package/dist/endpoints/models/converters.js +0 -42
- package/dist/endpoints/models/handler.d.ts +0 -2
- package/dist/endpoints/models/handler.js +0 -29
- package/dist/endpoints/models/index.d.ts +0 -3
- package/dist/endpoints/models/index.js +0 -3
- package/dist/endpoints/models/schema.d.ts +0 -42
- package/dist/endpoints/models/schema.js +0 -31
- package/dist/errors/ai-sdk.d.ts +0 -2
- package/dist/errors/ai-sdk.js +0 -52
- package/dist/errors/gateway.d.ts +0 -5
- package/dist/errors/gateway.js +0 -13
- package/dist/errors/openai.d.ts +0 -20
- package/dist/errors/openai.js +0 -40
- package/dist/errors/utils.d.ts +0 -22
- package/dist/errors/utils.js +0 -44
- package/dist/gateway.d.ts +0 -9
- package/dist/gateway.js +0 -34
- package/dist/index.d.ts +0 -14
- package/dist/index.js +0 -13
- package/dist/lifecycle.d.ts +0 -2
- package/dist/lifecycle.js +0 -94
- package/dist/logger/default.d.ts +0 -4
- package/dist/logger/default.js +0 -81
- package/dist/logger/index.d.ts +0 -14
- package/dist/logger/index.js +0 -25
- package/dist/middleware/common.d.ts +0 -12
- package/dist/middleware/common.js +0 -145
- package/dist/middleware/matcher.d.ts +0 -27
- package/dist/middleware/matcher.js +0 -112
- package/dist/middleware/utils.d.ts +0 -2
- package/dist/middleware/utils.js +0 -27
- package/dist/models/amazon/index.d.ts +0 -2
- package/dist/models/amazon/index.js +0 -2
- package/dist/models/amazon/middleware.d.ts +0 -3
- package/dist/models/amazon/middleware.js +0 -65
- package/dist/models/amazon/presets.d.ts +0 -2390
- package/dist/models/amazon/presets.js +0 -80
- package/dist/models/anthropic/index.d.ts +0 -2
- package/dist/models/anthropic/index.js +0 -2
- package/dist/models/anthropic/middleware.d.ts +0 -4
- package/dist/models/anthropic/middleware.js +0 -111
- package/dist/models/anthropic/presets.d.ts +0 -4802
- package/dist/models/anthropic/presets.js +0 -135
- package/dist/models/catalog.d.ts +0 -4
- package/dist/models/catalog.js +0 -4
- package/dist/models/cohere/index.d.ts +0 -2
- package/dist/models/cohere/index.js +0 -2
- package/dist/models/cohere/middleware.d.ts +0 -3
- package/dist/models/cohere/middleware.js +0 -60
- package/dist/models/cohere/presets.d.ts +0 -2918
- package/dist/models/cohere/presets.js +0 -134
- package/dist/models/google/index.d.ts +0 -2
- package/dist/models/google/index.js +0 -2
- package/dist/models/google/middleware.d.ts +0 -7
- package/dist/models/google/middleware.js +0 -103
- package/dist/models/google/presets.d.ts +0 -2553
- package/dist/models/google/presets.js +0 -83
- package/dist/models/meta/index.d.ts +0 -1
- package/dist/models/meta/index.js +0 -1
- package/dist/models/meta/presets.d.ts +0 -3254
- package/dist/models/meta/presets.js +0 -95
- package/dist/models/openai/index.d.ts +0 -2
- package/dist/models/openai/index.js +0 -2
- package/dist/models/openai/middleware.d.ts +0 -3
- package/dist/models/openai/middleware.js +0 -62
- package/dist/models/openai/presets.d.ts +0 -6634
- package/dist/models/openai/presets.js +0 -213
- package/dist/models/types.d.ts +0 -20
- package/dist/models/types.js +0 -84
- package/dist/models/voyage/index.d.ts +0 -2
- package/dist/models/voyage/index.js +0 -2
- package/dist/models/voyage/middleware.d.ts +0 -2
- package/dist/models/voyage/middleware.js +0 -18
- package/dist/models/voyage/presets.d.ts +0 -3471
- package/dist/models/voyage/presets.js +0 -85
- package/dist/providers/anthropic/canonical.d.ts +0 -3
- package/dist/providers/anthropic/canonical.js +0 -9
- package/dist/providers/anthropic/index.d.ts +0 -1
- package/dist/providers/anthropic/index.js +0 -1
- package/dist/providers/bedrock/canonical.d.ts +0 -17
- package/dist/providers/bedrock/canonical.js +0 -61
- package/dist/providers/bedrock/index.d.ts +0 -2
- package/dist/providers/bedrock/index.js +0 -2
- package/dist/providers/bedrock/middleware.d.ts +0 -3
- package/dist/providers/bedrock/middleware.js +0 -55
- package/dist/providers/cohere/canonical.d.ts +0 -3
- package/dist/providers/cohere/canonical.js +0 -17
- package/dist/providers/cohere/index.d.ts +0 -1
- package/dist/providers/cohere/index.js +0 -1
- package/dist/providers/groq/canonical.d.ts +0 -3
- package/dist/providers/groq/canonical.js +0 -12
- package/dist/providers/groq/index.d.ts +0 -1
- package/dist/providers/groq/index.js +0 -1
- package/dist/providers/openai/canonical.d.ts +0 -3
- package/dist/providers/openai/canonical.js +0 -8
- package/dist/providers/openai/index.d.ts +0 -1
- package/dist/providers/openai/index.js +0 -1
- package/dist/providers/registry.d.ts +0 -24
- package/dist/providers/registry.js +0 -100
- package/dist/providers/types.d.ts +0 -7
- package/dist/providers/types.js +0 -11
- package/dist/providers/vertex/canonical.d.ts +0 -3
- package/dist/providers/vertex/canonical.js +0 -8
- package/dist/providers/vertex/index.d.ts +0 -1
- package/dist/providers/vertex/index.js +0 -1
- package/dist/providers/voyage/canonical.d.ts +0 -3
- package/dist/providers/voyage/canonical.js +0 -7
- package/dist/providers/voyage/index.d.ts +0 -1
- package/dist/providers/voyage/index.js +0 -1
- package/dist/telemetry/ai-sdk.d.ts +0 -2
- package/dist/telemetry/ai-sdk.js +0 -31
- package/dist/telemetry/baggage.d.ts +0 -1
- package/dist/telemetry/baggage.js +0 -24
- package/dist/telemetry/fetch.d.ts +0 -2
- package/dist/telemetry/fetch.js +0 -24
- package/dist/telemetry/gen-ai.d.ts +0 -5
- package/dist/telemetry/gen-ai.js +0 -60
- package/dist/telemetry/http.d.ts +0 -3
- package/dist/telemetry/http.js +0 -54
- package/dist/telemetry/memory.d.ts +0 -2
- package/dist/telemetry/memory.js +0 -27
- package/dist/telemetry/span.d.ts +0 -13
- package/dist/telemetry/span.js +0 -60
- package/dist/telemetry/stream.d.ts +0 -3
- package/dist/telemetry/stream.js +0 -51
- package/dist/types.d.ts +0 -176
- package/dist/types.js +0 -1
- package/dist/utils/env.d.ts +0 -2
- package/dist/utils/env.js +0 -5
- package/dist/utils/headers.d.ts +0 -4
- package/dist/utils/headers.js +0 -22
- package/dist/utils/preset.d.ts +0 -9
- package/dist/utils/preset.js +0 -41
- package/dist/utils/request.d.ts +0 -2
- package/dist/utils/request.js +0 -14
- package/dist/utils/response.d.ts +0 -3
- package/dist/utils/response.js +0 -68
package/src/lifecycle.ts
CHANGED
|
@@ -10,7 +10,7 @@ import { GatewayError } from "./errors/gateway";
|
|
|
10
10
|
import { toOpenAIErrorResponse } from "./errors/openai";
|
|
11
11
|
import { logger } from "./logger";
|
|
12
12
|
import { getBaggageAttributes } from "./telemetry/baggage";
|
|
13
|
-
import {
|
|
13
|
+
import { instrumentFetch } from "./telemetry/fetch";
|
|
14
14
|
import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
|
|
15
15
|
import { recordV8jsMemory } from "./telemetry/memory";
|
|
16
16
|
import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
|
|
@@ -27,7 +27,7 @@ export const winterCgHandler = (
|
|
|
27
27
|
if (parsedConfig.telemetry?.enabled) {
|
|
28
28
|
setSpanTracer(parsedConfig.telemetry?.tracer);
|
|
29
29
|
setSpanEventsEnabled(parsedConfig.telemetry?.signals?.hebo);
|
|
30
|
-
|
|
30
|
+
instrumentFetch(parsedConfig.telemetry?.signals?.hebo);
|
|
31
31
|
}
|
|
32
32
|
|
|
33
33
|
return async (request: Request, state?: Record<string, unknown>): Promise<Response> => {
|
|
@@ -3,7 +3,7 @@ import { expect, test } from "bun:test";
|
|
|
3
3
|
|
|
4
4
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
5
5
|
import { CANONICAL_MODEL_IDS } from "../../models/types";
|
|
6
|
-
import { claudeReasoningMiddleware } from "./middleware";
|
|
6
|
+
import { claudePromptCachingMiddleware, claudeReasoningMiddleware } from "./middleware";
|
|
7
7
|
|
|
8
8
|
test("claudeReasoningMiddleware > matching patterns", () => {
|
|
9
9
|
const matching = [
|
|
@@ -27,6 +27,7 @@ test("claudeReasoningMiddleware > matching patterns", () => {
|
|
|
27
27
|
for (const id of matching) {
|
|
28
28
|
const middleware = modelMiddlewareMatcher.resolve({ kind: "text", modelId: id });
|
|
29
29
|
expect(middleware).toContain(claudeReasoningMiddleware);
|
|
30
|
+
expect(middleware).toContain(claudePromptCachingMiddleware);
|
|
30
31
|
}
|
|
31
32
|
|
|
32
33
|
for (const id of nonMatching) {
|
|
@@ -35,6 +36,49 @@ test("claudeReasoningMiddleware > matching patterns", () => {
|
|
|
35
36
|
}
|
|
36
37
|
});
|
|
37
38
|
|
|
39
|
+
test("claudePromptCachingMiddleware > should not auto-enable top-level cache control", async () => {
|
|
40
|
+
const params = {
|
|
41
|
+
prompt: [],
|
|
42
|
+
providerOptions: {
|
|
43
|
+
unknown: {},
|
|
44
|
+
},
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
const result = await claudePromptCachingMiddleware.transformParams!({
|
|
48
|
+
type: "generate",
|
|
49
|
+
params,
|
|
50
|
+
model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.6" }),
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
expect(result.providerOptions).toEqual({
|
|
54
|
+
unknown: {},
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
test("claudePromptCachingMiddleware > should map cache_control ttl", async () => {
|
|
59
|
+
const params = {
|
|
60
|
+
prompt: [],
|
|
61
|
+
providerOptions: {
|
|
62
|
+
unknown: {
|
|
63
|
+
cache_control: { type: "ephemeral", ttl: "1h" },
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
const result = await claudePromptCachingMiddleware.transformParams!({
|
|
69
|
+
type: "generate",
|
|
70
|
+
params,
|
|
71
|
+
model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.6" }),
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
expect(result.providerOptions).toEqual({
|
|
75
|
+
anthropic: {
|
|
76
|
+
cacheControl: { type: "ephemeral", ttl: "1h" },
|
|
77
|
+
},
|
|
78
|
+
unknown: {},
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
|
|
38
82
|
test("claudeReasoningMiddleware > should transform reasoning_effort string to thinking budget", async () => {
|
|
39
83
|
const params = {
|
|
40
84
|
prompt: [],
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { LanguageModelMiddleware } from "ai";
|
|
2
2
|
|
|
3
3
|
import type {
|
|
4
|
+
ChatCompletionsCacheControl,
|
|
4
5
|
ChatCompletionsReasoningConfig,
|
|
5
6
|
ChatCompletionsReasoningEffort,
|
|
6
7
|
} from "../../endpoints/chat-completions/schema";
|
|
@@ -123,6 +124,25 @@ export const claudeReasoningMiddleware: LanguageModelMiddleware = {
|
|
|
123
124
|
},
|
|
124
125
|
};
|
|
125
126
|
|
|
127
|
+
// https://platform.claude.com/docs/en/build-with-claude/prompt-caching
|
|
128
|
+
export const claudePromptCachingMiddleware: LanguageModelMiddleware = {
|
|
129
|
+
specificationVersion: "v3",
|
|
130
|
+
// eslint-disable-next-line require-await
|
|
131
|
+
transformParams: async ({ params }) => {
|
|
132
|
+
const unknown = params.providerOptions?.["unknown"];
|
|
133
|
+
if (!unknown) return params;
|
|
134
|
+
|
|
135
|
+
const cacheControl = unknown["cache_control"] as ChatCompletionsCacheControl;
|
|
136
|
+
if (cacheControl) {
|
|
137
|
+
(params.providerOptions!["anthropic"] ??= {})["cacheControl"] = cacheControl;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
delete unknown["cache_control"];
|
|
141
|
+
|
|
142
|
+
return params;
|
|
143
|
+
},
|
|
144
|
+
};
|
|
145
|
+
|
|
126
146
|
modelMiddlewareMatcher.useForModel(["anthropic/claude-*3*7*", "anthropic/claude-*4*"], {
|
|
127
|
-
language: [claudeReasoningMiddleware],
|
|
147
|
+
language: [claudeReasoningMiddleware, claudePromptCachingMiddleware],
|
|
128
148
|
});
|
|
@@ -4,7 +4,11 @@ import { expect, test } from "bun:test";
|
|
|
4
4
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
5
5
|
import { calculateReasoningBudgetFromEffort } from "../../middleware/utils";
|
|
6
6
|
import { CANONICAL_MODEL_IDS } from "../../models/types";
|
|
7
|
-
import {
|
|
7
|
+
import {
|
|
8
|
+
geminiDimensionsMiddleware,
|
|
9
|
+
geminiPromptCachingMiddleware,
|
|
10
|
+
geminiReasoningMiddleware,
|
|
11
|
+
} from "./middleware";
|
|
8
12
|
|
|
9
13
|
test("geminiReasoningMiddleware > matching patterns", () => {
|
|
10
14
|
const matching = [
|
|
@@ -21,6 +25,7 @@ test("geminiReasoningMiddleware > matching patterns", () => {
|
|
|
21
25
|
for (const id of matching) {
|
|
22
26
|
const middleware = modelMiddlewareMatcher.resolve({ kind: "text", modelId: id });
|
|
23
27
|
expect(middleware).toContain(geminiReasoningMiddleware);
|
|
28
|
+
expect(middleware).toContain(geminiPromptCachingMiddleware);
|
|
24
29
|
}
|
|
25
30
|
|
|
26
31
|
for (const id of nonMatching) {
|
|
@@ -29,6 +34,30 @@ test("geminiReasoningMiddleware > matching patterns", () => {
|
|
|
29
34
|
}
|
|
30
35
|
});
|
|
31
36
|
|
|
37
|
+
test("geminiPromptCachingMiddleware > should map normalized cached_content", async () => {
|
|
38
|
+
const params = {
|
|
39
|
+
prompt: [],
|
|
40
|
+
providerOptions: {
|
|
41
|
+
unknown: {
|
|
42
|
+
cached_content: "cachedContents/reusable",
|
|
43
|
+
},
|
|
44
|
+
},
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
const result = await geminiPromptCachingMiddleware.transformParams!({
|
|
48
|
+
type: "generate",
|
|
49
|
+
params,
|
|
50
|
+
model: new MockLanguageModelV3({ modelId: "google/gemini-2.5-flash" }),
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
expect(result.providerOptions).toEqual({
|
|
54
|
+
google: {
|
|
55
|
+
cachedContent: "cachedContents/reusable",
|
|
56
|
+
},
|
|
57
|
+
unknown: {},
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
|
|
32
61
|
test("geminiDimensionsMiddleware > matching patterns", () => {
|
|
33
62
|
const matching = ["google/gemini-embedding-001"];
|
|
34
63
|
const nonMatching = [
|
|
@@ -123,10 +123,29 @@ export const geminiReasoningMiddleware: LanguageModelMiddleware = {
|
|
|
123
123
|
},
|
|
124
124
|
};
|
|
125
125
|
|
|
126
|
+
// https://ai.google.dev/gemini-api/docs/caching
|
|
127
|
+
// FUTURE: auto-create cached_content for message-level cache_control blocks
|
|
128
|
+
export const geminiPromptCachingMiddleware: LanguageModelMiddleware = {
|
|
129
|
+
specificationVersion: "v3",
|
|
130
|
+
// eslint-disable-next-line require-await
|
|
131
|
+
transformParams: async ({ params }) => {
|
|
132
|
+
const unknown = params.providerOptions?.["unknown"];
|
|
133
|
+
if (!unknown) return params;
|
|
134
|
+
|
|
135
|
+
const cachedContent = unknown["cached_content"] as string | undefined;
|
|
136
|
+
if (cachedContent) {
|
|
137
|
+
(params.providerOptions!["google"] ??= {})["cachedContent"] = cachedContent;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
delete unknown["cached_content"];
|
|
141
|
+
return params;
|
|
142
|
+
},
|
|
143
|
+
};
|
|
144
|
+
|
|
126
145
|
modelMiddlewareMatcher.useForModel("google/gemini-*embedding-*", {
|
|
127
146
|
embedding: [geminiDimensionsMiddleware],
|
|
128
147
|
});
|
|
129
148
|
|
|
130
149
|
modelMiddlewareMatcher.useForModel(["google/gemini-2*", "google/gemini-3*"], {
|
|
131
|
-
language: [geminiReasoningMiddleware],
|
|
150
|
+
language: [geminiReasoningMiddleware, geminiPromptCachingMiddleware],
|
|
132
151
|
});
|
|
@@ -3,7 +3,11 @@ import { expect, test } from "bun:test";
|
|
|
3
3
|
|
|
4
4
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
5
5
|
import { CANONICAL_MODEL_IDS } from "../../models/types";
|
|
6
|
-
import {
|
|
6
|
+
import {
|
|
7
|
+
openAIDimensionsMiddleware,
|
|
8
|
+
openAIPromptCachingMiddleware,
|
|
9
|
+
openAIReasoningMiddleware,
|
|
10
|
+
} from "./middleware";
|
|
7
11
|
|
|
8
12
|
test("openAI middleware > matching patterns", () => {
|
|
9
13
|
const languageMatching = [
|
|
@@ -21,6 +25,7 @@ test("openAI middleware > matching patterns", () => {
|
|
|
21
25
|
for (const id of languageMatching) {
|
|
22
26
|
const middleware = modelMiddlewareMatcher.resolve({ kind: "text", modelId: id });
|
|
23
27
|
expect(middleware).toContain(openAIReasoningMiddleware);
|
|
28
|
+
expect(middleware).toContain(openAIPromptCachingMiddleware);
|
|
24
29
|
}
|
|
25
30
|
|
|
26
31
|
for (const id of languageNonMatching) {
|
|
@@ -46,6 +51,32 @@ test("openAI middleware > matching patterns", () => {
|
|
|
46
51
|
}
|
|
47
52
|
});
|
|
48
53
|
|
|
54
|
+
test("openAIPromptCachingMiddleware > should map key and retention", async () => {
|
|
55
|
+
const params = {
|
|
56
|
+
prompt: [],
|
|
57
|
+
providerOptions: {
|
|
58
|
+
unknown: {
|
|
59
|
+
prompt_cache_key: "tenant:shared:legal-v1",
|
|
60
|
+
prompt_cache_retention: "24h",
|
|
61
|
+
},
|
|
62
|
+
},
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
const result = await openAIPromptCachingMiddleware.transformParams!({
|
|
66
|
+
type: "generate",
|
|
67
|
+
params,
|
|
68
|
+
model: new MockLanguageModelV3({ modelId: "openai/gpt-5" }),
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
expect(result.providerOptions).toEqual({
|
|
72
|
+
openai: {
|
|
73
|
+
promptCacheKey: "tenant:shared:legal-v1",
|
|
74
|
+
promptCacheRetention: "24h",
|
|
75
|
+
},
|
|
76
|
+
unknown: {},
|
|
77
|
+
});
|
|
78
|
+
});
|
|
79
|
+
|
|
49
80
|
test("openAIReasoningMiddleware > should map reasoning effort to OpenAI provider options", async () => {
|
|
50
81
|
const params = {
|
|
51
82
|
prompt: [],
|
|
@@ -70,10 +70,34 @@ export const openAIReasoningMiddleware: LanguageModelMiddleware = {
|
|
|
70
70
|
},
|
|
71
71
|
};
|
|
72
72
|
|
|
73
|
+
// https://developers.openai.com/api/docs/guides/prompt-caching/
|
|
74
|
+
export const openAIPromptCachingMiddleware: LanguageModelMiddleware = {
|
|
75
|
+
specificationVersion: "v3",
|
|
76
|
+
// eslint-disable-next-line require-await
|
|
77
|
+
transformParams: async ({ params }) => {
|
|
78
|
+
const unknown = params.providerOptions?.["unknown"];
|
|
79
|
+
if (!unknown) return params;
|
|
80
|
+
|
|
81
|
+
const key = unknown["prompt_cache_key"] as string | undefined;
|
|
82
|
+
const retention = unknown["prompt_cache_retention"] as "in_memory" | "24h" | undefined;
|
|
83
|
+
|
|
84
|
+
if (key || retention) {
|
|
85
|
+
const target = (params.providerOptions!["openai"] ??= {});
|
|
86
|
+
if (key) target["promptCacheKey"] = key;
|
|
87
|
+
if (retention) target["promptCacheRetention"] = retention;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
delete unknown["prompt_cache_key"];
|
|
91
|
+
delete unknown["prompt_cache_retention"];
|
|
92
|
+
|
|
93
|
+
return params;
|
|
94
|
+
},
|
|
95
|
+
};
|
|
96
|
+
|
|
73
97
|
modelMiddlewareMatcher.useForModel("openai/text-embedding-*", {
|
|
74
98
|
embedding: [openAIDimensionsMiddleware],
|
|
75
99
|
});
|
|
76
100
|
|
|
77
101
|
modelMiddlewareMatcher.useForModel("openai/gpt-*", {
|
|
78
|
-
language: [openAIReasoningMiddleware],
|
|
102
|
+
language: [openAIReasoningMiddleware, openAIPromptCachingMiddleware],
|
|
79
103
|
});
|
|
@@ -2,7 +2,11 @@ import { MockLanguageModelV3 } from "ai/test";
|
|
|
2
2
|
import { expect, test } from "bun:test";
|
|
3
3
|
|
|
4
4
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
5
|
-
import {
|
|
5
|
+
import {
|
|
6
|
+
bedrockClaudeReasoningMiddleware,
|
|
7
|
+
bedrockGptReasoningMiddleware,
|
|
8
|
+
bedrockPromptCachingMiddleware,
|
|
9
|
+
} from "./middleware";
|
|
6
10
|
|
|
7
11
|
test("bedrock middlewares > matching provider resolves GPT middleware", () => {
|
|
8
12
|
const middleware = modelMiddlewareMatcher.resolve({
|
|
@@ -24,6 +28,26 @@ test("bedrock middlewares > matching provider resolves Claude middleware", () =>
|
|
|
24
28
|
expect(middleware).toContain(bedrockClaudeReasoningMiddleware);
|
|
25
29
|
});
|
|
26
30
|
|
|
31
|
+
test("bedrock middlewares > matching provider resolves prompt caching middleware for Claude", () => {
|
|
32
|
+
const middleware = modelMiddlewareMatcher.resolve({
|
|
33
|
+
kind: "text",
|
|
34
|
+
modelId: "anthropic/claude-opus-4.6",
|
|
35
|
+
providerId: "amazon-bedrock",
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
expect(middleware).toContain(bedrockPromptCachingMiddleware);
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
test("bedrock middlewares > matching provider resolves prompt caching middleware for Nova", () => {
|
|
42
|
+
const middleware = modelMiddlewareMatcher.resolve({
|
|
43
|
+
kind: "text",
|
|
44
|
+
modelId: "amazon/nova-2-lite",
|
|
45
|
+
providerId: "amazon-bedrock",
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
expect(middleware).toContain(bedrockPromptCachingMiddleware);
|
|
49
|
+
});
|
|
50
|
+
|
|
27
51
|
test("bedrockGptReasoningMiddleware > should map reasoningEffort into reasoningConfig", async () => {
|
|
28
52
|
const params = {
|
|
29
53
|
prompt: [],
|
|
@@ -181,3 +205,99 @@ test("bedrockClaudeReasoningMiddleware > should not set maxReasoningEffort for C
|
|
|
181
205
|
},
|
|
182
206
|
});
|
|
183
207
|
});
|
|
208
|
+
|
|
209
|
+
test("bedrockPromptCachingMiddleware > should map message and part cacheControl to cachePoint", async () => {
|
|
210
|
+
const params = {
|
|
211
|
+
prompt: [
|
|
212
|
+
{
|
|
213
|
+
role: "system",
|
|
214
|
+
content: [
|
|
215
|
+
{
|
|
216
|
+
type: "text",
|
|
217
|
+
text: "Policy",
|
|
218
|
+
providerOptions: {
|
|
219
|
+
bedrock: {
|
|
220
|
+
cacheControl: { type: "ephemeral", ttl: "1h" },
|
|
221
|
+
},
|
|
222
|
+
},
|
|
223
|
+
},
|
|
224
|
+
],
|
|
225
|
+
providerOptions: {
|
|
226
|
+
bedrock: {
|
|
227
|
+
cacheControl: { type: "ephemeral", ttl: "1h" },
|
|
228
|
+
},
|
|
229
|
+
},
|
|
230
|
+
},
|
|
231
|
+
],
|
|
232
|
+
providerOptions: {
|
|
233
|
+
bedrock: {},
|
|
234
|
+
},
|
|
235
|
+
};
|
|
236
|
+
|
|
237
|
+
const result = await bedrockPromptCachingMiddleware.transformParams!({
|
|
238
|
+
type: "generate",
|
|
239
|
+
params: params as any,
|
|
240
|
+
model: new MockLanguageModelV3({ modelId: "amazon/nova-2-lite" }),
|
|
241
|
+
});
|
|
242
|
+
|
|
243
|
+
expect((result.prompt[0] as any).providerOptions.bedrock.cachePoint).toEqual({
|
|
244
|
+
type: "default",
|
|
245
|
+
});
|
|
246
|
+
expect((result.prompt[0] as any).providerOptions.bedrock.cacheControl).toBeUndefined();
|
|
247
|
+
expect((result.prompt[0] as any).content[0].providerOptions.bedrock.cachePoint).toEqual({
|
|
248
|
+
type: "default",
|
|
249
|
+
});
|
|
250
|
+
expect((result.prompt[0] as any).content[0].providerOptions.bedrock.cacheControl).toBeUndefined();
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
test("bedrockPromptCachingMiddleware > should fallback from top-level cacheControl", async () => {
|
|
254
|
+
const params = {
|
|
255
|
+
prompt: [
|
|
256
|
+
{
|
|
257
|
+
role: "system",
|
|
258
|
+
content: "Reusable context",
|
|
259
|
+
},
|
|
260
|
+
{
|
|
261
|
+
role: "user",
|
|
262
|
+
content: "Question",
|
|
263
|
+
},
|
|
264
|
+
],
|
|
265
|
+
providerOptions: {
|
|
266
|
+
bedrock: {
|
|
267
|
+
cacheControl: { type: "ephemeral", ttl: "1h" },
|
|
268
|
+
},
|
|
269
|
+
},
|
|
270
|
+
};
|
|
271
|
+
|
|
272
|
+
const result = await bedrockPromptCachingMiddleware.transformParams!({
|
|
273
|
+
type: "generate",
|
|
274
|
+
params: params as any,
|
|
275
|
+
model: new MockLanguageModelV3({ modelId: "anthropic/claude-opus-4.6" }),
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
expect((result.prompt[1] as any).providerOptions).toBeUndefined();
|
|
279
|
+
expect((result.providerOptions as any).bedrock.cacheControl).toBeUndefined();
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
test("bedrockPromptCachingMiddleware > should skip non-claude non-nova models", async () => {
|
|
283
|
+
const params = {
|
|
284
|
+
prompt: [{ role: "user", content: "Hello" }],
|
|
285
|
+
providerOptions: {
|
|
286
|
+
bedrock: {
|
|
287
|
+
cacheControl: { type: "ephemeral", ttl: "1h" },
|
|
288
|
+
},
|
|
289
|
+
},
|
|
290
|
+
};
|
|
291
|
+
|
|
292
|
+
const result = await bedrockPromptCachingMiddleware.transformParams!({
|
|
293
|
+
type: "generate",
|
|
294
|
+
params: params as any,
|
|
295
|
+
model: new MockLanguageModelV3({ modelId: "openai/gpt-oss-20b" }),
|
|
296
|
+
});
|
|
297
|
+
|
|
298
|
+
expect((result.providerOptions as any).bedrock.cacheControl).toEqual({
|
|
299
|
+
type: "ephemeral",
|
|
300
|
+
ttl: "1h",
|
|
301
|
+
});
|
|
302
|
+
expect((result.prompt[0] as any).providerOptions).toBeUndefined();
|
|
303
|
+
});
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import type { LanguageModelMiddleware } from "ai";
|
|
2
2
|
|
|
3
|
+
import type { ChatCompletionsCacheControl } from "../../endpoints/chat-completions/schema";
|
|
4
|
+
|
|
3
5
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
4
6
|
|
|
5
7
|
const isClaude46 = (modelId: string) => modelId.includes("-4-6");
|
|
@@ -63,6 +65,64 @@ export const bedrockClaudeReasoningMiddleware: LanguageModelMiddleware = {
|
|
|
63
65
|
},
|
|
64
66
|
};
|
|
65
67
|
|
|
68
|
+
function toBedrockCachePoint(modelId: string, cacheControl?: ChatCompletionsCacheControl) {
|
|
69
|
+
const out: { type: "default"; ttl?: string } = { type: "default" };
|
|
70
|
+
// Nova currently only supports 5m
|
|
71
|
+
if (cacheControl?.ttl && !modelId.includes("nova")) {
|
|
72
|
+
out.ttl = cacheControl.ttl;
|
|
73
|
+
}
|
|
74
|
+
return out;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
|
|
78
|
+
export const bedrockPromptCachingMiddleware: LanguageModelMiddleware = {
|
|
79
|
+
specificationVersion: "v3",
|
|
80
|
+
// eslint-disable-next-line require-await
|
|
81
|
+
transformParams: async ({ params, model }) => {
|
|
82
|
+
if (!model.modelId.includes("nova") && !model.modelId.includes("claude")) return params;
|
|
83
|
+
|
|
84
|
+
let hasExplicitCacheControl = false;
|
|
85
|
+
let lastCacheableBlock;
|
|
86
|
+
|
|
87
|
+
const processCacheControl = (providerOptions?: Record<string, any>) => {
|
|
88
|
+
if (!providerOptions) return;
|
|
89
|
+
|
|
90
|
+
const entryBedrock = providerOptions["bedrock"] as Record<string, unknown> | undefined;
|
|
91
|
+
const entryCacheControl = entryBedrock?.["cacheControl"] as ChatCompletionsCacheControl;
|
|
92
|
+
if (!entryBedrock || !entryCacheControl) return;
|
|
93
|
+
|
|
94
|
+
hasExplicitCacheControl = true;
|
|
95
|
+
entryBedrock["cachePoint"] = toBedrockCachePoint(model.modelId, entryCacheControl);
|
|
96
|
+
delete entryBedrock["cacheControl"];
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
for (const message of params.prompt) {
|
|
100
|
+
processCacheControl(message["providerOptions"]);
|
|
101
|
+
|
|
102
|
+
if (!Array.isArray(message["content"])) continue;
|
|
103
|
+
for (const part of message["content"]) {
|
|
104
|
+
processCacheControl(part["providerOptions"]);
|
|
105
|
+
}
|
|
106
|
+
lastCacheableBlock = message;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const bedrock = params.providerOptions?.["bedrock"];
|
|
110
|
+
const cacheControl = bedrock?.["cacheControl"] as ChatCompletionsCacheControl;
|
|
111
|
+
if (cacheControl && !hasExplicitCacheControl && lastCacheableBlock) {
|
|
112
|
+
((lastCacheableBlock["providerOptions"] ??= {})["bedrock"] ??= {})["cachePoint"] =
|
|
113
|
+
toBedrockCachePoint(model.modelId, cacheControl);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
delete bedrock?.["cacheControl"];
|
|
117
|
+
|
|
118
|
+
return params;
|
|
119
|
+
},
|
|
120
|
+
};
|
|
121
|
+
|
|
66
122
|
modelMiddlewareMatcher.useForProvider("amazon-bedrock", {
|
|
67
|
-
language: [
|
|
123
|
+
language: [
|
|
124
|
+
bedrockGptReasoningMiddleware,
|
|
125
|
+
bedrockClaudeReasoningMiddleware,
|
|
126
|
+
bedrockPromptCachingMiddleware,
|
|
127
|
+
],
|
|
68
128
|
});
|
package/src/telemetry/fetch.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { SpanKind } from "@opentelemetry/api";
|
|
1
|
+
import { SpanKind, type Attributes } from "@opentelemetry/api";
|
|
2
2
|
|
|
3
3
|
import type { TelemetrySignalLevel } from "../types";
|
|
4
4
|
|
|
5
|
-
import { withSpan } from "./span";
|
|
5
|
+
import { setSpanAttributes, withSpan } from "./span";
|
|
6
6
|
|
|
7
7
|
const ORIGINAL_FETCH_KEY = Symbol.for("@hebo/fetch/original-fetch");
|
|
8
8
|
|
|
@@ -13,6 +13,21 @@ type GlobalFetchState = typeof globalThis & {
|
|
|
13
13
|
const g = globalThis as GlobalFetchState;
|
|
14
14
|
let fetchTracingEnabled = false;
|
|
15
15
|
|
|
16
|
+
const isRequest = (value: unknown): value is Request =>
|
|
17
|
+
typeof Request !== "undefined" && value instanceof Request;
|
|
18
|
+
|
|
19
|
+
const getRequestAttributes = (input: RequestInfo | URL, init?: RequestInit): Attributes => {
|
|
20
|
+
const attrs: Attributes = {
|
|
21
|
+
"http.request.method": init?.method ?? (isRequest(input) ? input.method : "GET"),
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
if (input instanceof URL) attrs["url.full"] = input.href;
|
|
25
|
+
else if (typeof input === "string") attrs["url.full"] = input;
|
|
26
|
+
else if (isRequest(input)) attrs["url.full"] = input.url;
|
|
27
|
+
|
|
28
|
+
return attrs;
|
|
29
|
+
};
|
|
30
|
+
|
|
16
31
|
const shouldTraceFetch = (init?: RequestInit): boolean =>
|
|
17
32
|
typeof (init?.headers as any)?.["user-agent"] === "string" &&
|
|
18
33
|
(init!.headers as any)["user-agent"].indexOf("ai-sdk/provider-utils") !== -1;
|
|
@@ -22,10 +37,22 @@ const otelFetch = (input: RequestInfo | URL, init?: RequestInit) => {
|
|
|
22
37
|
|
|
23
38
|
if (!fetchTracingEnabled) return original(input, init);
|
|
24
39
|
if (!shouldTraceFetch(init)) return original(input, init);
|
|
25
|
-
|
|
40
|
+
|
|
41
|
+
return withSpan(
|
|
42
|
+
"fetch",
|
|
43
|
+
async () => {
|
|
44
|
+
const response = await original(input, init);
|
|
45
|
+
setSpanAttributes({ "http.response.status_code": response.status });
|
|
46
|
+
return response;
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
kind: SpanKind.CLIENT,
|
|
50
|
+
attributes: getRequestAttributes(input, init),
|
|
51
|
+
},
|
|
52
|
+
);
|
|
26
53
|
};
|
|
27
54
|
|
|
28
|
-
export const
|
|
55
|
+
export const instrumentFetch = (level?: TelemetrySignalLevel) => {
|
|
29
56
|
fetchTracingEnabled = level === "full";
|
|
30
57
|
if (!fetchTracingEnabled) return;
|
|
31
58
|
if (g[ORIGINAL_FETCH_KEY]) return;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from "./fetch";
|
package/dist/config.d.ts
DELETED
package/dist/config.js
DELETED
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
import { isLogger, logger, setLoggerInstance } from "./logger";
|
|
2
|
-
import { createDefaultLogger } from "./logger/default";
|
|
3
|
-
import { installAiSdkWarningLogger } from "./telemetry/ai-sdk";
|
|
4
|
-
import { kParsed, } from "./types";
|
|
5
|
-
export const parseConfig = (config) => {
|
|
6
|
-
// If it has been parsed before, just return.
|
|
7
|
-
if (kParsed in config)
|
|
8
|
-
return config;
|
|
9
|
-
const providers = config.providers ?? {};
|
|
10
|
-
const parsedProviders = {};
|
|
11
|
-
const models = config.models ?? {};
|
|
12
|
-
// Set the global logger instance.
|
|
13
|
-
if (config.logger === undefined) {
|
|
14
|
-
setLoggerInstance(createDefaultLogger({}));
|
|
15
|
-
}
|
|
16
|
-
else if (config.logger !== null) {
|
|
17
|
-
setLoggerInstance(isLogger(config.logger) ? config.logger : createDefaultLogger(config.logger));
|
|
18
|
-
logger.info(isLogger(config.logger)
|
|
19
|
-
? `[logger] custom logger configured`
|
|
20
|
-
: `[logger] logger configured: level=${config.logger.level}`);
|
|
21
|
-
}
|
|
22
|
-
// Strip providers that are not configured.
|
|
23
|
-
for (const id in providers) {
|
|
24
|
-
const provider = providers[id];
|
|
25
|
-
if (provider === undefined) {
|
|
26
|
-
logger.warn(`[config] ${id} provider removed (undefined)`);
|
|
27
|
-
continue;
|
|
28
|
-
}
|
|
29
|
-
parsedProviders[id] = provider;
|
|
30
|
-
}
|
|
31
|
-
if (Object.keys(parsedProviders).length === 0) {
|
|
32
|
-
throw new Error("No providers configured (config.providers is empty)");
|
|
33
|
-
}
|
|
34
|
-
// Strip providers that are not configured from models.
|
|
35
|
-
const parsedModels = {};
|
|
36
|
-
const warnings = new Set();
|
|
37
|
-
for (const id in models) {
|
|
38
|
-
const model = models[id];
|
|
39
|
-
const kept = [];
|
|
40
|
-
for (const p of model.providers) {
|
|
41
|
-
if (p in parsedProviders)
|
|
42
|
-
kept.push(p);
|
|
43
|
-
else
|
|
44
|
-
warnings.add(p);
|
|
45
|
-
}
|
|
46
|
-
if (kept.length > 0)
|
|
47
|
-
parsedModels[id] = { ...model, providers: kept };
|
|
48
|
-
}
|
|
49
|
-
for (const warning of warnings) {
|
|
50
|
-
logger.warn(`[config] ${warning} provider removed (not configured)`);
|
|
51
|
-
}
|
|
52
|
-
if (Object.keys(parsedModels).length === 0) {
|
|
53
|
-
throw new Error("No models configured (config.models is empty)");
|
|
54
|
-
}
|
|
55
|
-
// Default for the telemetry settings.
|
|
56
|
-
const telemetryEnabled = config.telemetry?.enabled ?? false;
|
|
57
|
-
const telemetrySignals = telemetryEnabled
|
|
58
|
-
? {
|
|
59
|
-
http: config.telemetry?.signals?.http ?? "recommended",
|
|
60
|
-
gen_ai: config.telemetry?.signals?.gen_ai ?? "full",
|
|
61
|
-
hebo: config.telemetry?.signals?.hebo ?? "off",
|
|
62
|
-
}
|
|
63
|
-
: {
|
|
64
|
-
http: "off",
|
|
65
|
-
gen_ai: "off",
|
|
66
|
-
hebo: "off",
|
|
67
|
-
};
|
|
68
|
-
installAiSdkWarningLogger(telemetrySignals.gen_ai);
|
|
69
|
-
// Return parsed config.
|
|
70
|
-
return {
|
|
71
|
-
...config,
|
|
72
|
-
telemetry: {
|
|
73
|
-
...config.telemetry,
|
|
74
|
-
enabled: telemetryEnabled,
|
|
75
|
-
signals: telemetrySignals,
|
|
76
|
-
},
|
|
77
|
-
providers: parsedProviders,
|
|
78
|
-
models: parsedModels,
|
|
79
|
-
[kParsed]: true,
|
|
80
|
-
};
|
|
81
|
-
};
|