@hebo-ai/gateway 0.6.2-rc0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/endpoints/chat-completions/converters.js +26 -21
- package/dist/endpoints/chat-completions/handler.js +2 -0
- package/dist/endpoints/chat-completions/otel.js +1 -1
- package/dist/endpoints/chat-completions/schema.d.ts +4 -18
- package/dist/endpoints/chat-completions/schema.js +14 -17
- package/dist/endpoints/embeddings/handler.js +2 -0
- package/dist/endpoints/embeddings/otel.js +5 -0
- package/dist/endpoints/embeddings/schema.d.ts +6 -0
- package/dist/endpoints/embeddings/schema.js +4 -1
- package/dist/endpoints/models/converters.js +3 -3
- package/dist/lifecycle.js +2 -2
- package/dist/logger/default.js +3 -3
- package/dist/logger/index.d.ts +2 -5
- package/dist/middleware/common.js +1 -0
- package/dist/middleware/utils.js +0 -3
- package/dist/models/amazon/middleware.js +8 -5
- package/dist/models/anthropic/middleware.js +13 -13
- package/dist/models/catalog.js +5 -1
- package/dist/models/cohere/middleware.js +7 -5
- package/dist/models/google/middleware.d.ts +1 -1
- package/dist/models/google/middleware.js +29 -25
- package/dist/models/openai/middleware.js +13 -9
- package/dist/models/voyage/middleware.js +2 -1
- package/dist/providers/bedrock/middleware.js +21 -23
- package/dist/providers/registry.js +3 -0
- package/dist/telemetry/fetch.js +7 -2
- package/dist/telemetry/gen-ai.js +15 -12
- package/dist/telemetry/memory.d.ts +1 -1
- package/dist/telemetry/memory.js +30 -14
- package/dist/telemetry/span.js +1 -1
- package/dist/telemetry/stream.js +30 -23
- package/dist/utils/env.js +4 -2
- package/dist/utils/preset.js +1 -0
- package/dist/utils/response.js +3 -1
- package/package.json +36 -50
- package/src/config.ts +0 -98
- package/src/endpoints/chat-completions/converters.test.ts +0 -631
- package/src/endpoints/chat-completions/converters.ts +0 -899
- package/src/endpoints/chat-completions/handler.test.ts +0 -391
- package/src/endpoints/chat-completions/handler.ts +0 -201
- package/src/endpoints/chat-completions/index.ts +0 -4
- package/src/endpoints/chat-completions/otel.test.ts +0 -315
- package/src/endpoints/chat-completions/otel.ts +0 -214
- package/src/endpoints/chat-completions/schema.ts +0 -364
- package/src/endpoints/embeddings/converters.ts +0 -51
- package/src/endpoints/embeddings/handler.test.ts +0 -133
- package/src/endpoints/embeddings/handler.ts +0 -137
- package/src/endpoints/embeddings/index.ts +0 -4
- package/src/endpoints/embeddings/otel.ts +0 -40
- package/src/endpoints/embeddings/schema.ts +0 -36
- package/src/endpoints/models/converters.ts +0 -56
- package/src/endpoints/models/handler.test.ts +0 -122
- package/src/endpoints/models/handler.ts +0 -37
- package/src/endpoints/models/index.ts +0 -3
- package/src/endpoints/models/schema.ts +0 -37
- package/src/errors/ai-sdk.ts +0 -99
- package/src/errors/gateway.ts +0 -17
- package/src/errors/openai.ts +0 -57
- package/src/errors/utils.ts +0 -47
- package/src/gateway.ts +0 -50
- package/src/index.ts +0 -19
- package/src/lifecycle.ts +0 -135
- package/src/logger/default.ts +0 -105
- package/src/logger/index.ts +0 -42
- package/src/middleware/common.test.ts +0 -215
- package/src/middleware/common.ts +0 -163
- package/src/middleware/debug.ts +0 -37
- package/src/middleware/matcher.ts +0 -161
- package/src/middleware/utils.ts +0 -34
- package/src/models/amazon/index.ts +0 -2
- package/src/models/amazon/middleware.test.ts +0 -133
- package/src/models/amazon/middleware.ts +0 -79
- package/src/models/amazon/presets.ts +0 -104
- package/src/models/anthropic/index.ts +0 -2
- package/src/models/anthropic/middleware.test.ts +0 -643
- package/src/models/anthropic/middleware.ts +0 -148
- package/src/models/anthropic/presets.ts +0 -191
- package/src/models/catalog.ts +0 -13
- package/src/models/cohere/index.ts +0 -2
- package/src/models/cohere/middleware.test.ts +0 -138
- package/src/models/cohere/middleware.ts +0 -76
- package/src/models/cohere/presets.ts +0 -186
- package/src/models/google/index.ts +0 -2
- package/src/models/google/middleware.test.ts +0 -298
- package/src/models/google/middleware.ts +0 -137
- package/src/models/google/presets.ts +0 -118
- package/src/models/meta/index.ts +0 -1
- package/src/models/meta/presets.ts +0 -143
- package/src/models/openai/index.ts +0 -2
- package/src/models/openai/middleware.test.ts +0 -189
- package/src/models/openai/middleware.ts +0 -103
- package/src/models/openai/presets.ts +0 -280
- package/src/models/types.ts +0 -114
- package/src/models/voyage/index.ts +0 -2
- package/src/models/voyage/middleware.test.ts +0 -28
- package/src/models/voyage/middleware.ts +0 -23
- package/src/models/voyage/presets.ts +0 -126
- package/src/providers/anthropic/canonical.ts +0 -17
- package/src/providers/anthropic/index.ts +0 -1
- package/src/providers/bedrock/canonical.ts +0 -87
- package/src/providers/bedrock/index.ts +0 -2
- package/src/providers/bedrock/middleware.test.ts +0 -303
- package/src/providers/bedrock/middleware.ts +0 -128
- package/src/providers/cohere/canonical.ts +0 -26
- package/src/providers/cohere/index.ts +0 -1
- package/src/providers/groq/canonical.ts +0 -21
- package/src/providers/groq/index.ts +0 -1
- package/src/providers/openai/canonical.ts +0 -16
- package/src/providers/openai/index.ts +0 -1
- package/src/providers/registry.test.ts +0 -44
- package/src/providers/registry.ts +0 -165
- package/src/providers/types.ts +0 -20
- package/src/providers/vertex/canonical.ts +0 -17
- package/src/providers/vertex/index.ts +0 -1
- package/src/providers/voyage/canonical.ts +0 -16
- package/src/providers/voyage/index.ts +0 -1
- package/src/telemetry/ai-sdk.ts +0 -46
- package/src/telemetry/baggage.ts +0 -27
- package/src/telemetry/fetch.ts +0 -62
- package/src/telemetry/gen-ai.ts +0 -113
- package/src/telemetry/http.ts +0 -62
- package/src/telemetry/index.ts +0 -1
- package/src/telemetry/memory.ts +0 -36
- package/src/telemetry/span.ts +0 -85
- package/src/telemetry/stream.ts +0 -64
- package/src/types.ts +0 -223
- package/src/utils/env.ts +0 -7
- package/src/utils/headers.ts +0 -27
- package/src/utils/preset.ts +0 -65
- package/src/utils/request.test.ts +0 -75
- package/src/utils/request.ts +0 -52
- package/src/utils/response.ts +0 -84
- package/src/utils/url.ts +0 -26
|
@@ -1,186 +0,0 @@
|
|
|
1
|
-
import type { CanonicalProviderId } from "../../providers/types";
|
|
2
|
-
import type { CanonicalModelId, CatalogModel } from "../types";
|
|
3
|
-
|
|
4
|
-
import { presetFor, type DeepPartial } from "../../utils/preset";
|
|
5
|
-
|
|
6
|
-
const COMMAND_BASE = {
|
|
7
|
-
modalities: {
|
|
8
|
-
input: ["text"] as const,
|
|
9
|
-
output: ["text"] as const,
|
|
10
|
-
},
|
|
11
|
-
capabilities: ["tool_call", "structured_output", "reasoning", "temperature"] as const,
|
|
12
|
-
providers: ["cohere"] as const satisfies readonly CanonicalProviderId[],
|
|
13
|
-
knowledge: "2024-06",
|
|
14
|
-
} satisfies DeepPartial<CatalogModel>;
|
|
15
|
-
|
|
16
|
-
const COMMAND_VISION_BASE = {
|
|
17
|
-
modalities: {
|
|
18
|
-
input: ["text", "image"] as const,
|
|
19
|
-
output: ["text"] as const,
|
|
20
|
-
},
|
|
21
|
-
capabilities: ["structured_output", "reasoning", "temperature"] as const,
|
|
22
|
-
providers: ["cohere"] as const satisfies readonly CanonicalProviderId[],
|
|
23
|
-
knowledge: "2024-06",
|
|
24
|
-
} satisfies DeepPartial<CatalogModel>;
|
|
25
|
-
|
|
26
|
-
const EMBED_V3_BASE = {
|
|
27
|
-
modalities: {
|
|
28
|
-
input: ["text", "image"] as const,
|
|
29
|
-
output: ["embedding"] as const,
|
|
30
|
-
},
|
|
31
|
-
providers: ["cohere"] as const satisfies readonly CanonicalProviderId[],
|
|
32
|
-
} satisfies DeepPartial<CatalogModel>;
|
|
33
|
-
|
|
34
|
-
const EMBED_V4_BASE = {
|
|
35
|
-
modalities: {
|
|
36
|
-
input: ["text", "image", "pdf"] as const,
|
|
37
|
-
output: ["embedding"] as const,
|
|
38
|
-
},
|
|
39
|
-
providers: ["cohere"] as const satisfies readonly CanonicalProviderId[],
|
|
40
|
-
} satisfies DeepPartial<CatalogModel>;
|
|
41
|
-
|
|
42
|
-
export const commandA = presetFor<CanonicalModelId, CatalogModel>()("cohere/command-a" as const, {
|
|
43
|
-
...COMMAND_BASE,
|
|
44
|
-
name: "Cohere Command A",
|
|
45
|
-
created: "2025-03-13",
|
|
46
|
-
context: 256000,
|
|
47
|
-
} satisfies CatalogModel);
|
|
48
|
-
|
|
49
|
-
export const commandAReasoning = presetFor<CanonicalModelId, CatalogModel>()(
|
|
50
|
-
"cohere/command-a-reasoning" as const,
|
|
51
|
-
{
|
|
52
|
-
...COMMAND_BASE,
|
|
53
|
-
name: "Cohere Command A Reasoning",
|
|
54
|
-
created: "2025-08-21",
|
|
55
|
-
context: 256000,
|
|
56
|
-
} satisfies CatalogModel,
|
|
57
|
-
);
|
|
58
|
-
|
|
59
|
-
export const commandATranslate = presetFor<CanonicalModelId, CatalogModel>()(
|
|
60
|
-
"cohere/command-a-translate" as const,
|
|
61
|
-
{
|
|
62
|
-
...COMMAND_BASE,
|
|
63
|
-
name: "Cohere Command A Translate",
|
|
64
|
-
created: "2025-08-28",
|
|
65
|
-
context: 8000,
|
|
66
|
-
} satisfies CatalogModel,
|
|
67
|
-
);
|
|
68
|
-
|
|
69
|
-
export const commandAVision = presetFor<CanonicalModelId, CatalogModel>()(
|
|
70
|
-
"cohere/command-a-vision" as const,
|
|
71
|
-
{
|
|
72
|
-
...COMMAND_VISION_BASE,
|
|
73
|
-
name: "Cohere Command A Vision",
|
|
74
|
-
created: "2025-07-31",
|
|
75
|
-
context: 128000,
|
|
76
|
-
} satisfies CatalogModel,
|
|
77
|
-
);
|
|
78
|
-
|
|
79
|
-
export const commandR = presetFor<CanonicalModelId, CatalogModel>()("cohere/command-r" as const, {
|
|
80
|
-
...COMMAND_BASE,
|
|
81
|
-
name: "Cohere Command R",
|
|
82
|
-
created: "2024-08-01",
|
|
83
|
-
context: 128000,
|
|
84
|
-
providers: ["cohere", "bedrock"],
|
|
85
|
-
} satisfies CatalogModel);
|
|
86
|
-
|
|
87
|
-
export const commandRPlus = presetFor<CanonicalModelId, CatalogModel>()(
|
|
88
|
-
"cohere/command-r-plus" as const,
|
|
89
|
-
{
|
|
90
|
-
...COMMAND_BASE,
|
|
91
|
-
name: "Cohere Command R+",
|
|
92
|
-
created: "2024-08-01",
|
|
93
|
-
context: 128000,
|
|
94
|
-
providers: ["cohere", "bedrock"],
|
|
95
|
-
} satisfies CatalogModel,
|
|
96
|
-
);
|
|
97
|
-
|
|
98
|
-
export const commandR7b = presetFor<CanonicalModelId, CatalogModel>()(
|
|
99
|
-
"cohere/command-r7b" as const,
|
|
100
|
-
{
|
|
101
|
-
...COMMAND_BASE,
|
|
102
|
-
name: "Cohere Command R7B",
|
|
103
|
-
created: "2024-12-13",
|
|
104
|
-
context: 128000,
|
|
105
|
-
} satisfies CatalogModel,
|
|
106
|
-
);
|
|
107
|
-
|
|
108
|
-
export const embed4 = presetFor<CanonicalModelId, CatalogModel>()("cohere/embed-v4.0" as const, {
|
|
109
|
-
...EMBED_V4_BASE,
|
|
110
|
-
name: "Cohere 4 Embeddings",
|
|
111
|
-
created: "2025-04-15",
|
|
112
|
-
context: 128000,
|
|
113
|
-
providers: ["cohere", "bedrock"],
|
|
114
|
-
} satisfies CatalogModel);
|
|
115
|
-
|
|
116
|
-
export const embedEnglishV3 = presetFor<CanonicalModelId, CatalogModel>()(
|
|
117
|
-
"cohere/embed-english-v3.0" as const,
|
|
118
|
-
{
|
|
119
|
-
...EMBED_V3_BASE,
|
|
120
|
-
name: "Cohere Embed English v3",
|
|
121
|
-
created: "2024-02-07",
|
|
122
|
-
context: 512,
|
|
123
|
-
providers: ["cohere", "bedrock"],
|
|
124
|
-
} satisfies CatalogModel,
|
|
125
|
-
);
|
|
126
|
-
|
|
127
|
-
export const embedEnglishLightV3 = presetFor<CanonicalModelId, CatalogModel>()(
|
|
128
|
-
"cohere/embed-english-light-v3.0" as const,
|
|
129
|
-
{
|
|
130
|
-
...EMBED_V3_BASE,
|
|
131
|
-
name: "Cohere Embed English Light v3",
|
|
132
|
-
created: "2024-02-07",
|
|
133
|
-
context: 512,
|
|
134
|
-
providers: ["cohere"],
|
|
135
|
-
} satisfies CatalogModel,
|
|
136
|
-
);
|
|
137
|
-
|
|
138
|
-
export const embedMultilingualV3 = presetFor<CanonicalModelId, CatalogModel>()(
|
|
139
|
-
"cohere/embed-multilingual-v3.0" as const,
|
|
140
|
-
{
|
|
141
|
-
...EMBED_V3_BASE,
|
|
142
|
-
name: "Cohere Embed Multilingual v3",
|
|
143
|
-
created: "2024-02-07",
|
|
144
|
-
context: 512,
|
|
145
|
-
providers: ["cohere", "bedrock"],
|
|
146
|
-
} satisfies CatalogModel,
|
|
147
|
-
);
|
|
148
|
-
|
|
149
|
-
export const embedMultilingualLightV3 = presetFor<CanonicalModelId, CatalogModel>()(
|
|
150
|
-
"cohere/embed-multilingual-light-v3.0" as const,
|
|
151
|
-
{
|
|
152
|
-
...EMBED_V3_BASE,
|
|
153
|
-
name: "Cohere Embed Multilingual Light v3",
|
|
154
|
-
created: "2024-02-07",
|
|
155
|
-
context: 512,
|
|
156
|
-
providers: ["cohere"],
|
|
157
|
-
} satisfies CatalogModel,
|
|
158
|
-
);
|
|
159
|
-
|
|
160
|
-
const commandAtomic = {
|
|
161
|
-
A: [commandA, commandAReasoning, commandATranslate, commandAVision],
|
|
162
|
-
R: [commandR, commandRPlus, commandR7b],
|
|
163
|
-
} as const;
|
|
164
|
-
|
|
165
|
-
const commandGroups = {} as const;
|
|
166
|
-
|
|
167
|
-
export const command = {
|
|
168
|
-
...commandAtomic,
|
|
169
|
-
...commandGroups,
|
|
170
|
-
latest: [commandA],
|
|
171
|
-
all: Object.values(commandAtomic).flat(),
|
|
172
|
-
} as const;
|
|
173
|
-
|
|
174
|
-
const embedAtomic = {
|
|
175
|
-
v4: [embed4],
|
|
176
|
-
v3: [embedEnglishV3, embedEnglishLightV3, embedMultilingualV3, embedMultilingualLightV3],
|
|
177
|
-
} as const;
|
|
178
|
-
|
|
179
|
-
const embedGroups = {} as const;
|
|
180
|
-
|
|
181
|
-
export const embed = {
|
|
182
|
-
...embedAtomic,
|
|
183
|
-
...embedGroups,
|
|
184
|
-
latest: [embed4],
|
|
185
|
-
all: Object.values(embedAtomic).flat(),
|
|
186
|
-
} as const;
|
|
@@ -1,298 +0,0 @@
|
|
|
1
|
-
import { MockLanguageModelV3 } from "ai/test";
|
|
2
|
-
import { expect, test } from "bun:test";
|
|
3
|
-
|
|
4
|
-
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
5
|
-
import { calculateReasoningBudgetFromEffort } from "../../middleware/utils";
|
|
6
|
-
import { CANONICAL_MODEL_IDS } from "../../models/types";
|
|
7
|
-
import {
|
|
8
|
-
geminiDimensionsMiddleware,
|
|
9
|
-
geminiPromptCachingMiddleware,
|
|
10
|
-
geminiReasoningMiddleware,
|
|
11
|
-
} from "./middleware";
|
|
12
|
-
|
|
13
|
-
test("geminiReasoningMiddleware > matching patterns", () => {
|
|
14
|
-
const matching = [
|
|
15
|
-
"google/gemini-2.5-flash-lite",
|
|
16
|
-
"google/gemini-2.5-flash",
|
|
17
|
-
"google/gemini-2.5-pro",
|
|
18
|
-
"google/gemini-3-flash-preview",
|
|
19
|
-
"google/gemini-3.1-flash-lite-preview",
|
|
20
|
-
"google/gemini-3.1-pro-preview",
|
|
21
|
-
] satisfies (typeof CANONICAL_MODEL_IDS)[number][];
|
|
22
|
-
|
|
23
|
-
const nonMatching = ["google/gemini-1.5-pro", "google/gemini-1.5-flash"];
|
|
24
|
-
|
|
25
|
-
for (const id of matching) {
|
|
26
|
-
const middleware = modelMiddlewareMatcher.resolve({ kind: "text", modelId: id });
|
|
27
|
-
expect(middleware).toContain(geminiReasoningMiddleware);
|
|
28
|
-
expect(middleware).toContain(geminiPromptCachingMiddleware);
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
for (const id of nonMatching) {
|
|
32
|
-
const middleware = modelMiddlewareMatcher.resolve({ kind: "text", modelId: id });
|
|
33
|
-
expect(middleware).not.toContain(geminiReasoningMiddleware);
|
|
34
|
-
}
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
test("geminiPromptCachingMiddleware > should map normalized cached_content", async () => {
|
|
38
|
-
const params = {
|
|
39
|
-
prompt: [],
|
|
40
|
-
providerOptions: {
|
|
41
|
-
unknown: {
|
|
42
|
-
cached_content: "cachedContents/reusable",
|
|
43
|
-
},
|
|
44
|
-
},
|
|
45
|
-
};
|
|
46
|
-
|
|
47
|
-
const result = await geminiPromptCachingMiddleware.transformParams!({
|
|
48
|
-
type: "generate",
|
|
49
|
-
params,
|
|
50
|
-
model: new MockLanguageModelV3({ modelId: "google/gemini-2.5-flash" }),
|
|
51
|
-
});
|
|
52
|
-
|
|
53
|
-
expect(result.providerOptions).toEqual({
|
|
54
|
-
google: {
|
|
55
|
-
cachedContent: "cachedContents/reusable",
|
|
56
|
-
},
|
|
57
|
-
unknown: {},
|
|
58
|
-
});
|
|
59
|
-
});
|
|
60
|
-
|
|
61
|
-
test("geminiDimensionsMiddleware > matching patterns", () => {
|
|
62
|
-
const matching = ["google/gemini-embedding-001"];
|
|
63
|
-
const nonMatching = [
|
|
64
|
-
"google/gemini-3-flash-preview",
|
|
65
|
-
"google/embedding-001",
|
|
66
|
-
] satisfies (typeof CANONICAL_MODEL_IDS)[number][];
|
|
67
|
-
|
|
68
|
-
for (const id of matching) {
|
|
69
|
-
const middleware = modelMiddlewareMatcher.resolve({ kind: "embedding", modelId: id });
|
|
70
|
-
expect(middleware).toContain(geminiDimensionsMiddleware);
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
for (const id of nonMatching) {
|
|
74
|
-
const middleware = modelMiddlewareMatcher.resolve({ kind: "embedding", modelId: id });
|
|
75
|
-
expect(middleware).not.toContain(geminiDimensionsMiddleware);
|
|
76
|
-
}
|
|
77
|
-
});
|
|
78
|
-
|
|
79
|
-
test("geminiReasoningMiddleware > should enable thinking for Gemini 3 Flash effort", async () => {
|
|
80
|
-
const params = {
|
|
81
|
-
prompt: [],
|
|
82
|
-
maxOutputTokens: 2000,
|
|
83
|
-
providerOptions: {
|
|
84
|
-
unknown: {
|
|
85
|
-
reasoning: { enabled: true, effort: "medium" },
|
|
86
|
-
},
|
|
87
|
-
},
|
|
88
|
-
};
|
|
89
|
-
|
|
90
|
-
const result = await geminiReasoningMiddleware.transformParams!({
|
|
91
|
-
type: "generate",
|
|
92
|
-
params,
|
|
93
|
-
model: new MockLanguageModelV3({ modelId: "google/gemini-3-flash-preview" }),
|
|
94
|
-
});
|
|
95
|
-
|
|
96
|
-
expect(result).toEqual({
|
|
97
|
-
prompt: [],
|
|
98
|
-
maxOutputTokens: 2000,
|
|
99
|
-
providerOptions: {
|
|
100
|
-
google: {
|
|
101
|
-
thinkingConfig: {
|
|
102
|
-
includeThoughts: true,
|
|
103
|
-
thinkingLevel: "medium",
|
|
104
|
-
},
|
|
105
|
-
},
|
|
106
|
-
unknown: {},
|
|
107
|
-
},
|
|
108
|
-
});
|
|
109
|
-
});
|
|
110
|
-
|
|
111
|
-
test("geminiReasoningMiddleware > should map medium effort for Gemini 3.1 Pro", async () => {
|
|
112
|
-
const params = {
|
|
113
|
-
prompt: [],
|
|
114
|
-
providerOptions: {
|
|
115
|
-
unknown: {
|
|
116
|
-
reasoning: { enabled: true, effort: "medium" },
|
|
117
|
-
},
|
|
118
|
-
},
|
|
119
|
-
};
|
|
120
|
-
|
|
121
|
-
const result = await geminiReasoningMiddleware.transformParams!({
|
|
122
|
-
type: "generate",
|
|
123
|
-
params,
|
|
124
|
-
model: new MockLanguageModelV3({ modelId: "google/gemini-3.1-pro-preview" }),
|
|
125
|
-
});
|
|
126
|
-
|
|
127
|
-
expect(result).toEqual({
|
|
128
|
-
prompt: [],
|
|
129
|
-
providerOptions: {
|
|
130
|
-
google: {
|
|
131
|
-
thinkingConfig: {
|
|
132
|
-
includeThoughts: true,
|
|
133
|
-
thinkingLevel: "medium",
|
|
134
|
-
},
|
|
135
|
-
},
|
|
136
|
-
unknown: {},
|
|
137
|
-
},
|
|
138
|
-
});
|
|
139
|
-
});
|
|
140
|
-
|
|
141
|
-
test("geminiReasoningMiddleware > should normalize none effort for Gemini 3.1 Flash Lite", async () => {
|
|
142
|
-
const params = {
|
|
143
|
-
prompt: [],
|
|
144
|
-
providerOptions: {
|
|
145
|
-
unknown: {
|
|
146
|
-
reasoning: { enabled: false, effort: "none" },
|
|
147
|
-
},
|
|
148
|
-
},
|
|
149
|
-
};
|
|
150
|
-
|
|
151
|
-
const result = await geminiReasoningMiddleware.transformParams!({
|
|
152
|
-
type: "generate",
|
|
153
|
-
params,
|
|
154
|
-
model: new MockLanguageModelV3({ modelId: "google/gemini-3.1-flash-lite-preview" }),
|
|
155
|
-
});
|
|
156
|
-
|
|
157
|
-
expect(result).toEqual({
|
|
158
|
-
prompt: [],
|
|
159
|
-
providerOptions: {
|
|
160
|
-
google: {
|
|
161
|
-
thinkingConfig: {
|
|
162
|
-
includeThoughts: false,
|
|
163
|
-
thinkingLevel: "minimal",
|
|
164
|
-
},
|
|
165
|
-
},
|
|
166
|
-
unknown: {},
|
|
167
|
-
},
|
|
168
|
-
});
|
|
169
|
-
});
|
|
170
|
-
|
|
171
|
-
test("geminiReasoningMiddleware > should use budget for Gemini 2", async () => {
|
|
172
|
-
const params = {
|
|
173
|
-
prompt: [],
|
|
174
|
-
providerOptions: {
|
|
175
|
-
unknown: {
|
|
176
|
-
reasoning: { enabled: true, effort: "medium" },
|
|
177
|
-
},
|
|
178
|
-
},
|
|
179
|
-
};
|
|
180
|
-
|
|
181
|
-
const result = await geminiReasoningMiddleware.transformParams!({
|
|
182
|
-
type: "generate",
|
|
183
|
-
params,
|
|
184
|
-
model: new MockLanguageModelV3({ modelId: "google/gemini-2.5-flash" }),
|
|
185
|
-
});
|
|
186
|
-
|
|
187
|
-
expect(result).toEqual({
|
|
188
|
-
prompt: [],
|
|
189
|
-
providerOptions: {
|
|
190
|
-
google: {
|
|
191
|
-
thinkingConfig: {
|
|
192
|
-
includeThoughts: true,
|
|
193
|
-
thinkingBudget: calculateReasoningBudgetFromEffort("medium", 65536),
|
|
194
|
-
},
|
|
195
|
-
},
|
|
196
|
-
unknown: {},
|
|
197
|
-
},
|
|
198
|
-
});
|
|
199
|
-
});
|
|
200
|
-
|
|
201
|
-
test("geminiReasoningMiddleware > should handle disabled reasoning", async () => {
|
|
202
|
-
const params = {
|
|
203
|
-
prompt: [],
|
|
204
|
-
providerOptions: {
|
|
205
|
-
unknown: {
|
|
206
|
-
reasoning: { enabled: false, effort: "none" },
|
|
207
|
-
},
|
|
208
|
-
},
|
|
209
|
-
};
|
|
210
|
-
|
|
211
|
-
const result = await geminiReasoningMiddleware.transformParams!({
|
|
212
|
-
type: "generate",
|
|
213
|
-
params,
|
|
214
|
-
model: new MockLanguageModelV3({ modelId: "google/gemini-3-flash-preview" }),
|
|
215
|
-
});
|
|
216
|
-
|
|
217
|
-
expect(result).toEqual({
|
|
218
|
-
prompt: [],
|
|
219
|
-
providerOptions: {
|
|
220
|
-
google: {
|
|
221
|
-
thinkingConfig: {
|
|
222
|
-
includeThoughts: false,
|
|
223
|
-
thinkingLevel: "minimal",
|
|
224
|
-
},
|
|
225
|
-
},
|
|
226
|
-
unknown: {},
|
|
227
|
-
},
|
|
228
|
-
});
|
|
229
|
-
});
|
|
230
|
-
|
|
231
|
-
test("geminiReasoningMiddleware > should default reasoning effort for Gemini 3 Flash", async () => {
|
|
232
|
-
const params = {
|
|
233
|
-
prompt: [],
|
|
234
|
-
providerOptions: {
|
|
235
|
-
unknown: {
|
|
236
|
-
reasoning: { enabled: true },
|
|
237
|
-
},
|
|
238
|
-
},
|
|
239
|
-
};
|
|
240
|
-
|
|
241
|
-
const result = await geminiReasoningMiddleware.transformParams!({
|
|
242
|
-
type: "generate",
|
|
243
|
-
params,
|
|
244
|
-
model: new MockLanguageModelV3({ modelId: "google/gemini-3-flash-preview" }),
|
|
245
|
-
});
|
|
246
|
-
|
|
247
|
-
expect(result).toEqual({
|
|
248
|
-
prompt: [],
|
|
249
|
-
providerOptions: {
|
|
250
|
-
google: {
|
|
251
|
-
thinkingConfig: {
|
|
252
|
-
includeThoughts: true,
|
|
253
|
-
},
|
|
254
|
-
},
|
|
255
|
-
unknown: {},
|
|
256
|
-
},
|
|
257
|
-
});
|
|
258
|
-
});
|
|
259
|
-
|
|
260
|
-
test("geminiReasoningMiddleware > Gemini 2.5 Pro should have minimum budget even if reasoning is disabled", async () => {
|
|
261
|
-
const params = {
|
|
262
|
-
prompt: [],
|
|
263
|
-
providerOptions: {
|
|
264
|
-
unknown: {
|
|
265
|
-
reasoning: { enabled: false },
|
|
266
|
-
},
|
|
267
|
-
},
|
|
268
|
-
};
|
|
269
|
-
|
|
270
|
-
const result = await geminiReasoningMiddleware.transformParams!({
|
|
271
|
-
type: "generate",
|
|
272
|
-
params,
|
|
273
|
-
model: new MockLanguageModelV3({ modelId: "google/gemini-2.5-pro" }),
|
|
274
|
-
});
|
|
275
|
-
|
|
276
|
-
expect(result.providerOptions?.google?.thinkingConfig?.thinkingBudget).toBe(128);
|
|
277
|
-
expect(result.providerOptions?.google?.thinkingConfig?.includeThoughts).toBe(false);
|
|
278
|
-
});
|
|
279
|
-
|
|
280
|
-
test("geminiReasoningMiddleware > Gemini 2.0 Flash should NOT have forced minimum budget", async () => {
|
|
281
|
-
const params = {
|
|
282
|
-
prompt: [],
|
|
283
|
-
providerOptions: {
|
|
284
|
-
unknown: {
|
|
285
|
-
reasoning: { enabled: false, effort: "none" },
|
|
286
|
-
},
|
|
287
|
-
},
|
|
288
|
-
};
|
|
289
|
-
|
|
290
|
-
const result = await geminiReasoningMiddleware.transformParams!({
|
|
291
|
-
type: "generate",
|
|
292
|
-
params,
|
|
293
|
-
model: new MockLanguageModelV3({ modelId: "google/gemini-2.0-flash" }),
|
|
294
|
-
});
|
|
295
|
-
|
|
296
|
-
expect(result.providerOptions?.google?.thinkingConfig?.thinkingBudget).toBe(0);
|
|
297
|
-
expect(result.providerOptions?.google?.thinkingConfig?.includeThoughts).toBe(false);
|
|
298
|
-
});
|
|
@@ -1,137 +0,0 @@
|
|
|
1
|
-
import type { EmbeddingModelMiddleware, LanguageModelMiddleware } from "ai";
|
|
2
|
-
|
|
3
|
-
import type {
|
|
4
|
-
ChatCompletionsReasoningConfig,
|
|
5
|
-
ChatCompletionsReasoningEffort,
|
|
6
|
-
} from "../../endpoints/chat-completions/schema";
|
|
7
|
-
|
|
8
|
-
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
9
|
-
import { calculateReasoningBudgetFromEffort } from "../../middleware/utils";
|
|
10
|
-
|
|
11
|
-
// Convert `dimensions` (OpenAI) to `outputDimensionality` (Google)
|
|
12
|
-
export const geminiDimensionsMiddleware: EmbeddingModelMiddleware = {
|
|
13
|
-
specificationVersion: "v3",
|
|
14
|
-
// oxlint-disable-next-line require-await
|
|
15
|
-
transformParams: async ({ params }) => {
|
|
16
|
-
const unknown = params.providerOptions?.["unknown"];
|
|
17
|
-
if (!unknown) return params;
|
|
18
|
-
|
|
19
|
-
const dimensions = unknown["dimensions"] as number;
|
|
20
|
-
if (!dimensions) return params;
|
|
21
|
-
|
|
22
|
-
(params.providerOptions!["google"] ??= {})["outputDimensionality"] = dimensions;
|
|
23
|
-
delete unknown["dimensions"];
|
|
24
|
-
|
|
25
|
-
return params;
|
|
26
|
-
},
|
|
27
|
-
};
|
|
28
|
-
|
|
29
|
-
// https://ai.google.dev/gemini-api/docs/thinking#thinking-levels
|
|
30
|
-
export function mapGeminiReasoningEffort(
|
|
31
|
-
effort: ChatCompletionsReasoningEffort,
|
|
32
|
-
modelId: string,
|
|
33
|
-
): ChatCompletionsReasoningEffort | undefined {
|
|
34
|
-
if (modelId.includes("gemini-3.1-pro")) {
|
|
35
|
-
switch (effort) {
|
|
36
|
-
case "none":
|
|
37
|
-
case "minimal":
|
|
38
|
-
case "low":
|
|
39
|
-
return "low";
|
|
40
|
-
case "medium":
|
|
41
|
-
return "medium";
|
|
42
|
-
case "high":
|
|
43
|
-
case "xhigh":
|
|
44
|
-
case "max":
|
|
45
|
-
return "high";
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
if (modelId.includes("gemini-3-flash") || modelId.includes("gemini-3.1-flash")) {
|
|
50
|
-
switch (effort) {
|
|
51
|
-
case "none":
|
|
52
|
-
case "minimal":
|
|
53
|
-
return "minimal";
|
|
54
|
-
case "low":
|
|
55
|
-
return "low";
|
|
56
|
-
case "medium":
|
|
57
|
-
return "medium";
|
|
58
|
-
case "high":
|
|
59
|
-
case "xhigh":
|
|
60
|
-
case "max":
|
|
61
|
-
return "high";
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
return effort;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
export const GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65536;
|
|
69
|
-
export const GEMINI_2_5_PRO_MIN_THINKING_BUDGET = 128;
|
|
70
|
-
|
|
71
|
-
export const geminiReasoningMiddleware: LanguageModelMiddleware = {
|
|
72
|
-
specificationVersion: "v3",
|
|
73
|
-
// oxlint-disable-next-line require-await
|
|
74
|
-
transformParams: async ({ params, model }) => {
|
|
75
|
-
const unknown = params.providerOptions?.["unknown"];
|
|
76
|
-
if (!unknown) return params;
|
|
77
|
-
|
|
78
|
-
const reasoning = unknown["reasoning"] as ChatCompletionsReasoningConfig;
|
|
79
|
-
if (!reasoning) return params;
|
|
80
|
-
|
|
81
|
-
const target = (params.providerOptions!["google"] ??= {});
|
|
82
|
-
const modelId = model.modelId;
|
|
83
|
-
|
|
84
|
-
if (modelId.includes("gemini-2")) {
|
|
85
|
-
const is25Pro = modelId.includes("gemini-2.5-pro");
|
|
86
|
-
|
|
87
|
-
target["thinkingConfig"] = {
|
|
88
|
-
thinkingBudget:
|
|
89
|
-
reasoning.max_tokens ??
|
|
90
|
-
calculateReasoningBudgetFromEffort(
|
|
91
|
-
reasoning.effort ?? "none",
|
|
92
|
-
params.maxOutputTokens ?? GEMINI_DEFAULT_MAX_OUTPUT_TOKENS,
|
|
93
|
-
is25Pro ? GEMINI_2_5_PRO_MIN_THINKING_BUDGET : 0,
|
|
94
|
-
),
|
|
95
|
-
};
|
|
96
|
-
} else if (modelId.includes("gemini-3") && reasoning.effort) {
|
|
97
|
-
target["thinkingConfig"] = {
|
|
98
|
-
thinkingLevel: mapGeminiReasoningEffort(reasoning.effort, modelId),
|
|
99
|
-
};
|
|
100
|
-
// FUTURE: warn if model is gemini-3 and max_tokens (unsupported) was ignored
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
((target["thinkingConfig"] ??= {}) as Record<string, unknown>)["includeThoughts"] =
|
|
104
|
-
reasoning.enabled ? !reasoning.exclude : false;
|
|
105
|
-
|
|
106
|
-
delete unknown["reasoning"];
|
|
107
|
-
|
|
108
|
-
return params;
|
|
109
|
-
},
|
|
110
|
-
};
|
|
111
|
-
|
|
112
|
-
// https://ai.google.dev/gemini-api/docs/caching
|
|
113
|
-
// FUTURE: auto-create cached_content for message-level cache_control blocks
|
|
114
|
-
export const geminiPromptCachingMiddleware: LanguageModelMiddleware = {
|
|
115
|
-
specificationVersion: "v3",
|
|
116
|
-
// oxlint-disable-next-line require-await
|
|
117
|
-
transformParams: async ({ params }) => {
|
|
118
|
-
const unknown = params.providerOptions?.["unknown"];
|
|
119
|
-
if (!unknown) return params;
|
|
120
|
-
|
|
121
|
-
const cachedContent = unknown["cached_content"] as string | undefined;
|
|
122
|
-
if (cachedContent) {
|
|
123
|
-
(params.providerOptions!["google"] ??= {})["cachedContent"] = cachedContent;
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
delete unknown["cached_content"];
|
|
127
|
-
return params;
|
|
128
|
-
},
|
|
129
|
-
};
|
|
130
|
-
|
|
131
|
-
modelMiddlewareMatcher.useForModel("google/gemini-*embedding-*", {
|
|
132
|
-
embedding: [geminiDimensionsMiddleware],
|
|
133
|
-
});
|
|
134
|
-
|
|
135
|
-
modelMiddlewareMatcher.useForModel(["google/gemini-2*", "google/gemini-3*"], {
|
|
136
|
-
language: [geminiReasoningMiddleware, geminiPromptCachingMiddleware],
|
|
137
|
-
});
|