@hebo-ai/gateway 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +8 -6
  2. package/dist/endpoints/chat-completions/converters.d.ts +3 -1
  3. package/dist/endpoints/chat-completions/converters.js +121 -90
  4. package/dist/endpoints/chat-completions/otel.js +7 -0
  5. package/dist/endpoints/chat-completions/schema.d.ts +400 -76
  6. package/dist/endpoints/chat-completions/schema.js +80 -36
  7. package/dist/endpoints/embeddings/schema.d.ts +1 -1
  8. package/dist/endpoints/embeddings/schema.js +1 -1
  9. package/dist/errors/gateway.js +1 -0
  10. package/dist/logger/default.d.ts +0 -1
  11. package/dist/logger/default.js +30 -6
  12. package/dist/middleware/utils.js +1 -0
  13. package/dist/models/amazon/middleware.js +1 -0
  14. package/dist/models/anthropic/middleware.d.ts +2 -0
  15. package/dist/models/anthropic/middleware.js +77 -16
  16. package/dist/models/google/middleware.js +17 -0
  17. package/dist/models/google/presets.d.ts +387 -0
  18. package/dist/models/google/presets.js +9 -2
  19. package/dist/models/openai/middleware.js +1 -0
  20. package/dist/models/types.d.ts +1 -1
  21. package/dist/models/types.js +1 -0
  22. package/dist/providers/bedrock/index.d.ts +1 -0
  23. package/dist/providers/bedrock/index.js +1 -0
  24. package/dist/providers/bedrock/middleware.d.ts +2 -0
  25. package/dist/providers/bedrock/middleware.js +35 -0
  26. package/package.json +19 -21
  27. package/src/endpoints/chat-completions/converters.test.ts +219 -0
  28. package/src/endpoints/chat-completions/converters.ts +144 -104
  29. package/src/endpoints/chat-completions/handler.test.ts +87 -0
  30. package/src/endpoints/chat-completions/otel.ts +6 -0
  31. package/src/endpoints/chat-completions/schema.ts +85 -43
  32. package/src/endpoints/embeddings/schema.ts +1 -1
  33. package/src/errors/gateway.ts +2 -0
  34. package/src/logger/default.ts +34 -8
  35. package/src/middleware/utils.ts +1 -0
  36. package/src/models/amazon/middleware.ts +1 -0
  37. package/src/models/anthropic/middleware.test.ts +332 -1
  38. package/src/models/anthropic/middleware.ts +83 -19
  39. package/src/models/google/middleware.test.ts +31 -0
  40. package/src/models/google/middleware.ts +18 -0
  41. package/src/models/google/presets.ts +13 -2
  42. package/src/models/openai/middleware.ts +1 -0
  43. package/src/models/types.ts +1 -0
  44. package/src/providers/bedrock/index.ts +1 -0
  45. package/src/providers/bedrock/middleware.test.ts +73 -0
  46. package/src/providers/bedrock/middleware.ts +43 -0
@@ -8,6 +8,7 @@ import { claudeReasoningMiddleware } from "./middleware";
8
8
  test("claudeReasoningMiddleware > matching patterns", () => {
9
9
  const matching = [
10
10
  "anthropic/claude-opus-4.6",
11
+ "anthropic/claude-sonnet-4.6",
11
12
  "anthropic/claude-sonnet-3.7",
12
13
  "anthropic/claude-opus-4.5",
13
14
  "anthropic/claude-sonnet-4.5",
@@ -124,7 +125,7 @@ test("claudeReasoningMiddleware > should transform reasoning object to thinking
124
125
  anthropic: {
125
126
  thinking: {
126
127
  type: "enabled",
127
- budgetTokens: 2000,
128
+ budgetTokens: 32000,
128
129
  },
129
130
  },
130
131
  unknown: {},
@@ -262,3 +263,333 @@ test("claudeReasoningMiddleware > should clamp max_tokens for Opus 4", async ()
262
263
 
263
264
  expect(result.providerOptions?.anthropic?.thinking?.budgetTokens).toBe(32000);
264
265
  });
266
+
267
+ test("claudeReasoningMiddleware > should pass through max effort for Claude 4.6", async () => {
268
+ const params = {
269
+ prompt: [],
270
+ providerOptions: {
271
+ unknown: {
272
+ reasoning: {
273
+ enabled: true,
274
+ effort: "max",
275
+ },
276
+ },
277
+ },
278
+ };
279
+
280
+ const result = await claudeReasoningMiddleware.transformParams!({
281
+ type: "generate",
282
+ params,
283
+ model: new MockLanguageModelV3({ modelId: "anthropic/claude-opus-4.6" }),
284
+ });
285
+
286
+ expect(result.providerOptions?.anthropic?.thinking).toEqual({
287
+ type: "adaptive",
288
+ });
289
+ expect(result.providerOptions?.anthropic?.effort).toBe("max");
290
+ });
291
+
292
+ test("claudeReasoningMiddleware > should map xhigh effort to max for Claude Opus 4.6", async () => {
293
+ const params = {
294
+ prompt: [],
295
+ providerOptions: {
296
+ unknown: {
297
+ reasoning: {
298
+ enabled: true,
299
+ effort: "xhigh",
300
+ },
301
+ },
302
+ },
303
+ };
304
+
305
+ const result = await claudeReasoningMiddleware.transformParams!({
306
+ type: "generate",
307
+ params,
308
+ model: new MockLanguageModelV3({ modelId: "anthropic/claude-opus-4.6" }),
309
+ });
310
+
311
+ expect(result.providerOptions?.anthropic?.thinking).toEqual({
312
+ type: "adaptive",
313
+ });
314
+ expect(result.providerOptions?.anthropic?.effort).toBe("max");
315
+ });
316
+
317
+ test("claudeReasoningMiddleware > should map max effort to high for Claude Sonnet 4.6", async () => {
318
+ const params = {
319
+ prompt: [],
320
+ providerOptions: {
321
+ unknown: {
322
+ reasoning: {
323
+ enabled: true,
324
+ effort: "max",
325
+ },
326
+ },
327
+ },
328
+ };
329
+
330
+ const result = await claudeReasoningMiddleware.transformParams!({
331
+ type: "generate",
332
+ params,
333
+ model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.6" }),
334
+ });
335
+
336
+ expect(result.providerOptions?.anthropic?.thinking).toEqual({
337
+ type: "adaptive",
338
+ });
339
+ expect(result.providerOptions?.anthropic?.effort).toBe("high");
340
+ });
341
+
342
+ test("claudeReasoningMiddleware > should map minimal effort to low for Claude Sonnet 4.6", async () => {
343
+ const params = {
344
+ prompt: [],
345
+ providerOptions: {
346
+ unknown: {
347
+ reasoning: {
348
+ enabled: true,
349
+ effort: "minimal",
350
+ },
351
+ },
352
+ },
353
+ };
354
+
355
+ const result = await claudeReasoningMiddleware.transformParams!({
356
+ type: "generate",
357
+ params,
358
+ model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.6" }),
359
+ });
360
+
361
+ expect(result.providerOptions?.anthropic?.thinking).toEqual({
362
+ type: "adaptive",
363
+ });
364
+ expect(result.providerOptions?.anthropic?.effort).toBe("low");
365
+ });
366
+
367
+ test("claudeReasoningMiddleware > should use manual thinking for Claude Sonnet 4.6 when max_tokens is provided", async () => {
368
+ const params = {
369
+ prompt: [],
370
+ providerOptions: {
371
+ unknown: {
372
+ reasoning: {
373
+ enabled: true,
374
+ effort: "medium",
375
+ max_tokens: 2000,
376
+ },
377
+ },
378
+ },
379
+ };
380
+
381
+ const result = await claudeReasoningMiddleware.transformParams!({
382
+ type: "generate",
383
+ params,
384
+ model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.6" }),
385
+ });
386
+
387
+ expect(result.providerOptions?.anthropic?.thinking).toEqual({
388
+ type: "enabled",
389
+ budgetTokens: 2000,
390
+ });
391
+ expect(result.providerOptions?.anthropic?.effort).toBe("medium");
392
+ });
393
+
394
+ test("claudeReasoningMiddleware > should map none effort to low for Claude Sonnet 4.5", async () => {
395
+ const params = {
396
+ prompt: [],
397
+ providerOptions: {
398
+ unknown: {
399
+ reasoning: {
400
+ enabled: true,
401
+ effort: "none",
402
+ },
403
+ },
404
+ },
405
+ };
406
+
407
+ const result = await claudeReasoningMiddleware.transformParams!({
408
+ type: "generate",
409
+ params,
410
+ model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.5" }),
411
+ });
412
+
413
+ expect(result.providerOptions?.anthropic?.thinking).toEqual({
414
+ type: "enabled",
415
+ });
416
+ expect(result.providerOptions?.anthropic?.effort).toBe("low");
417
+ });
418
+
419
+ test("claudeReasoningMiddleware > should include effort and max_tokens for Claude 4.6", async () => {
420
+ const params = {
421
+ prompt: [],
422
+ providerOptions: {
423
+ unknown: {
424
+ reasoning: {
425
+ enabled: true,
426
+ effort: "medium",
427
+ max_tokens: 2000,
428
+ },
429
+ },
430
+ },
431
+ };
432
+
433
+ const result = await claudeReasoningMiddleware.transformParams!({
434
+ type: "generate",
435
+ params,
436
+ model: new MockLanguageModelV3({ modelId: "anthropic/claude-opus-4.6" }),
437
+ });
438
+
439
+ expect(result.providerOptions?.anthropic?.thinking).toEqual({
440
+ type: "adaptive",
441
+ budgetTokens: 2000,
442
+ });
443
+ expect(result.providerOptions?.anthropic?.effort).toBe("medium");
444
+ });
445
+
446
+ test("claudeReasoningMiddleware > should clamp max_tokens to 128k for Claude Opus 4.6", async () => {
447
+ const params = {
448
+ prompt: [],
449
+ providerOptions: {
450
+ unknown: {
451
+ reasoning: {
452
+ enabled: true,
453
+ effort: "medium",
454
+ max_tokens: 200000,
455
+ },
456
+ },
457
+ },
458
+ };
459
+
460
+ const result = await claudeReasoningMiddleware.transformParams!({
461
+ type: "generate",
462
+ params,
463
+ model: new MockLanguageModelV3({ modelId: "anthropic/claude-opus-4.6" }),
464
+ });
465
+
466
+ expect(result.providerOptions?.anthropic?.thinking).toEqual({
467
+ type: "adaptive",
468
+ budgetTokens: 128000,
469
+ });
470
+ expect(result.providerOptions?.anthropic?.effort).toBe("medium");
471
+ });
472
+
473
+ test("claudeReasoningMiddleware > should include effort and max_tokens for Claude Sonnet 4.5", async () => {
474
+ const params = {
475
+ prompt: [],
476
+ providerOptions: {
477
+ unknown: {
478
+ reasoning: {
479
+ enabled: true,
480
+ effort: "medium",
481
+ max_tokens: 2000,
482
+ },
483
+ },
484
+ },
485
+ };
486
+
487
+ const result = await claudeReasoningMiddleware.transformParams!({
488
+ type: "generate",
489
+ params,
490
+ model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.5" }),
491
+ });
492
+
493
+ expect(result.providerOptions?.anthropic?.thinking).toEqual({
494
+ type: "enabled",
495
+ budgetTokens: 2000,
496
+ });
497
+ expect(result.providerOptions?.anthropic?.effort).toBe("medium");
498
+ });
499
+
500
+ test("claudeReasoningMiddleware > should map max effort to high for Claude Sonnet 4.5", async () => {
501
+ const params = {
502
+ prompt: [],
503
+ providerOptions: {
504
+ unknown: {
505
+ reasoning: {
506
+ enabled: true,
507
+ effort: "max",
508
+ },
509
+ },
510
+ },
511
+ };
512
+
513
+ const result = await claudeReasoningMiddleware.transformParams!({
514
+ type: "generate",
515
+ params,
516
+ model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.5" }),
517
+ });
518
+
519
+ expect(result.providerOptions?.anthropic?.thinking).toEqual({
520
+ type: "enabled",
521
+ });
522
+ expect(result.providerOptions?.anthropic?.effort).toBe("high");
523
+ });
524
+
525
+ test("claudeReasoningMiddleware > should map xhigh effort to high for Claude Sonnet 4.5", async () => {
526
+ const params = {
527
+ prompt: [],
528
+ providerOptions: {
529
+ unknown: {
530
+ reasoning: {
531
+ enabled: true,
532
+ effort: "xhigh",
533
+ },
534
+ },
535
+ },
536
+ };
537
+
538
+ const result = await claudeReasoningMiddleware.transformParams!({
539
+ type: "generate",
540
+ params,
541
+ model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.5" }),
542
+ });
543
+
544
+ expect(result.providerOptions?.anthropic?.thinking).toEqual({
545
+ type: "enabled",
546
+ });
547
+ expect(result.providerOptions?.anthropic?.effort).toBe("high");
548
+ });
549
+
550
+ test("claudeReasoningMiddleware > should keep xhigh as budget for non-4.6 models", async () => {
551
+ const params = {
552
+ prompt: [],
553
+ providerOptions: {
554
+ unknown: {
555
+ reasoning: {
556
+ enabled: true,
557
+ effort: "xhigh",
558
+ },
559
+ },
560
+ },
561
+ };
562
+
563
+ const result = await claudeReasoningMiddleware.transformParams!({
564
+ type: "generate",
565
+ params,
566
+ model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4" }),
567
+ });
568
+
569
+ expect(result.providerOptions?.anthropic?.thinking?.budgetTokens).toBe(60800);
570
+ });
571
+
572
+ test("claudeReasoningMiddleware > should map xhigh effort for Claude Opus 4.5 without default budget", async () => {
573
+ const params = {
574
+ prompt: [],
575
+ providerOptions: {
576
+ unknown: {
577
+ reasoning: {
578
+ enabled: true,
579
+ effort: "xhigh",
580
+ },
581
+ },
582
+ },
583
+ };
584
+
585
+ const result = await claudeReasoningMiddleware.transformParams!({
586
+ type: "generate",
587
+ params,
588
+ model: new MockLanguageModelV3({ modelId: "anthropic/claude-opus-4.5" }),
589
+ });
590
+
591
+ expect(result.providerOptions?.anthropic?.thinking).toEqual({
592
+ type: "enabled",
593
+ });
594
+ expect(result.providerOptions?.anthropic?.effort).toBe("high");
595
+ });
@@ -1,21 +1,66 @@
1
1
  import type { LanguageModelMiddleware } from "ai";
2
2
 
3
- import type { ChatCompletionsReasoningConfig } from "../../endpoints/chat-completions/schema";
3
+ import type {
4
+ ChatCompletionsReasoningConfig,
5
+ ChatCompletionsReasoningEffort,
6
+ } from "../../endpoints/chat-completions/schema";
4
7
 
5
8
  import { modelMiddlewareMatcher } from "../../middleware/matcher";
6
9
  import { calculateReasoningBudgetFromEffort } from "../../middleware/utils";
7
10
 
8
- const CLAUDE_MAX_OUTPUT_TOKENS = 64000;
9
- const CLAUDE_OPUS_4_MAX_OUTPUT_TOKENS = 32000;
11
+ const isClaude = (family: "opus" | "sonnet" | "haiku", version: string) => {
12
+ const dashed = version.replace(".", "-");
10
13
 
11
- function getMaxOutputTokens(modelId: string): number {
12
- if (!modelId.includes("opus-4")) return CLAUDE_MAX_OUTPUT_TOKENS;
13
- if (modelId.includes("opus-4.5") || modelId.includes("opus-4-5")) {
14
- return CLAUDE_MAX_OUTPUT_TOKENS;
14
+ return (modelId: string) =>
15
+ modelId.includes(`claude-${family}-${version}`) ||
16
+ modelId.includes(`claude-${family}-${dashed}`);
17
+ };
18
+
19
+ const isOpus46 = isClaude("opus", "4.6");
20
+ const isOpus45 = isClaude("opus", "4.5");
21
+ const isOpus4 = isClaude("opus", "4");
22
+ const isSonnet46 = isClaude("sonnet", "4.6");
23
+ const isSonnet45 = isClaude("sonnet", "4.5");
24
+
25
+ export function mapClaudeReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string) {
26
+ if (isOpus46(modelId)) {
27
+ switch (effort) {
28
+ case "none":
29
+ case "minimal":
30
+ case "low":
31
+ return "low";
32
+ case "medium":
33
+ return "medium";
34
+ case "high":
35
+ return "high";
36
+ case "xhigh":
37
+ case "max":
38
+ return "max";
39
+ }
40
+ }
41
+
42
+ switch (effort) {
43
+ case "none":
44
+ case "minimal":
45
+ case "low":
46
+ return "low";
47
+ case "medium":
48
+ return "medium";
49
+ case "high":
50
+ case "xhigh":
51
+ case "max":
52
+ return "high";
15
53
  }
16
- return CLAUDE_OPUS_4_MAX_OUTPUT_TOKENS;
17
54
  }
18
55
 
56
+ function getMaxOutputTokens(modelId: string): number {
57
+ if (isOpus46(modelId)) return 128_000;
58
+ if (isOpus45(modelId)) return 64_000;
59
+ if (isOpus4(modelId)) return 32_000;
60
+ return 64_000;
61
+ }
62
+
63
+ // https://platform.claude.com/docs/en/build-with-claude/effort
19
64
  export const claudeReasoningMiddleware: LanguageModelMiddleware = {
20
65
  specificationVersion: "v3",
21
66
  // eslint-disable-next-line require-await
@@ -27,23 +72,42 @@ export const claudeReasoningMiddleware: LanguageModelMiddleware = {
27
72
  if (!reasoning) return params;
28
73
 
29
74
  const target = (params.providerOptions!["anthropic"] ??= {});
75
+ const modelId = model.modelId;
76
+ const clampedMaxTokens =
77
+ reasoning.max_tokens && Math.min(reasoning.max_tokens, getMaxOutputTokens(modelId));
30
78
 
31
79
  if (!reasoning.enabled) {
32
80
  target["thinking"] = { type: "disabled" };
33
- } else if (reasoning.max_tokens) {
34
- target["thinking"] = {
35
- type: "enabled",
36
- budgetTokens: Math.min(reasoning.max_tokens, getMaxOutputTokens(model.modelId)),
37
- };
38
81
  } else if (reasoning.effort) {
39
- // FUTURE: warn that reasoning.max_tokens was computed
82
+ if (isOpus46(modelId)) {
83
+ target["thinking"] = clampedMaxTokens
84
+ ? { type: "adaptive", budgetTokens: clampedMaxTokens }
85
+ : { type: "adaptive" };
86
+ target["effort"] = mapClaudeReasoningEffort(reasoning.effort, modelId);
87
+ } else if (isSonnet46(modelId)) {
88
+ target["thinking"] = clampedMaxTokens
89
+ ? { type: "enabled", budgetTokens: clampedMaxTokens }
90
+ : { type: "adaptive" };
91
+ target["effort"] = mapClaudeReasoningEffort(reasoning.effort, modelId);
92
+ } else if (isOpus45(modelId) || isSonnet45(modelId)) {
93
+ target["thinking"] = { type: "enabled" };
94
+ if (clampedMaxTokens) target["thinking"]["budgetTokens"] = clampedMaxTokens;
95
+ target["effort"] = mapClaudeReasoningEffort(reasoning.effort, modelId);
96
+ } else {
97
+ // FUTURE: warn that reasoning.max_tokens was computed
98
+ target["thinking"] = {
99
+ type: "enabled",
100
+ budgetTokens: calculateReasoningBudgetFromEffort(
101
+ reasoning.effort,
102
+ params.maxOutputTokens ?? getMaxOutputTokens(modelId),
103
+ 1024,
104
+ ),
105
+ };
106
+ }
107
+ } else if (clampedMaxTokens) {
40
108
  target["thinking"] = {
41
109
  type: "enabled",
42
- budgetTokens: calculateReasoningBudgetFromEffort(
43
- reasoning.effort,
44
- params.maxOutputTokens ?? getMaxOutputTokens(model.modelId),
45
- 1024,
46
- ),
110
+ budgetTokens: clampedMaxTokens,
47
111
  };
48
112
  } else {
49
113
  target["thinking"] = { type: "enabled" };
@@ -13,6 +13,7 @@ test("geminiReasoningMiddleware > matching patterns", () => {
13
13
  "google/gemini-2.5-pro",
14
14
  "google/gemini-3-flash-preview",
15
15
  "google/gemini-3-pro-preview",
16
+ "google/gemini-3.1-pro-preview",
16
17
  ] satisfies (typeof CANONICAL_MODEL_IDS)[number][];
17
18
 
18
19
  const nonMatching = ["google/gemini-1.5-pro", "google/gemini-1.5-flash"];
@@ -108,6 +109,36 @@ test("geminiReasoningMiddleware > should map effort for Gemini 3 Pro", async ()
108
109
  });
109
110
  });
110
111
 
112
+ test("geminiReasoningMiddleware > should map medium effort for Gemini 3.1 Pro", async () => {
113
+ const params = {
114
+ prompt: [],
115
+ providerOptions: {
116
+ unknown: {
117
+ reasoning: { enabled: true, effort: "medium" },
118
+ },
119
+ },
120
+ };
121
+
122
+ const result = await geminiReasoningMiddleware.transformParams!({
123
+ type: "generate",
124
+ params,
125
+ model: new MockLanguageModelV3({ modelId: "google/gemini-3.1-pro-preview" }),
126
+ });
127
+
128
+ expect(result).toEqual({
129
+ prompt: [],
130
+ providerOptions: {
131
+ google: {
132
+ thinkingConfig: {
133
+ includeThoughts: true,
134
+ thinkingLevel: "medium",
135
+ },
136
+ },
137
+ unknown: {},
138
+ },
139
+ });
140
+ });
141
+
111
142
  test("geminiReasoningMiddleware > should use budget for Gemini 2", async () => {
112
143
  const params = {
113
144
  prompt: [],
@@ -26,10 +26,26 @@ export const geminiDimensionsMiddleware: EmbeddingModelMiddleware = {
26
26
  },
27
27
  };
28
28
 
29
+ // https://ai.google.dev/gemini-api/docs/thinking#thinking-levels
29
30
  export function mapGeminiReasoningEffort(
30
31
  effort: ChatCompletionsReasoningEffort,
31
32
  modelId: string,
32
33
  ): ChatCompletionsReasoningEffort | undefined {
34
+ if (modelId.includes("gemini-3.1-pro")) {
35
+ switch (effort) {
36
+ case "none":
37
+ case "minimal":
38
+ case "low":
39
+ return "low";
40
+ case "medium":
41
+ return "medium";
42
+ case "high":
43
+ case "xhigh":
44
+ case "max":
45
+ return "high";
46
+ }
47
+ }
48
+
33
49
  if (modelId.includes("gemini-3-pro")) {
34
50
  switch (effort) {
35
51
  case "none":
@@ -39,6 +55,7 @@ export function mapGeminiReasoningEffort(
39
55
  case "medium":
40
56
  case "high":
41
57
  case "xhigh":
58
+ case "max":
42
59
  return "high";
43
60
  }
44
61
  }
@@ -54,6 +71,7 @@ export function mapGeminiReasoningEffort(
54
71
  return "medium";
55
72
  case "high":
56
73
  case "xhigh":
74
+ case "max":
57
75
  return "high";
58
76
  }
59
77
  }
@@ -57,6 +57,16 @@ export const gemini3ProPreview = presetFor<CanonicalModelId, CatalogModel>()(
57
57
  } satisfies DeepPartial<CatalogModel>,
58
58
  );
59
59
 
60
+ export const gemini31ProPreview = presetFor<CanonicalModelId, CatalogModel>()(
61
+ "google/gemini-3.1-pro-preview" as const,
62
+ {
63
+ ...GEMINI_BASE,
64
+ name: "Gemini 3.1 Pro (Preview)",
65
+ created: "2026-02-19",
66
+ knowledge: "2025-01",
67
+ } satisfies DeepPartial<CatalogModel>,
68
+ );
69
+
60
70
  export const gemini25FlashLite = presetFor<CanonicalModelId, CatalogModel>()(
61
71
  "google/gemini-2.5-flash-lite" as const,
62
72
  {
@@ -90,19 +100,20 @@ export const gemini25Pro = presetFor<CanonicalModelId, CatalogModel>()(
90
100
  const geminiAtomic = {
91
101
  "v2.5": [gemini25FlashLite, gemini25Flash, gemini25Pro],
92
102
  "v3-preview": [gemini3FlashPreview, gemini3ProPreview],
103
+ "v3.1-preview": [gemini31ProPreview],
93
104
  embeddings: [geminiEmbedding001],
94
105
  } as const;
95
106
 
96
107
  const geminiGroups = {
97
108
  "v2.x": [...geminiAtomic["v2.5"]],
98
- "v3.x": [...geminiAtomic["v3-preview"]],
109
+ "v3.x": [...geminiAtomic["v3-preview"], ...geminiAtomic["v3.1-preview"]],
99
110
  } as const;
100
111
 
101
112
  export const gemini = {
102
113
  ...geminiAtomic,
103
114
  ...geminiGroups,
104
115
  latest: [...geminiAtomic["v2.5"]],
105
- preview: [...geminiAtomic["v3-preview"]],
116
+ preview: [...geminiAtomic["v3-preview"], ...geminiAtomic["v3.1-preview"]],
106
117
  embeddings: [...geminiAtomic["embeddings"]],
107
118
  all: Object.values(geminiAtomic).flat(),
108
119
  } as const;
@@ -33,6 +33,7 @@ function mapGptOssReasoningEffort(
33
33
  return "medium";
34
34
  case "high":
35
35
  case "xhigh":
36
+ case "max":
36
37
  return "high";
37
38
  default:
38
39
  return "low";
@@ -46,6 +46,7 @@ export const CANONICAL_MODEL_IDS = [
46
46
  "google/gemini-2.5-pro",
47
47
  "google/gemini-3-flash-preview",
48
48
  "google/gemini-3-pro-preview",
49
+ "google/gemini-3.1-pro-preview",
49
50
  "google/embedding-001",
50
51
  // Meta
51
52
  "meta/llama-3.1-8b",
@@ -1 +1,2 @@
1
1
  export * from "./canonical";
2
+ export * from "./middleware";
@@ -0,0 +1,73 @@
1
+ import { MockLanguageModelV3 } from "ai/test";
2
+ import { expect, test } from "bun:test";
3
+
4
+ import { modelMiddlewareMatcher } from "../../middleware/matcher";
5
+ import { bedrockAnthropicReasoningMiddleware } from "./middleware";
6
+
7
+ test("bedrockAnthropicReasoningMiddleware > matching provider", () => {
8
+ const middleware = modelMiddlewareMatcher.resolve({
9
+ kind: "text",
10
+ modelId: "anthropic/claude-opus-4.6",
11
+ providerId: "amazon-bedrock",
12
+ });
13
+
14
+ expect(middleware).toContain(bedrockAnthropicReasoningMiddleware);
15
+ });
16
+
17
+ test("bedrockAnthropicReasoningMiddleware > should map thinking/effort into reasoningConfig", async () => {
18
+ const params = {
19
+ prompt: [],
20
+ providerOptions: {
21
+ bedrock: {
22
+ thinking: {
23
+ type: "adaptive",
24
+ budgetTokens: 4096,
25
+ },
26
+ effort: "max",
27
+ },
28
+ },
29
+ };
30
+
31
+ const result = await bedrockAnthropicReasoningMiddleware.transformParams!({
32
+ type: "generate",
33
+ params,
34
+ model: new MockLanguageModelV3({ modelId: "anthropic/claude-opus-4.6" }),
35
+ });
36
+
37
+ expect(result.providerOptions?.bedrock).toEqual({
38
+ reasoningConfig: {
39
+ type: "adaptive",
40
+ budgetTokens: 4096,
41
+ maxReasoningEffort: "max",
42
+ },
43
+ });
44
+ });
45
+
46
+ test("bedrockAnthropicReasoningMiddleware > should skip non-anthropic models", async () => {
47
+ const params = {
48
+ prompt: [],
49
+ providerOptions: {
50
+ bedrock: {
51
+ thinking: {
52
+ type: "enabled",
53
+ budgetTokens: 4096,
54
+ },
55
+ effort: "high",
56
+ },
57
+ },
58
+ };
59
+
60
+ const result = await bedrockAnthropicReasoningMiddleware.transformParams!({
61
+ type: "generate",
62
+ params,
63
+ model: new MockLanguageModelV3({ modelId: "openai/gpt-oss-20b" }),
64
+ });
65
+
66
+ expect(result.providerOptions?.bedrock).toEqual({
67
+ thinking: {
68
+ type: "enabled",
69
+ budgetTokens: 4096,
70
+ },
71
+ effort: "high",
72
+ });
73
+ });