@hebo-ai/gateway 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -6
- package/dist/endpoints/chat-completions/converters.d.ts +3 -1
- package/dist/endpoints/chat-completions/converters.js +121 -90
- package/dist/endpoints/chat-completions/otel.js +7 -0
- package/dist/endpoints/chat-completions/schema.d.ts +400 -76
- package/dist/endpoints/chat-completions/schema.js +80 -36
- package/dist/endpoints/embeddings/schema.d.ts +1 -1
- package/dist/endpoints/embeddings/schema.js +1 -1
- package/dist/errors/gateway.js +1 -0
- package/dist/logger/default.d.ts +0 -1
- package/dist/logger/default.js +30 -6
- package/dist/middleware/utils.js +1 -0
- package/dist/models/amazon/middleware.js +1 -0
- package/dist/models/anthropic/middleware.d.ts +2 -0
- package/dist/models/anthropic/middleware.js +77 -16
- package/dist/models/google/middleware.js +17 -0
- package/dist/models/google/presets.d.ts +387 -0
- package/dist/models/google/presets.js +9 -2
- package/dist/models/openai/middleware.js +1 -0
- package/dist/models/types.d.ts +1 -1
- package/dist/models/types.js +1 -0
- package/dist/providers/bedrock/index.d.ts +1 -0
- package/dist/providers/bedrock/index.js +1 -0
- package/dist/providers/bedrock/middleware.d.ts +2 -0
- package/dist/providers/bedrock/middleware.js +35 -0
- package/package.json +19 -21
- package/src/endpoints/chat-completions/converters.test.ts +219 -0
- package/src/endpoints/chat-completions/converters.ts +144 -104
- package/src/endpoints/chat-completions/handler.test.ts +87 -0
- package/src/endpoints/chat-completions/otel.ts +6 -0
- package/src/endpoints/chat-completions/schema.ts +85 -43
- package/src/endpoints/embeddings/schema.ts +1 -1
- package/src/errors/gateway.ts +2 -0
- package/src/logger/default.ts +34 -8
- package/src/middleware/utils.ts +1 -0
- package/src/models/amazon/middleware.ts +1 -0
- package/src/models/anthropic/middleware.test.ts +332 -1
- package/src/models/anthropic/middleware.ts +83 -19
- package/src/models/google/middleware.test.ts +31 -0
- package/src/models/google/middleware.ts +18 -0
- package/src/models/google/presets.ts +13 -2
- package/src/models/openai/middleware.ts +1 -0
- package/src/models/types.ts +1 -0
- package/src/providers/bedrock/index.ts +1 -0
- package/src/providers/bedrock/middleware.test.ts +73 -0
- package/src/providers/bedrock/middleware.ts +43 -0
|
@@ -8,6 +8,7 @@ import { claudeReasoningMiddleware } from "./middleware";
|
|
|
8
8
|
test("claudeReasoningMiddleware > matching patterns", () => {
|
|
9
9
|
const matching = [
|
|
10
10
|
"anthropic/claude-opus-4.6",
|
|
11
|
+
"anthropic/claude-sonnet-4.6",
|
|
11
12
|
"anthropic/claude-sonnet-3.7",
|
|
12
13
|
"anthropic/claude-opus-4.5",
|
|
13
14
|
"anthropic/claude-sonnet-4.5",
|
|
@@ -124,7 +125,7 @@ test("claudeReasoningMiddleware > should transform reasoning object to thinking
|
|
|
124
125
|
anthropic: {
|
|
125
126
|
thinking: {
|
|
126
127
|
type: "enabled",
|
|
127
|
-
budgetTokens:
|
|
128
|
+
budgetTokens: 32000,
|
|
128
129
|
},
|
|
129
130
|
},
|
|
130
131
|
unknown: {},
|
|
@@ -262,3 +263,333 @@ test("claudeReasoningMiddleware > should clamp max_tokens for Opus 4", async ()
|
|
|
262
263
|
|
|
263
264
|
expect(result.providerOptions?.anthropic?.thinking?.budgetTokens).toBe(32000);
|
|
264
265
|
});
|
|
266
|
+
|
|
267
|
+
test("claudeReasoningMiddleware > should pass through max effort for Claude 4.6", async () => {
|
|
268
|
+
const params = {
|
|
269
|
+
prompt: [],
|
|
270
|
+
providerOptions: {
|
|
271
|
+
unknown: {
|
|
272
|
+
reasoning: {
|
|
273
|
+
enabled: true,
|
|
274
|
+
effort: "max",
|
|
275
|
+
},
|
|
276
|
+
},
|
|
277
|
+
},
|
|
278
|
+
};
|
|
279
|
+
|
|
280
|
+
const result = await claudeReasoningMiddleware.transformParams!({
|
|
281
|
+
type: "generate",
|
|
282
|
+
params,
|
|
283
|
+
model: new MockLanguageModelV3({ modelId: "anthropic/claude-opus-4.6" }),
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
expect(result.providerOptions?.anthropic?.thinking).toEqual({
|
|
287
|
+
type: "adaptive",
|
|
288
|
+
});
|
|
289
|
+
expect(result.providerOptions?.anthropic?.effort).toBe("max");
|
|
290
|
+
});
|
|
291
|
+
|
|
292
|
+
test("claudeReasoningMiddleware > should map xhigh effort to max for Claude Opus 4.6", async () => {
|
|
293
|
+
const params = {
|
|
294
|
+
prompt: [],
|
|
295
|
+
providerOptions: {
|
|
296
|
+
unknown: {
|
|
297
|
+
reasoning: {
|
|
298
|
+
enabled: true,
|
|
299
|
+
effort: "xhigh",
|
|
300
|
+
},
|
|
301
|
+
},
|
|
302
|
+
},
|
|
303
|
+
};
|
|
304
|
+
|
|
305
|
+
const result = await claudeReasoningMiddleware.transformParams!({
|
|
306
|
+
type: "generate",
|
|
307
|
+
params,
|
|
308
|
+
model: new MockLanguageModelV3({ modelId: "anthropic/claude-opus-4.6" }),
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
expect(result.providerOptions?.anthropic?.thinking).toEqual({
|
|
312
|
+
type: "adaptive",
|
|
313
|
+
});
|
|
314
|
+
expect(result.providerOptions?.anthropic?.effort).toBe("max");
|
|
315
|
+
});
|
|
316
|
+
|
|
317
|
+
test("claudeReasoningMiddleware > should map max effort to high for Claude Sonnet 4.6", async () => {
|
|
318
|
+
const params = {
|
|
319
|
+
prompt: [],
|
|
320
|
+
providerOptions: {
|
|
321
|
+
unknown: {
|
|
322
|
+
reasoning: {
|
|
323
|
+
enabled: true,
|
|
324
|
+
effort: "max",
|
|
325
|
+
},
|
|
326
|
+
},
|
|
327
|
+
},
|
|
328
|
+
};
|
|
329
|
+
|
|
330
|
+
const result = await claudeReasoningMiddleware.transformParams!({
|
|
331
|
+
type: "generate",
|
|
332
|
+
params,
|
|
333
|
+
model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.6" }),
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
expect(result.providerOptions?.anthropic?.thinking).toEqual({
|
|
337
|
+
type: "adaptive",
|
|
338
|
+
});
|
|
339
|
+
expect(result.providerOptions?.anthropic?.effort).toBe("high");
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
test("claudeReasoningMiddleware > should map minimal effort to low for Claude Sonnet 4.6", async () => {
|
|
343
|
+
const params = {
|
|
344
|
+
prompt: [],
|
|
345
|
+
providerOptions: {
|
|
346
|
+
unknown: {
|
|
347
|
+
reasoning: {
|
|
348
|
+
enabled: true,
|
|
349
|
+
effort: "minimal",
|
|
350
|
+
},
|
|
351
|
+
},
|
|
352
|
+
},
|
|
353
|
+
};
|
|
354
|
+
|
|
355
|
+
const result = await claudeReasoningMiddleware.transformParams!({
|
|
356
|
+
type: "generate",
|
|
357
|
+
params,
|
|
358
|
+
model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.6" }),
|
|
359
|
+
});
|
|
360
|
+
|
|
361
|
+
expect(result.providerOptions?.anthropic?.thinking).toEqual({
|
|
362
|
+
type: "adaptive",
|
|
363
|
+
});
|
|
364
|
+
expect(result.providerOptions?.anthropic?.effort).toBe("low");
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
test("claudeReasoningMiddleware > should use manual thinking for Claude Sonnet 4.6 when max_tokens is provided", async () => {
|
|
368
|
+
const params = {
|
|
369
|
+
prompt: [],
|
|
370
|
+
providerOptions: {
|
|
371
|
+
unknown: {
|
|
372
|
+
reasoning: {
|
|
373
|
+
enabled: true,
|
|
374
|
+
effort: "medium",
|
|
375
|
+
max_tokens: 2000,
|
|
376
|
+
},
|
|
377
|
+
},
|
|
378
|
+
},
|
|
379
|
+
};
|
|
380
|
+
|
|
381
|
+
const result = await claudeReasoningMiddleware.transformParams!({
|
|
382
|
+
type: "generate",
|
|
383
|
+
params,
|
|
384
|
+
model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.6" }),
|
|
385
|
+
});
|
|
386
|
+
|
|
387
|
+
expect(result.providerOptions?.anthropic?.thinking).toEqual({
|
|
388
|
+
type: "enabled",
|
|
389
|
+
budgetTokens: 2000,
|
|
390
|
+
});
|
|
391
|
+
expect(result.providerOptions?.anthropic?.effort).toBe("medium");
|
|
392
|
+
});
|
|
393
|
+
|
|
394
|
+
test("claudeReasoningMiddleware > should map none effort to low for Claude Sonnet 4.5", async () => {
|
|
395
|
+
const params = {
|
|
396
|
+
prompt: [],
|
|
397
|
+
providerOptions: {
|
|
398
|
+
unknown: {
|
|
399
|
+
reasoning: {
|
|
400
|
+
enabled: true,
|
|
401
|
+
effort: "none",
|
|
402
|
+
},
|
|
403
|
+
},
|
|
404
|
+
},
|
|
405
|
+
};
|
|
406
|
+
|
|
407
|
+
const result = await claudeReasoningMiddleware.transformParams!({
|
|
408
|
+
type: "generate",
|
|
409
|
+
params,
|
|
410
|
+
model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.5" }),
|
|
411
|
+
});
|
|
412
|
+
|
|
413
|
+
expect(result.providerOptions?.anthropic?.thinking).toEqual({
|
|
414
|
+
type: "enabled",
|
|
415
|
+
});
|
|
416
|
+
expect(result.providerOptions?.anthropic?.effort).toBe("low");
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
test("claudeReasoningMiddleware > should include effort and max_tokens for Claude 4.6", async () => {
|
|
420
|
+
const params = {
|
|
421
|
+
prompt: [],
|
|
422
|
+
providerOptions: {
|
|
423
|
+
unknown: {
|
|
424
|
+
reasoning: {
|
|
425
|
+
enabled: true,
|
|
426
|
+
effort: "medium",
|
|
427
|
+
max_tokens: 2000,
|
|
428
|
+
},
|
|
429
|
+
},
|
|
430
|
+
},
|
|
431
|
+
};
|
|
432
|
+
|
|
433
|
+
const result = await claudeReasoningMiddleware.transformParams!({
|
|
434
|
+
type: "generate",
|
|
435
|
+
params,
|
|
436
|
+
model: new MockLanguageModelV3({ modelId: "anthropic/claude-opus-4.6" }),
|
|
437
|
+
});
|
|
438
|
+
|
|
439
|
+
expect(result.providerOptions?.anthropic?.thinking).toEqual({
|
|
440
|
+
type: "adaptive",
|
|
441
|
+
budgetTokens: 2000,
|
|
442
|
+
});
|
|
443
|
+
expect(result.providerOptions?.anthropic?.effort).toBe("medium");
|
|
444
|
+
});
|
|
445
|
+
|
|
446
|
+
test("claudeReasoningMiddleware > should clamp max_tokens to 128k for Claude Opus 4.6", async () => {
|
|
447
|
+
const params = {
|
|
448
|
+
prompt: [],
|
|
449
|
+
providerOptions: {
|
|
450
|
+
unknown: {
|
|
451
|
+
reasoning: {
|
|
452
|
+
enabled: true,
|
|
453
|
+
effort: "medium",
|
|
454
|
+
max_tokens: 200000,
|
|
455
|
+
},
|
|
456
|
+
},
|
|
457
|
+
},
|
|
458
|
+
};
|
|
459
|
+
|
|
460
|
+
const result = await claudeReasoningMiddleware.transformParams!({
|
|
461
|
+
type: "generate",
|
|
462
|
+
params,
|
|
463
|
+
model: new MockLanguageModelV3({ modelId: "anthropic/claude-opus-4.6" }),
|
|
464
|
+
});
|
|
465
|
+
|
|
466
|
+
expect(result.providerOptions?.anthropic?.thinking).toEqual({
|
|
467
|
+
type: "adaptive",
|
|
468
|
+
budgetTokens: 128000,
|
|
469
|
+
});
|
|
470
|
+
expect(result.providerOptions?.anthropic?.effort).toBe("medium");
|
|
471
|
+
});
|
|
472
|
+
|
|
473
|
+
test("claudeReasoningMiddleware > should include effort and max_tokens for Claude Sonnet 4.5", async () => {
|
|
474
|
+
const params = {
|
|
475
|
+
prompt: [],
|
|
476
|
+
providerOptions: {
|
|
477
|
+
unknown: {
|
|
478
|
+
reasoning: {
|
|
479
|
+
enabled: true,
|
|
480
|
+
effort: "medium",
|
|
481
|
+
max_tokens: 2000,
|
|
482
|
+
},
|
|
483
|
+
},
|
|
484
|
+
},
|
|
485
|
+
};
|
|
486
|
+
|
|
487
|
+
const result = await claudeReasoningMiddleware.transformParams!({
|
|
488
|
+
type: "generate",
|
|
489
|
+
params,
|
|
490
|
+
model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.5" }),
|
|
491
|
+
});
|
|
492
|
+
|
|
493
|
+
expect(result.providerOptions?.anthropic?.thinking).toEqual({
|
|
494
|
+
type: "enabled",
|
|
495
|
+
budgetTokens: 2000,
|
|
496
|
+
});
|
|
497
|
+
expect(result.providerOptions?.anthropic?.effort).toBe("medium");
|
|
498
|
+
});
|
|
499
|
+
|
|
500
|
+
test("claudeReasoningMiddleware > should map max effort to high for Claude Sonnet 4.5", async () => {
|
|
501
|
+
const params = {
|
|
502
|
+
prompt: [],
|
|
503
|
+
providerOptions: {
|
|
504
|
+
unknown: {
|
|
505
|
+
reasoning: {
|
|
506
|
+
enabled: true,
|
|
507
|
+
effort: "max",
|
|
508
|
+
},
|
|
509
|
+
},
|
|
510
|
+
},
|
|
511
|
+
};
|
|
512
|
+
|
|
513
|
+
const result = await claudeReasoningMiddleware.transformParams!({
|
|
514
|
+
type: "generate",
|
|
515
|
+
params,
|
|
516
|
+
model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.5" }),
|
|
517
|
+
});
|
|
518
|
+
|
|
519
|
+
expect(result.providerOptions?.anthropic?.thinking).toEqual({
|
|
520
|
+
type: "enabled",
|
|
521
|
+
});
|
|
522
|
+
expect(result.providerOptions?.anthropic?.effort).toBe("high");
|
|
523
|
+
});
|
|
524
|
+
|
|
525
|
+
test("claudeReasoningMiddleware > should map xhigh effort to high for Claude Sonnet 4.5", async () => {
|
|
526
|
+
const params = {
|
|
527
|
+
prompt: [],
|
|
528
|
+
providerOptions: {
|
|
529
|
+
unknown: {
|
|
530
|
+
reasoning: {
|
|
531
|
+
enabled: true,
|
|
532
|
+
effort: "xhigh",
|
|
533
|
+
},
|
|
534
|
+
},
|
|
535
|
+
},
|
|
536
|
+
};
|
|
537
|
+
|
|
538
|
+
const result = await claudeReasoningMiddleware.transformParams!({
|
|
539
|
+
type: "generate",
|
|
540
|
+
params,
|
|
541
|
+
model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4.5" }),
|
|
542
|
+
});
|
|
543
|
+
|
|
544
|
+
expect(result.providerOptions?.anthropic?.thinking).toEqual({
|
|
545
|
+
type: "enabled",
|
|
546
|
+
});
|
|
547
|
+
expect(result.providerOptions?.anthropic?.effort).toBe("high");
|
|
548
|
+
});
|
|
549
|
+
|
|
550
|
+
test("claudeReasoningMiddleware > should keep xhigh as budget for non-4.6 models", async () => {
|
|
551
|
+
const params = {
|
|
552
|
+
prompt: [],
|
|
553
|
+
providerOptions: {
|
|
554
|
+
unknown: {
|
|
555
|
+
reasoning: {
|
|
556
|
+
enabled: true,
|
|
557
|
+
effort: "xhigh",
|
|
558
|
+
},
|
|
559
|
+
},
|
|
560
|
+
},
|
|
561
|
+
};
|
|
562
|
+
|
|
563
|
+
const result = await claudeReasoningMiddleware.transformParams!({
|
|
564
|
+
type: "generate",
|
|
565
|
+
params,
|
|
566
|
+
model: new MockLanguageModelV3({ modelId: "anthropic/claude-sonnet-4" }),
|
|
567
|
+
});
|
|
568
|
+
|
|
569
|
+
expect(result.providerOptions?.anthropic?.thinking?.budgetTokens).toBe(60800);
|
|
570
|
+
});
|
|
571
|
+
|
|
572
|
+
test("claudeReasoningMiddleware > should map xhigh effort for Claude Opus 4.5 without default budget", async () => {
|
|
573
|
+
const params = {
|
|
574
|
+
prompt: [],
|
|
575
|
+
providerOptions: {
|
|
576
|
+
unknown: {
|
|
577
|
+
reasoning: {
|
|
578
|
+
enabled: true,
|
|
579
|
+
effort: "xhigh",
|
|
580
|
+
},
|
|
581
|
+
},
|
|
582
|
+
},
|
|
583
|
+
};
|
|
584
|
+
|
|
585
|
+
const result = await claudeReasoningMiddleware.transformParams!({
|
|
586
|
+
type: "generate",
|
|
587
|
+
params,
|
|
588
|
+
model: new MockLanguageModelV3({ modelId: "anthropic/claude-opus-4.5" }),
|
|
589
|
+
});
|
|
590
|
+
|
|
591
|
+
expect(result.providerOptions?.anthropic?.thinking).toEqual({
|
|
592
|
+
type: "enabled",
|
|
593
|
+
});
|
|
594
|
+
expect(result.providerOptions?.anthropic?.effort).toBe("high");
|
|
595
|
+
});
|
|
@@ -1,21 +1,66 @@
|
|
|
1
1
|
import type { LanguageModelMiddleware } from "ai";
|
|
2
2
|
|
|
3
|
-
import type {
|
|
3
|
+
import type {
|
|
4
|
+
ChatCompletionsReasoningConfig,
|
|
5
|
+
ChatCompletionsReasoningEffort,
|
|
6
|
+
} from "../../endpoints/chat-completions/schema";
|
|
4
7
|
|
|
5
8
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
6
9
|
import { calculateReasoningBudgetFromEffort } from "../../middleware/utils";
|
|
7
10
|
|
|
8
|
-
const
|
|
9
|
-
const
|
|
11
|
+
const isClaude = (family: "opus" | "sonnet" | "haiku", version: string) => {
|
|
12
|
+
const dashed = version.replace(".", "-");
|
|
10
13
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
14
|
+
return (modelId: string) =>
|
|
15
|
+
modelId.includes(`claude-${family}-${version}`) ||
|
|
16
|
+
modelId.includes(`claude-${family}-${dashed}`);
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
const isOpus46 = isClaude("opus", "4.6");
|
|
20
|
+
const isOpus45 = isClaude("opus", "4.5");
|
|
21
|
+
const isOpus4 = isClaude("opus", "4");
|
|
22
|
+
const isSonnet46 = isClaude("sonnet", "4.6");
|
|
23
|
+
const isSonnet45 = isClaude("sonnet", "4.5");
|
|
24
|
+
|
|
25
|
+
export function mapClaudeReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string) {
|
|
26
|
+
if (isOpus46(modelId)) {
|
|
27
|
+
switch (effort) {
|
|
28
|
+
case "none":
|
|
29
|
+
case "minimal":
|
|
30
|
+
case "low":
|
|
31
|
+
return "low";
|
|
32
|
+
case "medium":
|
|
33
|
+
return "medium";
|
|
34
|
+
case "high":
|
|
35
|
+
return "high";
|
|
36
|
+
case "xhigh":
|
|
37
|
+
case "max":
|
|
38
|
+
return "max";
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
switch (effort) {
|
|
43
|
+
case "none":
|
|
44
|
+
case "minimal":
|
|
45
|
+
case "low":
|
|
46
|
+
return "low";
|
|
47
|
+
case "medium":
|
|
48
|
+
return "medium";
|
|
49
|
+
case "high":
|
|
50
|
+
case "xhigh":
|
|
51
|
+
case "max":
|
|
52
|
+
return "high";
|
|
15
53
|
}
|
|
16
|
-
return CLAUDE_OPUS_4_MAX_OUTPUT_TOKENS;
|
|
17
54
|
}
|
|
18
55
|
|
|
56
|
+
function getMaxOutputTokens(modelId: string): number {
|
|
57
|
+
if (isOpus46(modelId)) return 128_000;
|
|
58
|
+
if (isOpus45(modelId)) return 64_000;
|
|
59
|
+
if (isOpus4(modelId)) return 32_000;
|
|
60
|
+
return 64_000;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// https://platform.claude.com/docs/en/build-with-claude/effort
|
|
19
64
|
export const claudeReasoningMiddleware: LanguageModelMiddleware = {
|
|
20
65
|
specificationVersion: "v3",
|
|
21
66
|
// eslint-disable-next-line require-await
|
|
@@ -27,23 +72,42 @@ export const claudeReasoningMiddleware: LanguageModelMiddleware = {
|
|
|
27
72
|
if (!reasoning) return params;
|
|
28
73
|
|
|
29
74
|
const target = (params.providerOptions!["anthropic"] ??= {});
|
|
75
|
+
const modelId = model.modelId;
|
|
76
|
+
const clampedMaxTokens =
|
|
77
|
+
reasoning.max_tokens && Math.min(reasoning.max_tokens, getMaxOutputTokens(modelId));
|
|
30
78
|
|
|
31
79
|
if (!reasoning.enabled) {
|
|
32
80
|
target["thinking"] = { type: "disabled" };
|
|
33
|
-
} else if (reasoning.max_tokens) {
|
|
34
|
-
target["thinking"] = {
|
|
35
|
-
type: "enabled",
|
|
36
|
-
budgetTokens: Math.min(reasoning.max_tokens, getMaxOutputTokens(model.modelId)),
|
|
37
|
-
};
|
|
38
81
|
} else if (reasoning.effort) {
|
|
39
|
-
|
|
82
|
+
if (isOpus46(modelId)) {
|
|
83
|
+
target["thinking"] = clampedMaxTokens
|
|
84
|
+
? { type: "adaptive", budgetTokens: clampedMaxTokens }
|
|
85
|
+
: { type: "adaptive" };
|
|
86
|
+
target["effort"] = mapClaudeReasoningEffort(reasoning.effort, modelId);
|
|
87
|
+
} else if (isSonnet46(modelId)) {
|
|
88
|
+
target["thinking"] = clampedMaxTokens
|
|
89
|
+
? { type: "enabled", budgetTokens: clampedMaxTokens }
|
|
90
|
+
: { type: "adaptive" };
|
|
91
|
+
target["effort"] = mapClaudeReasoningEffort(reasoning.effort, modelId);
|
|
92
|
+
} else if (isOpus45(modelId) || isSonnet45(modelId)) {
|
|
93
|
+
target["thinking"] = { type: "enabled" };
|
|
94
|
+
if (clampedMaxTokens) target["thinking"]["budgetTokens"] = clampedMaxTokens;
|
|
95
|
+
target["effort"] = mapClaudeReasoningEffort(reasoning.effort, modelId);
|
|
96
|
+
} else {
|
|
97
|
+
// FUTURE: warn that reasoning.max_tokens was computed
|
|
98
|
+
target["thinking"] = {
|
|
99
|
+
type: "enabled",
|
|
100
|
+
budgetTokens: calculateReasoningBudgetFromEffort(
|
|
101
|
+
reasoning.effort,
|
|
102
|
+
params.maxOutputTokens ?? getMaxOutputTokens(modelId),
|
|
103
|
+
1024,
|
|
104
|
+
),
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
} else if (clampedMaxTokens) {
|
|
40
108
|
target["thinking"] = {
|
|
41
109
|
type: "enabled",
|
|
42
|
-
budgetTokens:
|
|
43
|
-
reasoning.effort,
|
|
44
|
-
params.maxOutputTokens ?? getMaxOutputTokens(model.modelId),
|
|
45
|
-
1024,
|
|
46
|
-
),
|
|
110
|
+
budgetTokens: clampedMaxTokens,
|
|
47
111
|
};
|
|
48
112
|
} else {
|
|
49
113
|
target["thinking"] = { type: "enabled" };
|
|
@@ -13,6 +13,7 @@ test("geminiReasoningMiddleware > matching patterns", () => {
|
|
|
13
13
|
"google/gemini-2.5-pro",
|
|
14
14
|
"google/gemini-3-flash-preview",
|
|
15
15
|
"google/gemini-3-pro-preview",
|
|
16
|
+
"google/gemini-3.1-pro-preview",
|
|
16
17
|
] satisfies (typeof CANONICAL_MODEL_IDS)[number][];
|
|
17
18
|
|
|
18
19
|
const nonMatching = ["google/gemini-1.5-pro", "google/gemini-1.5-flash"];
|
|
@@ -108,6 +109,36 @@ test("geminiReasoningMiddleware > should map effort for Gemini 3 Pro", async ()
|
|
|
108
109
|
});
|
|
109
110
|
});
|
|
110
111
|
|
|
112
|
+
test("geminiReasoningMiddleware > should map medium effort for Gemini 3.1 Pro", async () => {
|
|
113
|
+
const params = {
|
|
114
|
+
prompt: [],
|
|
115
|
+
providerOptions: {
|
|
116
|
+
unknown: {
|
|
117
|
+
reasoning: { enabled: true, effort: "medium" },
|
|
118
|
+
},
|
|
119
|
+
},
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
const result = await geminiReasoningMiddleware.transformParams!({
|
|
123
|
+
type: "generate",
|
|
124
|
+
params,
|
|
125
|
+
model: new MockLanguageModelV3({ modelId: "google/gemini-3.1-pro-preview" }),
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
expect(result).toEqual({
|
|
129
|
+
prompt: [],
|
|
130
|
+
providerOptions: {
|
|
131
|
+
google: {
|
|
132
|
+
thinkingConfig: {
|
|
133
|
+
includeThoughts: true,
|
|
134
|
+
thinkingLevel: "medium",
|
|
135
|
+
},
|
|
136
|
+
},
|
|
137
|
+
unknown: {},
|
|
138
|
+
},
|
|
139
|
+
});
|
|
140
|
+
});
|
|
141
|
+
|
|
111
142
|
test("geminiReasoningMiddleware > should use budget for Gemini 2", async () => {
|
|
112
143
|
const params = {
|
|
113
144
|
prompt: [],
|
|
@@ -26,10 +26,26 @@ export const geminiDimensionsMiddleware: EmbeddingModelMiddleware = {
|
|
|
26
26
|
},
|
|
27
27
|
};
|
|
28
28
|
|
|
29
|
+
// https://ai.google.dev/gemini-api/docs/thinking#thinking-levels
|
|
29
30
|
export function mapGeminiReasoningEffort(
|
|
30
31
|
effort: ChatCompletionsReasoningEffort,
|
|
31
32
|
modelId: string,
|
|
32
33
|
): ChatCompletionsReasoningEffort | undefined {
|
|
34
|
+
if (modelId.includes("gemini-3.1-pro")) {
|
|
35
|
+
switch (effort) {
|
|
36
|
+
case "none":
|
|
37
|
+
case "minimal":
|
|
38
|
+
case "low":
|
|
39
|
+
return "low";
|
|
40
|
+
case "medium":
|
|
41
|
+
return "medium";
|
|
42
|
+
case "high":
|
|
43
|
+
case "xhigh":
|
|
44
|
+
case "max":
|
|
45
|
+
return "high";
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
33
49
|
if (modelId.includes("gemini-3-pro")) {
|
|
34
50
|
switch (effort) {
|
|
35
51
|
case "none":
|
|
@@ -39,6 +55,7 @@ export function mapGeminiReasoningEffort(
|
|
|
39
55
|
case "medium":
|
|
40
56
|
case "high":
|
|
41
57
|
case "xhigh":
|
|
58
|
+
case "max":
|
|
42
59
|
return "high";
|
|
43
60
|
}
|
|
44
61
|
}
|
|
@@ -54,6 +71,7 @@ export function mapGeminiReasoningEffort(
|
|
|
54
71
|
return "medium";
|
|
55
72
|
case "high":
|
|
56
73
|
case "xhigh":
|
|
74
|
+
case "max":
|
|
57
75
|
return "high";
|
|
58
76
|
}
|
|
59
77
|
}
|
|
@@ -57,6 +57,16 @@ export const gemini3ProPreview = presetFor<CanonicalModelId, CatalogModel>()(
|
|
|
57
57
|
} satisfies DeepPartial<CatalogModel>,
|
|
58
58
|
);
|
|
59
59
|
|
|
60
|
+
export const gemini31ProPreview = presetFor<CanonicalModelId, CatalogModel>()(
|
|
61
|
+
"google/gemini-3.1-pro-preview" as const,
|
|
62
|
+
{
|
|
63
|
+
...GEMINI_BASE,
|
|
64
|
+
name: "Gemini 3.1 Pro (Preview)",
|
|
65
|
+
created: "2026-02-19",
|
|
66
|
+
knowledge: "2025-01",
|
|
67
|
+
} satisfies DeepPartial<CatalogModel>,
|
|
68
|
+
);
|
|
69
|
+
|
|
60
70
|
export const gemini25FlashLite = presetFor<CanonicalModelId, CatalogModel>()(
|
|
61
71
|
"google/gemini-2.5-flash-lite" as const,
|
|
62
72
|
{
|
|
@@ -90,19 +100,20 @@ export const gemini25Pro = presetFor<CanonicalModelId, CatalogModel>()(
|
|
|
90
100
|
const geminiAtomic = {
|
|
91
101
|
"v2.5": [gemini25FlashLite, gemini25Flash, gemini25Pro],
|
|
92
102
|
"v3-preview": [gemini3FlashPreview, gemini3ProPreview],
|
|
103
|
+
"v3.1-preview": [gemini31ProPreview],
|
|
93
104
|
embeddings: [geminiEmbedding001],
|
|
94
105
|
} as const;
|
|
95
106
|
|
|
96
107
|
const geminiGroups = {
|
|
97
108
|
"v2.x": [...geminiAtomic["v2.5"]],
|
|
98
|
-
"v3.x": [...geminiAtomic["v3-preview"]],
|
|
109
|
+
"v3.x": [...geminiAtomic["v3-preview"], ...geminiAtomic["v3.1-preview"]],
|
|
99
110
|
} as const;
|
|
100
111
|
|
|
101
112
|
export const gemini = {
|
|
102
113
|
...geminiAtomic,
|
|
103
114
|
...geminiGroups,
|
|
104
115
|
latest: [...geminiAtomic["v2.5"]],
|
|
105
|
-
preview: [...geminiAtomic["v3-preview"]],
|
|
116
|
+
preview: [...geminiAtomic["v3-preview"], ...geminiAtomic["v3.1-preview"]],
|
|
106
117
|
embeddings: [...geminiAtomic["embeddings"]],
|
|
107
118
|
all: Object.values(geminiAtomic).flat(),
|
|
108
119
|
} as const;
|
package/src/models/types.ts
CHANGED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { MockLanguageModelV3 } from "ai/test";
|
|
2
|
+
import { expect, test } from "bun:test";
|
|
3
|
+
|
|
4
|
+
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
5
|
+
import { bedrockAnthropicReasoningMiddleware } from "./middleware";
|
|
6
|
+
|
|
7
|
+
test("bedrockAnthropicReasoningMiddleware > matching provider", () => {
|
|
8
|
+
const middleware = modelMiddlewareMatcher.resolve({
|
|
9
|
+
kind: "text",
|
|
10
|
+
modelId: "anthropic/claude-opus-4.6",
|
|
11
|
+
providerId: "amazon-bedrock",
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
expect(middleware).toContain(bedrockAnthropicReasoningMiddleware);
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
test("bedrockAnthropicReasoningMiddleware > should map thinking/effort into reasoningConfig", async () => {
|
|
18
|
+
const params = {
|
|
19
|
+
prompt: [],
|
|
20
|
+
providerOptions: {
|
|
21
|
+
bedrock: {
|
|
22
|
+
thinking: {
|
|
23
|
+
type: "adaptive",
|
|
24
|
+
budgetTokens: 4096,
|
|
25
|
+
},
|
|
26
|
+
effort: "max",
|
|
27
|
+
},
|
|
28
|
+
},
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
const result = await bedrockAnthropicReasoningMiddleware.transformParams!({
|
|
32
|
+
type: "generate",
|
|
33
|
+
params,
|
|
34
|
+
model: new MockLanguageModelV3({ modelId: "anthropic/claude-opus-4.6" }),
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
expect(result.providerOptions?.bedrock).toEqual({
|
|
38
|
+
reasoningConfig: {
|
|
39
|
+
type: "adaptive",
|
|
40
|
+
budgetTokens: 4096,
|
|
41
|
+
maxReasoningEffort: "max",
|
|
42
|
+
},
|
|
43
|
+
});
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
test("bedrockAnthropicReasoningMiddleware > should skip non-anthropic models", async () => {
|
|
47
|
+
const params = {
|
|
48
|
+
prompt: [],
|
|
49
|
+
providerOptions: {
|
|
50
|
+
bedrock: {
|
|
51
|
+
thinking: {
|
|
52
|
+
type: "enabled",
|
|
53
|
+
budgetTokens: 4096,
|
|
54
|
+
},
|
|
55
|
+
effort: "high",
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
const result = await bedrockAnthropicReasoningMiddleware.transformParams!({
|
|
61
|
+
type: "generate",
|
|
62
|
+
params,
|
|
63
|
+
model: new MockLanguageModelV3({ modelId: "openai/gpt-oss-20b" }),
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
expect(result.providerOptions?.bedrock).toEqual({
|
|
67
|
+
thinking: {
|
|
68
|
+
type: "enabled",
|
|
69
|
+
budgetTokens: 4096,
|
|
70
|
+
},
|
|
71
|
+
effort: "high",
|
|
72
|
+
});
|
|
73
|
+
});
|