@hebo-ai/gateway 0.6.2-rc1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -536,14 +536,14 @@ Normalization rules:
536
536
 
537
537
  - `enabled` -> fall-back to model default if none provided
538
538
  - `max_tokens`: fall-back to model default if model supports
539
- - `effort` supports: `none`, `minimal`, `low`, `medium`, `high`, `xhigh`, `max`
539
+ - `effort` supports: `none`, `minimal`, `low`, `medium`, `high`, `xhigh`
540
540
  - Generic `effort` -> budget = percentage of `max_tokens`
541
541
  - `none`: 0%
542
542
  - `minimal`: 10%
543
543
  - `low`: 20%
544
544
  - `medium`: 50% (default)
545
545
  - `high`: 80%
546
- - `xhigh` / `max`: 95%
546
+ - `xhigh`: 95%
547
547
 
548
548
  Reasoning output is surfaced as extension to the `completion` object.
549
549
 
@@ -665,7 +665,7 @@ https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/
665
665
 
666
666
  > [!TIP]
667
667
  > To populate custom span attributes, the inbound W3C `baggage` header is supported. Keys in the `hebo.` namespace are mapped to span attributes, with the namespace stripped. For example: `baggage: hebo.user_id=u-123` becomes span attribute `user_id=u-123`.
668
- > For `/chat/completions`, request `metadata` (`Record<string, string>`, key 1-64 chars, value up to 512 chars) is also forwarded to spans as `gen_ai.request.metadata.<key>`.
668
+ > For `/chat/completions` and `/embeddings`, request `metadata` (`Record<string, string>`, key 1-64 chars, value up to 512 chars) is also forwarded to spans as `gen_ai.request.metadata.<key>`.
669
669
 
670
670
  For observability integration that is not otel compliant, you can disable built-in telemetry and manually instrument requests during `before` / `after` hooks.
671
671
 
@@ -8,7 +8,12 @@ import { parseDataUrl } from "../../utils/url";
8
8
  export function convertToTextCallOptions(params) {
9
9
  const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, prompt_cache_key, prompt_cache_retention, extra_body, cache_control, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
10
10
  Object.assign(rest, parseReasoningOptions(reasoning_effort, reasoning));
11
- Object.assign(rest, parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, extra_body?.google?.cached_content, cache_control));
11
+ Object.assign(rest, parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cache_control));
12
+ if (extra_body) {
13
+ for (const v of Object.values(extra_body)) {
14
+ Object.assign(rest, v);
15
+ }
16
+ }
12
17
  const { toolChoice, activeTools } = convertToToolChoiceOptions(tool_choice);
13
18
  return {
14
19
  messages: convertToModelMessages(messages),
@@ -337,29 +342,25 @@ function parseReasoningOptions(reasoning_effort, reasoning) {
337
342
  }
338
343
  return out;
339
344
  }
340
- function parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cached_content, cache_control) {
345
+ function parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cache_control) {
341
346
  const out = {};
342
- const syncedCacheKey = prompt_cache_key ?? cached_content;
343
- const syncedCachedContent = cached_content ?? prompt_cache_key;
344
- let syncedCacheRetention = prompt_cache_retention;
345
- if (!syncedCacheRetention && cache_control?.ttl) {
346
- syncedCacheRetention = cache_control.ttl === "24h" ? "24h" : "in_memory";
347
- }
348
- let syncedCacheControl = cache_control;
349
- if (!syncedCacheControl && syncedCacheRetention) {
350
- syncedCacheControl = {
347
+ let retention = prompt_cache_retention;
348
+ if (!retention && cache_control?.ttl) {
349
+ retention = cache_control.ttl === "24h" ? "24h" : "in_memory";
350
+ }
351
+ let control = cache_control;
352
+ if (!control && retention) {
353
+ control = {
351
354
  type: "ephemeral",
352
- ttl: syncedCacheRetention === "24h" ? "24h" : "5m",
355
+ ttl: retention === "24h" ? "24h" : "5m",
353
356
  };
354
357
  }
355
- if (syncedCacheKey)
356
- out["prompt_cache_key"] = syncedCacheKey;
357
- if (syncedCacheRetention)
358
- out["prompt_cache_retention"] = syncedCacheRetention;
359
- if (syncedCachedContent)
360
- out["cached_content"] = syncedCachedContent;
361
- if (syncedCacheControl)
362
- out["cache_control"] = syncedCacheControl;
358
+ if (prompt_cache_key)
359
+ out["prompt_cache_key"] = prompt_cache_key;
360
+ if (retention)
361
+ out["prompt_cache_retention"] = retention;
362
+ if (control)
363
+ out["cache_control"] = control;
363
364
  return out;
364
365
  }
365
366
  // --- Response Flow ---
@@ -427,7 +427,6 @@ export declare const ChatCompletionsReasoningEffortSchema: z.ZodEnum<{
427
427
  minimal: "minimal";
428
428
  medium: "medium";
429
429
  xhigh: "xhigh";
430
- max: "max";
431
430
  }>;
432
431
  export type ChatCompletionsReasoningEffort = z.infer<typeof ChatCompletionsReasoningEffortSchema>;
433
432
  export declare const ChatCompletionsReasoningConfigSchema: z.ZodObject<{
@@ -439,7 +438,6 @@ export declare const ChatCompletionsReasoningConfigSchema: z.ZodObject<{
439
438
  minimal: "minimal";
440
439
  medium: "medium";
441
440
  xhigh: "xhigh";
442
- max: "max";
443
441
  }>>;
444
442
  max_tokens: z.ZodOptional<z.ZodNumber>;
445
443
  exclude: z.ZodOptional<z.ZodBoolean>;
@@ -651,18 +649,12 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
651
649
  minimal: "minimal";
652
650
  medium: "medium";
653
651
  xhigh: "xhigh";
654
- max: "max";
655
652
  }>>;
656
653
  prompt_cache_key: z.ZodOptional<z.ZodString>;
657
654
  prompt_cache_retention: z.ZodOptional<z.ZodEnum<{
658
655
  in_memory: "in_memory";
659
656
  "24h": "24h";
660
657
  }>>;
661
- extra_body: z.ZodOptional<z.ZodObject<{
662
- google: z.ZodOptional<z.ZodObject<{
663
- cached_content: z.ZodOptional<z.ZodString>;
664
- }, z.core.$strip>>;
665
- }, z.core.$strip>>;
666
658
  cache_control: z.ZodOptional<z.ZodObject<{
667
659
  type: z.ZodLiteral<"ephemeral">;
668
660
  ttl: z.ZodOptional<z.ZodString>;
@@ -676,11 +668,11 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
676
668
  minimal: "minimal";
677
669
  medium: "medium";
678
670
  xhigh: "xhigh";
679
- max: "max";
680
671
  }>>;
681
672
  max_tokens: z.ZodOptional<z.ZodNumber>;
682
673
  exclude: z.ZodOptional<z.ZodBoolean>;
683
674
  }, z.core.$strip>>;
675
+ extra_body: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
684
676
  }, z.core.$strip>;
685
677
  export type ChatCompletionsInputs = z.infer<typeof ChatCompletionsInputsSchema>;
686
678
  export declare const ChatCompletionsBodySchema: z.ZodObject<{
@@ -863,18 +855,12 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
863
855
  minimal: "minimal";
864
856
  medium: "medium";
865
857
  xhigh: "xhigh";
866
- max: "max";
867
858
  }>>;
868
859
  prompt_cache_key: z.ZodOptional<z.ZodString>;
869
860
  prompt_cache_retention: z.ZodOptional<z.ZodEnum<{
870
861
  in_memory: "in_memory";
871
862
  "24h": "24h";
872
863
  }>>;
873
- extra_body: z.ZodOptional<z.ZodObject<{
874
- google: z.ZodOptional<z.ZodObject<{
875
- cached_content: z.ZodOptional<z.ZodString>;
876
- }, z.core.$strip>>;
877
- }, z.core.$strip>>;
878
864
  cache_control: z.ZodOptional<z.ZodObject<{
879
865
  type: z.ZodLiteral<"ephemeral">;
880
866
  ttl: z.ZodOptional<z.ZodString>;
@@ -888,11 +874,11 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
888
874
  minimal: "minimal";
889
875
  medium: "medium";
890
876
  xhigh: "xhigh";
891
- max: "max";
892
877
  }>>;
893
878
  max_tokens: z.ZodOptional<z.ZodNumber>;
894
879
  exclude: z.ZodOptional<z.ZodBoolean>;
895
880
  }, z.core.$strip>>;
881
+ extra_body: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
896
882
  model: z.ZodString;
897
883
  stream: z.ZodOptional<z.ZodBoolean>;
898
884
  }, z.core.$loose>;
@@ -1029,7 +1015,7 @@ export declare const ChatCompletionsSchema: z.ZodObject<{
1029
1015
  cache_write_tokens: z.ZodOptional<z.ZodInt>;
1030
1016
  }, z.core.$strip>>;
1031
1017
  }, z.core.$strip>>;
1032
- provider_metadata: z.ZodOptional<z.ZodUnknown>;
1018
+ provider_metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
1033
1019
  }, z.core.$strip>;
1034
1020
  export type ChatCompletions = z.infer<typeof ChatCompletionsSchema>;
1035
1021
  export declare const ChatCompletionsToolCallDeltaSchema: z.ZodObject<{
@@ -1196,7 +1182,7 @@ export declare const ChatCompletionsChunkSchema: z.ZodObject<{
1196
1182
  cache_write_tokens: z.ZodOptional<z.ZodInt>;
1197
1183
  }, z.core.$strip>>;
1198
1184
  }, z.core.$strip>>;
1199
- provider_metadata: z.ZodOptional<z.ZodUnknown>;
1185
+ provider_metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
1200
1186
  }, z.core.$strip>;
1201
1187
  export type ChatCompletionsChunk = z.infer<typeof ChatCompletionsChunkSchema>;
1202
1188
  export {};
@@ -161,14 +161,11 @@ export const ChatCompletionsToolChoiceSchema = z.union([
161
161
  ]);
162
162
  export const ChatCompletionsReasoningEffortSchema = z.enum([
163
163
  "none",
164
- // Extension origin: Gemini
165
164
  "minimal",
166
165
  "low",
167
166
  "medium",
168
167
  "high",
169
168
  "xhigh",
170
- // Extension origin: Anthropic
171
- "max",
172
169
  ]);
173
170
  export const ChatCompletionsReasoningConfigSchema = z.object({
174
171
  enabled: z.optional(z.boolean()),
@@ -212,22 +209,16 @@ const ChatCompletionsInputsSchema = z.object({
212
209
  reasoning_effort: ChatCompletionsReasoningEffortSchema.optional(),
213
210
  prompt_cache_key: z.string().optional(),
214
211
  prompt_cache_retention: z.enum(["in_memory", "24h"]).optional(),
215
- // Extension origin: Gemini explicit cache handle
216
- // FUTURE: generalize extra_body handling
217
- // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
218
- extra_body: z
219
- .object({
220
- google: z
221
- .object({
222
- cached_content: z.string().optional().meta({ extension: true }),
223
- })
224
- .optional(),
225
- })
226
- .optional(),
227
212
  // Extension origin: OpenRouter/Vercel/Anthropic
228
213
  cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
229
214
  // Extension origin: OpenRouter
230
215
  reasoning: ChatCompletionsReasoningConfigSchema.optional().meta({ extension: true }),
216
+ // Extension origin: Gemini extra_body
217
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview#extra_body
218
+ extra_body: z
219
+ .record(z.string(), z.record(z.string(), z.unknown()))
220
+ .optional()
221
+ .meta({ extension: true }),
231
222
  });
232
223
  export const ChatCompletionsBodySchema = z.looseObject({
233
224
  model: z.string(),
@@ -274,7 +265,10 @@ export const ChatCompletionsSchema = z.object({
274
265
  choices: z.array(ChatCompletionsChoiceSchema),
275
266
  usage: ChatCompletionsUsageSchema.nullable(),
276
267
  // Extension origin: Vercel AI Gateway
277
- provider_metadata: z.unknown().optional().meta({ extension: true }),
268
+ provider_metadata: z
269
+ .record(z.string(), z.record(z.string(), z.unknown()))
270
+ .optional()
271
+ .meta({ extension: true }),
278
272
  });
279
273
  export const ChatCompletionsToolCallDeltaSchema = ChatCompletionsToolCallSchema.partial().extend({
280
274
  index: z.int().nonnegative(),
@@ -297,5 +291,8 @@ export const ChatCompletionsChunkSchema = z.object({
297
291
  choices: z.array(ChatCompletionsChoiceDeltaSchema),
298
292
  usage: ChatCompletionsUsageSchema.nullable(),
299
293
  // Extension origin: Vercel AI Gateway
300
- provider_metadata: z.unknown().optional().meta({ extension: true }),
294
+ provider_metadata: z
295
+ .record(z.string(), z.record(z.string(), z.unknown()))
296
+ .optional()
297
+ .meta({ extension: true }),
301
298
  });
@@ -7,6 +7,11 @@ export const getEmbeddingsRequestAttributes = (inputs, signalLevel) => {
7
7
  Object.assign(attrs, {
8
8
  "gen_ai.embeddings.dimension.count": inputs.dimensions,
9
9
  });
10
+ if (inputs.metadata) {
11
+ for (const key in inputs.metadata) {
12
+ attrs[`gen_ai.request.metadata.${key}`] = inputs.metadata[key];
13
+ }
14
+ }
10
15
  }
11
16
  return attrs;
12
17
  };
@@ -1,12 +1,18 @@
1
1
  import * as z from "zod";
2
+ export declare const EmbeddingsDimensionsSchema: z.ZodInt;
3
+ export type EmbeddingsDimensions = z.infer<typeof EmbeddingsDimensionsSchema>;
4
+ export declare const EmbeddingsMetadataSchema: z.ZodRecord<z.ZodString, z.ZodString>;
5
+ export type EmbeddingsMetadata = z.infer<typeof EmbeddingsMetadataSchema>;
2
6
  export declare const EmbeddingsInputsSchema: z.ZodObject<{
3
7
  input: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
4
8
  dimensions: z.ZodOptional<z.ZodInt>;
9
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
5
10
  }, z.core.$strip>;
6
11
  export type EmbeddingsInputs = z.infer<typeof EmbeddingsInputsSchema>;
7
12
  export declare const EmbeddingsBodySchema: z.ZodObject<{
8
13
  input: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
9
14
  dimensions: z.ZodOptional<z.ZodInt>;
15
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
10
16
  model: z.ZodString;
11
17
  }, z.core.$loose>;
12
18
  export type EmbeddingsBody = z.infer<typeof EmbeddingsBodySchema>;
@@ -1,7 +1,10 @@
1
1
  import * as z from "zod";
2
+ export const EmbeddingsDimensionsSchema = z.int().nonnegative().max(65536);
3
+ export const EmbeddingsMetadataSchema = z.record(z.string().min(1).max(64), z.string().max(512));
2
4
  export const EmbeddingsInputsSchema = z.object({
3
5
  input: z.union([z.string(), z.array(z.string())]),
4
- dimensions: z.int().nonnegative().max(65536).optional(),
6
+ dimensions: EmbeddingsDimensionsSchema.optional(),
7
+ metadata: EmbeddingsMetadataSchema.optional(),
5
8
  });
6
9
  export const EmbeddingsBodySchema = z.looseObject({
7
10
  model: z.string(),
@@ -17,7 +17,6 @@ export function calculateReasoningBudgetFromEffort(effort, maxTokens, minTokens
17
17
  percentage = 0.8;
18
18
  break;
19
19
  case "xhigh":
20
- case "max":
21
20
  percentage = 0.95;
22
21
  break;
23
22
  }
@@ -10,7 +10,9 @@ export const novaDimensionsMiddleware = {
10
10
  const dimensions = unknown["dimensions"];
11
11
  if (!dimensions)
12
12
  return params;
13
- (params.providerOptions["nova"] ??= {})["embeddingDimension"] = dimensions;
13
+ const target = (params.providerOptions["nova"] ??= {});
14
+ // @ts-expect-error AI SDK does the value checking for us
15
+ target.embeddingDimension = dimensions;
14
16
  delete unknown["dimensions"];
15
17
  return params;
16
18
  },
@@ -26,7 +28,6 @@ function mapNovaEffort(effort) {
26
28
  return "medium";
27
29
  case "high":
28
30
  case "xhigh":
29
- case "max":
30
31
  return "high";
31
32
  }
32
33
  }
@@ -42,18 +43,18 @@ export const novaReasoningMiddleware = {
42
43
  return params;
43
44
  const target = (params.providerOptions["amazon"] ??= {});
44
45
  if (!reasoning.enabled) {
45
- target["reasoningConfig"] = { type: "disabled" };
46
+ target.reasoningConfig = { type: "disabled" };
46
47
  }
47
48
  else if (reasoning.effort) {
48
49
  // FUTURE: warn if mapNovaEffort modified the effort
49
- target["reasoningConfig"] = {
50
+ target.reasoningConfig = {
50
51
  type: "enabled",
51
52
  maxReasoningEffort: mapNovaEffort(reasoning.effort),
52
53
  };
53
54
  }
54
55
  else {
55
56
  // FUTURE: warn if reasoning.max_tokens (unsupported) was ignored
56
- target["reasoningConfig"] = { type: "enabled" };
57
+ target.reasoningConfig = { type: "enabled" };
57
58
  }
58
59
  delete unknown["reasoning"];
59
60
  return params;
@@ -22,7 +22,6 @@ export function mapClaudeReasoningEffort(effort, modelId) {
22
22
  case "high":
23
23
  return "high";
24
24
  case "xhigh":
25
- case "max":
26
25
  return "max";
27
26
  }
28
27
  }
@@ -35,7 +34,6 @@ export function mapClaudeReasoningEffort(effort, modelId) {
35
34
  return "medium";
36
35
  case "high":
37
36
  case "xhigh":
38
- case "max":
39
37
  return "high";
40
38
  }
41
39
  }
@@ -66,41 +64,42 @@ export const claudeReasoningMiddleware = {
66
64
  const modelId = model.modelId;
67
65
  const clampedMaxTokens = reasoning.max_tokens && Math.min(reasoning.max_tokens, getMaxOutputTokens(modelId));
68
66
  if (!reasoning.enabled) {
69
- target["thinking"] = { type: "disabled" };
67
+ target.thinking = { type: "disabled" };
70
68
  }
71
69
  else if (reasoning.effort) {
72
70
  if (isClaude4(modelId)) {
73
- target["effort"] = mapClaudeReasoningEffort(reasoning.effort, modelId);
71
+ target.effort = mapClaudeReasoningEffort(reasoning.effort, modelId);
74
72
  }
75
73
  if (isOpus46(modelId)) {
76
- target["thinking"] = clampedMaxTokens
77
- ? { type: "adaptive", budgetTokens: clampedMaxTokens }
74
+ target.thinking = clampedMaxTokens
75
+ ? // @ts-expect-error AI SDK type missing type:adaptive with budgetToken
76
+ { type: "adaptive", budgetTokens: clampedMaxTokens }
78
77
  : { type: "adaptive" };
79
78
  }
80
79
  else if (isSonnet46(modelId)) {
81
- target["thinking"] = clampedMaxTokens
80
+ target.thinking = clampedMaxTokens
82
81
  ? { type: "enabled", budgetTokens: clampedMaxTokens }
83
82
  : { type: "adaptive" };
84
83
  }
85
84
  else {
86
- target["thinking"] = { type: "enabled" };
85
+ target.thinking = { type: "enabled" };
87
86
  if (clampedMaxTokens) {
88
- target["thinking"]["budgetTokens"] = clampedMaxTokens;
87
+ target.thinking.budgetTokens = clampedMaxTokens;
89
88
  }
90
89
  else {
91
90
  // FUTURE: warn that reasoning.max_tokens was computed
92
- target["thinking"]["budgetTokens"] = calculateReasoningBudgetFromEffort(reasoning.effort, params.maxOutputTokens ?? getMaxOutputTokens(modelId), 1024);
91
+ target.thinking.budgetTokens = calculateReasoningBudgetFromEffort(reasoning.effort, params.maxOutputTokens ?? getMaxOutputTokens(modelId), 1024);
93
92
  }
94
93
  }
95
94
  }
96
95
  else if (clampedMaxTokens) {
97
- target["thinking"] = {
96
+ target.thinking = {
98
97
  type: "enabled",
99
98
  budgetTokens: clampedMaxTokens,
100
99
  };
101
100
  }
102
101
  else {
103
- target["thinking"] = { type: "enabled" };
102
+ target.thinking = { type: "enabled" };
104
103
  }
105
104
  delete unknown["reasoning"];
106
105
  return params;
@@ -116,7 +115,8 @@ export const claudePromptCachingMiddleware = {
116
115
  return params;
117
116
  const cacheControl = unknown["cache_control"];
118
117
  if (cacheControl) {
119
- (params.providerOptions["anthropic"] ??= {})["cacheControl"] = cacheControl;
118
+ (params.providerOptions["anthropic"] ??= {}).cacheControl =
119
+ cacheControl;
120
120
  }
121
121
  delete unknown["cache_control"];
122
122
  return params;
@@ -17,7 +17,9 @@ export const cohereDimensionsMiddleware = {
17
17
  const dimensions = unknown["dimensions"];
18
18
  if (!dimensions)
19
19
  return params;
20
- (params.providerOptions["cohere"] ??= {})["outputDimension"] = dimensions;
20
+ const target = (params.providerOptions["cohere"] ??= {});
21
+ // @ts-expect-error AI SDK does the value checking for us
22
+ target.outputDimension = dimensions;
21
23
  delete unknown["dimensions"];
22
24
  return params;
23
25
  },
@@ -35,20 +37,20 @@ export const cohereReasoningMiddleware = {
35
37
  return params;
36
38
  const target = (params.providerOptions["cohere"] ??= {});
37
39
  if (!reasoning.enabled) {
38
- target["thinking"] = { type: "disabled" };
40
+ target.thinking = { type: "disabled" };
39
41
  }
40
42
  else if (reasoning.max_tokens) {
41
- target["thinking"] = { type: "enabled", tokenBudget: reasoning.max_tokens };
43
+ target.thinking = { type: "enabled", tokenBudget: reasoning.max_tokens };
42
44
  }
43
45
  else if (reasoning.effort) {
44
46
  // FUTURE: warn that reasoning.max_tokens was computed
45
- target["thinking"] = {
47
+ target.thinking = {
46
48
  type: "enabled",
47
49
  tokenBudget: calculateReasoningBudgetFromEffort(reasoning.effort, params.maxOutputTokens ?? COHERE_MAX_OUTPUT_TOKENS, 1024),
48
50
  };
49
51
  }
50
52
  else {
51
- target["thinking"] = { type: "enabled" };
53
+ target.thinking = { type: "enabled" };
52
54
  }
53
55
  delete unknown["reasoning"];
54
56
  return params;
@@ -1,7 +1,7 @@
1
1
  import type { EmbeddingModelMiddleware, LanguageModelMiddleware } from "ai";
2
2
  import type { ChatCompletionsReasoningEffort } from "../../endpoints/chat-completions/schema";
3
3
  export declare const geminiDimensionsMiddleware: EmbeddingModelMiddleware;
4
- export declare function mapGeminiReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string): ChatCompletionsReasoningEffort | undefined;
4
+ export declare function mapGeminiReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string): "low" | "high" | "minimal" | "medium";
5
5
  export declare const GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65536;
6
6
  export declare const GEMINI_2_5_PRO_MIN_THINKING_BUDGET = 128;
7
7
  export declare const geminiReasoningMiddleware: LanguageModelMiddleware;
@@ -11,14 +11,15 @@ export const geminiDimensionsMiddleware = {
11
11
  const dimensions = unknown["dimensions"];
12
12
  if (!dimensions)
13
13
  return params;
14
- (params.providerOptions["google"] ??= {})["outputDimensionality"] = dimensions;
14
+ const target = (params.providerOptions["google"] ??= {});
15
+ target.outputDimensionality = dimensions;
15
16
  delete unknown["dimensions"];
16
17
  return params;
17
18
  },
18
19
  };
19
20
  // https://ai.google.dev/gemini-api/docs/thinking#thinking-levels
20
21
  export function mapGeminiReasoningEffort(effort, modelId) {
21
- if (modelId.includes("gemini-3.1-pro")) {
22
+ if (modelId.includes("pro")) {
22
23
  switch (effort) {
23
24
  case "none":
24
25
  case "minimal":
@@ -28,26 +29,22 @@ export function mapGeminiReasoningEffort(effort, modelId) {
28
29
  return "medium";
29
30
  case "high":
30
31
  case "xhigh":
31
- case "max":
32
32
  return "high";
33
33
  }
34
34
  }
35
- if (modelId.includes("gemini-3-flash") || modelId.includes("gemini-3.1-flash")) {
36
- switch (effort) {
37
- case "none":
38
- case "minimal":
39
- return "minimal";
40
- case "low":
41
- return "low";
42
- case "medium":
43
- return "medium";
44
- case "high":
45
- case "xhigh":
46
- case "max":
47
- return "high";
48
- }
35
+ // Flash
36
+ switch (effort) {
37
+ case "none":
38
+ case "minimal":
39
+ return "minimal";
40
+ case "low":
41
+ return "low";
42
+ case "medium":
43
+ return "medium";
44
+ case "high":
45
+ case "xhigh":
46
+ return "high";
49
47
  }
50
- return effort;
51
48
  }
52
49
  export const GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65536;
53
50
  export const GEMINI_2_5_PRO_MIN_THINKING_BUDGET = 128;
@@ -58,6 +55,9 @@ export const geminiReasoningMiddleware = {
58
55
  const unknown = params.providerOptions?.["unknown"];
59
56
  if (!unknown)
60
57
  return params;
58
+ // If thinking options exist, just pass through
59
+ if (unknown["thinking_config"])
60
+ return params;
61
61
  const reasoning = unknown["reasoning"];
62
62
  if (!reasoning)
63
63
  return params;
@@ -65,19 +65,19 @@ export const geminiReasoningMiddleware = {
65
65
  const modelId = model.modelId;
66
66
  if (modelId.includes("gemini-2")) {
67
67
  const is25Pro = modelId.includes("gemini-2.5-pro");
68
- target["thinkingConfig"] = {
68
+ target.thinkingConfig = {
69
69
  thinkingBudget: reasoning.max_tokens ??
70
70
  calculateReasoningBudgetFromEffort(reasoning.effort ?? "none", params.maxOutputTokens ?? GEMINI_DEFAULT_MAX_OUTPUT_TOKENS, is25Pro ? GEMINI_2_5_PRO_MIN_THINKING_BUDGET : 0),
71
71
  };
72
72
  }
73
73
  else if (modelId.includes("gemini-3") && reasoning.effort) {
74
- target["thinkingConfig"] = {
74
+ target.thinkingConfig = {
75
75
  thinkingLevel: mapGeminiReasoningEffort(reasoning.effort, modelId),
76
76
  };
77
77
  // FUTURE: warn if model is gemini-3 and max_tokens (unsupported) was ignored
78
78
  }
79
- (target["thinkingConfig"] ??= {})["includeThoughts"] =
80
- reasoning.enabled ? !reasoning.exclude : false;
79
+ const thinkingConfig = (target.thinkingConfig ??= {});
80
+ thinkingConfig.includeThoughts = reasoning.enabled ? !reasoning.exclude : false;
81
81
  delete unknown["reasoning"];
82
82
  return params;
83
83
  },
@@ -91,9 +91,13 @@ export const geminiPromptCachingMiddleware = {
91
91
  const unknown = params.providerOptions?.["unknown"];
92
92
  if (!unknown)
93
93
  return params;
94
- const cachedContent = unknown["cached_content"];
95
- if (cachedContent) {
96
- (params.providerOptions["google"] ??= {})["cachedContent"] = cachedContent;
94
+ // If cached_content options exist, just pass through
95
+ if (unknown["cached_content"])
96
+ return params;
97
+ const promptCacheKey = unknown["prompt_cache_key"];
98
+ if (promptCacheKey) {
99
+ (params.providerOptions["google"] ??= {}).cachedContent =
100
+ promptCacheKey;
97
101
  }
98
102
  delete unknown["cached_content"];
99
103
  return params;
@@ -10,7 +10,8 @@ export const openAIDimensionsMiddleware = {
10
10
  const dimensions = unknown["dimensions"];
11
11
  if (!dimensions)
12
12
  return params;
13
- (params.providerOptions["openai"] ??= {})["dimensions"] = dimensions;
13
+ const target = (params.providerOptions["openai"] ??= {});
14
+ target.dimensions = dimensions;
14
15
  delete unknown["dimensions"];
15
16
  return params;
16
17
  },
@@ -27,7 +28,6 @@ function mapGptOssReasoningEffort(effort) {
27
28
  return "medium";
28
29
  case "high":
29
30
  case "xhigh":
30
- case "max":
31
31
  return "high";
32
32
  }
33
33
  }
@@ -45,13 +45,13 @@ export const openAIReasoningMiddleware = {
45
45
  const isGptOss = model.modelId.includes("gpt-oss");
46
46
  if (isGptOss) {
47
47
  // FUTURE: warn that unable to disable reasoning for gpt-oss models
48
- target["reasoningEffort"] = mapGptOssReasoningEffort(reasoning.effort);
48
+ target.reasoningEffort = mapGptOssReasoningEffort(reasoning.effort);
49
49
  }
50
50
  else if (reasoning.enabled === false) {
51
- target["reasoningEffort"] = "none";
51
+ target.reasoningEffort = "none";
52
52
  }
53
53
  else if (reasoning.effort) {
54
- target["reasoningEffort"] = reasoning.effort;
54
+ target.reasoningEffort = reasoning.effort;
55
55
  }
56
56
  // FUTURE: warn that reasoning.max_tokens (not supported) was ignored
57
57
  delete unknown["reasoning"];
@@ -71,9 +71,9 @@ export const openAIPromptCachingMiddleware = {
71
71
  if (key || retention) {
72
72
  const target = (params.providerOptions["openai"] ??= {});
73
73
  if (key)
74
- target["promptCacheKey"] = key;
74
+ target.promptCacheKey = key;
75
75
  if (retention)
76
- target["promptCacheRetention"] = retention;
76
+ target.promptCacheRetention = retention;
77
77
  }
78
78
  delete unknown["prompt_cache_key"];
79
79
  delete unknown["prompt_cache_retention"];
@@ -10,7 +10,8 @@ export const voyageDimensionsMiddleware = {
10
10
  const dimensions = unknown["dimensions"];
11
11
  if (!dimensions)
12
12
  return params;
13
- (params.providerOptions["voyage"] ??= {})["outputDimension"] = dimensions;
13
+ const target = (params.providerOptions["voyage"] ??= {});
14
+ target.outputDimension = dimensions;
14
15
  delete unknown["dimensions"];
15
16
  return params;
16
17
  },
@@ -7,14 +7,15 @@ export const bedrockGptReasoningMiddleware = {
7
7
  if (!model.modelId.includes("gpt"))
8
8
  return params;
9
9
  const bedrock = params.providerOptions?.["bedrock"];
10
- if (!bedrock || typeof bedrock !== "object")
10
+ if (!bedrock)
11
11
  return params;
12
- const effort = bedrock["reasoningEffort"];
12
+ const effort = bedrock.reasoningEffort;
13
13
  if (effort === undefined)
14
14
  return params;
15
- const target = (bedrock["reasoningConfig"] ??= {});
16
- target["maxReasoningEffort"] = effort;
17
- delete bedrock["reasoningEffort"];
15
+ const target = (bedrock.reasoningConfig ??= {});
16
+ // @ts-expect-error AI SDK does accept this
17
+ target.maxReasoningEffort = effort;
18
+ delete bedrock.reasoningEffort;
18
19
  return params;
19
20
  },
20
21
  };
@@ -25,28 +26,25 @@ export const bedrockClaudeReasoningMiddleware = {
25
26
  if (!model.modelId.includes("claude"))
26
27
  return params;
27
28
  const bedrock = params.providerOptions?.["bedrock"];
28
- if (!bedrock || typeof bedrock !== "object")
29
+ if (!bedrock)
29
30
  return params;
30
- const thinking = bedrock["thinking"];
31
- const effort = bedrock["effort"];
31
+ const thinking = bedrock.thinking;
32
+ const effort = bedrock.effort;
32
33
  if (!thinking && effort === undefined)
33
34
  return params;
34
- const target = (bedrock["reasoningConfig"] ??= {});
35
+ const target = (bedrock.reasoningConfig ??= {});
35
36
  if (thinking && typeof thinking === "object") {
36
- const thinkingOptions = thinking;
37
- if (thinkingOptions["type"] !== undefined) {
38
- target["type"] = thinkingOptions["type"];
39
- }
40
- if (thinkingOptions["budgetTokens"] !== undefined) {
41
- target["budgetTokens"] = thinkingOptions["budgetTokens"];
37
+ target.type = thinking.type;
38
+ if ("budgetTokens" in thinking && thinking.budgetTokens !== undefined) {
39
+ target.budgetTokens = thinking.budgetTokens;
42
40
  }
43
41
  }
44
42
  // FUTURE: bedrock currently does not support "effort" for other 4.x models
45
43
  if (effort !== undefined && isClaude46(model.modelId)) {
46
- target["maxReasoningEffort"] = effort;
44
+ target.maxReasoningEffort = effort;
47
45
  }
48
- delete bedrock["thinking"];
49
- delete bedrock["effort"];
46
+ delete bedrock.thinking;
47
+ delete bedrock.effort;
50
48
  return params;
51
49
  },
52
50
  };
@@ -79,18 +77,18 @@ export const bedrockPromptCachingMiddleware = {
79
77
  delete entryBedrock["cacheControl"];
80
78
  };
81
79
  for (const message of params.prompt) {
82
- processCacheControl(message["providerOptions"]);
83
- if (!Array.isArray(message["content"]))
80
+ processCacheControl(message.providerOptions);
81
+ if (!Array.isArray(message.content))
84
82
  continue;
85
- for (const part of message["content"]) {
86
- processCacheControl(part["providerOptions"]);
83
+ for (const part of message.content) {
84
+ processCacheControl(part.providerOptions);
87
85
  }
88
86
  lastCacheableBlock = message;
89
87
  }
90
88
  const bedrock = params.providerOptions?.["bedrock"];
91
89
  const cacheControl = bedrock?.["cacheControl"];
92
90
  if (cacheControl && !hasExplicitCacheControl && lastCacheableBlock) {
93
- ((lastCacheableBlock["providerOptions"] ??= {})["bedrock"] ??= {})["cachePoint"] =
91
+ ((lastCacheableBlock.providerOptions ??= {})["bedrock"] ??= {})["cachePoint"] =
94
92
  toBedrockCachePoint(model.modelId, cacheControl);
95
93
  }
96
94
  delete bedrock?.["cacheControl"];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hebo-ai/gateway",
3
- "version": "0.6.2-rc1",
3
+ "version": "0.6.2",
4
4
  "description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI-compatible /chat/completions, /embeddings & /models.",
5
5
  "keywords": [
6
6
  "ai",
@@ -179,6 +179,7 @@
179
179
  "@ai-sdk/amazon-bedrock": "^4.0.77",
180
180
  "@ai-sdk/anthropic": "^3.0.58",
181
181
  "@ai-sdk/cohere": "^3.0.25",
182
+ "@ai-sdk/google": "^3.0.43",
182
183
  "@ai-sdk/google-vertex": "^4.0.80",
183
184
  "@ai-sdk/groq": "^3.0.29",
184
185
  "@ai-sdk/openai": "^3.0.41",
@@ -196,6 +197,9 @@
196
197
  "@ai-sdk/cohere": {
197
198
  "optional": true
198
199
  },
200
+ "@ai-sdk/google": {
201
+ "optional": true
202
+ },
199
203
  "@ai-sdk/google-vertex": {
200
204
  "optional": true
201
205
  },