@hebo-ai/gateway 0.6.2-rc0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/README.md +3 -3
  2. package/dist/endpoints/chat-completions/converters.js +26 -21
  3. package/dist/endpoints/chat-completions/handler.js +2 -0
  4. package/dist/endpoints/chat-completions/otel.js +1 -1
  5. package/dist/endpoints/chat-completions/schema.d.ts +4 -18
  6. package/dist/endpoints/chat-completions/schema.js +14 -17
  7. package/dist/endpoints/embeddings/handler.js +2 -0
  8. package/dist/endpoints/embeddings/otel.js +5 -0
  9. package/dist/endpoints/embeddings/schema.d.ts +6 -0
  10. package/dist/endpoints/embeddings/schema.js +4 -1
  11. package/dist/endpoints/models/converters.js +3 -3
  12. package/dist/lifecycle.js +2 -2
  13. package/dist/logger/default.js +3 -3
  14. package/dist/logger/index.d.ts +2 -5
  15. package/dist/middleware/common.js +1 -0
  16. package/dist/middleware/utils.js +0 -3
  17. package/dist/models/amazon/middleware.js +8 -5
  18. package/dist/models/anthropic/middleware.js +13 -13
  19. package/dist/models/catalog.js +5 -1
  20. package/dist/models/cohere/middleware.js +7 -5
  21. package/dist/models/google/middleware.d.ts +1 -1
  22. package/dist/models/google/middleware.js +29 -25
  23. package/dist/models/openai/middleware.js +13 -9
  24. package/dist/models/voyage/middleware.js +2 -1
  25. package/dist/providers/bedrock/middleware.js +21 -23
  26. package/dist/providers/registry.js +3 -0
  27. package/dist/telemetry/fetch.js +7 -2
  28. package/dist/telemetry/gen-ai.js +15 -12
  29. package/dist/telemetry/memory.d.ts +1 -1
  30. package/dist/telemetry/memory.js +30 -14
  31. package/dist/telemetry/span.js +1 -1
  32. package/dist/telemetry/stream.js +30 -23
  33. package/dist/utils/env.js +4 -2
  34. package/dist/utils/preset.js +1 -0
  35. package/dist/utils/response.js +3 -1
  36. package/package.json +36 -50
  37. package/src/config.ts +0 -98
  38. package/src/endpoints/chat-completions/converters.test.ts +0 -631
  39. package/src/endpoints/chat-completions/converters.ts +0 -899
  40. package/src/endpoints/chat-completions/handler.test.ts +0 -391
  41. package/src/endpoints/chat-completions/handler.ts +0 -201
  42. package/src/endpoints/chat-completions/index.ts +0 -4
  43. package/src/endpoints/chat-completions/otel.test.ts +0 -315
  44. package/src/endpoints/chat-completions/otel.ts +0 -214
  45. package/src/endpoints/chat-completions/schema.ts +0 -364
  46. package/src/endpoints/embeddings/converters.ts +0 -51
  47. package/src/endpoints/embeddings/handler.test.ts +0 -133
  48. package/src/endpoints/embeddings/handler.ts +0 -137
  49. package/src/endpoints/embeddings/index.ts +0 -4
  50. package/src/endpoints/embeddings/otel.ts +0 -40
  51. package/src/endpoints/embeddings/schema.ts +0 -36
  52. package/src/endpoints/models/converters.ts +0 -56
  53. package/src/endpoints/models/handler.test.ts +0 -122
  54. package/src/endpoints/models/handler.ts +0 -37
  55. package/src/endpoints/models/index.ts +0 -3
  56. package/src/endpoints/models/schema.ts +0 -37
  57. package/src/errors/ai-sdk.ts +0 -99
  58. package/src/errors/gateway.ts +0 -17
  59. package/src/errors/openai.ts +0 -57
  60. package/src/errors/utils.ts +0 -47
  61. package/src/gateway.ts +0 -50
  62. package/src/index.ts +0 -19
  63. package/src/lifecycle.ts +0 -135
  64. package/src/logger/default.ts +0 -105
  65. package/src/logger/index.ts +0 -42
  66. package/src/middleware/common.test.ts +0 -215
  67. package/src/middleware/common.ts +0 -163
  68. package/src/middleware/debug.ts +0 -37
  69. package/src/middleware/matcher.ts +0 -161
  70. package/src/middleware/utils.ts +0 -34
  71. package/src/models/amazon/index.ts +0 -2
  72. package/src/models/amazon/middleware.test.ts +0 -133
  73. package/src/models/amazon/middleware.ts +0 -79
  74. package/src/models/amazon/presets.ts +0 -104
  75. package/src/models/anthropic/index.ts +0 -2
  76. package/src/models/anthropic/middleware.test.ts +0 -643
  77. package/src/models/anthropic/middleware.ts +0 -148
  78. package/src/models/anthropic/presets.ts +0 -191
  79. package/src/models/catalog.ts +0 -13
  80. package/src/models/cohere/index.ts +0 -2
  81. package/src/models/cohere/middleware.test.ts +0 -138
  82. package/src/models/cohere/middleware.ts +0 -76
  83. package/src/models/cohere/presets.ts +0 -186
  84. package/src/models/google/index.ts +0 -2
  85. package/src/models/google/middleware.test.ts +0 -298
  86. package/src/models/google/middleware.ts +0 -137
  87. package/src/models/google/presets.ts +0 -118
  88. package/src/models/meta/index.ts +0 -1
  89. package/src/models/meta/presets.ts +0 -143
  90. package/src/models/openai/index.ts +0 -2
  91. package/src/models/openai/middleware.test.ts +0 -189
  92. package/src/models/openai/middleware.ts +0 -103
  93. package/src/models/openai/presets.ts +0 -280
  94. package/src/models/types.ts +0 -114
  95. package/src/models/voyage/index.ts +0 -2
  96. package/src/models/voyage/middleware.test.ts +0 -28
  97. package/src/models/voyage/middleware.ts +0 -23
  98. package/src/models/voyage/presets.ts +0 -126
  99. package/src/providers/anthropic/canonical.ts +0 -17
  100. package/src/providers/anthropic/index.ts +0 -1
  101. package/src/providers/bedrock/canonical.ts +0 -87
  102. package/src/providers/bedrock/index.ts +0 -2
  103. package/src/providers/bedrock/middleware.test.ts +0 -303
  104. package/src/providers/bedrock/middleware.ts +0 -128
  105. package/src/providers/cohere/canonical.ts +0 -26
  106. package/src/providers/cohere/index.ts +0 -1
  107. package/src/providers/groq/canonical.ts +0 -21
  108. package/src/providers/groq/index.ts +0 -1
  109. package/src/providers/openai/canonical.ts +0 -16
  110. package/src/providers/openai/index.ts +0 -1
  111. package/src/providers/registry.test.ts +0 -44
  112. package/src/providers/registry.ts +0 -165
  113. package/src/providers/types.ts +0 -20
  114. package/src/providers/vertex/canonical.ts +0 -17
  115. package/src/providers/vertex/index.ts +0 -1
  116. package/src/providers/voyage/canonical.ts +0 -16
  117. package/src/providers/voyage/index.ts +0 -1
  118. package/src/telemetry/ai-sdk.ts +0 -46
  119. package/src/telemetry/baggage.ts +0 -27
  120. package/src/telemetry/fetch.ts +0 -62
  121. package/src/telemetry/gen-ai.ts +0 -113
  122. package/src/telemetry/http.ts +0 -62
  123. package/src/telemetry/index.ts +0 -1
  124. package/src/telemetry/memory.ts +0 -36
  125. package/src/telemetry/span.ts +0 -85
  126. package/src/telemetry/stream.ts +0 -64
  127. package/src/types.ts +0 -223
  128. package/src/utils/env.ts +0 -7
  129. package/src/utils/headers.ts +0 -27
  130. package/src/utils/preset.ts +0 -65
  131. package/src/utils/request.test.ts +0 -75
  132. package/src/utils/request.ts +0 -52
  133. package/src/utils/response.ts +0 -84
  134. package/src/utils/url.ts +0 -26
package/README.md CHANGED
@@ -536,14 +536,14 @@ Normalization rules:
536
536
 
537
537
  - `enabled` -> fall-back to model default if none provided
538
538
  - `max_tokens`: fall-back to model default if model supports
539
- - `effort` supports: `none`, `minimal`, `low`, `medium`, `high`, `xhigh`, `max`
539
+ - `effort` supports: `none`, `minimal`, `low`, `medium`, `high`, `xhigh`
540
540
  - Generic `effort` -> budget = percentage of `max_tokens`
541
541
  - `none`: 0%
542
542
  - `minimal`: 10%
543
543
  - `low`: 20%
544
544
  - `medium`: 50% (default)
545
545
  - `high`: 80%
546
- - `xhigh` / `max`: 95%
546
+ - `xhigh`: 95%
547
547
 
548
548
  Reasoning output is surfaced as extension to the `completion` object.
549
549
 
@@ -665,7 +665,7 @@ https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/
665
665
 
666
666
  > [!TIP]
667
667
  > To populate custom span attributes, the inbound W3C `baggage` header is supported. Keys in the `hebo.` namespace are mapped to span attributes, with the namespace stripped. For example: `baggage: hebo.user_id=u-123` becomes span attribute `user_id=u-123`.
668
- > For `/chat/completions`, request `metadata` (`Record<string, string>`, key 1-64 chars, value up to 512 chars) is also forwarded to spans as `gen_ai.request.metadata.<key>`.
668
+ > For `/chat/completions` and `/embeddings`, request `metadata` (`Record<string, string>`, key 1-64 chars, value up to 512 chars) is also forwarded to spans as `gen_ai.request.metadata.<key>`.
669
669
 
670
670
  For observability integration that is not otel compliant, you can disable built-in telemetry and manually instrument requests during `before` / `after` hooks.
671
671
 
@@ -8,7 +8,12 @@ import { parseDataUrl } from "../../utils/url";
8
8
  export function convertToTextCallOptions(params) {
9
9
  const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, prompt_cache_key, prompt_cache_retention, extra_body, cache_control, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
10
10
  Object.assign(rest, parseReasoningOptions(reasoning_effort, reasoning));
11
- Object.assign(rest, parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, extra_body?.google?.cached_content, cache_control));
11
+ Object.assign(rest, parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cache_control));
12
+ if (extra_body) {
13
+ for (const v of Object.values(extra_body)) {
14
+ Object.assign(rest, v);
15
+ }
16
+ }
12
17
  const { toolChoice, activeTools } = convertToToolChoiceOptions(tool_choice);
13
18
  return {
14
19
  messages: convertToModelMessages(messages),
@@ -192,7 +197,7 @@ export function fromChatCompletionsContent(content) {
192
197
  return fromFilePart(part.file.data, part.file.media_type, part.file.filename, part.cache_control);
193
198
  case "input_audio":
194
199
  return fromFilePart(part.input_audio.data, `audio/${part.input_audio.format}`, undefined, part.cache_control);
195
- default: {
200
+ case "text": {
196
201
  const out = {
197
202
  type: "text",
198
203
  text: part.text,
@@ -204,6 +209,8 @@ export function fromChatCompletionsContent(content) {
204
209
  }
205
210
  return out;
206
211
  }
212
+ default:
213
+ throw new Error(`Unhandled content part type: ${part.type}`);
207
214
  }
208
215
  });
209
216
  }
@@ -305,6 +312,7 @@ function parseToolResult(content) {
305
312
  }
306
313
  function parseJsonOrText(content) {
307
314
  try {
315
+ // oxlint-disable-next-line no-unsafe-assignment
308
316
  return { type: "json", value: JSON.parse(content) };
309
317
  }
310
318
  catch {
@@ -334,29 +342,25 @@ function parseReasoningOptions(reasoning_effort, reasoning) {
334
342
  }
335
343
  return out;
336
344
  }
337
- function parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cached_content, cache_control) {
345
+ function parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cache_control) {
338
346
  const out = {};
339
- const syncedCacheKey = prompt_cache_key ?? cached_content;
340
- const syncedCachedContent = cached_content ?? prompt_cache_key;
341
- let syncedCacheRetention = prompt_cache_retention;
342
- if (!syncedCacheRetention && cache_control?.ttl) {
343
- syncedCacheRetention = cache_control.ttl === "24h" ? "24h" : "in_memory";
344
- }
345
- let syncedCacheControl = cache_control;
346
- if (!syncedCacheControl && syncedCacheRetention) {
347
- syncedCacheControl = {
347
+ let retention = prompt_cache_retention;
348
+ if (!retention && cache_control?.ttl) {
349
+ retention = cache_control.ttl === "24h" ? "24h" : "in_memory";
350
+ }
351
+ let control = cache_control;
352
+ if (!control && retention) {
353
+ control = {
348
354
  type: "ephemeral",
349
- ttl: syncedCacheRetention === "24h" ? "24h" : "5m",
355
+ ttl: retention === "24h" ? "24h" : "5m",
350
356
  };
351
357
  }
352
- if (syncedCacheKey)
353
- out["prompt_cache_key"] = syncedCacheKey;
354
- if (syncedCacheRetention)
355
- out["prompt_cache_retention"] = syncedCacheRetention;
356
- if (syncedCachedContent)
357
- out["cached_content"] = syncedCachedContent;
358
- if (syncedCacheControl)
359
- out["cache_control"] = syncedCacheControl;
358
+ if (prompt_cache_key)
359
+ out["prompt_cache_key"] = prompt_cache_key;
360
+ if (retention)
361
+ out["prompt_cache_retention"] = retention;
362
+ if (control)
363
+ out["cache_control"] = control;
360
364
  return out;
361
365
  }
362
366
  // --- Response Flow ---
@@ -414,6 +418,7 @@ export class ChatCompletionsStream extends TransformStream {
414
418
  };
415
419
  super({
416
420
  transform(part, controller) {
421
+ // oxlint-disable-next-line switch-exhaustiveness-check
417
422
  switch (part.type) {
418
423
  case "text-delta": {
419
424
  controller.enqueue(createChunk({ role: "assistant", content: part.text }, part.providerMetadata));
@@ -23,6 +23,7 @@ export const chatCompletions = (config) => {
23
23
  }
24
24
  // Parse + validate input.
25
25
  try {
26
+ // oxlint-disable-next-line no-unsafe-assignment
26
27
  ctx.body = await ctx.request.json();
27
28
  }
28
29
  catch {
@@ -66,6 +67,7 @@ export const chatCompletions = (config) => {
66
67
  const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
67
68
  setSpanAttributes(genAiGeneralAttrs);
68
69
  // Convert inputs to AI SDK call options.
70
+ // oxlint-disable-next-line no-unsafe-argument
69
71
  const textOptions = convertToTextCallOptions(inputs);
70
72
  logger.trace({
71
73
  requestId: ctx.requestId,
@@ -96,7 +96,7 @@ const toMessageParts = (message) => {
96
96
  case "system":
97
97
  return toTextParts(message.content);
98
98
  default:
99
- return [];
99
+ throw new Error(`Unhandled content part type: ${message.role}`);
100
100
  }
101
101
  };
102
102
  export const getChatRequestAttributes = (inputs, signalLevel) => {
@@ -427,7 +427,6 @@ export declare const ChatCompletionsReasoningEffortSchema: z.ZodEnum<{
427
427
  minimal: "minimal";
428
428
  medium: "medium";
429
429
  xhigh: "xhigh";
430
- max: "max";
431
430
  }>;
432
431
  export type ChatCompletionsReasoningEffort = z.infer<typeof ChatCompletionsReasoningEffortSchema>;
433
432
  export declare const ChatCompletionsReasoningConfigSchema: z.ZodObject<{
@@ -439,7 +438,6 @@ export declare const ChatCompletionsReasoningConfigSchema: z.ZodObject<{
439
438
  minimal: "minimal";
440
439
  medium: "medium";
441
440
  xhigh: "xhigh";
442
- max: "max";
443
441
  }>>;
444
442
  max_tokens: z.ZodOptional<z.ZodNumber>;
445
443
  exclude: z.ZodOptional<z.ZodBoolean>;
@@ -651,18 +649,12 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
651
649
  minimal: "minimal";
652
650
  medium: "medium";
653
651
  xhigh: "xhigh";
654
- max: "max";
655
652
  }>>;
656
653
  prompt_cache_key: z.ZodOptional<z.ZodString>;
657
654
  prompt_cache_retention: z.ZodOptional<z.ZodEnum<{
658
655
  in_memory: "in_memory";
659
656
  "24h": "24h";
660
657
  }>>;
661
- extra_body: z.ZodOptional<z.ZodObject<{
662
- google: z.ZodOptional<z.ZodObject<{
663
- cached_content: z.ZodOptional<z.ZodString>;
664
- }, z.core.$strip>>;
665
- }, z.core.$strip>>;
666
658
  cache_control: z.ZodOptional<z.ZodObject<{
667
659
  type: z.ZodLiteral<"ephemeral">;
668
660
  ttl: z.ZodOptional<z.ZodString>;
@@ -676,11 +668,11 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
676
668
  minimal: "minimal";
677
669
  medium: "medium";
678
670
  xhigh: "xhigh";
679
- max: "max";
680
671
  }>>;
681
672
  max_tokens: z.ZodOptional<z.ZodNumber>;
682
673
  exclude: z.ZodOptional<z.ZodBoolean>;
683
674
  }, z.core.$strip>>;
675
+ extra_body: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
684
676
  }, z.core.$strip>;
685
677
  export type ChatCompletionsInputs = z.infer<typeof ChatCompletionsInputsSchema>;
686
678
  export declare const ChatCompletionsBodySchema: z.ZodObject<{
@@ -863,18 +855,12 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
863
855
  minimal: "minimal";
864
856
  medium: "medium";
865
857
  xhigh: "xhigh";
866
- max: "max";
867
858
  }>>;
868
859
  prompt_cache_key: z.ZodOptional<z.ZodString>;
869
860
  prompt_cache_retention: z.ZodOptional<z.ZodEnum<{
870
861
  in_memory: "in_memory";
871
862
  "24h": "24h";
872
863
  }>>;
873
- extra_body: z.ZodOptional<z.ZodObject<{
874
- google: z.ZodOptional<z.ZodObject<{
875
- cached_content: z.ZodOptional<z.ZodString>;
876
- }, z.core.$strip>>;
877
- }, z.core.$strip>>;
878
864
  cache_control: z.ZodOptional<z.ZodObject<{
879
865
  type: z.ZodLiteral<"ephemeral">;
880
866
  ttl: z.ZodOptional<z.ZodString>;
@@ -888,11 +874,11 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
888
874
  minimal: "minimal";
889
875
  medium: "medium";
890
876
  xhigh: "xhigh";
891
- max: "max";
892
877
  }>>;
893
878
  max_tokens: z.ZodOptional<z.ZodNumber>;
894
879
  exclude: z.ZodOptional<z.ZodBoolean>;
895
880
  }, z.core.$strip>>;
881
+ extra_body: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
896
882
  model: z.ZodString;
897
883
  stream: z.ZodOptional<z.ZodBoolean>;
898
884
  }, z.core.$loose>;
@@ -1029,7 +1015,7 @@ export declare const ChatCompletionsSchema: z.ZodObject<{
1029
1015
  cache_write_tokens: z.ZodOptional<z.ZodInt>;
1030
1016
  }, z.core.$strip>>;
1031
1017
  }, z.core.$strip>>;
1032
- provider_metadata: z.ZodOptional<z.ZodUnknown>;
1018
+ provider_metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
1033
1019
  }, z.core.$strip>;
1034
1020
  export type ChatCompletions = z.infer<typeof ChatCompletionsSchema>;
1035
1021
  export declare const ChatCompletionsToolCallDeltaSchema: z.ZodObject<{
@@ -1196,7 +1182,7 @@ export declare const ChatCompletionsChunkSchema: z.ZodObject<{
1196
1182
  cache_write_tokens: z.ZodOptional<z.ZodInt>;
1197
1183
  }, z.core.$strip>>;
1198
1184
  }, z.core.$strip>>;
1199
- provider_metadata: z.ZodOptional<z.ZodUnknown>;
1185
+ provider_metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
1200
1186
  }, z.core.$strip>;
1201
1187
  export type ChatCompletionsChunk = z.infer<typeof ChatCompletionsChunkSchema>;
1202
1188
  export {};
@@ -161,14 +161,11 @@ export const ChatCompletionsToolChoiceSchema = z.union([
161
161
  ]);
162
162
  export const ChatCompletionsReasoningEffortSchema = z.enum([
163
163
  "none",
164
- // Extension origin: Gemini
165
164
  "minimal",
166
165
  "low",
167
166
  "medium",
168
167
  "high",
169
168
  "xhigh",
170
- // Extension origin: Anthropic
171
- "max",
172
169
  ]);
173
170
  export const ChatCompletionsReasoningConfigSchema = z.object({
174
171
  enabled: z.optional(z.boolean()),
@@ -212,22 +209,16 @@ const ChatCompletionsInputsSchema = z.object({
212
209
  reasoning_effort: ChatCompletionsReasoningEffortSchema.optional(),
213
210
  prompt_cache_key: z.string().optional(),
214
211
  prompt_cache_retention: z.enum(["in_memory", "24h"]).optional(),
215
- // Extension origin: Gemini explicit cache handle
216
- // FUTURE: generalize extra_body handling
217
- // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
218
- extra_body: z
219
- .object({
220
- google: z
221
- .object({
222
- cached_content: z.string().optional().meta({ extension: true }),
223
- })
224
- .optional(),
225
- })
226
- .optional(),
227
212
  // Extension origin: OpenRouter/Vercel/Anthropic
228
213
  cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
229
214
  // Extension origin: OpenRouter
230
215
  reasoning: ChatCompletionsReasoningConfigSchema.optional().meta({ extension: true }),
216
+ // Extension origin: Gemini extra_body
217
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview#extra_body
218
+ extra_body: z
219
+ .record(z.string(), z.record(z.string(), z.unknown()))
220
+ .optional()
221
+ .meta({ extension: true }),
231
222
  });
232
223
  export const ChatCompletionsBodySchema = z.looseObject({
233
224
  model: z.string(),
@@ -274,7 +265,10 @@ export const ChatCompletionsSchema = z.object({
274
265
  choices: z.array(ChatCompletionsChoiceSchema),
275
266
  usage: ChatCompletionsUsageSchema.nullable(),
276
267
  // Extension origin: Vercel AI Gateway
277
- provider_metadata: z.unknown().optional().meta({ extension: true }),
268
+ provider_metadata: z
269
+ .record(z.string(), z.record(z.string(), z.unknown()))
270
+ .optional()
271
+ .meta({ extension: true }),
278
272
  });
279
273
  export const ChatCompletionsToolCallDeltaSchema = ChatCompletionsToolCallSchema.partial().extend({
280
274
  index: z.int().nonnegative(),
@@ -297,5 +291,8 @@ export const ChatCompletionsChunkSchema = z.object({
297
291
  choices: z.array(ChatCompletionsChoiceDeltaSchema),
298
292
  usage: ChatCompletionsUsageSchema.nullable(),
299
293
  // Extension origin: Vercel AI Gateway
300
- provider_metadata: z.unknown().optional().meta({ extension: true }),
294
+ provider_metadata: z
295
+ .record(z.string(), z.record(z.string(), z.unknown()))
296
+ .optional()
297
+ .meta({ extension: true }),
301
298
  });
@@ -23,6 +23,7 @@ export const embeddings = (config) => {
23
23
  }
24
24
  // Parse + validate input.
25
25
  try {
26
+ // oxlint-disable-next-line no-unsafe-assignment
26
27
  ctx.body = await ctx.request.json();
27
28
  }
28
29
  catch {
@@ -65,6 +66,7 @@ export const embeddings = (config) => {
65
66
  const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
66
67
  setSpanAttributes(genAiGeneralAttrs);
67
68
  // Convert inputs to AI SDK call options.
69
+ // oxlint-disable-next-line no-unsafe-argument
68
70
  const embedOptions = convertToEmbedCallOptions(inputs);
69
71
  logger.trace({ requestId: ctx.requestId, options: embedOptions }, "[embeddings] AI SDK options");
70
72
  addSpanEvent("hebo.options.prepared");
@@ -7,6 +7,11 @@ export const getEmbeddingsRequestAttributes = (inputs, signalLevel) => {
7
7
  Object.assign(attrs, {
8
8
  "gen_ai.embeddings.dimension.count": inputs.dimensions,
9
9
  });
10
+ if (inputs.metadata) {
11
+ for (const key in inputs.metadata) {
12
+ attrs[`gen_ai.request.metadata.${key}`] = inputs.metadata[key];
13
+ }
14
+ }
10
15
  }
11
16
  return attrs;
12
17
  };
@@ -1,12 +1,18 @@
1
1
  import * as z from "zod";
2
+ export declare const EmbeddingsDimensionsSchema: z.ZodInt;
3
+ export type EmbeddingsDimensions = z.infer<typeof EmbeddingsDimensionsSchema>;
4
+ export declare const EmbeddingsMetadataSchema: z.ZodRecord<z.ZodString, z.ZodString>;
5
+ export type EmbeddingsMetadata = z.infer<typeof EmbeddingsMetadataSchema>;
2
6
  export declare const EmbeddingsInputsSchema: z.ZodObject<{
3
7
  input: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
4
8
  dimensions: z.ZodOptional<z.ZodInt>;
9
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
5
10
  }, z.core.$strip>;
6
11
  export type EmbeddingsInputs = z.infer<typeof EmbeddingsInputsSchema>;
7
12
  export declare const EmbeddingsBodySchema: z.ZodObject<{
8
13
  input: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
9
14
  dimensions: z.ZodOptional<z.ZodInt>;
15
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
10
16
  model: z.ZodString;
11
17
  }, z.core.$loose>;
12
18
  export type EmbeddingsBody = z.infer<typeof EmbeddingsBodySchema>;
@@ -1,7 +1,10 @@
1
1
  import * as z from "zod";
2
+ export const EmbeddingsDimensionsSchema = z.int().nonnegative().max(65536);
3
+ export const EmbeddingsMetadataSchema = z.record(z.string().min(1).max(64), z.string().max(512));
2
4
  export const EmbeddingsInputsSchema = z.object({
3
5
  input: z.union([z.string(), z.array(z.string())]),
4
- dimensions: z.int().nonnegative().max(65536).optional(),
6
+ dimensions: EmbeddingsDimensionsSchema.optional(),
7
+ metadata: EmbeddingsMetadataSchema.optional(),
5
8
  });
6
9
  export const EmbeddingsBodySchema = z.looseObject({
7
10
  model: z.string(),
@@ -12,13 +12,13 @@ export function toModel(id, catalogModel) {
12
12
  id,
13
13
  object: "model",
14
14
  created: createdTimestamp,
15
- owned_by: id.split("/")[0] || "system",
15
+ owned_by: id.split("/")[0] ?? "system",
16
16
  architecture: {
17
- input_modalities: modalities?.input || [],
17
+ input_modalities: modalities?.input ?? [],
18
18
  modality: modalities?.input &&
19
19
  modalities?.output &&
20
20
  `${modalities.input?.[0]}->${modalities.output?.[0]}`,
21
- output_modalities: modalities?.output || [],
21
+ output_modalities: modalities?.output ?? [],
22
22
  },
23
23
  endpoints: providers?.map((provider) => ({
24
24
  tag: provider,
package/dist/lifecycle.js CHANGED
@@ -6,7 +6,7 @@ import { getBaggageAttributes } from "./telemetry/baggage";
6
6
  import { instrumentFetch } from "./telemetry/fetch";
7
7
  import { recordRequestDuration } from "./telemetry/gen-ai";
8
8
  import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
9
- import { recordV8jsMemory } from "./telemetry/memory";
9
+ import { observeV8jsMemoryMetrics } from "./telemetry/memory";
10
10
  import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
11
11
  import { wrapStream } from "./telemetry/stream";
12
12
  import { resolveOrCreateRequestId } from "./utils/request";
@@ -17,6 +17,7 @@ export const winterCgHandler = (run, config) => {
17
17
  setSpanTracer(parsedConfig.telemetry?.tracer);
18
18
  setSpanEventsEnabled(parsedConfig.telemetry?.signals?.hebo);
19
19
  instrumentFetch(parsedConfig.telemetry?.signals?.hebo);
20
+ observeV8jsMemoryMetrics(parsedConfig.telemetry?.signals?.hebo);
20
21
  }
21
22
  return async (request, state) => {
22
23
  const start = performance.now();
@@ -57,7 +58,6 @@ export const winterCgHandler = (run, config) => {
57
58
  if (ctx.operation === "chat" || ctx.operation === "embeddings") {
58
59
  recordRequestDuration(performance.now() - start, realStatus, ctx, parsedConfig.telemetry?.signals?.gen_ai);
59
60
  }
60
- recordV8jsMemory(parsedConfig.telemetry?.signals?.hebo);
61
61
  span.finish();
62
62
  };
63
63
  try {
@@ -37,8 +37,6 @@ function serializeError(err, _seen) {
37
37
  return out;
38
38
  }
39
39
  const buildLogObject = (level, args) => {
40
- if (args.length === 0)
41
- return {};
42
40
  const [first, second] = args;
43
41
  let obj;
44
42
  let err;
@@ -70,7 +68,9 @@ const buildLogObject = (level, args) => {
70
68
  ...obj,
71
69
  };
72
70
  };
73
- const makeLogFn = (level, write) => (...args) => write(JSON.stringify(buildLogObject(level, args)));
71
+ const makeLogFn = (level, write) => (...args) => {
72
+ write(JSON.stringify(buildLogObject(level, args)));
73
+ };
74
74
  export const createDefaultLogger = (config) => {
75
75
  if (config.level === "silent" || getDefaultLogLevel() === "silent") {
76
76
  return { trace: noop, debug: noop, info: noop, warn: noop, error: noop };
@@ -1,8 +1,5 @@
1
- export type LogFn = {
2
- (msg: string): void;
3
- (obj: Record<string, unknown>, msg?: string): void;
4
- (err: Error, msg?: string): void;
5
- };
1
+ export type LogArgs = [msg: string] | [obj: Record<string, unknown>, msg?: string] | [err: Error, msg?: string];
2
+ export type LogFn = (...args: LogArgs) => void;
6
3
  export type Logger = Record<"trace" | "debug" | "info" | "warn" | "error", LogFn>;
7
4
  export type LogLevel = "trace" | "debug" | "info" | "warn" | "error" | "silent";
8
5
  export type LoggerConfig = {
@@ -1,4 +1,5 @@
1
1
  function snakeToCamel(key) {
2
+ // oxlint-disable-next-line prefer-includes
2
3
  if (key.indexOf("_") === -1)
3
4
  return key;
4
5
  let out = "";
@@ -17,11 +17,8 @@ export function calculateReasoningBudgetFromEffort(effort, maxTokens, minTokens
17
17
  percentage = 0.8;
18
18
  break;
19
19
  case "xhigh":
20
- case "max":
21
20
  percentage = 0.95;
22
21
  break;
23
- default:
24
- return 0;
25
22
  }
26
23
  return Math.max(minTokens, Math.floor(maxTokens * percentage));
27
24
  }
@@ -10,13 +10,17 @@ export const novaDimensionsMiddleware = {
10
10
  const dimensions = unknown["dimensions"];
11
11
  if (!dimensions)
12
12
  return params;
13
- (params.providerOptions["nova"] ??= {})["embeddingDimension"] = dimensions;
13
+ const target = (params.providerOptions["nova"] ??= {});
14
+ // @ts-expect-error AI SDK does the value checking for us
15
+ target.embeddingDimension = dimensions;
14
16
  delete unknown["dimensions"];
15
17
  return params;
16
18
  },
17
19
  };
18
20
  function mapNovaEffort(effort) {
19
21
  switch (effort) {
22
+ case "none":
23
+ return;
20
24
  case "minimal":
21
25
  case "low":
22
26
  return "low";
@@ -24,7 +28,6 @@ function mapNovaEffort(effort) {
24
28
  return "medium";
25
29
  case "high":
26
30
  case "xhigh":
27
- case "max":
28
31
  return "high";
29
32
  }
30
33
  }
@@ -40,18 +43,18 @@ export const novaReasoningMiddleware = {
40
43
  return params;
41
44
  const target = (params.providerOptions["amazon"] ??= {});
42
45
  if (!reasoning.enabled) {
43
- target["reasoningConfig"] = { type: "disabled" };
46
+ target.reasoningConfig = { type: "disabled" };
44
47
  }
45
48
  else if (reasoning.effort) {
46
49
  // FUTURE: warn if mapNovaEffort modified the effort
47
- target["reasoningConfig"] = {
50
+ target.reasoningConfig = {
48
51
  type: "enabled",
49
52
  maxReasoningEffort: mapNovaEffort(reasoning.effort),
50
53
  };
51
54
  }
52
55
  else {
53
56
  // FUTURE: warn if reasoning.max_tokens (unsupported) was ignored
54
- target["reasoningConfig"] = { type: "enabled" };
57
+ target.reasoningConfig = { type: "enabled" };
55
58
  }
56
59
  delete unknown["reasoning"];
57
60
  return params;
@@ -22,7 +22,6 @@ export function mapClaudeReasoningEffort(effort, modelId) {
22
22
  case "high":
23
23
  return "high";
24
24
  case "xhigh":
25
- case "max":
26
25
  return "max";
27
26
  }
28
27
  }
@@ -35,7 +34,6 @@ export function mapClaudeReasoningEffort(effort, modelId) {
35
34
  return "medium";
36
35
  case "high":
37
36
  case "xhigh":
38
- case "max":
39
37
  return "high";
40
38
  }
41
39
  }
@@ -66,41 +64,42 @@ export const claudeReasoningMiddleware = {
66
64
  const modelId = model.modelId;
67
65
  const clampedMaxTokens = reasoning.max_tokens && Math.min(reasoning.max_tokens, getMaxOutputTokens(modelId));
68
66
  if (!reasoning.enabled) {
69
- target["thinking"] = { type: "disabled" };
67
+ target.thinking = { type: "disabled" };
70
68
  }
71
69
  else if (reasoning.effort) {
72
70
  if (isClaude4(modelId)) {
73
- target["effort"] = mapClaudeReasoningEffort(reasoning.effort, modelId);
71
+ target.effort = mapClaudeReasoningEffort(reasoning.effort, modelId);
74
72
  }
75
73
  if (isOpus46(modelId)) {
76
- target["thinking"] = clampedMaxTokens
77
- ? { type: "adaptive", budgetTokens: clampedMaxTokens }
74
+ target.thinking = clampedMaxTokens
75
+ ? // @ts-expect-error AI SDK type missing type:adaptive with budgetToken
76
+ { type: "adaptive", budgetTokens: clampedMaxTokens }
78
77
  : { type: "adaptive" };
79
78
  }
80
79
  else if (isSonnet46(modelId)) {
81
- target["thinking"] = clampedMaxTokens
80
+ target.thinking = clampedMaxTokens
82
81
  ? { type: "enabled", budgetTokens: clampedMaxTokens }
83
82
  : { type: "adaptive" };
84
83
  }
85
84
  else {
86
- target["thinking"] = { type: "enabled" };
85
+ target.thinking = { type: "enabled" };
87
86
  if (clampedMaxTokens) {
88
- target["thinking"]["budgetTokens"] = clampedMaxTokens;
87
+ target.thinking.budgetTokens = clampedMaxTokens;
89
88
  }
90
89
  else {
91
90
  // FUTURE: warn that reasoning.max_tokens was computed
92
- target["thinking"]["budgetTokens"] = calculateReasoningBudgetFromEffort(reasoning.effort, params.maxOutputTokens ?? getMaxOutputTokens(modelId), 1024);
91
+ target.thinking.budgetTokens = calculateReasoningBudgetFromEffort(reasoning.effort, params.maxOutputTokens ?? getMaxOutputTokens(modelId), 1024);
93
92
  }
94
93
  }
95
94
  }
96
95
  else if (clampedMaxTokens) {
97
- target["thinking"] = {
96
+ target.thinking = {
98
97
  type: "enabled",
99
98
  budgetTokens: clampedMaxTokens,
100
99
  };
101
100
  }
102
101
  else {
103
- target["thinking"] = { type: "enabled" };
102
+ target.thinking = { type: "enabled" };
104
103
  }
105
104
  delete unknown["reasoning"];
106
105
  return params;
@@ -116,7 +115,8 @@ export const claudePromptCachingMiddleware = {
116
115
  return params;
117
116
  const cacheControl = unknown["cache_control"];
118
117
  if (cacheControl) {
119
- (params.providerOptions["anthropic"] ??= {})["cacheControl"] = cacheControl;
118
+ (params.providerOptions["anthropic"] ??= {}).cacheControl =
119
+ cacheControl;
120
120
  }
121
121
  delete unknown["cache_control"];
122
122
  return params;
@@ -1,4 +1,8 @@
1
1
  export function defineModelCatalog(...inputs) {
2
2
  const catalogs = inputs.flat().map((input) => (typeof input === "function" ? input() : input));
3
- return Object.assign({}, ...catalogs);
3
+ const out = {};
4
+ for (const catalog of catalogs) {
5
+ Object.assign(out, catalog);
6
+ }
7
+ return out;
4
8
  }
@@ -17,7 +17,9 @@ export const cohereDimensionsMiddleware = {
17
17
  const dimensions = unknown["dimensions"];
18
18
  if (!dimensions)
19
19
  return params;
20
- (params.providerOptions["cohere"] ??= {})["outputDimension"] = dimensions;
20
+ const target = (params.providerOptions["cohere"] ??= {});
21
+ // @ts-expect-error AI SDK does the value checking for us
22
+ target.outputDimension = dimensions;
21
23
  delete unknown["dimensions"];
22
24
  return params;
23
25
  },
@@ -35,20 +37,20 @@ export const cohereReasoningMiddleware = {
35
37
  return params;
36
38
  const target = (params.providerOptions["cohere"] ??= {});
37
39
  if (!reasoning.enabled) {
38
- target["thinking"] = { type: "disabled" };
40
+ target.thinking = { type: "disabled" };
39
41
  }
40
42
  else if (reasoning.max_tokens) {
41
- target["thinking"] = { type: "enabled", tokenBudget: reasoning.max_tokens };
43
+ target.thinking = { type: "enabled", tokenBudget: reasoning.max_tokens };
42
44
  }
43
45
  else if (reasoning.effort) {
44
46
  // FUTURE: warn that reasoning.max_tokens was computed
45
- target["thinking"] = {
47
+ target.thinking = {
46
48
  type: "enabled",
47
49
  tokenBudget: calculateReasoningBudgetFromEffort(reasoning.effort, params.maxOutputTokens ?? COHERE_MAX_OUTPUT_TOKENS, 1024),
48
50
  };
49
51
  }
50
52
  else {
51
- target["thinking"] = { type: "enabled" };
53
+ target.thinking = { type: "enabled" };
52
54
  }
53
55
  delete unknown["reasoning"];
54
56
  return params;
@@ -1,7 +1,7 @@
1
1
  import type { EmbeddingModelMiddleware, LanguageModelMiddleware } from "ai";
2
2
  import type { ChatCompletionsReasoningEffort } from "../../endpoints/chat-completions/schema";
3
3
  export declare const geminiDimensionsMiddleware: EmbeddingModelMiddleware;
4
- export declare function mapGeminiReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string): ChatCompletionsReasoningEffort | undefined;
4
+ export declare function mapGeminiReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string): "low" | "high" | "minimal" | "medium";
5
5
  export declare const GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65536;
6
6
  export declare const GEMINI_2_5_PRO_MIN_THINKING_BUDGET = 128;
7
7
  export declare const geminiReasoningMiddleware: LanguageModelMiddleware;