@hebo-ai/gateway 0.6.2-rc1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +58 -8
  2. package/dist/config.js +28 -1
  3. package/dist/endpoints/chat-completions/converters.d.ts +5 -5
  4. package/dist/endpoints/chat-completions/converters.js +86 -49
  5. package/dist/endpoints/chat-completions/handler.js +4 -4
  6. package/dist/endpoints/chat-completions/otel.d.ts +1 -1
  7. package/dist/endpoints/chat-completions/otel.js +20 -18
  8. package/dist/endpoints/chat-completions/schema.d.ts +47 -23
  9. package/dist/endpoints/chat-completions/schema.js +24 -17
  10. package/dist/endpoints/embeddings/handler.js +2 -2
  11. package/dist/endpoints/embeddings/otel.d.ts +2 -2
  12. package/dist/endpoints/embeddings/otel.js +7 -2
  13. package/dist/endpoints/embeddings/schema.d.ts +6 -0
  14. package/dist/endpoints/embeddings/schema.js +4 -1
  15. package/dist/endpoints/models/handler.js +2 -2
  16. package/dist/errors/openai.d.ts +1 -6
  17. package/dist/lifecycle.d.ts +3 -2
  18. package/dist/lifecycle.js +4 -6
  19. package/dist/middleware/utils.js +0 -1
  20. package/dist/models/amazon/middleware.js +6 -5
  21. package/dist/models/anthropic/middleware.js +13 -13
  22. package/dist/models/cohere/middleware.js +7 -5
  23. package/dist/models/google/middleware.d.ts +1 -1
  24. package/dist/models/google/middleware.js +29 -25
  25. package/dist/models/google/presets.d.ts +28 -0
  26. package/dist/models/google/presets.js +7 -1
  27. package/dist/models/openai/middleware.js +7 -7
  28. package/dist/models/types.d.ts +1 -1
  29. package/dist/models/types.js +1 -0
  30. package/dist/models/voyage/middleware.js +2 -1
  31. package/dist/providers/bedrock/middleware.d.ts +1 -0
  32. package/dist/providers/bedrock/middleware.js +54 -23
  33. package/dist/providers/groq/index.d.ts +1 -0
  34. package/dist/providers/groq/index.js +1 -0
  35. package/dist/providers/groq/middleware.d.ts +2 -0
  36. package/dist/providers/groq/middleware.js +31 -0
  37. package/dist/providers/vertex/index.d.ts +1 -0
  38. package/dist/providers/vertex/index.js +1 -0
  39. package/dist/providers/vertex/middleware.d.ts +2 -0
  40. package/dist/providers/vertex/middleware.js +47 -0
  41. package/dist/types.d.ts +25 -4
  42. package/dist/types.js +1 -0
  43. package/dist/utils/response.d.ts +4 -1
  44. package/dist/utils/response.js +5 -20
  45. package/dist/utils/stream.d.ts +9 -0
  46. package/dist/utils/stream.js +100 -0
  47. package/package.json +5 -1
  48. package/dist/telemetry/stream.d.ts +0 -3
  49. package/dist/telemetry/stream.js +0 -58
@@ -1,4 +1,5 @@
1
1
  import * as z from "zod";
2
+ import type { SseErrorFrame, SseFrame } from "../../utils/stream";
2
3
  export declare const ChatCompletionsCacheControlSchema: z.ZodObject<{
3
4
  type: z.ZodLiteral<"ephemeral">;
4
5
  ttl: z.ZodOptional<z.ZodString>;
@@ -427,7 +428,6 @@ export declare const ChatCompletionsReasoningEffortSchema: z.ZodEnum<{
427
428
  minimal: "minimal";
428
429
  medium: "medium";
429
430
  xhigh: "xhigh";
430
- max: "max";
431
431
  }>;
432
432
  export type ChatCompletionsReasoningEffort = z.infer<typeof ChatCompletionsReasoningEffortSchema>;
433
433
  export declare const ChatCompletionsReasoningConfigSchema: z.ZodObject<{
@@ -439,7 +439,6 @@ export declare const ChatCompletionsReasoningConfigSchema: z.ZodObject<{
439
439
  minimal: "minimal";
440
440
  medium: "medium";
441
441
  xhigh: "xhigh";
442
- max: "max";
443
442
  }>>;
444
443
  max_tokens: z.ZodOptional<z.ZodNumber>;
445
444
  exclude: z.ZodOptional<z.ZodBoolean>;
@@ -471,6 +470,14 @@ export declare const ChatCompletionsResponseFormatSchema: z.ZodDiscriminatedUnio
471
470
  export type ChatCompletionsResponseFormat = z.infer<typeof ChatCompletionsResponseFormatSchema>;
472
471
  export declare const ChatCompletionsMetadataSchema: z.ZodRecord<z.ZodString, z.ZodString>;
473
472
  export type ChatCompletionsMetadata = z.infer<typeof ChatCompletionsMetadataSchema>;
473
+ export declare const ChatCompletionsServiceTierSchema: z.ZodEnum<{
474
+ auto: "auto";
475
+ default: "default";
476
+ flex: "flex";
477
+ scale: "scale";
478
+ priority: "priority";
479
+ }>;
480
+ export type ChatCompletionsServiceTier = z.infer<typeof ChatCompletionsServiceTierSchema>;
474
481
  declare const ChatCompletionsInputsSchema: z.ZodObject<{
475
482
  messages: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
476
483
  role: z.ZodLiteral<"system">;
@@ -651,18 +658,19 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
651
658
  minimal: "minimal";
652
659
  medium: "medium";
653
660
  xhigh: "xhigh";
654
- max: "max";
661
+ }>>;
662
+ service_tier: z.ZodOptional<z.ZodEnum<{
663
+ auto: "auto";
664
+ default: "default";
665
+ flex: "flex";
666
+ scale: "scale";
667
+ priority: "priority";
655
668
  }>>;
656
669
  prompt_cache_key: z.ZodOptional<z.ZodString>;
657
670
  prompt_cache_retention: z.ZodOptional<z.ZodEnum<{
658
671
  in_memory: "in_memory";
659
672
  "24h": "24h";
660
673
  }>>;
661
- extra_body: z.ZodOptional<z.ZodObject<{
662
- google: z.ZodOptional<z.ZodObject<{
663
- cached_content: z.ZodOptional<z.ZodString>;
664
- }, z.core.$strip>>;
665
- }, z.core.$strip>>;
666
674
  cache_control: z.ZodOptional<z.ZodObject<{
667
675
  type: z.ZodLiteral<"ephemeral">;
668
676
  ttl: z.ZodOptional<z.ZodString>;
@@ -676,11 +684,11 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
676
684
  minimal: "minimal";
677
685
  medium: "medium";
678
686
  xhigh: "xhigh";
679
- max: "max";
680
687
  }>>;
681
688
  max_tokens: z.ZodOptional<z.ZodNumber>;
682
689
  exclude: z.ZodOptional<z.ZodBoolean>;
683
690
  }, z.core.$strip>>;
691
+ extra_body: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
684
692
  }, z.core.$strip>;
685
693
  export type ChatCompletionsInputs = z.infer<typeof ChatCompletionsInputsSchema>;
686
694
  export declare const ChatCompletionsBodySchema: z.ZodObject<{
@@ -863,18 +871,19 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
863
871
  minimal: "minimal";
864
872
  medium: "medium";
865
873
  xhigh: "xhigh";
866
- max: "max";
874
+ }>>;
875
+ service_tier: z.ZodOptional<z.ZodEnum<{
876
+ auto: "auto";
877
+ default: "default";
878
+ flex: "flex";
879
+ scale: "scale";
880
+ priority: "priority";
867
881
  }>>;
868
882
  prompt_cache_key: z.ZodOptional<z.ZodString>;
869
883
  prompt_cache_retention: z.ZodOptional<z.ZodEnum<{
870
884
  in_memory: "in_memory";
871
885
  "24h": "24h";
872
886
  }>>;
873
- extra_body: z.ZodOptional<z.ZodObject<{
874
- google: z.ZodOptional<z.ZodObject<{
875
- cached_content: z.ZodOptional<z.ZodString>;
876
- }, z.core.$strip>>;
877
- }, z.core.$strip>>;
878
887
  cache_control: z.ZodOptional<z.ZodObject<{
879
888
  type: z.ZodLiteral<"ephemeral">;
880
889
  ttl: z.ZodOptional<z.ZodString>;
@@ -888,19 +897,19 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
888
897
  minimal: "minimal";
889
898
  medium: "medium";
890
899
  xhigh: "xhigh";
891
- max: "max";
892
900
  }>>;
893
901
  max_tokens: z.ZodOptional<z.ZodNumber>;
894
902
  exclude: z.ZodOptional<z.ZodBoolean>;
895
903
  }, z.core.$strip>>;
904
+ extra_body: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
896
905
  model: z.ZodString;
897
906
  stream: z.ZodOptional<z.ZodBoolean>;
898
907
  }, z.core.$loose>;
899
908
  export type ChatCompletionsBody = z.infer<typeof ChatCompletionsBodySchema>;
900
909
  export declare const ChatCompletionsFinishReasonSchema: z.ZodEnum<{
901
- tool_calls: "tool_calls";
902
910
  stop: "stop";
903
911
  length: "length";
912
+ tool_calls: "tool_calls";
904
913
  content_filter: "content_filter";
905
914
  }>;
906
915
  export type ChatCompletionsFinishReason = z.infer<typeof ChatCompletionsFinishReasonSchema>;
@@ -944,9 +953,9 @@ export declare const ChatCompletionsChoiceSchema: z.ZodObject<{
944
953
  }, z.core.$strip>>;
945
954
  }, z.core.$strip>;
946
955
  finish_reason: z.ZodEnum<{
947
- tool_calls: "tool_calls";
948
956
  stop: "stop";
949
957
  length: "length";
958
+ tool_calls: "tool_calls";
950
959
  content_filter: "content_filter";
951
960
  }>;
952
961
  logprobs: z.ZodOptional<z.ZodUnknown>;
@@ -1010,9 +1019,9 @@ export declare const ChatCompletionsSchema: z.ZodObject<{
1010
1019
  }, z.core.$strip>>;
1011
1020
  }, z.core.$strip>;
1012
1021
  finish_reason: z.ZodEnum<{
1013
- tool_calls: "tool_calls";
1014
1022
  stop: "stop";
1015
1023
  length: "length";
1024
+ tool_calls: "tool_calls";
1016
1025
  content_filter: "content_filter";
1017
1026
  }>;
1018
1027
  logprobs: z.ZodOptional<z.ZodUnknown>;
@@ -1029,7 +1038,14 @@ export declare const ChatCompletionsSchema: z.ZodObject<{
1029
1038
  cache_write_tokens: z.ZodOptional<z.ZodInt>;
1030
1039
  }, z.core.$strip>>;
1031
1040
  }, z.core.$strip>>;
1032
- provider_metadata: z.ZodOptional<z.ZodUnknown>;
1041
+ service_tier: z.ZodOptional<z.ZodEnum<{
1042
+ auto: "auto";
1043
+ default: "default";
1044
+ flex: "flex";
1045
+ scale: "scale";
1046
+ priority: "priority";
1047
+ }>>;
1048
+ provider_metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
1033
1049
  }, z.core.$strip>;
1034
1050
  export type ChatCompletions = z.infer<typeof ChatCompletionsSchema>;
1035
1051
  export declare const ChatCompletionsToolCallDeltaSchema: z.ZodObject<{
@@ -1123,9 +1139,9 @@ export declare const ChatCompletionsChoiceDeltaSchema: z.ZodObject<{
1123
1139
  }, z.core.$strip>>>;
1124
1140
  }, z.core.$strip>;
1125
1141
  finish_reason: z.ZodNullable<z.ZodEnum<{
1126
- tool_calls: "tool_calls";
1127
1142
  stop: "stop";
1128
1143
  length: "length";
1144
+ tool_calls: "tool_calls";
1129
1145
  content_filter: "content_filter";
1130
1146
  }>>;
1131
1147
  logprobs: z.ZodOptional<z.ZodUnknown>;
@@ -1177,9 +1193,9 @@ export declare const ChatCompletionsChunkSchema: z.ZodObject<{
1177
1193
  }, z.core.$strip>>>;
1178
1194
  }, z.core.$strip>;
1179
1195
  finish_reason: z.ZodNullable<z.ZodEnum<{
1180
- tool_calls: "tool_calls";
1181
1196
  stop: "stop";
1182
1197
  length: "length";
1198
+ tool_calls: "tool_calls";
1183
1199
  content_filter: "content_filter";
1184
1200
  }>>;
1185
1201
  logprobs: z.ZodOptional<z.ZodUnknown>;
@@ -1196,7 +1212,15 @@ export declare const ChatCompletionsChunkSchema: z.ZodObject<{
1196
1212
  cache_write_tokens: z.ZodOptional<z.ZodInt>;
1197
1213
  }, z.core.$strip>>;
1198
1214
  }, z.core.$strip>>;
1199
- provider_metadata: z.ZodOptional<z.ZodUnknown>;
1215
+ service_tier: z.ZodOptional<z.ZodEnum<{
1216
+ auto: "auto";
1217
+ default: "default";
1218
+ flex: "flex";
1219
+ scale: "scale";
1220
+ priority: "priority";
1221
+ }>>;
1222
+ provider_metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
1200
1223
  }, z.core.$strip>;
1201
1224
  export type ChatCompletionsChunk = z.infer<typeof ChatCompletionsChunkSchema>;
1225
+ export type ChatCompletionsStream = ReadableStream<SseFrame<ChatCompletionsChunk> | SseErrorFrame>;
1202
1226
  export {};
@@ -161,14 +161,11 @@ export const ChatCompletionsToolChoiceSchema = z.union([
161
161
  ]);
162
162
  export const ChatCompletionsReasoningEffortSchema = z.enum([
163
163
  "none",
164
- // Extension origin: Gemini
165
164
  "minimal",
166
165
  "low",
167
166
  "medium",
168
167
  "high",
169
168
  "xhigh",
170
- // Extension origin: Anthropic
171
- "max",
172
169
  ]);
173
170
  export const ChatCompletionsReasoningConfigSchema = z.object({
174
171
  enabled: z.optional(z.boolean()),
@@ -195,6 +192,13 @@ export const ChatCompletionsResponseFormatSchema = z.discriminatedUnion("type",
195
192
  ChatCompletionsResponseFormatTextSchema,
196
193
  ]);
197
194
  export const ChatCompletionsMetadataSchema = z.record(z.string().min(1).max(64), z.string().max(512));
195
+ export const ChatCompletionsServiceTierSchema = z.enum([
196
+ "auto",
197
+ "default",
198
+ "flex",
199
+ "scale",
200
+ "priority",
201
+ ]);
198
202
  const ChatCompletionsInputsSchema = z.object({
199
203
  messages: z.array(ChatCompletionsMessageSchema),
200
204
  tools: z.array(ChatCompletionsToolSchema).optional(),
@@ -210,24 +214,19 @@ const ChatCompletionsInputsSchema = z.object({
210
214
  metadata: ChatCompletionsMetadataSchema.optional(),
211
215
  response_format: ChatCompletionsResponseFormatSchema.optional(),
212
216
  reasoning_effort: ChatCompletionsReasoningEffortSchema.optional(),
217
+ service_tier: ChatCompletionsServiceTierSchema.optional(),
213
218
  prompt_cache_key: z.string().optional(),
214
219
  prompt_cache_retention: z.enum(["in_memory", "24h"]).optional(),
215
- // Extension origin: Gemini explicit cache handle
216
- // FUTURE: generalize extra_body handling
217
- // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
218
- extra_body: z
219
- .object({
220
- google: z
221
- .object({
222
- cached_content: z.string().optional().meta({ extension: true }),
223
- })
224
- .optional(),
225
- })
226
- .optional(),
227
220
  // Extension origin: OpenRouter/Vercel/Anthropic
228
221
  cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
229
222
  // Extension origin: OpenRouter
230
223
  reasoning: ChatCompletionsReasoningConfigSchema.optional().meta({ extension: true }),
224
+ // Extension origin: Gemini extra_body
225
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview#extra_body
226
+ extra_body: z
227
+ .record(z.string(), z.record(z.string(), z.unknown()))
228
+ .optional()
229
+ .meta({ extension: true }),
231
230
  });
232
231
  export const ChatCompletionsBodySchema = z.looseObject({
233
232
  model: z.string(),
@@ -273,8 +272,12 @@ export const ChatCompletionsSchema = z.object({
273
272
  model: z.string(),
274
273
  choices: z.array(ChatCompletionsChoiceSchema),
275
274
  usage: ChatCompletionsUsageSchema.nullable(),
275
+ service_tier: ChatCompletionsServiceTierSchema.optional(),
276
276
  // Extension origin: Vercel AI Gateway
277
- provider_metadata: z.unknown().optional().meta({ extension: true }),
277
+ provider_metadata: z
278
+ .record(z.string(), z.record(z.string(), z.unknown()))
279
+ .optional()
280
+ .meta({ extension: true }),
278
281
  });
279
282
  export const ChatCompletionsToolCallDeltaSchema = ChatCompletionsToolCallSchema.partial().extend({
280
283
  index: z.int().nonnegative(),
@@ -296,6 +299,10 @@ export const ChatCompletionsChunkSchema = z.object({
296
299
  model: z.string(),
297
300
  choices: z.array(ChatCompletionsChoiceDeltaSchema),
298
301
  usage: ChatCompletionsUsageSchema.nullable(),
302
+ service_tier: ChatCompletionsServiceTierSchema.optional(),
299
303
  // Extension origin: Vercel AI Gateway
300
- provider_metadata: z.unknown().optional().meta({ extension: true }),
304
+ provider_metadata: z
305
+ .record(z.string(), z.record(z.string(), z.unknown()))
306
+ .optional()
307
+ .meta({ extension: true }),
301
308
  });
@@ -13,7 +13,7 @@ import { getEmbeddingsRequestAttributes, getEmbeddingsResponseAttributes } from
13
13
  import { EmbeddingsBodySchema } from "./schema";
14
14
  export const embeddings = (config) => {
15
15
  const hooks = config.hooks;
16
- const handler = async (ctx) => {
16
+ const handler = async (ctx, cfg) => {
17
17
  const start = performance.now();
18
18
  ctx.operation = "embeddings";
19
19
  addSpanEvent("hebo.handler.started");
@@ -62,7 +62,7 @@ export const embeddings = (config) => {
62
62
  ctx.resolvedProviderId = embeddingModel.provider;
63
63
  logger.debug(`[embeddings] using ${embeddingModel.provider} for ${ctx.resolvedModelId}`);
64
64
  addSpanEvent("hebo.provider.resolved");
65
- const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
65
+ const genAiSignalLevel = cfg.telemetry?.signals?.gen_ai;
66
66
  const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
67
67
  setSpanAttributes(genAiGeneralAttrs);
68
68
  // Convert inputs to AI SDK call options.
@@ -1,5 +1,5 @@
1
1
  import type { Attributes } from "@opentelemetry/api";
2
- import type { Embeddings, EmbeddingsInputs } from "./schema";
2
+ import type { Embeddings, EmbeddingsBody } from "./schema";
3
3
  import { type TelemetrySignalLevel } from "../../types";
4
- export declare const getEmbeddingsRequestAttributes: (inputs: EmbeddingsInputs, signalLevel?: TelemetrySignalLevel) => Attributes;
4
+ export declare const getEmbeddingsRequestAttributes: (body: EmbeddingsBody, signalLevel?: TelemetrySignalLevel) => Attributes;
5
5
  export declare const getEmbeddingsResponseAttributes: (embeddings: Embeddings, signalLevel?: TelemetrySignalLevel) => Attributes;
@@ -1,12 +1,17 @@
1
1
  import {} from "../../types";
2
- export const getEmbeddingsRequestAttributes = (inputs, signalLevel) => {
2
+ export const getEmbeddingsRequestAttributes = (body, signalLevel) => {
3
3
  if (!signalLevel || signalLevel === "off")
4
4
  return {};
5
5
  const attrs = {};
6
6
  if (signalLevel !== "required") {
7
7
  Object.assign(attrs, {
8
- "gen_ai.embeddings.dimension.count": inputs.dimensions,
8
+ "gen_ai.embeddings.dimension.count": body.dimensions,
9
9
  });
10
+ if (body.metadata) {
11
+ for (const key in body.metadata) {
12
+ attrs[`gen_ai.request.metadata.${key}`] = body.metadata[key];
13
+ }
14
+ }
10
15
  }
11
16
  return attrs;
12
17
  };
@@ -1,12 +1,18 @@
1
1
  import * as z from "zod";
2
+ export declare const EmbeddingsDimensionsSchema: z.ZodInt;
3
+ export type EmbeddingsDimensions = z.infer<typeof EmbeddingsDimensionsSchema>;
4
+ export declare const EmbeddingsMetadataSchema: z.ZodRecord<z.ZodString, z.ZodString>;
5
+ export type EmbeddingsMetadata = z.infer<typeof EmbeddingsMetadataSchema>;
2
6
  export declare const EmbeddingsInputsSchema: z.ZodObject<{
3
7
  input: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
4
8
  dimensions: z.ZodOptional<z.ZodInt>;
9
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
5
10
  }, z.core.$strip>;
6
11
  export type EmbeddingsInputs = z.infer<typeof EmbeddingsInputsSchema>;
7
12
  export declare const EmbeddingsBodySchema: z.ZodObject<{
8
13
  input: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
9
14
  dimensions: z.ZodOptional<z.ZodInt>;
15
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
10
16
  model: z.ZodString;
11
17
  }, z.core.$loose>;
12
18
  export type EmbeddingsBody = z.infer<typeof EmbeddingsBodySchema>;
@@ -1,7 +1,10 @@
1
1
  import * as z from "zod";
2
+ export const EmbeddingsDimensionsSchema = z.int().nonnegative().max(65536);
3
+ export const EmbeddingsMetadataSchema = z.record(z.string().min(1).max(64), z.string().max(512));
2
4
  export const EmbeddingsInputsSchema = z.object({
3
5
  input: z.union([z.string(), z.array(z.string())]),
4
- dimensions: z.int().nonnegative().max(65536).optional(),
6
+ dimensions: EmbeddingsDimensionsSchema.optional(),
7
+ metadata: EmbeddingsMetadataSchema.optional(),
5
8
  });
6
9
  export const EmbeddingsBodySchema = z.looseObject({
7
10
  model: z.string(),
@@ -2,8 +2,8 @@ import { GatewayError } from "../../errors/gateway";
2
2
  import { winterCgHandler } from "../../lifecycle";
3
3
  import { toModels, toModel } from "./converters";
4
4
  export const models = (config) => {
5
- // oxlint-disable-next-line require-await
6
- const handler = async (ctx) => {
5
+ // eslint-disable-next-line require-await
6
+ const handler = async (ctx, _cfg) => {
7
7
  ctx.operation = "models";
8
8
  if (!ctx.request || ctx.request.method !== "GET") {
9
9
  throw new GatewayError("Method Not Allowed", 405);
@@ -8,12 +8,7 @@ export declare const OpenAIErrorSchema: z.ZodObject<{
8
8
  }, z.core.$strip>;
9
9
  }, z.core.$strip>;
10
10
  export declare class OpenAIError {
11
- readonly error: {
12
- message: string;
13
- type: string;
14
- code: string | undefined;
15
- param: string;
16
- };
11
+ readonly error: z.infer<typeof OpenAIErrorSchema>["error"];
17
12
  constructor(message: string, type?: string, code?: string, param?: string);
18
13
  }
19
14
  export declare function toOpenAIError(error: unknown): OpenAIError;
@@ -1,2 +1,3 @@
1
- import type { GatewayConfig, GatewayContext } from "./types";
2
- export declare const winterCgHandler: (run: (ctx: GatewayContext) => Promise<object | ReadableStream<object>>, config: GatewayConfig) => (request: Request, state?: Record<string, unknown>) => Promise<Response>;
1
+ import type { GatewayConfig, GatewayConfigParsed, GatewayContext } from "./types";
2
+ import type { SseFrame } from "./utils/stream";
3
+ export declare const winterCgHandler: (run: (ctx: GatewayContext, cfg: GatewayConfigParsed) => Promise<object | ReadableStream<SseFrame>>, config: GatewayConfig) => (request: Request, state?: Record<string, unknown>) => Promise<Response>;
package/dist/lifecycle.js CHANGED
@@ -8,7 +8,6 @@ import { recordRequestDuration } from "./telemetry/gen-ai";
8
8
  import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
9
9
  import { observeV8jsMemoryMetrics } from "./telemetry/memory";
10
10
  import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
11
- import { wrapStream } from "./telemetry/stream";
12
11
  import { resolveOrCreateRequestId } from "./utils/request";
13
12
  import { prepareResponseInit, toResponse } from "./utils/response";
14
13
  export const winterCgHandler = (run, config) => {
@@ -69,11 +68,10 @@ export const winterCgHandler = (run, config) => {
69
68
  }
70
69
  }
71
70
  if (!ctx.response) {
72
- ctx.result = (await span.runWithContext(() => run(ctx)));
73
- if (ctx.result instanceof ReadableStream) {
74
- ctx.result = wrapStream(ctx.result, { onDone: finalize });
75
- }
76
- ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.requestId));
71
+ ctx.result = (await span.runWithContext(() => run(ctx, parsedConfig)));
72
+ ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.requestId), {
73
+ onDone: finalize,
74
+ });
77
75
  }
78
76
  if (parsedConfig.hooks?.onResponse) {
79
77
  const onResponse = await parsedConfig.hooks.onResponse(ctx);
@@ -17,7 +17,6 @@ export function calculateReasoningBudgetFromEffort(effort, maxTokens, minTokens
17
17
  percentage = 0.8;
18
18
  break;
19
19
  case "xhigh":
20
- case "max":
21
20
  percentage = 0.95;
22
21
  break;
23
22
  }
@@ -10,7 +10,9 @@ export const novaDimensionsMiddleware = {
10
10
  const dimensions = unknown["dimensions"];
11
11
  if (!dimensions)
12
12
  return params;
13
- (params.providerOptions["nova"] ??= {})["embeddingDimension"] = dimensions;
13
+ const target = (params.providerOptions["nova"] ??= {});
14
+ // @ts-expect-error AI SDK does the value checking for us
15
+ target.embeddingDimension = dimensions;
14
16
  delete unknown["dimensions"];
15
17
  return params;
16
18
  },
@@ -26,7 +28,6 @@ function mapNovaEffort(effort) {
26
28
  return "medium";
27
29
  case "high":
28
30
  case "xhigh":
29
- case "max":
30
31
  return "high";
31
32
  }
32
33
  }
@@ -42,18 +43,18 @@ export const novaReasoningMiddleware = {
42
43
  return params;
43
44
  const target = (params.providerOptions["amazon"] ??= {});
44
45
  if (!reasoning.enabled) {
45
- target["reasoningConfig"] = { type: "disabled" };
46
+ target.reasoningConfig = { type: "disabled" };
46
47
  }
47
48
  else if (reasoning.effort) {
48
49
  // FUTURE: warn if mapNovaEffort modified the effort
49
- target["reasoningConfig"] = {
50
+ target.reasoningConfig = {
50
51
  type: "enabled",
51
52
  maxReasoningEffort: mapNovaEffort(reasoning.effort),
52
53
  };
53
54
  }
54
55
  else {
55
56
  // FUTURE: warn if reasoning.max_tokens (unsupported) was ignored
56
- target["reasoningConfig"] = { type: "enabled" };
57
+ target.reasoningConfig = { type: "enabled" };
57
58
  }
58
59
  delete unknown["reasoning"];
59
60
  return params;
@@ -22,7 +22,6 @@ export function mapClaudeReasoningEffort(effort, modelId) {
22
22
  case "high":
23
23
  return "high";
24
24
  case "xhigh":
25
- case "max":
26
25
  return "max";
27
26
  }
28
27
  }
@@ -35,7 +34,6 @@ export function mapClaudeReasoningEffort(effort, modelId) {
35
34
  return "medium";
36
35
  case "high":
37
36
  case "xhigh":
38
- case "max":
39
37
  return "high";
40
38
  }
41
39
  }
@@ -66,41 +64,42 @@ export const claudeReasoningMiddleware = {
66
64
  const modelId = model.modelId;
67
65
  const clampedMaxTokens = reasoning.max_tokens && Math.min(reasoning.max_tokens, getMaxOutputTokens(modelId));
68
66
  if (!reasoning.enabled) {
69
- target["thinking"] = { type: "disabled" };
67
+ target.thinking = { type: "disabled" };
70
68
  }
71
69
  else if (reasoning.effort) {
72
70
  if (isClaude4(modelId)) {
73
- target["effort"] = mapClaudeReasoningEffort(reasoning.effort, modelId);
71
+ target.effort = mapClaudeReasoningEffort(reasoning.effort, modelId);
74
72
  }
75
73
  if (isOpus46(modelId)) {
76
- target["thinking"] = clampedMaxTokens
77
- ? { type: "adaptive", budgetTokens: clampedMaxTokens }
74
+ target.thinking = clampedMaxTokens
75
+ ? // @ts-expect-error AI SDK type missing type:adaptive with budgetToken
76
+ { type: "adaptive", budgetTokens: clampedMaxTokens }
78
77
  : { type: "adaptive" };
79
78
  }
80
79
  else if (isSonnet46(modelId)) {
81
- target["thinking"] = clampedMaxTokens
80
+ target.thinking = clampedMaxTokens
82
81
  ? { type: "enabled", budgetTokens: clampedMaxTokens }
83
82
  : { type: "adaptive" };
84
83
  }
85
84
  else {
86
- target["thinking"] = { type: "enabled" };
85
+ target.thinking = { type: "enabled" };
87
86
  if (clampedMaxTokens) {
88
- target["thinking"]["budgetTokens"] = clampedMaxTokens;
87
+ target.thinking.budgetTokens = clampedMaxTokens;
89
88
  }
90
89
  else {
91
90
  // FUTURE: warn that reasoning.max_tokens was computed
92
- target["thinking"]["budgetTokens"] = calculateReasoningBudgetFromEffort(reasoning.effort, params.maxOutputTokens ?? getMaxOutputTokens(modelId), 1024);
91
+ target.thinking.budgetTokens = calculateReasoningBudgetFromEffort(reasoning.effort, params.maxOutputTokens ?? getMaxOutputTokens(modelId), 1024);
93
92
  }
94
93
  }
95
94
  }
96
95
  else if (clampedMaxTokens) {
97
- target["thinking"] = {
96
+ target.thinking = {
98
97
  type: "enabled",
99
98
  budgetTokens: clampedMaxTokens,
100
99
  };
101
100
  }
102
101
  else {
103
- target["thinking"] = { type: "enabled" };
102
+ target.thinking = { type: "enabled" };
104
103
  }
105
104
  delete unknown["reasoning"];
106
105
  return params;
@@ -116,7 +115,8 @@ export const claudePromptCachingMiddleware = {
116
115
  return params;
117
116
  const cacheControl = unknown["cache_control"];
118
117
  if (cacheControl) {
119
- (params.providerOptions["anthropic"] ??= {})["cacheControl"] = cacheControl;
118
+ (params.providerOptions["anthropic"] ??= {}).cacheControl =
119
+ cacheControl;
120
120
  }
121
121
  delete unknown["cache_control"];
122
122
  return params;
@@ -17,7 +17,9 @@ export const cohereDimensionsMiddleware = {
17
17
  const dimensions = unknown["dimensions"];
18
18
  if (!dimensions)
19
19
  return params;
20
- (params.providerOptions["cohere"] ??= {})["outputDimension"] = dimensions;
20
+ const target = (params.providerOptions["cohere"] ??= {});
21
+ // @ts-expect-error AI SDK does the value checking for us
22
+ target.outputDimension = dimensions;
21
23
  delete unknown["dimensions"];
22
24
  return params;
23
25
  },
@@ -35,20 +37,20 @@ export const cohereReasoningMiddleware = {
35
37
  return params;
36
38
  const target = (params.providerOptions["cohere"] ??= {});
37
39
  if (!reasoning.enabled) {
38
- target["thinking"] = { type: "disabled" };
40
+ target.thinking = { type: "disabled" };
39
41
  }
40
42
  else if (reasoning.max_tokens) {
41
- target["thinking"] = { type: "enabled", tokenBudget: reasoning.max_tokens };
43
+ target.thinking = { type: "enabled", tokenBudget: reasoning.max_tokens };
42
44
  }
43
45
  else if (reasoning.effort) {
44
46
  // FUTURE: warn that reasoning.max_tokens was computed
45
- target["thinking"] = {
47
+ target.thinking = {
46
48
  type: "enabled",
47
49
  tokenBudget: calculateReasoningBudgetFromEffort(reasoning.effort, params.maxOutputTokens ?? COHERE_MAX_OUTPUT_TOKENS, 1024),
48
50
  };
49
51
  }
50
52
  else {
51
- target["thinking"] = { type: "enabled" };
53
+ target.thinking = { type: "enabled" };
52
54
  }
53
55
  delete unknown["reasoning"];
54
56
  return params;
@@ -1,7 +1,7 @@
1
1
  import type { EmbeddingModelMiddleware, LanguageModelMiddleware } from "ai";
2
2
  import type { ChatCompletionsReasoningEffort } from "../../endpoints/chat-completions/schema";
3
3
  export declare const geminiDimensionsMiddleware: EmbeddingModelMiddleware;
4
- export declare function mapGeminiReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string): ChatCompletionsReasoningEffort | undefined;
4
+ export declare function mapGeminiReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string): "low" | "high" | "minimal" | "medium";
5
5
  export declare const GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65536;
6
6
  export declare const GEMINI_2_5_PRO_MIN_THINKING_BUDGET = 128;
7
7
  export declare const geminiReasoningMiddleware: LanguageModelMiddleware;