@hebo-ai/gateway 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +55 -5
  2. package/dist/config.js +28 -1
  3. package/dist/endpoints/chat-completions/converters.d.ts +5 -5
  4. package/dist/endpoints/chat-completions/converters.js +65 -29
  5. package/dist/endpoints/chat-completions/handler.js +4 -4
  6. package/dist/endpoints/chat-completions/otel.d.ts +1 -1
  7. package/dist/endpoints/chat-completions/otel.js +20 -18
  8. package/dist/endpoints/chat-completions/schema.d.ts +43 -5
  9. package/dist/endpoints/chat-completions/schema.js +10 -0
  10. package/dist/endpoints/embeddings/handler.js +2 -2
  11. package/dist/endpoints/embeddings/otel.d.ts +2 -2
  12. package/dist/endpoints/embeddings/otel.js +5 -5
  13. package/dist/endpoints/models/handler.js +2 -2
  14. package/dist/errors/openai.d.ts +1 -6
  15. package/dist/lifecycle.d.ts +3 -2
  16. package/dist/lifecycle.js +4 -6
  17. package/dist/models/google/presets.d.ts +28 -0
  18. package/dist/models/google/presets.js +7 -1
  19. package/dist/models/types.d.ts +1 -1
  20. package/dist/models/types.js +1 -0
  21. package/dist/providers/bedrock/middleware.d.ts +1 -0
  22. package/dist/providers/bedrock/middleware.js +33 -0
  23. package/dist/providers/groq/index.d.ts +1 -0
  24. package/dist/providers/groq/index.js +1 -0
  25. package/dist/providers/groq/middleware.d.ts +2 -0
  26. package/dist/providers/groq/middleware.js +31 -0
  27. package/dist/providers/vertex/index.d.ts +1 -0
  28. package/dist/providers/vertex/index.js +1 -0
  29. package/dist/providers/vertex/middleware.d.ts +2 -0
  30. package/dist/providers/vertex/middleware.js +47 -0
  31. package/dist/types.d.ts +25 -4
  32. package/dist/types.js +1 -0
  33. package/dist/utils/response.d.ts +4 -1
  34. package/dist/utils/response.js +5 -20
  35. package/dist/utils/stream.d.ts +9 -0
  36. package/dist/utils/stream.js +100 -0
  37. package/package.json +1 -1
  38. package/dist/telemetry/stream.d.ts +0 -3
  39. package/dist/telemetry/stream.js +0 -58
@@ -8,12 +8,7 @@ export declare const OpenAIErrorSchema: z.ZodObject<{
8
8
  }, z.core.$strip>;
9
9
  }, z.core.$strip>;
10
10
  export declare class OpenAIError {
11
- readonly error: {
12
- message: string;
13
- type: string;
14
- code: string | undefined;
15
- param: string;
16
- };
11
+ readonly error: z.infer<typeof OpenAIErrorSchema>["error"];
17
12
  constructor(message: string, type?: string, code?: string, param?: string);
18
13
  }
19
14
  export declare function toOpenAIError(error: unknown): OpenAIError;
@@ -1,2 +1,3 @@
1
- import type { GatewayConfig, GatewayContext } from "./types";
2
- export declare const winterCgHandler: (run: (ctx: GatewayContext) => Promise<object | ReadableStream<object>>, config: GatewayConfig) => (request: Request, state?: Record<string, unknown>) => Promise<Response>;
1
+ import type { GatewayConfig, GatewayConfigParsed, GatewayContext } from "./types";
2
+ import type { SseFrame } from "./utils/stream";
3
+ export declare const winterCgHandler: (run: (ctx: GatewayContext, cfg: GatewayConfigParsed) => Promise<object | ReadableStream<SseFrame>>, config: GatewayConfig) => (request: Request, state?: Record<string, unknown>) => Promise<Response>;
package/dist/lifecycle.js CHANGED
@@ -8,7 +8,6 @@ import { recordRequestDuration } from "./telemetry/gen-ai";
8
8
  import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
9
9
  import { observeV8jsMemoryMetrics } from "./telemetry/memory";
10
10
  import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
11
- import { wrapStream } from "./telemetry/stream";
12
11
  import { resolveOrCreateRequestId } from "./utils/request";
13
12
  import { prepareResponseInit, toResponse } from "./utils/response";
14
13
  export const winterCgHandler = (run, config) => {
@@ -69,11 +68,10 @@ export const winterCgHandler = (run, config) => {
69
68
  }
70
69
  }
71
70
  if (!ctx.response) {
72
- ctx.result = (await span.runWithContext(() => run(ctx)));
73
- if (ctx.result instanceof ReadableStream) {
74
- ctx.result = wrapStream(ctx.result, { onDone: finalize });
75
- }
76
- ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.requestId));
71
+ ctx.result = (await span.runWithContext(() => run(ctx, parsedConfig)));
72
+ ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.requestId), {
73
+ onDone: finalize,
74
+ });
77
75
  }
78
76
  if (parsedConfig.hooks?.onResponse) {
79
77
  const onResponse = await parsedConfig.hooks.onResponse(ctx);
@@ -9,6 +9,16 @@ export declare const geminiEmbedding001: import("../../utils/preset").Preset<"go
9
9
  };
10
10
  providers: readonly ["vertex"];
11
11
  }>;
12
+ export declare const geminiEmbedding2Preview: import("../../utils/preset").Preset<"google/gemini-embedding-2-preview", CatalogModel, {
13
+ name: string;
14
+ created: string;
15
+ context: number;
16
+ modalities: {
17
+ input: readonly ["text"];
18
+ output: readonly ["embedding"];
19
+ };
20
+ providers: readonly ["vertex"];
21
+ }>;
12
22
  export declare const gemini3FlashPreview: import("../../utils/preset").Preset<"google/gemini-3-flash-preview", CatalogModel, {
13
23
  name: string;
14
24
  created: string;
@@ -159,6 +169,15 @@ export declare const gemini: {
159
169
  output: readonly ["embedding"];
160
170
  };
161
171
  providers: readonly ["vertex"];
172
+ }>, import("../../utils/preset").Preset<"google/gemini-embedding-2-preview", CatalogModel, {
173
+ name: string;
174
+ created: string;
175
+ context: number;
176
+ modalities: {
177
+ input: readonly ["text"];
178
+ output: readonly ["embedding"];
179
+ };
180
+ providers: readonly ["vertex"];
162
181
  }>];
163
182
  readonly all: (import("../../utils/preset").Preset<"google/embedding-001", CatalogModel, {
164
183
  name: string;
@@ -169,6 +188,15 @@ export declare const gemini: {
169
188
  output: readonly ["embedding"];
170
189
  };
171
190
  providers: readonly ["vertex"];
191
+ }> | import("../../utils/preset").Preset<"google/gemini-embedding-2-preview", CatalogModel, {
192
+ name: string;
193
+ created: string;
194
+ context: number;
195
+ modalities: {
196
+ input: readonly ["text"];
197
+ output: readonly ["embedding"];
198
+ };
199
+ providers: readonly ["vertex"];
172
200
  }> | import("../../utils/preset").Preset<"google/gemini-3-flash-preview", CatalogModel, {
173
201
  name: string;
174
202
  created: string;
@@ -27,6 +27,12 @@ export const geminiEmbedding001 = presetFor()("google/embedding-001", {
27
27
  created: "2025-05-20",
28
28
  context: 8192,
29
29
  });
30
+ export const geminiEmbedding2Preview = presetFor()("google/gemini-embedding-2-preview", {
31
+ ...GEMINI_EMBEDDINGS_BASE,
32
+ name: "Gemini Embedding 2 (Preview)",
33
+ created: "2026-03-10",
34
+ context: 8192,
35
+ });
30
36
  export const gemini3FlashPreview = presetFor()("google/gemini-3-flash-preview", {
31
37
  ...GEMINI_BASE,
32
38
  name: "Gemini 3 Flash (Preview)",
@@ -66,7 +72,7 @@ export const gemini25Pro = presetFor()("google/gemini-2.5-pro", {
66
72
  const geminiAtomic = {
67
73
  "v2.5": [gemini25FlashLite, gemini25Flash, gemini25Pro],
68
74
  "v3-preview": [gemini3FlashPreview, gemini31FlashLitePreview, gemini31ProPreview],
69
- embeddings: [geminiEmbedding001],
75
+ embeddings: [geminiEmbedding001, geminiEmbedding2Preview],
70
76
  };
71
77
  const geminiGroups = {
72
78
  "v2.x": [...geminiAtomic["v2.5"]],
@@ -1,5 +1,5 @@
1
1
  import type { ProviderId } from "../providers/types";
2
- export declare const CANONICAL_MODEL_IDS: readonly ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "anthropic/claude-haiku-4.5", "anthropic/claude-sonnet-4.5", "anthropic/claude-opus-4.5", "anthropic/claude-opus-4.1", "anthropic/claude-opus-4", "anthropic/claude-sonnet-4", "anthropic/claude-sonnet-3.7", "anthropic/claude-sonnet-3.5", "anthropic/claude-haiku-3.5", "anthropic/claude-haiku-3", "openai/gpt-oss-20b", "openai/gpt-oss-120b", "openai/gpt-5", "openai/gpt-5-pro", "openai/gpt-5.2", "openai/gpt-5.2-chat", "openai/gpt-5.2-pro", "openai/gpt-5.2-codex", "openai/gpt-5.3-codex", "openai/gpt-5-mini", "openai/gpt-5-nano", "openai/gpt-5-codex", "openai/gpt-5.1-codex", "openai/gpt-5.1-codex-max", "openai/gpt-5.1-chat", "openai/gpt-5.1", "openai/text-embedding-3-small", "openai/text-embedding-3-large", "amazon/nova-micro", "amazon/nova-lite", "amazon/nova-pro", "amazon/nova-premier", "amazon/nova-2-lite", "amazon/nova-2-multimodal-embeddings", "google/gemini-2.5-flash-lite", "google/gemini-2.5-flash", "google/gemini-2.5-pro", "google/gemini-3-flash-preview", "google/gemini-3.1-flash-lite-preview", "google/gemini-3.1-pro-preview", "google/embedding-001", "meta/llama-3.1-8b", "meta/llama-3.1-70b", "meta/llama-3.1-405b", "meta/llama-3.2-1b", "meta/llama-3.2-3b", "meta/llama-3.2-11b", "meta/llama-3.2-90b", "meta/llama-3.3-70b", "meta/llama-4-scout", "meta/llama-4-maverick", "cohere/embed-v4.0", "cohere/embed-english-v3.0", "cohere/embed-english-light-v3.0", "cohere/embed-multilingual-v3.0", "cohere/embed-multilingual-light-v3.0", "cohere/command-a", "cohere/command-r7b", "cohere/command-a-translate", "cohere/command-a-reasoning", "cohere/command-a-vision", "cohere/command-r", "cohere/command-r-plus", "voyage/voyage-2-code", "voyage/voyage-2-law", "voyage/voyage-2-finance", "voyage/voyage-3-code", "voyage/voyage-3-large", "voyage/voyage-3.5-lite", "voyage/voyage-3.5", "voyage/voyage-4-lite", "voyage/voyage-4", "voyage/voyage-4-large"];
2
+ export declare const CANONICAL_MODEL_IDS: readonly ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "anthropic/claude-haiku-4.5", "anthropic/claude-sonnet-4.5", "anthropic/claude-opus-4.5", "anthropic/claude-opus-4.1", "anthropic/claude-opus-4", "anthropic/claude-sonnet-4", "anthropic/claude-sonnet-3.7", "anthropic/claude-sonnet-3.5", "anthropic/claude-haiku-3.5", "anthropic/claude-haiku-3", "openai/gpt-oss-20b", "openai/gpt-oss-120b", "openai/gpt-5", "openai/gpt-5-pro", "openai/gpt-5.2", "openai/gpt-5.2-chat", "openai/gpt-5.2-pro", "openai/gpt-5.2-codex", "openai/gpt-5.3-codex", "openai/gpt-5-mini", "openai/gpt-5-nano", "openai/gpt-5-codex", "openai/gpt-5.1-codex", "openai/gpt-5.1-codex-max", "openai/gpt-5.1-chat", "openai/gpt-5.1", "openai/text-embedding-3-small", "openai/text-embedding-3-large", "amazon/nova-micro", "amazon/nova-lite", "amazon/nova-pro", "amazon/nova-premier", "amazon/nova-2-lite", "amazon/nova-2-multimodal-embeddings", "google/gemini-2.5-flash-lite", "google/gemini-2.5-flash", "google/gemini-2.5-pro", "google/gemini-3-flash-preview", "google/gemini-3.1-flash-lite-preview", "google/gemini-3.1-pro-preview", "google/gemini-embedding-2-preview", "google/embedding-001", "meta/llama-3.1-8b", "meta/llama-3.1-70b", "meta/llama-3.1-405b", "meta/llama-3.2-1b", "meta/llama-3.2-3b", "meta/llama-3.2-11b", "meta/llama-3.2-90b", "meta/llama-3.3-70b", "meta/llama-4-scout", "meta/llama-4-maverick", "cohere/embed-v4.0", "cohere/embed-english-v3.0", "cohere/embed-english-light-v3.0", "cohere/embed-multilingual-v3.0", "cohere/embed-multilingual-light-v3.0", "cohere/command-a", "cohere/command-r7b", "cohere/command-a-translate", "cohere/command-a-reasoning", "cohere/command-a-vision", "cohere/command-r", "cohere/command-r-plus", "voyage/voyage-2-code", "voyage/voyage-2-law", "voyage/voyage-2-finance", "voyage/voyage-3-code", "voyage/voyage-3-large", "voyage/voyage-3.5-lite", "voyage/voyage-3.5", "voyage/voyage-4-lite", "voyage/voyage-4", "voyage/voyage-4-large"];
3
3
  export type CanonicalModelId = (typeof CANONICAL_MODEL_IDS)[number];
4
4
  export type ModelId = CanonicalModelId | (string & {});
5
5
  export type CatalogModel = {
@@ -45,6 +45,7 @@ export const CANONICAL_MODEL_IDS = [
45
45
  "google/gemini-3-flash-preview",
46
46
  "google/gemini-3.1-flash-lite-preview",
47
47
  "google/gemini-3.1-pro-preview",
48
+ "google/gemini-embedding-2-preview",
48
49
  "google/embedding-001",
49
50
  // Meta
50
51
  "meta/llama-3.1-8b",
@@ -1,4 +1,5 @@
1
1
  import type { LanguageModelMiddleware } from "ai";
2
+ export declare const bedrockServiceTierMiddleware: LanguageModelMiddleware;
2
3
  export declare const bedrockGptReasoningMiddleware: LanguageModelMiddleware;
3
4
  export declare const bedrockClaudeReasoningMiddleware: LanguageModelMiddleware;
4
5
  export declare const bedrockPromptCachingMiddleware: LanguageModelMiddleware;
@@ -1,5 +1,37 @@
1
1
  import { modelMiddlewareMatcher } from "../../middleware/matcher";
2
2
  const isClaude46 = (modelId) => modelId.includes("-4-6");
3
+ // https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
4
+ export const bedrockServiceTierMiddleware = {
5
+ specificationVersion: "v3",
6
+ // eslint-disable-next-line require-await
7
+ transformParams: async ({ params }) => {
8
+ const bedrock = params.providerOptions?.["bedrock"];
9
+ if (!bedrock || typeof bedrock !== "object")
10
+ return params;
11
+ // UPSTREAM: https://github.com/vercel/ai/issues/13241
12
+ // @ts-expect-error AI SDK missing serviceTier, need to open PR
13
+ const tier = bedrock["serviceTier"];
14
+ switch (tier) {
15
+ case undefined:
16
+ return params;
17
+ case "auto":
18
+ // Bedrock uses its default tier when omitted.
19
+ // @ts-expect-error AI SDK missing serviceTier, need to open PR
20
+ delete bedrock.serviceTier;
21
+ return params;
22
+ case "scale":
23
+ // @ts-expect-error AI SDK missing serviceTier, need to open PR
24
+ bedrock.serviceTier = { type: "reserved" };
25
+ return params;
26
+ case "default":
27
+ case "flex":
28
+ case "priority":
29
+ // @ts-expect-error AI SDK missing serviceTier, need to open PR
30
+ bedrock.serviceTier = { type: tier };
31
+ return params;
32
+ }
33
+ },
34
+ };
3
35
  export const bedrockGptReasoningMiddleware = {
4
36
  specificationVersion: "v3",
5
37
  // oxlint-disable-next-line require-await
@@ -97,6 +129,7 @@ export const bedrockPromptCachingMiddleware = {
97
129
  };
98
130
  modelMiddlewareMatcher.useForProvider("amazon-bedrock", {
99
131
  language: [
132
+ bedrockServiceTierMiddleware,
100
133
  bedrockGptReasoningMiddleware,
101
134
  bedrockClaudeReasoningMiddleware,
102
135
  bedrockPromptCachingMiddleware,
@@ -1 +1,2 @@
1
1
  export * from "./canonical";
2
+ export * from "./middleware";
@@ -1 +1,2 @@
1
1
  export * from "./canonical";
2
+ export * from "./middleware";
@@ -0,0 +1,2 @@
1
+ import type { LanguageModelMiddleware } from "ai";
2
+ export declare const groqServiceTierMiddleware: LanguageModelMiddleware;
@@ -0,0 +1,31 @@
1
+ import { modelMiddlewareMatcher } from "../../middleware/matcher";
2
+ // https://console.groq.com/docs/service-tiers
3
+ export const groqServiceTierMiddleware = {
4
+ specificationVersion: "v3",
5
+ // eslint-disable-next-line require-await
6
+ transformParams: async ({ params }) => {
7
+ const groq = params.providerOptions?.["groq"];
8
+ if (!groq || typeof groq !== "object")
9
+ return params;
10
+ const tier = groq.serviceTier;
11
+ switch (tier) {
12
+ case undefined:
13
+ return params;
14
+ case "auto":
15
+ case "flex":
16
+ return params;
17
+ case "default":
18
+ groq.serviceTier = "on_demand";
19
+ return params;
20
+ case "scale":
21
+ case "priority":
22
+ // UPSTREAM: https://github.com/vercel/ai/issues/13235
23
+ // @ts-expect-error AI SDK missing "performance", need to open PR
24
+ groq.serviceTier = "performance";
25
+ return params;
26
+ }
27
+ },
28
+ };
29
+ modelMiddlewareMatcher.useForProvider("groq.*", {
30
+ language: [groqServiceTierMiddleware],
31
+ });
@@ -1 +1,2 @@
1
1
  export * from "./canonical";
2
+ export * from "./middleware";
@@ -1 +1,2 @@
1
1
  export * from "./canonical";
2
+ export * from "./middleware";
@@ -0,0 +1,2 @@
1
+ import type { LanguageModelMiddleware } from "ai";
2
+ export declare const vertexServiceTierMiddleware: LanguageModelMiddleware;
@@ -0,0 +1,47 @@
1
+ import { modelMiddlewareMatcher } from "../../middleware/matcher";
2
+ const VERTEX_REQUEST_TYPE_HEADER = "x-vertex-ai-llm-request-type";
3
+ const VERTEX_SHARED_REQUEST_TYPE_HEADER = "x-vertex-ai-llm-shared-request-type";
4
+ function setHeaderIfMissing(headers, key, value) {
5
+ headers[key] ??= value;
6
+ }
7
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/standard-paygo
8
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/priority-paygo
9
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/flex-paygo
10
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/provisioned-throughput/use-provisioned-throughput
11
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/reference/rest/v1/GenerateContentResponse#TrafficType
12
+ export const vertexServiceTierMiddleware = {
13
+ specificationVersion: "v3",
14
+ // eslint-disable-next-line require-await
15
+ transformParams: async ({ params }) => {
16
+ const vertex = params.providerOptions?.["vertex"];
17
+ if (!vertex || typeof vertex !== "object")
18
+ return params;
19
+ const tier = vertex["serviceTier"];
20
+ const headers = (params.headers ??= {});
21
+ switch (tier) {
22
+ case undefined:
23
+ return params;
24
+ case "flex":
25
+ setHeaderIfMissing(headers, VERTEX_REQUEST_TYPE_HEADER, "shared");
26
+ setHeaderIfMissing(headers, VERTEX_SHARED_REQUEST_TYPE_HEADER, "flex");
27
+ break;
28
+ case "priority":
29
+ setHeaderIfMissing(headers, VERTEX_REQUEST_TYPE_HEADER, "shared");
30
+ setHeaderIfMissing(headers, VERTEX_SHARED_REQUEST_TYPE_HEADER, "priority");
31
+ break;
32
+ case "scale":
33
+ setHeaderIfMissing(headers, VERTEX_REQUEST_TYPE_HEADER, "dedicated");
34
+ break;
35
+ case "default":
36
+ setHeaderIfMissing(headers, VERTEX_REQUEST_TYPE_HEADER, "shared");
37
+ break;
38
+ case "auto":
39
+ break;
40
+ }
41
+ delete vertex["serviceTier"];
42
+ return params;
43
+ },
44
+ };
45
+ modelMiddlewareMatcher.useForProvider(["google.vertex.*"], {
46
+ language: [vertexServiceTierMiddleware],
47
+ });
package/dist/types.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import type { ProviderV3 } from "@ai-sdk/provider";
2
2
  import type { Tracer } from "@opentelemetry/api";
3
- import type { ChatCompletions, ChatCompletionsBody, ChatCompletionsChunk } from "./endpoints/chat-completions/schema";
3
+ import type { ChatCompletions, ChatCompletionsBody, ChatCompletionsStream } from "./endpoints/chat-completions/schema";
4
4
  import type { Embeddings, EmbeddingsBody } from "./endpoints/embeddings/schema";
5
5
  import type { Model, ModelList } from "./endpoints/models";
6
6
  import type { Logger, LoggerConfig } from "./logger";
@@ -57,7 +57,7 @@ export type GatewayContext = {
57
57
  /**
58
58
  * Result returned by the handler (pre-response).
59
59
  */
60
- result?: ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings | Model | ModelList;
60
+ result?: ChatCompletions | ChatCompletionsStream | Embeddings | Model | ModelList;
61
61
  /**
62
62
  * Response object returned by the handler.
63
63
  */
@@ -104,7 +104,7 @@ export type GatewayHooks = {
104
104
  * Runs after the endpoint handler.
105
105
  * @returns Result to replace, or undefined to keep original.
106
106
  */
107
- after?: (ctx: AfterHookContext) => void | ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings | Promise<void | ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings>;
107
+ after?: (ctx: AfterHookContext) => void | ChatCompletions | ChatCompletionsStream | Embeddings | Promise<void | ChatCompletions | ChatCompletionsStream | Embeddings>;
108
108
  /**
109
109
  * Runs after the lifecycle has produced the final Response.
110
110
  * @returns Replacement Response, or undefined to keep original.
@@ -112,6 +112,18 @@ export type GatewayHooks = {
112
112
  onResponse?: (ctx: OnResponseHookContext) => void | Response | Promise<void | Response>;
113
113
  };
114
114
  export type TelemetrySignalLevel = "off" | "required" | "recommended" | "full";
115
+ export declare const DEFAULT_CHAT_TIMEOUT_MS: number;
116
+ export type GatewayTimeout = number | null | {
117
+ /**
118
+ * Default timeout used.
119
+ */
120
+ normal?: number | null;
121
+ /**
122
+ * Timeout used when `service_tier=flex`.
123
+ * Defaults to 3x `normal` when omitted.
124
+ */
125
+ flex?: number | null;
126
+ };
115
127
  /**
116
128
  * Main configuration object for the gateway.
117
129
  */
@@ -162,9 +174,18 @@ export type GatewayConfig = {
162
174
  hebo?: TelemetrySignalLevel;
163
175
  };
164
176
  };
177
+ /**
178
+ * Optional timeout for server responses.
179
+ * Supports a number in milliseconds, or tiered config.
180
+ */
181
+ timeouts?: GatewayTimeout;
165
182
  };
166
183
  export declare const kParsed: unique symbol;
167
- export type GatewayConfigParsed = GatewayConfig & {
184
+ export type GatewayConfigParsed = Omit<GatewayConfig, "timeouts"> & {
185
+ timeouts: {
186
+ normal?: number;
187
+ flex?: number;
188
+ };
168
189
  [kParsed]: true;
169
190
  };
170
191
  export interface Endpoint {
package/dist/types.js CHANGED
@@ -1 +1,2 @@
1
+ export const DEFAULT_CHAT_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
1
2
  export const kParsed = Symbol("hebo.gateway.parsed");
@@ -1,3 +1,6 @@
1
+ import type { SseFrame } from "./stream";
1
2
  export declare const prepareResponseInit: (requestId: string) => ResponseInit;
2
3
  export declare const mergeResponseInit: (defaultHeaders: HeadersInit, responseInit?: ResponseInit) => ResponseInit;
3
- export declare const toResponse: (result: ReadableStream | Uint8Array<ArrayBuffer> | object | string, responseInit?: ResponseInit) => Response;
4
+ export declare const toResponse: (result: ReadableStream<SseFrame> | Uint8Array<ArrayBuffer> | object | string, responseInit?: ResponseInit, streamOptions?: {
5
+ onDone?: (status: number, reason?: unknown) => void;
6
+ }) => Response;
@@ -1,17 +1,6 @@
1
1
  import { REQUEST_ID_HEADER } from "./headers";
2
+ import { toSseStream } from "./stream";
2
3
  const TEXT_ENCODER = new TextEncoder();
3
- class JsonToSseTransformStream extends TransformStream {
4
- constructor() {
5
- super({
6
- transform(part, controller) {
7
- controller.enqueue(`data: ${JSON.stringify(part)}\n\n`);
8
- },
9
- flush(controller) {
10
- controller.enqueue("data: [DONE]\n\n");
11
- },
12
- });
13
- }
14
- }
15
4
  export const prepareResponseInit = (requestId) => ({
16
5
  headers: { [REQUEST_ID_HEADER]: requestId },
17
6
  });
@@ -31,11 +20,11 @@ export const mergeResponseInit = (defaultHeaders, responseInit) => {
31
20
  headers,
32
21
  };
33
22
  };
34
- export const toResponse = (result, responseInit) => {
23
+ export const toResponse = (result, responseInit, streamOptions) => {
35
24
  let body;
36
25
  const isStream = result instanceof ReadableStream;
37
26
  if (isStream) {
38
- body = result.pipeThrough(new JsonToSseTransformStream()).pipeThrough(new TextEncoderStream());
27
+ body = toSseStream(result, streamOptions);
39
28
  }
40
29
  else if (result instanceof Uint8Array) {
41
30
  body = result;
@@ -43,16 +32,12 @@ export const toResponse = (result, responseInit) => {
43
32
  else if (typeof result === "string") {
44
33
  body = TEXT_ENCODER.encode(result);
45
34
  }
46
- else if (result instanceof Error) {
47
- body = TEXT_ENCODER.encode(JSON.stringify({ message: result.message }));
48
- }
49
35
  else {
50
36
  body = TEXT_ENCODER.encode(JSON.stringify(result));
51
37
  }
52
38
  if (!responseInit?.statusText) {
53
- const isError = result instanceof Error;
54
- const status = responseInit?.status ?? (isError ? 500 : 200);
55
- const statusText = isError ? "REQUEST_FAILED" : "OK";
39
+ const status = responseInit?.status ?? 200;
40
+ const statusText = "OK";
56
41
  const headers = responseInit?.headers;
57
42
  responseInit = headers ? { status, statusText, headers } : { status, statusText };
58
43
  }
@@ -0,0 +1,9 @@
1
+ export type SseFrame<T = unknown, E extends string | undefined = string | undefined> = {
2
+ data: T;
3
+ event?: E;
4
+ };
5
+ export type SseErrorFrame = SseFrame<Error, "error" | undefined>;
6
+ export declare function toSseStream(src: ReadableStream<SseFrame>, options?: {
7
+ onDone?: (status: number, reason?: unknown) => void;
8
+ keepAliveMs?: number;
9
+ }): ReadableStream<Uint8Array>;
@@ -0,0 +1,100 @@
1
+ import { toOpenAIError } from "../errors/openai";
2
+ const TEXT_ENCODER = new TextEncoder();
3
+ const SSE_DONE_CHUNK = TEXT_ENCODER.encode("data: [DONE]\n\n");
4
+ const SSE_KEEP_ALIVE_CHUNK = TEXT_ENCODER.encode(": keep-alive\n\n");
5
+ const SSE_DEFAULT_KEEP_ALIVE_MS = 20_000;
6
+ export function toSseStream(src, options = {}) {
7
+ const keepAliveMs = options.keepAliveMs ?? SSE_DEFAULT_KEEP_ALIVE_MS;
8
+ let reader;
9
+ let timer;
10
+ let finished = false;
11
+ const done = (controller, status, reason) => {
12
+ if (finished)
13
+ return;
14
+ finished = true;
15
+ if (timer)
16
+ clearTimeout(timer);
17
+ try {
18
+ options.onDone?.(status, reason);
19
+ }
20
+ catch { }
21
+ try {
22
+ controller.enqueue(SSE_DONE_CHUNK);
23
+ }
24
+ catch { }
25
+ try {
26
+ controller.close();
27
+ }
28
+ catch { }
29
+ };
30
+ const heartbeat = (controller) => {
31
+ if (timer)
32
+ clearTimeout(timer);
33
+ if (!keepAliveMs || keepAliveMs <= 0 || finished)
34
+ return;
35
+ timer = setTimeout(() => {
36
+ if (finished)
37
+ return;
38
+ try {
39
+ controller.enqueue(SSE_KEEP_ALIVE_CHUNK);
40
+ heartbeat(controller);
41
+ }
42
+ catch { }
43
+ }, keepAliveMs);
44
+ };
45
+ return new ReadableStream({
46
+ start(controller) {
47
+ reader = src.getReader();
48
+ heartbeat(controller);
49
+ },
50
+ async pull(controller) {
51
+ if (finished)
52
+ return;
53
+ try {
54
+ // oxlint-disable-next-line no-await-in-loop
55
+ const result = await reader.read();
56
+ if (result.done) {
57
+ done(controller, 200);
58
+ return;
59
+ }
60
+ const value = result.value;
61
+ if (value.event === "error" || value.data instanceof Error) {
62
+ const error = toOpenAIError(value.data);
63
+ controller.enqueue(TEXT_ENCODER.encode(serializeSseFrame({ event: value.event, data: error })));
64
+ done(controller, error.error.type === "invalid_request_error" ? 422 : 502, value.data);
65
+ reader.cancel(value.data).catch(() => { });
66
+ return;
67
+ }
68
+ controller.enqueue(TEXT_ENCODER.encode(serializeSseFrame(value)));
69
+ heartbeat(controller);
70
+ }
71
+ catch (error) {
72
+ try {
73
+ controller.enqueue(TEXT_ENCODER.encode(serializeSseFrame({
74
+ event: "error",
75
+ data: toOpenAIError(error),
76
+ })));
77
+ }
78
+ catch { }
79
+ done(controller, 502, error);
80
+ }
81
+ },
82
+ cancel(reason) {
83
+ if (finished)
84
+ return;
85
+ finished = true;
86
+ if (timer)
87
+ clearTimeout(timer);
88
+ options.onDone?.(499, reason);
89
+ return reader?.cancel(reason).catch(() => { });
90
+ },
91
+ });
92
+ }
93
+ function serializeSseFrame(frame) {
94
+ let out = "";
95
+ if (frame.event) {
96
+ out += `event: ${frame.event}\n`;
97
+ }
98
+ out += `data: ${JSON.stringify(frame.data)}\n\n`;
99
+ return out;
100
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hebo-ai/gateway",
3
- "version": "0.6.2",
3
+ "version": "0.7.0",
4
4
  "description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI-compatible /chat/completions, /embeddings & /models.",
5
5
  "keywords": [
6
6
  "ai",
@@ -1,3 +0,0 @@
1
- export declare const wrapStream: (src: ReadableStream, hooks: {
2
- onDone?: (status: number, reason: unknown) => void;
3
- }) => ReadableStream;
@@ -1,58 +0,0 @@
1
- import { toOpenAIError } from "../errors/openai";
2
- const isErrorChunk = (v) => v instanceof Error || (typeof v === "object" && v !== null && "error" in v);
3
- export const wrapStream = (src, hooks) => {
4
- let finished = false;
5
- let reader;
6
- const done = (controller, status, reason) => {
7
- if (finished)
8
- return;
9
- finished = true;
10
- hooks.onDone?.(status, reason);
11
- if (status !== 200) {
12
- reader?.cancel(reason).catch(() => { });
13
- }
14
- try {
15
- controller.close();
16
- }
17
- catch { }
18
- };
19
- return new ReadableStream({
20
- async start(controller) {
21
- reader = src.getReader();
22
- try {
23
- for (;;) {
24
- // oxlint-disable-next-line no-await-in-loop, no-unsafe-assignment
25
- const { value, done: eof } = await reader.read();
26
- if (eof)
27
- break;
28
- controller.enqueue(value);
29
- if (isErrorChunk(value)) {
30
- done(controller, toOpenAIError(value).error.type === "invalid_request_error" ? 422 : 502, value);
31
- return;
32
- }
33
- }
34
- done(controller, 200);
35
- }
36
- catch (err) {
37
- try {
38
- controller.enqueue(toOpenAIError(err));
39
- }
40
- catch { }
41
- done(controller, 502, err);
42
- }
43
- finally {
44
- try {
45
- reader?.releaseLock();
46
- }
47
- catch { }
48
- }
49
- },
50
- cancel(reason) {
51
- if (finished)
52
- return;
53
- finished = true;
54
- hooks.onDone?.(499, reason);
55
- reader?.cancel(reason).catch(() => { });
56
- },
57
- });
58
- };