@hebo-ai/gateway 0.10.6 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/README.md +97 -84
  2. package/dist/config.js +21 -4
  3. package/dist/endpoints/chat-completions/handler.js +9 -5
  4. package/dist/endpoints/conversations/handler.js +3 -3
  5. package/dist/endpoints/embeddings/handler.js +2 -2
  6. package/dist/endpoints/messages/converters.js +17 -21
  7. package/dist/endpoints/messages/handler.js +5 -5
  8. package/dist/endpoints/responses/handler.js +9 -5
  9. package/dist/index.d.ts +1 -0
  10. package/dist/index.js +1 -0
  11. package/dist/lifecycle.js +1 -2
  12. package/dist/models/alibaba/index.d.ts +2 -0
  13. package/dist/models/alibaba/index.js +2 -0
  14. package/dist/models/alibaba/middleware.d.ts +2 -0
  15. package/dist/models/alibaba/middleware.js +31 -0
  16. package/dist/models/alibaba/presets.d.ts +900 -0
  17. package/dist/models/alibaba/presets.js +252 -0
  18. package/dist/models/amazon/presets.d.ts +31 -31
  19. package/dist/models/anthropic/presets.d.ts +68 -68
  20. package/dist/models/cohere/presets.d.ts +38 -38
  21. package/dist/models/deepseek/index.d.ts +2 -0
  22. package/dist/models/deepseek/index.js +2 -0
  23. package/dist/models/deepseek/middleware.d.ts +2 -0
  24. package/dist/models/deepseek/middleware.js +25 -0
  25. package/dist/models/deepseek/presets.d.ts +51 -0
  26. package/dist/models/deepseek/presets.js +33 -0
  27. package/dist/models/google/presets.d.ts +94 -94
  28. package/dist/models/google/presets.js +3 -1
  29. package/dist/models/meta/presets.d.ts +84 -84
  30. package/dist/models/meta/presets.js +11 -12
  31. package/dist/models/minimax/index.d.ts +2 -0
  32. package/dist/models/minimax/index.js +2 -0
  33. package/dist/models/minimax/middleware.d.ts +2 -0
  34. package/dist/models/minimax/middleware.js +43 -0
  35. package/dist/models/minimax/presets.d.ts +99 -0
  36. package/dist/models/minimax/presets.js +47 -0
  37. package/dist/models/moonshot/index.d.ts +2 -0
  38. package/dist/models/moonshot/index.js +2 -0
  39. package/dist/models/moonshot/middleware.d.ts +2 -0
  40. package/dist/models/moonshot/middleware.js +32 -0
  41. package/dist/models/moonshot/presets.d.ts +104 -0
  42. package/dist/models/moonshot/presets.js +36 -0
  43. package/dist/models/openai/presets.d.ts +128 -128
  44. package/dist/models/openai/presets.js +1 -1
  45. package/dist/models/types.d.ts +1 -1
  46. package/dist/models/types.js +39 -0
  47. package/dist/models/voyage/presets.d.ts +46 -46
  48. package/dist/models/xai/index.d.ts +2 -0
  49. package/dist/models/xai/index.js +2 -0
  50. package/dist/models/xai/middleware.d.ts +2 -0
  51. package/dist/models/xai/middleware.js +37 -0
  52. package/dist/models/xai/presets.d.ts +210 -0
  53. package/dist/models/xai/presets.js +55 -0
  54. package/dist/models/zai/index.d.ts +2 -0
  55. package/dist/models/zai/index.js +2 -0
  56. package/dist/models/zai/middleware.d.ts +2 -0
  57. package/dist/models/zai/middleware.js +25 -0
  58. package/dist/models/zai/presets.d.ts +141 -0
  59. package/dist/models/zai/presets.js +41 -0
  60. package/dist/providers/alibaba/canonical.d.ts +3 -0
  61. package/dist/providers/alibaba/canonical.js +13 -0
  62. package/dist/providers/alibaba/index.d.ts +1 -0
  63. package/dist/providers/alibaba/index.js +1 -0
  64. package/dist/providers/bedrock/canonical.js +3 -0
  65. package/dist/providers/chutes/canonical.d.ts +3 -0
  66. package/dist/providers/chutes/canonical.js +14 -0
  67. package/dist/providers/chutes/index.d.ts +1 -0
  68. package/dist/providers/chutes/index.js +1 -0
  69. package/dist/providers/deepinfra/canonical.d.ts +3 -0
  70. package/dist/providers/deepinfra/canonical.js +40 -0
  71. package/dist/providers/deepinfra/index.d.ts +1 -0
  72. package/dist/providers/deepinfra/index.js +1 -0
  73. package/dist/providers/deepseek/canonical.d.ts +3 -0
  74. package/dist/providers/deepseek/canonical.js +9 -0
  75. package/dist/providers/deepseek/index.d.ts +1 -0
  76. package/dist/providers/deepseek/index.js +1 -0
  77. package/dist/providers/fireworks/canonical.d.ts +3 -0
  78. package/dist/providers/fireworks/canonical.js +27 -0
  79. package/dist/providers/fireworks/index.d.ts +2 -0
  80. package/dist/providers/fireworks/index.js +2 -0
  81. package/dist/providers/fireworks/middleware.d.ts +2 -0
  82. package/dist/providers/fireworks/middleware.js +35 -0
  83. package/dist/providers/groq/canonical.js +1 -1
  84. package/dist/providers/minimax/canonical.d.ts +3 -0
  85. package/dist/providers/minimax/canonical.js +9 -0
  86. package/dist/providers/minimax/index.d.ts +1 -0
  87. package/dist/providers/minimax/index.js +1 -0
  88. package/dist/providers/moonshot/canonical.d.ts +3 -0
  89. package/dist/providers/moonshot/canonical.js +6 -0
  90. package/dist/providers/moonshot/index.d.ts +1 -0
  91. package/dist/providers/moonshot/index.js +1 -0
  92. package/dist/providers/togetherai/canonical.d.ts +3 -0
  93. package/dist/providers/togetherai/canonical.js +24 -0
  94. package/dist/providers/togetherai/index.d.ts +1 -0
  95. package/dist/providers/togetherai/index.js +1 -0
  96. package/dist/providers/types.d.ts +1 -1
  97. package/dist/providers/types.js +10 -0
  98. package/dist/providers/vertex/canonical.js +5 -1
  99. package/dist/providers/xai/canonical.d.ts +3 -0
  100. package/dist/providers/xai/canonical.js +12 -0
  101. package/dist/providers/xai/index.d.ts +1 -0
  102. package/dist/providers/xai/index.js +1 -0
  103. package/dist/providers/zai/canonical.d.ts +3 -0
  104. package/dist/providers/zai/canonical.js +10 -0
  105. package/dist/providers/zai/index.d.ts +1 -0
  106. package/dist/providers/zai/index.js +1 -0
  107. package/dist/types.d.ts +30 -16
  108. package/dist/utils/index.d.ts +8 -0
  109. package/dist/utils/index.js +8 -0
  110. package/dist/utils/request.d.ts +2 -1
  111. package/dist/utils/request.js +35 -7
  112. package/package.json +133 -9
package/README.md CHANGED
@@ -16,12 +16,12 @@ Learn more in our blog post: [Yet Another AI Gateway?](https://hebo.ai/blog/2601
16
16
  - 💬 Open Responses `/responses` endpoint (stateless), including /conversations.
17
17
  - 🗨️ Anthropic-compatible `/messages` endpoint.
18
18
  - 🔌 Integrate into your existing Hono, Elysia, Next.js & TanStack apps.
19
- - 🧩 Provider registry compatible with Vercel AI SDK providers.
19
+ - 🧩 Provider registry compatible with any Vercel AI SDK providers.
20
20
  - 🧭 Canonical model IDs and parameter naming across providers.
21
21
  - 🗂️ Model catalog with extensible metadata capabilities.
22
22
  - 🪝 Hook system to customize routing, auth, rate limits, and shape responses.
23
- - 🧰 Low-level OpenAI-compatible schema, converters, and middleware helpers.
24
23
  - 👁️ Observability via OTel GenAI semantic conventions (Langfuse-compatible).
24
+ - 🧰 Low-level OpenAI-compatible schema, converters, and middleware helpers.
25
25
 
26
26
  ## 📦 Installation
27
27
 
@@ -34,7 +34,7 @@ bun install @hebo-ai/gateway
34
34
  - Quickstart
35
35
  - [Setup A Gateway Instance](#setup-a-gateway-instance) | [Mount Route Handlers](#mount-route-handlers) | [Call the Gateway](#call-the-gateway)
36
36
  - Configuration Reference
37
- - [Providers](#providers) | [Models](#models) | [Hooks](#hooks) | [Storage](#storage) | [Logger](#logger-settings) | [Observability](#observability) | [Timeouts](#timeout-settings)
37
+ - [Providers](#providers) | [Models](#models) | [Hooks](#hooks) | [Storage](#storage) | [Logger](#logger-settings) | [Observability](#observability) | [Advanced](#advanced-settings)
38
38
  - Framework Support
39
39
  - [ElysiaJS](#elysiajs) | [Hono](#hono) | [Next.js](#nextjs) | [TanStack Start](#tanstack-start)
40
40
  - Runtime Support
@@ -71,21 +71,14 @@ export const gw = gateway({
71
71
 
72
72
  // MODEL CATALOG
73
73
  models: defineModelCatalog(
74
- // Choose a pre-configured preset for common SOTA models
74
+ // Choose a pre-configured preset
75
75
  gptOss20b,
76
- // Or add a whole model family with your own provider list
77
- gptOss["all"].map((preset) =>
78
- preset({
79
- providers: ["groq"],
80
- }),
81
- ),
76
+ // Or add a whole model family
77
+ gptOss["all"],
82
78
  ),
83
79
  });
84
80
  ```
85
81
 
86
- > [!NOTE]
87
- > Don't forget to install the Groq provider package too: `@ai-sdk/groq`.
88
-
89
82
  > [!TIP]
90
83
  > Why `withCanonicalIdsForX`? In most cases you want your gateway to route using model IDs that are consistent across providers (e.g. `openai/gpt-oss-20b` rather than `openai.gpt-oss-20b-v1:0`). We call that `Canonical IDs` - they are what enable routing, fallbacks, and policy rules. Without this wrapper, providers only understands their native IDs, which would make cross-provider routing impossible.
91
84
 
@@ -110,11 +103,13 @@ const app = new Elysia().mount("/v1/gateway/", gw.handler).listen(3000);
110
103
  console.log(`🐒 Hebo Gateway is running with Elysia at ${app.server?.url}`);
111
104
  ```
112
105
 
106
+ See [Framework Support](#-framework-support) for all supported framework examples.
107
+
113
108
  ### Call the Gateway
114
109
 
115
- Since Hebo Gateway exposes OpenAI-compatible endpoints, it can be used with a broad set of common AI SDKs like **Vercel AI SDK**, **TanStack AI**, **LangChain**, the official **OpenAI SDK** and others.
110
+ Since Hebo Gateway exposes OpenAI-compatible and Anthropic-compatible endpoints, it can be used with a broad set of common AI SDKs like **Vercel AI SDK**, **TanStack AI**, **LangChain**, the official **OpenAI SDK**, the official **Anthropic SDK**, and others.
116
111
 
117
- Here is a quick example using the Vercel AI SDK:
112
+ Here is a quick example using the Vercel AI SDK against the OpenAI-compatible surface:
118
113
 
119
114
  ```ts
120
115
  import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
@@ -139,20 +134,22 @@ console.log(text);
139
134
 
140
135
  ### Providers
141
136
 
142
- Hebo Gateway’s provider registry accepts any **Vercel AI SDK Provider**. For Hebo to be able to route a model across different providers, the names need to be canonicalized to a common form, for example 'openai/gpt-4.1-mini' instead of 'gpt-4.1-mini'.
137
+ For most setups, start with one of the built-in canonical provider adapters. They wrap a provider SDK and let the gateway route using stable canonical model IDs like `openai/gpt-4.1-mini` instead of provider-native IDs.
138
+
139
+ Built-in adapters are available for `Alibaba`, `Anthropic`, `Bedrock`, `Chutes`, `Cohere`, `DeepInfra`, `DeepSeek`, `Fireworks`, `Groq`, `MiniMax`, `Moonshot`, `OpenAI`, `Together AI`, `Vertex`, `Voyage`, `xAI`, and `Z.ai`.
143
140
 
144
- We currently provide out-of-the-box canonical providers for: `Bedrock`, `Anthropic`, `Cohere`, `Vertex`, `Groq`, `OpenAI`, and `Voyage`. Import the helper from the matching package path:
141
+ Import the helper from the matching package path:
145
142
 
146
143
  ```ts
147
144
  // pattern: @hebo-ai/gateway/providers/<provider>
148
145
  import { withCanonicalIdsForGroq } from "@hebo-ai/gateway/providers/groq";
149
146
  ```
150
147
 
151
- If an adapter is not yet provided, you can create your own by wrapping the provider instance with the `withCanonicalIds` helper and define your custom canonicalization mapping & rules.
148
+ If you need a provider that is not on that list, Hebo Gateway’s provider registry also accepts any **Vercel AI SDK Provider**.
152
149
 
153
150
  For Azure, use `createAzure` from `@ai-sdk/azure` directly. Name each [Azure AI Foundry](https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/endpoints) deployment after its Hebo canonical ID (e.g. `anthropic/claude-sonnet-4.5`).
154
151
 
155
- For other providers, use `withCanonicalIds` with an explicit `mapping`:
152
+ For custom provider setups, wrap the provider instance with `withCanonicalIds` and define your own canonicalization mapping and rules:
156
153
 
157
154
  ```ts
158
155
  import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
@@ -184,11 +181,13 @@ const gw = gateway({
184
181
 
185
182
  ### Models
186
183
 
187
- Register models to tell the gateway what's available, under which canonical ID and what capabilities each one has.
184
+ Start with the built-in model presets when possible. They give you ready-to-use catalog entries with canonical IDs, metadata, and default provider lists.
185
+
186
+ Built-in preset families are available for `Alibaba Qwen`, `Amazon Nova`, `Anthropic Claude`, `Cohere Command/Embed`, `DeepSeek`, `Google Gemini`, `Meta Llama`, `MiniMax`, `Moonshot Kimi`, `OpenAI GPT/GPT-OSS`, `Voyage`, `xAI Grok`, and `Z.ai GLM`.
188
187
 
189
188
  #### Model Presets
190
189
 
191
- To simplify the registration, Hebo Gateway ships a set of model presets under `@hebo-ai/gateway/models`. Use these when you want ready-to-use catalog entries with sane defaults for common SOTA models.
190
+ Hebo Gateway ships a set of model presets under `@hebo-ai/gateway/models`. Use these when you want ready-to-use catalog entries with sane defaults for common SOTA models.
192
191
 
193
192
  Presets come in two forms:
194
193
 
@@ -214,6 +213,9 @@ const modelsFromFamily = defineModelCatalog(
214
213
 
215
214
  Out-of-the-box model presets:
216
215
 
216
+ - **Alibaba** — `@hebo-ai/gateway/models/alibaba`
217
+ Qwen: `qwen` (`v3`, `v3.5`, `v3.6`, `v3.x`, `coder`, `vl`, `embedding`, `embeddings`, `latest`, `all`)
218
+
217
219
  - **Amazon** — `@hebo-ai/gateway/models/amazon`
218
220
  Nova: `nova` (`v1`, `v2`, `v1.x`, `v2.x`, `latest`, `embeddings`, `all`)
219
221
 
@@ -224,23 +226,39 @@ Out-of-the-box model presets:
224
226
  Command: `command` (`A`, `R`, `latest`, `all`)
225
227
  Embed: `embed` (`v4`, `v3`, `latest`, `all`)
226
228
 
229
+ - **DeepSeek** — `@hebo-ai/gateway/models/deepseek`
230
+ DeepSeek: `deepseek` (`v3.2`, `latest`, `all`)
231
+
227
232
  - **Google** — `@hebo-ai/gateway/models/google`
228
233
  Gemini: `gemini` (`v2.5`, `v3-preview`, `v2.x`, `v3.x`, `embeddings`, `latest`, `preview`, `all`)
234
+ Gemma: `gemma` (`v3`, `v4`, `v3.x`, `v4.x`, `latest`, `all`)
229
235
 
230
236
  - **Meta** — `@hebo-ai/gateway/models/meta`
231
237
  Llama: `llama` (`v3.1`, `v3.2`, `v3.3`, `v4`, `v3.x`, `v4.x`, `latest`, `all`)
232
238
 
239
+ - **MiniMax** — `@hebo-ai/gateway/models/minimax`
240
+ MiniMax: `minimax` (`v2`, `v2.x`, `latest`, `all`)
241
+
242
+ - **Moonshot** — `@hebo-ai/gateway/models/moonshot`
243
+ Kimi: `kimi` (`k2.5`, `k2.6`, `k2.x`, `latest`, `all`)
244
+
233
245
  - **OpenAI** — `@hebo-ai/gateway/models/openai`
234
- GPT: `gpt` (`v5`, `v5.1`, `v5.2`, `v5.3`, `v5.x`, `chat`, `codex`, `pro`, `latest`, `all`)
246
+ GPT: `gpt` (`v5`, `v5.1`, `v5.2`, `v5.3`, `v5.4`, `v5.x`, `chat`, `codex`, `pro`, `latest`, `all`)
235
247
  GPT-OSS: `gptOss` (`v1`, `v1.x`, `latest`, `all`)
236
248
  Embeddings: `textEmbeddings` (`v3`, `v3.x`, `latest`, `all`)
237
249
 
238
250
  - **Voyage** — `@hebo-ai/gateway/models/voyage`
239
251
  Voyage: `voyage` (`v2`, `v3`, `v3.5`, `v4`, `v2.x`, `v3.x`, `v4.x`, `latest`, `all`)
240
252
 
253
+ - **xAI** — `@hebo-ai/gateway/models/xai`
254
+ Grok: `grok` (`v4.1`, `v4.2`, `latest`, `all`)
255
+
256
+ - **Z.ai** — `@hebo-ai/gateway/models/zai`
257
+ GLM: `glm` (`v5`, `v5.1`, `v5.x`, `latest`, `all`)
258
+
241
259
  #### User-defined Models
242
260
 
243
- As the ecosystem is moving faster than anyone can keep-up with, you can always register your own model entries by following the `CatalogModel` type.
261
+ If a built-in preset does not exist yet, you can always register your own model entries by following the `CatalogModel` type.
244
262
 
245
263
  ```ts
246
264
  const gw = gateway({
@@ -808,32 +826,7 @@ Provider behavior:
808
826
 
809
827
  ### Compressed Requests
810
828
 
811
- The gateway supports gzip and deflate compressed request bodies via the Web Compression Streams API. The `maxBodySize` option controls the maximum _decompressed_ body size for these compressed requests, protecting against gzip bombs and oversized payloads.
812
-
813
- ```ts
814
- import { gateway } from "@hebo-ai/gateway";
815
-
816
- const gw = gateway({
817
- // ...
818
- // Maximum decompressed body size in bytes (default: 10 MB).
819
- // Set to 0 to disable the decompressed size limit.
820
- maxBodySize: 10 * 1024 * 1024,
821
- });
822
- ```
823
-
824
- Compressed requests that exceed this limit after decompression receive an HTTP `413 Payload Too Large` response. Unsupported `Content-Encoding` values return HTTP `415 Unsupported Media Type`.
825
-
826
- > [!IMPORTANT]
827
- > **Plain (uncompressed) request body size limits** are _not_ enforced by the gateway — they should be configured at the framework or server level. The gateway only enforces `maxBodySize` on decompressed output, since the framework cannot know the decompressed size ahead of time.
828
- >
829
- > Framework-level configuration examples:
830
- >
831
- > - **Bun** — [`Bun.serve({ maxRequestBodySize: 10_485_760 })`](https://bun.sh/docs/api/http#bun-serve)
832
- > - **Elysia** — inherits from Bun's `maxRequestBodySize`
833
- > - **Hono** — [`bodyLimit` middleware](https://hono.dev/docs/middleware/builtin/body-limit): `app.use(bodyLimit({ maxSize: 10 * 1024 * 1024 }))`
834
- > - **Express** — [`express.json({ limit: '10mb' })`](https://expressjs.com/en/api.html#express.json)
835
- > - **Fastify** — [`fastify({ bodyLimit: 10485760 })`](https://fastify.dev/docs/latest/Reference/Server/#bodylimit)
836
- > - **Node.js `http`** — [`server.maxRequestSize`](https://nodejs.org/api/http.html) (v22.6+), or use a reverse proxy like nginx (`client_max_body_size 10m`)
829
+ The gateway supports gzip and deflate compressed request bodies via the Web Compression Streams API. The [`advanced.maxBodySize`](#max-body-size) option controls the maximum _decompressed_ body size for these compressed requests, protecting against gzip bombs and oversized payloads. See [Advanced Settings](#advanced-settings) for configuration details.
837
830
 
838
831
  ## 🧪 Advanced Usage
839
832
 
@@ -1034,25 +1027,38 @@ const gw = gateway({
1034
1027
 
1035
1028
  Langfuse credentials are read from environment variables by the Langfuse OTel SDK (`LANGFUSE_PUBLIC_KEY`, `LANGFUSE_SECRET_KEY`, `LANGFUSE_BASE_URL`).
1036
1029
 
1037
- ### Timeout Settings
1030
+ ### Advanced Settings
1038
1031
 
1039
- You can configure request timeouts via the `timeouts` field:
1032
+ The `advanced` field groups optional settings for timeouts, body size limits, and header forwarding.
1040
1033
 
1041
1034
  ```ts
1042
1035
  import { gateway } from "@hebo-ai/gateway";
1043
1036
 
1044
1037
  const gw = gateway({
1045
1038
  // ...
1046
- // default timeout is 300_000 (5 minutes).
1047
- // You can set one timeout for all tiers...
1039
+ advanced: {
1040
+ timeouts: { normal: 60_000, flex: 180_000 },
1041
+ maxBodySize: 10 * 1024 * 1024,
1042
+ forwardHeaders: ["x-my-custom-trace-id", "x-internal-team"],
1043
+ },
1044
+ });
1045
+ ```
1046
+
1047
+ #### Timeouts
1048
+
1049
+ Controls upstream request timeouts. Accepts a number (milliseconds), `null` (disabled), or a tiered object. Default is `300_000` (5 minutes).
1050
+
1051
+ ```ts
1052
+ advanced: {
1053
+ // Single timeout for all tiers
1048
1054
  timeouts: 60_000,
1049
- // ...disable timeouts completely:
1055
+ // ...or disable completely:
1050
1056
  // timeouts: null,
1051
1057
  // ...or split by service tier:
1052
1058
  // - normal: all non-flex tiers (set null to disable)
1053
1059
  // - flex: defaults to 3x normal when omitted (set null to disable)
1054
1060
  // timeouts: { normal: 30_000, flex: null },
1055
- });
1061
+ }
1056
1062
  ```
1057
1063
 
1058
1064
  > [!NOTE]
@@ -1065,6 +1071,42 @@ const gw = gateway({
1065
1071
  > **Provider/service timeout limits**
1066
1072
  > Serverless platforms (e.g. Cloudflare Workers, Vercel Edge/Serverless, AWS Lambda) also enforce platform time limits (roughly ~25-100s on edge paths, ~300s for streaming, and up to ~900s configurable for some).
1067
1073
 
1074
+ #### Max Body Size
1075
+
1076
+ Maximum _decompressed_ request body size in bytes for gzip/deflate-encoded requests. Protects against gzip bombs and oversized payloads. Default is `10_485_760` (10 MB). Set to `0` to disable.
1077
+
1078
+ Compressed requests that exceed this limit after decompression receive an HTTP `413 Payload Too Large` response. Unsupported `Content-Encoding` values return HTTP `415 Unsupported Media Type`.
1079
+
1080
+ > [!IMPORTANT]
1081
+ > **Plain (uncompressed) request body size limits** are _not_ enforced by the gateway — they should be configured at the framework or server level. The gateway only enforces `maxBodySize` on decompressed output, since the framework cannot know the decompressed size ahead of time.
1082
+ >
1083
+ > Framework-level configuration examples:
1084
+ >
1085
+ > - **Bun** — [`Bun.serve({ maxRequestBodySize: 10_485_760 })`](https://bun.sh/docs/api/http#bun-serve)
1086
+ > - **Elysia** — inherits from Bun's `maxRequestBodySize`
1087
+ > - **Hono** — [`bodyLimit` middleware](https://hono.dev/docs/middleware/builtin/body-limit): `app.use(bodyLimit({ maxSize: 10 * 1024 * 1024 }))`
1088
+ > - **Express** — [`express.json({ limit: '10mb' })`](https://expressjs.com/en/api.html#express.json)
1089
+ > - **Fastify** — [`fastify({ bodyLimit: 10485760 })`](https://fastify.dev/docs/latest/Reference/Server/#bodylimit)
1090
+ > - **Node.js `http`** — no built-in request-body size option; enforce a limit while reading the request stream, or use a reverse proxy like nginx (`client_max_body_size 10m`)
1091
+
1092
+ #### Forward Headers
1093
+
1094
+ Additional headers to forward to upstream providers, merged with the built-in allowlist at startup. Header names are matched case-insensitively. The merge is computed once at config parse time, not per-request.
1095
+
1096
+ > [!CAUTION]
1097
+ > Only add non-sensitive headers. Any header listed in `advanced.forwardHeaders` is forwarded to upstream providers when present on the incoming request — avoid credentials, cookies, user tokens, or raw PII.
1098
+
1099
+ The gateway ships a built-in allowlist covering common provider, agent, and SDK headers (OpenAI, Anthropic, Bedrock, Vertex, OpenRouter, Cohere, Stainless, Google, Kilo Code, Cline, Roo Code, Goose, Claude Code). Use `forwardHeaders` to extend it with your own headers without modifying the gateway source.
1100
+
1101
+ ```ts
1102
+ advanced: {
1103
+ forwardHeaders: [
1104
+ "x-my-custom-trace-id",
1105
+ "x-internal-team",
1106
+ ],
1107
+ }
1108
+ ```
1109
+
1068
1110
  ### Passing Framework State to Hooks
1069
1111
 
1070
1112
  You can pass per-request info from your framework into the gateway via the second `state` argument on the handler, then read it in hooks through `ctx.state`.
@@ -1170,32 +1212,3 @@ Non-streaming versions are available via `toChatCompletionsResponse`. Equivalent
1170
1212
 
1171
1213
  > [!TIP]
1172
1214
  > Since Zod v4.3 you can generate a JSON Schema from any zod object by calling `z.toJSONSchema(...)`. This is useful for producing OpenAPI documentation from the same source of truth.
1173
-
1174
- ### Request Body Size
1175
-
1176
- The gateway supports gzip and deflate compressed request bodies via the Web Compression Streams API. The `maxBodySize` option controls the maximum _decompressed_ body size for these compressed requests, protecting against gzip bombs and oversized payloads.
1177
-
1178
- ```ts
1179
- import { gateway } from "@hebo-ai/gateway";
1180
-
1181
- const gw = gateway({
1182
- // ...
1183
- // Maximum decompressed body size in bytes (default: 10 MB).
1184
- // Set to 0 to disable the decompressed size limit.
1185
- maxBodySize: 10 * 1024 * 1024,
1186
- });
1187
- ```
1188
-
1189
- Compressed requests that exceed this limit after decompression receive an HTTP `413 Payload Too Large` response. Unsupported `Content-Encoding` values return HTTP `415 Unsupported Media Type`.
1190
-
1191
- > [!IMPORTANT]
1192
- > **Plain (uncompressed) request body size limits** are _not_ enforced by the gateway — they should be configured at the framework or server level. The gateway only enforces `maxBodySize` on decompressed output, since the framework cannot know the decompressed size ahead of time.
1193
- >
1194
- > Framework-level configuration examples:
1195
- >
1196
- > - **Bun** — [`Bun.serve({ maxRequestBodySize: 10_485_760 })`](https://bun.sh/docs/api/http#bun-serve)
1197
- > - **Elysia** — inherits from Bun's `maxRequestBodySize`
1198
- > - **Hono** — [`bodyLimit` middleware](https://hono.dev/docs/middleware/builtin/body-limit): `app.use(bodyLimit({ maxSize: 10 * 1024 * 1024 }))`
1199
- > - **Express** — [`express.json({ limit: '10mb' })`](https://expressjs.com/en/api.html#express.json)
1200
- > - **Fastify** — [`fastify({ bodyLimit: 10485760 })`](https://fastify.dev/docs/latest/Reference/Server/#bodylimit)
1201
- > - **Node.js `http`** — [`server.maxRequestSize`](https://nodejs.org/api/http.html) (v22.6+), or use a reverse proxy like nginx (`client_max_body_size 10m`)
package/dist/config.js CHANGED
@@ -4,6 +4,7 @@ import { createDefaultLogger } from "./logger/default";
4
4
  import { installAiSdkWarningLogger } from "./telemetry/ai-sdk";
5
5
  import { DEFAULT_CHAT_TIMEOUT_MS, kParsed, } from "./types";
6
6
  import { DEFAULT_MAX_BODY_SIZE } from "./utils/body";
7
+ import { FORWARD_HEADER_ALLOWLIST } from "./utils/request";
7
8
  export const parseConfig = (config) => {
8
9
  // If it has been parsed before, just return.
9
10
  if (kParsed in config)
@@ -72,7 +73,7 @@ export const parseConfig = (config) => {
72
73
  // Default timeouts
73
74
  let normal;
74
75
  let flex;
75
- const t = config.timeouts;
76
+ const t = config.advanced?.timeouts;
76
77
  if (t === null) {
77
78
  normal = flex = undefined;
78
79
  }
@@ -96,7 +97,7 @@ export const parseConfig = (config) => {
96
97
  }
97
98
  const parsedTimeouts = { normal, flex };
98
99
  // Body size limit
99
- const rawMax = config.maxBodySize;
100
+ const rawMax = config.advanced?.maxBodySize;
100
101
  let maxBodySize;
101
102
  if (typeof rawMax === "number" && Number.isFinite(rawMax) && rawMax >= 0) {
102
103
  maxBodySize = rawMax;
@@ -107,11 +108,27 @@ export const parseConfig = (config) => {
107
108
  logger.warn(`[config] invalid maxBodySize (${rawMax}), using default ${DEFAULT_MAX_BODY_SIZE}`);
108
109
  }
109
110
  }
111
+ // Merge forward header allowlist once.
112
+ const customHeaders = config.advanced?.forwardHeaders ?? [];
113
+ const forwardHeaders = new Set(FORWARD_HEADER_ALLOWLIST);
114
+ for (const header of customHeaders) {
115
+ try {
116
+ void new Headers([[header, ""]]);
117
+ }
118
+ catch {
119
+ logger.warn(`[config] invalid advanced.forwardHeaders entry ignored: ${JSON.stringify(header)}`);
120
+ continue;
121
+ }
122
+ forwardHeaders.add(header.trim().toLowerCase());
123
+ }
110
124
  // Return parsed config.
111
125
  return {
112
126
  ...config,
113
- timeouts: parsedTimeouts,
114
- maxBodySize,
127
+ advanced: {
128
+ timeouts: parsedTimeouts,
129
+ maxBodySize,
130
+ forwardHeaders: [...forwardHeaders],
131
+ },
115
132
  telemetry: {
116
133
  ...config.telemetry,
117
134
  enabled: telemetryEnabled,
@@ -24,7 +24,7 @@ export const chatCompletions = (config) => {
24
24
  throw new GatewayError("Method Not Allowed", 405);
25
25
  }
26
26
  // Parse + validate input (handles Content-Encoding decompression + body size limits).
27
- ctx.body = (await parseRequestBody(ctx.request, cfg.maxBodySize));
27
+ ctx.body = (await parseRequestBody(ctx.request, cfg.advanced.maxBodySize));
28
28
  logger.trace({ requestId: ctx.requestId, body: ctx.body }, "[chat] ChatCompletionsBody");
29
29
  addSpanEvent("hebo.request.deserialized");
30
30
  const parsed = ChatCompletionsBodySchema.safeParse(ctx.body);
@@ -81,10 +81,12 @@ export const chatCompletions = (config) => {
81
81
  let ttft = 0;
82
82
  const result = streamText({
83
83
  model: languageModelWithMiddleware,
84
- headers: prepareForwardHeaders(ctx.request),
84
+ headers: prepareForwardHeaders(ctx.request, cfg.advanced.forwardHeaders),
85
85
  abortSignal: ctx.request.signal,
86
86
  timeout: {
87
- totalMs: ctx.body.service_tier === "flex" ? cfg.timeouts.flex : cfg.timeouts.normal,
87
+ totalMs: ctx.body.service_tier === "flex"
88
+ ? cfg.advanced.timeouts.flex
89
+ : cfg.advanced.timeouts.normal,
88
90
  },
89
91
  onAbort: () => {
90
92
  throw new DOMException("The operation was aborted.", "AbortError");
@@ -122,9 +124,11 @@ export const chatCompletions = (config) => {
122
124
  addSpanEvent("hebo.ai-sdk.started");
123
125
  const result = await generateText({
124
126
  model: languageModelWithMiddleware,
125
- headers: prepareForwardHeaders(ctx.request),
127
+ headers: prepareForwardHeaders(ctx.request, cfg.advanced.forwardHeaders),
126
128
  abortSignal: ctx.request.signal,
127
- timeout: ctx.body.service_tier === "flex" ? cfg.timeouts.flex : cfg.timeouts.normal,
129
+ timeout: ctx.body.service_tier === "flex"
130
+ ? cfg.advanced.timeouts.flex
131
+ : cfg.advanced.timeouts.normal,
128
132
  experimental_include: {
129
133
  requestBody: false,
130
134
  responseBody: false,
@@ -43,7 +43,7 @@ export const conversations = (config) => {
43
43
  };
44
44
  }
45
45
  async function create(ctx) {
46
- const body = await parseRequestBody(ctx.request, parsedConfig.maxBodySize);
46
+ const body = await parseRequestBody(ctx.request, parsedConfig.advanced.maxBodySize);
47
47
  addSpanEvent("hebo.request.deserialized");
48
48
  const parsed = ConversationCreateParamsSchema.safeParse(body);
49
49
  if (!parsed.success) {
@@ -67,7 +67,7 @@ export const conversations = (config) => {
67
67
  return toConversation(entity);
68
68
  }
69
69
  async function update(ctx, conversationId) {
70
- const body = await parseRequestBody(ctx.request, parsedConfig.maxBodySize);
70
+ const body = await parseRequestBody(ctx.request, parsedConfig.advanced.maxBodySize);
71
71
  addSpanEvent("hebo.request.deserialized");
72
72
  const parsed = ConversationUpdateBodySchema.safeParse(body);
73
73
  if (!parsed.success) {
@@ -139,7 +139,7 @@ export const conversations = (config) => {
139
139
  };
140
140
  }
141
141
  async function addItems(ctx, conversationId) {
142
- const body = await parseRequestBody(ctx.request, parsedConfig.maxBodySize);
142
+ const body = await parseRequestBody(ctx.request, parsedConfig.advanced.maxBodySize);
143
143
  addSpanEvent("hebo.request.deserialized");
144
144
  const parsed = ConversationItemsAddBodySchema.safeParse(body);
145
145
  if (!parsed.success) {
@@ -24,7 +24,7 @@ export const embeddings = (config) => {
24
24
  throw new GatewayError("Method Not Allowed", 405);
25
25
  }
26
26
  // Parse + validate input (handles Content-Encoding decompression + body size limits).
27
- ctx.body = (await parseRequestBody(ctx.request, cfg.maxBodySize));
27
+ ctx.body = (await parseRequestBody(ctx.request, cfg.advanced.maxBodySize));
28
28
  logger.trace({ requestId: ctx.requestId, result: ctx.body }, "[chat] EmbeddingsBody");
29
29
  addSpanEvent("hebo.request.deserialized");
30
30
  const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
@@ -75,7 +75,7 @@ export const embeddings = (config) => {
75
75
  addSpanEvent("hebo.ai-sdk.started");
76
76
  const result = await embedMany({
77
77
  model: embeddingModelWithMiddleware,
78
- headers: prepareForwardHeaders(ctx.request),
78
+ headers: prepareForwardHeaders(ctx.request, cfg.advanced.forwardHeaders),
79
79
  abortSignal: ctx.request.signal,
80
80
  ...embedOptions,
81
81
  });
@@ -133,34 +133,30 @@ export function convertToModelMessages(messages, system) {
133
133
  return modelMessages;
134
134
  }
135
135
  function fromUserMessage(message, toolNameMap) {
136
- const result = [];
137
136
  if (typeof message.content === "string") {
138
- result.push({ role: "user", content: message.content });
139
- return result;
137
+ return [{ role: "user", content: message.content }];
140
138
  }
141
- const userParts = [];
142
- const toolResultParts = [];
139
+ const result = [];
140
+ let currentParts = [];
141
+ let currentRole;
143
142
  for (const block of message.content) {
144
- if (block.type === "tool_result") {
145
- toolResultParts.push(fromToolResultBlock(block, toolNameMap));
143
+ const isToolResult = block.type === "tool_result";
144
+ const role = isToolResult ? "tool" : "user";
145
+ const part = isToolResult
146
+ ? fromToolResultBlock(block, toolNameMap)
147
+ : fromUserContentBlock(block);
148
+ if (!part)
149
+ continue;
150
+ if (role === currentRole) {
151
+ currentParts.push(part);
146
152
  }
147
153
  else {
148
- const part = fromUserContentBlock(block);
149
- if (part)
150
- userParts.push(part);
154
+ currentParts = [part];
155
+ currentRole = role;
156
+ result.push({ role, content: currentParts });
151
157
  }
152
158
  }
153
- if (userParts.length > 0) {
154
- result.push({ role: "user", content: userParts });
155
- }
156
- if (toolResultParts.length > 0) {
157
- result.push({ role: "tool", content: toolResultParts });
158
- }
159
- // If only tool results and no user parts, still valid
160
- if (userParts.length === 0 && toolResultParts.length === 0) {
161
- result.push({ role: "user", content: "" });
162
- }
163
- return result;
159
+ return result.length > 0 ? result : [{ role: "user", content: "" }];
164
160
  }
165
161
  function fromUserContentBlock(block) {
166
162
  // tool_result blocks are handled separately in fromUserMessage
@@ -23,7 +23,7 @@ export const messages = (config) => {
23
23
  throw new GatewayError("Method Not Allowed", 405);
24
24
  }
25
25
  // Parse + validate input (handles Content-Encoding decompression + body size limits).
26
- ctx.body = (await parseRequestBody(ctx.request, cfg.maxBodySize));
26
+ ctx.body = (await parseRequestBody(ctx.request, cfg.advanced.maxBodySize));
27
27
  logger.trace({ requestId: ctx.requestId, body: ctx.body }, "[messages] MessagesBody");
28
28
  addSpanEvent("hebo.request.deserialized");
29
29
  const parsed = MessagesBodySchema.safeParse(ctx.body);
@@ -72,10 +72,10 @@ export const messages = (config) => {
72
72
  let ttft = 0;
73
73
  const result = streamText({
74
74
  model: languageModelWithMiddleware,
75
- headers: prepareForwardHeaders(ctx.request),
75
+ headers: prepareForwardHeaders(ctx.request, cfg.advanced.forwardHeaders),
76
76
  abortSignal: ctx.request.signal,
77
77
  timeout: {
78
- totalMs: cfg.timeouts.normal,
78
+ totalMs: cfg.advanced.timeouts.normal,
79
79
  },
80
80
  onAbort: () => {
81
81
  throw new DOMException("The operation was aborted.", "AbortError");
@@ -113,9 +113,9 @@ export const messages = (config) => {
113
113
  addSpanEvent("hebo.ai-sdk.started");
114
114
  const result = await generateText({
115
115
  model: languageModelWithMiddleware,
116
- headers: prepareForwardHeaders(ctx.request),
116
+ headers: prepareForwardHeaders(ctx.request, cfg.advanced.forwardHeaders),
117
117
  abortSignal: ctx.request.signal,
118
- timeout: cfg.timeouts.normal,
118
+ timeout: cfg.advanced.timeouts.normal,
119
119
  experimental_include: {
120
120
  requestBody: false,
121
121
  responseBody: false,
@@ -23,7 +23,7 @@ export const responses = (config) => {
23
23
  throw new GatewayError("Method Not Allowed", 405);
24
24
  }
25
25
  // Parse + validate input (handles Content-Encoding decompression + body size limits).
26
- ctx.body = (await parseRequestBody(ctx.request, cfg.maxBodySize));
26
+ ctx.body = (await parseRequestBody(ctx.request, cfg.advanced.maxBodySize));
27
27
  logger.trace({ requestId: ctx.requestId, body: ctx.body }, "[responses] ResponsesBody");
28
28
  addSpanEvent("hebo.request.deserialized");
29
29
  const parsed = ResponsesBodySchema.safeParse(ctx.body);
@@ -71,10 +71,12 @@ export const responses = (config) => {
71
71
  let ttft = 0;
72
72
  const result = streamText({
73
73
  model: languageModelWithMiddleware,
74
- headers: prepareForwardHeaders(ctx.request),
74
+ headers: prepareForwardHeaders(ctx.request, cfg.advanced.forwardHeaders),
75
75
  abortSignal: ctx.request.signal,
76
76
  timeout: {
77
- totalMs: ctx.body.service_tier === "flex" ? cfg.timeouts.flex : cfg.timeouts.normal,
77
+ totalMs: ctx.body.service_tier === "flex"
78
+ ? cfg.advanced.timeouts.flex
79
+ : cfg.advanced.timeouts.normal,
78
80
  },
79
81
  onAbort: () => {
80
82
  throw new DOMException("The operation was aborted.", "AbortError");
@@ -112,9 +114,11 @@ export const responses = (config) => {
112
114
  addSpanEvent("hebo.ai-sdk.started");
113
115
  const result = await generateText({
114
116
  model: languageModelWithMiddleware,
115
- headers: prepareForwardHeaders(ctx.request),
117
+ headers: prepareForwardHeaders(ctx.request, cfg.advanced.forwardHeaders),
116
118
  abortSignal: ctx.request.signal,
117
- timeout: ctx.body.service_tier === "flex" ? cfg.timeouts.flex : cfg.timeouts.normal,
119
+ timeout: ctx.body.service_tier === "flex"
120
+ ? cfg.advanced.timeouts.flex
121
+ : cfg.advanced.timeouts.normal,
118
122
  experimental_include: {
119
123
  requestBody: false,
120
124
  responseBody: false,
package/dist/index.d.ts CHANGED
@@ -10,3 +10,4 @@ export * from "./models/catalog";
10
10
  export * from "./models/types";
11
11
  export * from "./providers/registry";
12
12
  export * from "./providers/types";
13
+ export { FORWARD_HEADER_ALLOWLIST } from "./utils";
package/dist/index.js CHANGED
@@ -9,3 +9,4 @@ export * from "./models/catalog";
9
9
  export * from "./models/types";
10
10
  export * from "./providers/registry";
11
11
  export * from "./providers/types";
12
+ export { FORWARD_HEADER_ALLOWLIST } from "./utils";
package/dist/lifecycle.js CHANGED
@@ -54,8 +54,7 @@ export const winterCgHandler = (run, config) => {
54
54
  requestId: ctx.requestId,
55
55
  err: reason ?? ctx.request.signal.reason,
56
56
  });
57
- const isUpstreamError = reason instanceof GatewayError && reason.statusText.startsWith("UPSTREAM_");
58
- span.recordError(reason, realStatus >= 500 || isUpstreamError);
57
+ span.recordError(reason, true);
59
58
  }
60
59
  span.setAttributes({ "http.response.status_code_effective": realStatus });
61
60
  if (ctx.operation === "chat" ||
@@ -0,0 +1,2 @@
1
+ export * from "./presets";
2
+ export * from "./middleware";
@@ -0,0 +1,2 @@
1
+ export * from "./presets";
2
+ export * from "./middleware";
@@ -0,0 +1,2 @@
1
+ import type { LanguageModelMiddleware } from "ai";
2
+ export declare const qwenReasoningMiddleware: LanguageModelMiddleware;
@@ -0,0 +1,31 @@
1
+ import { modelMiddlewareMatcher } from "../../middleware/matcher";
2
+ import { calculateReasoningBudgetFromEffort } from "../../middleware/utils";
3
+ const QWEN_DEFAULT_MAX_OUTPUT_TOKENS = 16384;
4
+ export const qwenReasoningMiddleware = {
5
+ specificationVersion: "v3",
6
+ // oxlint-disable-next-line require-await
7
+ transformParams: async ({ params }) => {
8
+ const unknown = params.providerOptions?.["unknown"];
9
+ if (!unknown)
10
+ return params;
11
+ const reasoning = unknown["reasoning"];
12
+ if (!reasoning)
13
+ return params;
14
+ const target = (params.providerOptions["alibaba"] ??= {});
15
+ if (!reasoning.enabled || reasoning.effort === "none") {
16
+ target.enableThinking = false;
17
+ delete target.thinkingBudget;
18
+ }
19
+ else {
20
+ target.enableThinking = true;
21
+ target.thinkingBudget =
22
+ reasoning.max_tokens ??
23
+ calculateReasoningBudgetFromEffort(reasoning.effort ?? "medium", params.maxOutputTokens ?? QWEN_DEFAULT_MAX_OUTPUT_TOKENS);
24
+ }
25
+ delete unknown["reasoning"];
26
+ return params;
27
+ },
28
+ };
29
+ modelMiddlewareMatcher.useForModel("alibaba/qwen*", {
30
+ language: [qwenReasoningMiddleware],
31
+ });