@hebo-ai/gateway 0.10.6 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +97 -84
- package/dist/config.js +21 -4
- package/dist/endpoints/chat-completions/handler.js +9 -5
- package/dist/endpoints/conversations/handler.js +3 -3
- package/dist/endpoints/embeddings/handler.js +2 -2
- package/dist/endpoints/messages/converters.js +17 -21
- package/dist/endpoints/messages/handler.js +5 -5
- package/dist/endpoints/responses/handler.js +9 -5
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/lifecycle.js +1 -2
- package/dist/models/alibaba/index.d.ts +2 -0
- package/dist/models/alibaba/index.js +2 -0
- package/dist/models/alibaba/middleware.d.ts +2 -0
- package/dist/models/alibaba/middleware.js +31 -0
- package/dist/models/alibaba/presets.d.ts +900 -0
- package/dist/models/alibaba/presets.js +252 -0
- package/dist/models/amazon/presets.d.ts +31 -31
- package/dist/models/anthropic/presets.d.ts +68 -68
- package/dist/models/cohere/presets.d.ts +38 -38
- package/dist/models/deepseek/index.d.ts +2 -0
- package/dist/models/deepseek/index.js +2 -0
- package/dist/models/deepseek/middleware.d.ts +2 -0
- package/dist/models/deepseek/middleware.js +25 -0
- package/dist/models/deepseek/presets.d.ts +51 -0
- package/dist/models/deepseek/presets.js +33 -0
- package/dist/models/google/presets.d.ts +94 -94
- package/dist/models/google/presets.js +3 -1
- package/dist/models/meta/presets.d.ts +84 -84
- package/dist/models/meta/presets.js +11 -12
- package/dist/models/minimax/index.d.ts +2 -0
- package/dist/models/minimax/index.js +2 -0
- package/dist/models/minimax/middleware.d.ts +2 -0
- package/dist/models/minimax/middleware.js +43 -0
- package/dist/models/minimax/presets.d.ts +99 -0
- package/dist/models/minimax/presets.js +47 -0
- package/dist/models/moonshot/index.d.ts +2 -0
- package/dist/models/moonshot/index.js +2 -0
- package/dist/models/moonshot/middleware.d.ts +2 -0
- package/dist/models/moonshot/middleware.js +32 -0
- package/dist/models/moonshot/presets.d.ts +104 -0
- package/dist/models/moonshot/presets.js +36 -0
- package/dist/models/openai/presets.d.ts +128 -128
- package/dist/models/openai/presets.js +1 -1
- package/dist/models/types.d.ts +1 -1
- package/dist/models/types.js +39 -0
- package/dist/models/voyage/presets.d.ts +46 -46
- package/dist/models/xai/index.d.ts +2 -0
- package/dist/models/xai/index.js +2 -0
- package/dist/models/xai/middleware.d.ts +2 -0
- package/dist/models/xai/middleware.js +37 -0
- package/dist/models/xai/presets.d.ts +210 -0
- package/dist/models/xai/presets.js +55 -0
- package/dist/models/zai/index.d.ts +2 -0
- package/dist/models/zai/index.js +2 -0
- package/dist/models/zai/middleware.d.ts +2 -0
- package/dist/models/zai/middleware.js +25 -0
- package/dist/models/zai/presets.d.ts +141 -0
- package/dist/models/zai/presets.js +41 -0
- package/dist/providers/alibaba/canonical.d.ts +3 -0
- package/dist/providers/alibaba/canonical.js +13 -0
- package/dist/providers/alibaba/index.d.ts +1 -0
- package/dist/providers/alibaba/index.js +1 -0
- package/dist/providers/bedrock/canonical.js +3 -0
- package/dist/providers/chutes/canonical.d.ts +3 -0
- package/dist/providers/chutes/canonical.js +14 -0
- package/dist/providers/chutes/index.d.ts +1 -0
- package/dist/providers/chutes/index.js +1 -0
- package/dist/providers/deepinfra/canonical.d.ts +3 -0
- package/dist/providers/deepinfra/canonical.js +40 -0
- package/dist/providers/deepinfra/index.d.ts +1 -0
- package/dist/providers/deepinfra/index.js +1 -0
- package/dist/providers/deepseek/canonical.d.ts +3 -0
- package/dist/providers/deepseek/canonical.js +9 -0
- package/dist/providers/deepseek/index.d.ts +1 -0
- package/dist/providers/deepseek/index.js +1 -0
- package/dist/providers/fireworks/canonical.d.ts +3 -0
- package/dist/providers/fireworks/canonical.js +27 -0
- package/dist/providers/fireworks/index.d.ts +2 -0
- package/dist/providers/fireworks/index.js +2 -0
- package/dist/providers/fireworks/middleware.d.ts +2 -0
- package/dist/providers/fireworks/middleware.js +35 -0
- package/dist/providers/groq/canonical.js +1 -1
- package/dist/providers/minimax/canonical.d.ts +3 -0
- package/dist/providers/minimax/canonical.js +9 -0
- package/dist/providers/minimax/index.d.ts +1 -0
- package/dist/providers/minimax/index.js +1 -0
- package/dist/providers/moonshot/canonical.d.ts +3 -0
- package/dist/providers/moonshot/canonical.js +6 -0
- package/dist/providers/moonshot/index.d.ts +1 -0
- package/dist/providers/moonshot/index.js +1 -0
- package/dist/providers/togetherai/canonical.d.ts +3 -0
- package/dist/providers/togetherai/canonical.js +24 -0
- package/dist/providers/togetherai/index.d.ts +1 -0
- package/dist/providers/togetherai/index.js +1 -0
- package/dist/providers/types.d.ts +1 -1
- package/dist/providers/types.js +10 -0
- package/dist/providers/vertex/canonical.js +5 -1
- package/dist/providers/xai/canonical.d.ts +3 -0
- package/dist/providers/xai/canonical.js +12 -0
- package/dist/providers/xai/index.d.ts +1 -0
- package/dist/providers/xai/index.js +1 -0
- package/dist/providers/zai/canonical.d.ts +3 -0
- package/dist/providers/zai/canonical.js +10 -0
- package/dist/providers/zai/index.d.ts +1 -0
- package/dist/providers/zai/index.js +1 -0
- package/dist/types.d.ts +30 -16
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.js +8 -0
- package/dist/utils/request.d.ts +2 -1
- package/dist/utils/request.js +35 -7
- package/package.json +133 -9
package/README.md
CHANGED
|
@@ -16,12 +16,12 @@ Learn more in our blog post: [Yet Another AI Gateway?](https://hebo.ai/blog/2601
|
|
|
16
16
|
- 💬 Open Responses `/responses` endpoint (stateless), including /conversations.
|
|
17
17
|
- 🗨️ Anthropic-compatible `/messages` endpoint.
|
|
18
18
|
- 🔌 Integrate into your existing Hono, Elysia, Next.js & TanStack apps.
|
|
19
|
-
- 🧩 Provider registry compatible with Vercel AI SDK providers.
|
|
19
|
+
- 🧩 Provider registry compatible with any Vercel AI SDK providers.
|
|
20
20
|
- 🧭 Canonical model IDs and parameter naming across providers.
|
|
21
21
|
- 🗂️ Model catalog with extensible metadata capabilities.
|
|
22
22
|
- 🪝 Hook system to customize routing, auth, rate limits, and shape responses.
|
|
23
|
-
- 🧰 Low-level OpenAI-compatible schema, converters, and middleware helpers.
|
|
24
23
|
- 👁️ Observability via OTel GenAI semantic conventions (Langfuse-compatible).
|
|
24
|
+
- 🧰 Low-level OpenAI-compatible schema, converters, and middleware helpers.
|
|
25
25
|
|
|
26
26
|
## 📦 Installation
|
|
27
27
|
|
|
@@ -34,7 +34,7 @@ bun install @hebo-ai/gateway
|
|
|
34
34
|
- Quickstart
|
|
35
35
|
- [Setup A Gateway Instance](#setup-a-gateway-instance) | [Mount Route Handlers](#mount-route-handlers) | [Call the Gateway](#call-the-gateway)
|
|
36
36
|
- Configuration Reference
|
|
37
|
-
- [Providers](#providers) | [Models](#models) | [Hooks](#hooks) | [Storage](#storage) | [Logger](#logger-settings) | [Observability](#observability) | [
|
|
37
|
+
- [Providers](#providers) | [Models](#models) | [Hooks](#hooks) | [Storage](#storage) | [Logger](#logger-settings) | [Observability](#observability) | [Advanced](#advanced-settings)
|
|
38
38
|
- Framework Support
|
|
39
39
|
- [ElysiaJS](#elysiajs) | [Hono](#hono) | [Next.js](#nextjs) | [TanStack Start](#tanstack-start)
|
|
40
40
|
- Runtime Support
|
|
@@ -71,21 +71,14 @@ export const gw = gateway({
|
|
|
71
71
|
|
|
72
72
|
// MODEL CATALOG
|
|
73
73
|
models: defineModelCatalog(
|
|
74
|
-
// Choose a pre-configured preset
|
|
74
|
+
// Choose a pre-configured preset
|
|
75
75
|
gptOss20b,
|
|
76
|
-
// Or add a whole model family
|
|
77
|
-
gptOss["all"]
|
|
78
|
-
preset({
|
|
79
|
-
providers: ["groq"],
|
|
80
|
-
}),
|
|
81
|
-
),
|
|
76
|
+
// Or add a whole model family
|
|
77
|
+
gptOss["all"],
|
|
82
78
|
),
|
|
83
79
|
});
|
|
84
80
|
```
|
|
85
81
|
|
|
86
|
-
> [!NOTE]
|
|
87
|
-
> Don't forget to install the Groq provider package too: `@ai-sdk/groq`.
|
|
88
|
-
|
|
89
82
|
> [!TIP]
|
|
90
83
|
> Why `withCanonicalIdsForX`? In most cases you want your gateway to route using model IDs that are consistent across providers (e.g. `openai/gpt-oss-20b` rather than `openai.gpt-oss-20b-v1:0`). We call that `Canonical IDs` - they are what enable routing, fallbacks, and policy rules. Without this wrapper, providers only understands their native IDs, which would make cross-provider routing impossible.
|
|
91
84
|
|
|
@@ -110,11 +103,13 @@ const app = new Elysia().mount("/v1/gateway/", gw.handler).listen(3000);
|
|
|
110
103
|
console.log(`🐒 Hebo Gateway is running with Elysia at ${app.server?.url}`);
|
|
111
104
|
```
|
|
112
105
|
|
|
106
|
+
See [Framework Support](#-framework-support) for all supported framework examples.
|
|
107
|
+
|
|
113
108
|
### Call the Gateway
|
|
114
109
|
|
|
115
|
-
Since Hebo Gateway exposes OpenAI-compatible endpoints, it can be used with a broad set of common AI SDKs like **Vercel AI SDK**, **TanStack AI**, **LangChain**, the official **OpenAI SDK** and others.
|
|
110
|
+
Since Hebo Gateway exposes OpenAI-compatible and Anthropic-compatible endpoints, it can be used with a broad set of common AI SDKs like **Vercel AI SDK**, **TanStack AI**, **LangChain**, the official **OpenAI SDK**, the official **Anthropic SDK**, and others.
|
|
116
111
|
|
|
117
|
-
Here is a quick example using the Vercel AI SDK:
|
|
112
|
+
Here is a quick example using the Vercel AI SDK against the OpenAI-compatible surface:
|
|
118
113
|
|
|
119
114
|
```ts
|
|
120
115
|
import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
|
|
@@ -139,20 +134,22 @@ console.log(text);
|
|
|
139
134
|
|
|
140
135
|
### Providers
|
|
141
136
|
|
|
142
|
-
|
|
137
|
+
For most setups, start with one of the built-in canonical provider adapters. They wrap a provider SDK and let the gateway route using stable canonical model IDs like `openai/gpt-4.1-mini` instead of provider-native IDs.
|
|
138
|
+
|
|
139
|
+
Built-in adapters are available for `Alibaba`, `Anthropic`, `Bedrock`, `Chutes`, `Cohere`, `DeepInfra`, `DeepSeek`, `Fireworks`, `Groq`, `MiniMax`, `Moonshot`, `OpenAI`, `Together AI`, `Vertex`, `Voyage`, `xAI`, and `Z.ai`.
|
|
143
140
|
|
|
144
|
-
|
|
141
|
+
Import the helper from the matching package path:
|
|
145
142
|
|
|
146
143
|
```ts
|
|
147
144
|
// pattern: @hebo-ai/gateway/providers/<provider>
|
|
148
145
|
import { withCanonicalIdsForGroq } from "@hebo-ai/gateway/providers/groq";
|
|
149
146
|
```
|
|
150
147
|
|
|
151
|
-
If
|
|
148
|
+
If you need a provider that is not on that list, Hebo Gateway’s provider registry also accepts any **Vercel AI SDK Provider**.
|
|
152
149
|
|
|
153
150
|
For Azure, use `createAzure` from `@ai-sdk/azure` directly. Name each [Azure AI Foundry](https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/endpoints) deployment after its Hebo canonical ID (e.g. `anthropic/claude-sonnet-4.5`).
|
|
154
151
|
|
|
155
|
-
For
|
|
152
|
+
For custom provider setups, wrap the provider instance with `withCanonicalIds` and define your own canonicalization mapping and rules:
|
|
156
153
|
|
|
157
154
|
```ts
|
|
158
155
|
import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
|
|
@@ -184,11 +181,13 @@ const gw = gateway({
|
|
|
184
181
|
|
|
185
182
|
### Models
|
|
186
183
|
|
|
187
|
-
|
|
184
|
+
Start with the built-in model presets when possible. They give you ready-to-use catalog entries with canonical IDs, metadata, and default provider lists.
|
|
185
|
+
|
|
186
|
+
Built-in preset families are available for `Alibaba Qwen`, `Amazon Nova`, `Anthropic Claude`, `Cohere Command/Embed`, `DeepSeek`, `Google Gemini`, `Meta Llama`, `MiniMax`, `Moonshot Kimi`, `OpenAI GPT/GPT-OSS`, `Voyage`, `xAI Grok`, and `Z.ai GLM`.
|
|
188
187
|
|
|
189
188
|
#### Model Presets
|
|
190
189
|
|
|
191
|
-
|
|
190
|
+
Hebo Gateway ships a set of model presets under `@hebo-ai/gateway/models`. Use these when you want ready-to-use catalog entries with sane defaults for common SOTA models.
|
|
192
191
|
|
|
193
192
|
Presets come in two forms:
|
|
194
193
|
|
|
@@ -214,6 +213,9 @@ const modelsFromFamily = defineModelCatalog(
|
|
|
214
213
|
|
|
215
214
|
Out-of-the-box model presets:
|
|
216
215
|
|
|
216
|
+
- **Alibaba** — `@hebo-ai/gateway/models/alibaba`
|
|
217
|
+
Qwen: `qwen` (`v3`, `v3.5`, `v3.6`, `v3.x`, `coder`, `vl`, `embedding`, `embeddings`, `latest`, `all`)
|
|
218
|
+
|
|
217
219
|
- **Amazon** — `@hebo-ai/gateway/models/amazon`
|
|
218
220
|
Nova: `nova` (`v1`, `v2`, `v1.x`, `v2.x`, `latest`, `embeddings`, `all`)
|
|
219
221
|
|
|
@@ -224,23 +226,39 @@ Out-of-the-box model presets:
|
|
|
224
226
|
Command: `command` (`A`, `R`, `latest`, `all`)
|
|
225
227
|
Embed: `embed` (`v4`, `v3`, `latest`, `all`)
|
|
226
228
|
|
|
229
|
+
- **DeepSeek** — `@hebo-ai/gateway/models/deepseek`
|
|
230
|
+
DeepSeek: `deepseek` (`v3.2`, `latest`, `all`)
|
|
231
|
+
|
|
227
232
|
- **Google** — `@hebo-ai/gateway/models/google`
|
|
228
233
|
Gemini: `gemini` (`v2.5`, `v3-preview`, `v2.x`, `v3.x`, `embeddings`, `latest`, `preview`, `all`)
|
|
234
|
+
Gemma: `gemma` (`v3`, `v4`, `v3.x`, `v4.x`, `latest`, `all`)
|
|
229
235
|
|
|
230
236
|
- **Meta** — `@hebo-ai/gateway/models/meta`
|
|
231
237
|
Llama: `llama` (`v3.1`, `v3.2`, `v3.3`, `v4`, `v3.x`, `v4.x`, `latest`, `all`)
|
|
232
238
|
|
|
239
|
+
- **MiniMax** — `@hebo-ai/gateway/models/minimax`
|
|
240
|
+
MiniMax: `minimax` (`v2`, `v2.x`, `latest`, `all`)
|
|
241
|
+
|
|
242
|
+
- **Moonshot** — `@hebo-ai/gateway/models/moonshot`
|
|
243
|
+
Kimi: `kimi` (`k2.5`, `k2.6`, `k2.x`, `latest`, `all`)
|
|
244
|
+
|
|
233
245
|
- **OpenAI** — `@hebo-ai/gateway/models/openai`
|
|
234
|
-
GPT: `gpt` (`v5`, `v5.1`, `v5.2`, `v5.3`, `v5.x`, `chat`, `codex`, `pro`, `latest`, `all`)
|
|
246
|
+
GPT: `gpt` (`v5`, `v5.1`, `v5.2`, `v5.3`, `v5.4`, `v5.x`, `chat`, `codex`, `pro`, `latest`, `all`)
|
|
235
247
|
GPT-OSS: `gptOss` (`v1`, `v1.x`, `latest`, `all`)
|
|
236
248
|
Embeddings: `textEmbeddings` (`v3`, `v3.x`, `latest`, `all`)
|
|
237
249
|
|
|
238
250
|
- **Voyage** — `@hebo-ai/gateway/models/voyage`
|
|
239
251
|
Voyage: `voyage` (`v2`, `v3`, `v3.5`, `v4`, `v2.x`, `v3.x`, `v4.x`, `latest`, `all`)
|
|
240
252
|
|
|
253
|
+
- **xAI** — `@hebo-ai/gateway/models/xai`
|
|
254
|
+
Grok: `grok` (`v4.1`, `v4.2`, `latest`, `all`)
|
|
255
|
+
|
|
256
|
+
- **Z.ai** — `@hebo-ai/gateway/models/zai`
|
|
257
|
+
GLM: `glm` (`v5`, `v5.1`, `v5.x`, `latest`, `all`)
|
|
258
|
+
|
|
241
259
|
#### User-defined Models
|
|
242
260
|
|
|
243
|
-
|
|
261
|
+
If a built-in preset does not exist yet, you can always register your own model entries by following the `CatalogModel` type.
|
|
244
262
|
|
|
245
263
|
```ts
|
|
246
264
|
const gw = gateway({
|
|
@@ -808,32 +826,7 @@ Provider behavior:
|
|
|
808
826
|
|
|
809
827
|
### Compressed Requests
|
|
810
828
|
|
|
811
|
-
The gateway supports gzip and deflate compressed request bodies via the Web Compression Streams API. The `maxBodySize` option controls the maximum _decompressed_ body size for these compressed requests, protecting against gzip bombs and oversized payloads.
|
|
812
|
-
|
|
813
|
-
```ts
|
|
814
|
-
import { gateway } from "@hebo-ai/gateway";
|
|
815
|
-
|
|
816
|
-
const gw = gateway({
|
|
817
|
-
// ...
|
|
818
|
-
// Maximum decompressed body size in bytes (default: 10 MB).
|
|
819
|
-
// Set to 0 to disable the decompressed size limit.
|
|
820
|
-
maxBodySize: 10 * 1024 * 1024,
|
|
821
|
-
});
|
|
822
|
-
```
|
|
823
|
-
|
|
824
|
-
Compressed requests that exceed this limit after decompression receive an HTTP `413 Payload Too Large` response. Unsupported `Content-Encoding` values return HTTP `415 Unsupported Media Type`.
|
|
825
|
-
|
|
826
|
-
> [!IMPORTANT]
|
|
827
|
-
> **Plain (uncompressed) request body size limits** are _not_ enforced by the gateway — they should be configured at the framework or server level. The gateway only enforces `maxBodySize` on decompressed output, since the framework cannot know the decompressed size ahead of time.
|
|
828
|
-
>
|
|
829
|
-
> Framework-level configuration examples:
|
|
830
|
-
>
|
|
831
|
-
> - **Bun** — [`Bun.serve({ maxRequestBodySize: 10_485_760 })`](https://bun.sh/docs/api/http#bun-serve)
|
|
832
|
-
> - **Elysia** — inherits from Bun's `maxRequestBodySize`
|
|
833
|
-
> - **Hono** — [`bodyLimit` middleware](https://hono.dev/docs/middleware/builtin/body-limit): `app.use(bodyLimit({ maxSize: 10 * 1024 * 1024 }))`
|
|
834
|
-
> - **Express** — [`express.json({ limit: '10mb' })`](https://expressjs.com/en/api.html#express.json)
|
|
835
|
-
> - **Fastify** — [`fastify({ bodyLimit: 10485760 })`](https://fastify.dev/docs/latest/Reference/Server/#bodylimit)
|
|
836
|
-
> - **Node.js `http`** — [`server.maxRequestSize`](https://nodejs.org/api/http.html) (v22.6+), or use a reverse proxy like nginx (`client_max_body_size 10m`)
|
|
829
|
+
The gateway supports gzip and deflate compressed request bodies via the Web Compression Streams API. The [`advanced.maxBodySize`](#max-body-size) option controls the maximum _decompressed_ body size for these compressed requests, protecting against gzip bombs and oversized payloads. See [Advanced Settings](#advanced-settings) for configuration details.
|
|
837
830
|
|
|
838
831
|
## 🧪 Advanced Usage
|
|
839
832
|
|
|
@@ -1034,25 +1027,38 @@ const gw = gateway({
|
|
|
1034
1027
|
|
|
1035
1028
|
Langfuse credentials are read from environment variables by the Langfuse OTel SDK (`LANGFUSE_PUBLIC_KEY`, `LANGFUSE_SECRET_KEY`, `LANGFUSE_BASE_URL`).
|
|
1036
1029
|
|
|
1037
|
-
###
|
|
1030
|
+
### Advanced Settings
|
|
1038
1031
|
|
|
1039
|
-
|
|
1032
|
+
The `advanced` field groups optional settings for timeouts, body size limits, and header forwarding.
|
|
1040
1033
|
|
|
1041
1034
|
```ts
|
|
1042
1035
|
import { gateway } from "@hebo-ai/gateway";
|
|
1043
1036
|
|
|
1044
1037
|
const gw = gateway({
|
|
1045
1038
|
// ...
|
|
1046
|
-
|
|
1047
|
-
|
|
1039
|
+
advanced: {
|
|
1040
|
+
timeouts: { normal: 60_000, flex: 180_000 },
|
|
1041
|
+
maxBodySize: 10 * 1024 * 1024,
|
|
1042
|
+
forwardHeaders: ["x-my-custom-trace-id", "x-internal-team"],
|
|
1043
|
+
},
|
|
1044
|
+
});
|
|
1045
|
+
```
|
|
1046
|
+
|
|
1047
|
+
#### Timeouts
|
|
1048
|
+
|
|
1049
|
+
Controls upstream request timeouts. Accepts a number (milliseconds), `null` (disabled), or a tiered object. Default is `300_000` (5 minutes).
|
|
1050
|
+
|
|
1051
|
+
```ts
|
|
1052
|
+
advanced: {
|
|
1053
|
+
// Single timeout for all tiers
|
|
1048
1054
|
timeouts: 60_000,
|
|
1049
|
-
// ...disable
|
|
1055
|
+
// ...or disable completely:
|
|
1050
1056
|
// timeouts: null,
|
|
1051
1057
|
// ...or split by service tier:
|
|
1052
1058
|
// - normal: all non-flex tiers (set null to disable)
|
|
1053
1059
|
// - flex: defaults to 3x normal when omitted (set null to disable)
|
|
1054
1060
|
// timeouts: { normal: 30_000, flex: null },
|
|
1055
|
-
}
|
|
1061
|
+
}
|
|
1056
1062
|
```
|
|
1057
1063
|
|
|
1058
1064
|
> [!NOTE]
|
|
@@ -1065,6 +1071,42 @@ const gw = gateway({
|
|
|
1065
1071
|
> **Provider/service timeout limits**
|
|
1066
1072
|
> Serverless platforms (e.g. Cloudflare Workers, Vercel Edge/Serverless, AWS Lambda) also enforce platform time limits (roughly ~25-100s on edge paths, ~300s for streaming, and up to ~900s configurable for some).
|
|
1067
1073
|
|
|
1074
|
+
#### Max Body Size
|
|
1075
|
+
|
|
1076
|
+
Maximum _decompressed_ request body size in bytes for gzip/deflate-encoded requests. Protects against gzip bombs and oversized payloads. Default is `10_485_760` (10 MB). Set to `0` to disable.
|
|
1077
|
+
|
|
1078
|
+
Compressed requests that exceed this limit after decompression receive an HTTP `413 Payload Too Large` response. Unsupported `Content-Encoding` values return HTTP `415 Unsupported Media Type`.
|
|
1079
|
+
|
|
1080
|
+
> [!IMPORTANT]
|
|
1081
|
+
> **Plain (uncompressed) request body size limits** are _not_ enforced by the gateway — they should be configured at the framework or server level. The gateway only enforces `maxBodySize` on decompressed output, since the framework cannot know the decompressed size ahead of time.
|
|
1082
|
+
>
|
|
1083
|
+
> Framework-level configuration examples:
|
|
1084
|
+
>
|
|
1085
|
+
> - **Bun** — [`Bun.serve({ maxRequestBodySize: 10_485_760 })`](https://bun.sh/docs/api/http#bun-serve)
|
|
1086
|
+
> - **Elysia** — inherits from Bun's `maxRequestBodySize`
|
|
1087
|
+
> - **Hono** — [`bodyLimit` middleware](https://hono.dev/docs/middleware/builtin/body-limit): `app.use(bodyLimit({ maxSize: 10 * 1024 * 1024 }))`
|
|
1088
|
+
> - **Express** — [`express.json({ limit: '10mb' })`](https://expressjs.com/en/api.html#express.json)
|
|
1089
|
+
> - **Fastify** — [`fastify({ bodyLimit: 10485760 })`](https://fastify.dev/docs/latest/Reference/Server/#bodylimit)
|
|
1090
|
+
> - **Node.js `http`** — no built-in request-body size option; enforce a limit while reading the request stream, or use a reverse proxy like nginx (`client_max_body_size 10m`)
|
|
1091
|
+
|
|
1092
|
+
#### Forward Headers
|
|
1093
|
+
|
|
1094
|
+
Additional headers to forward to upstream providers, merged with the built-in allowlist at startup. Header names are matched case-insensitively. The merge is computed once at config parse time, not per-request.
|
|
1095
|
+
|
|
1096
|
+
> [!CAUTION]
|
|
1097
|
+
> Only add non-sensitive headers. Any header listed in `advanced.forwardHeaders` is forwarded to upstream providers when present on the incoming request — avoid credentials, cookies, user tokens, or raw PII.
|
|
1098
|
+
|
|
1099
|
+
The gateway ships a built-in allowlist covering common provider, agent, and SDK headers (OpenAI, Anthropic, Bedrock, Vertex, OpenRouter, Cohere, Stainless, Google, Kilo Code, Cline, Roo Code, Goose, Claude Code). Use `forwardHeaders` to extend it with your own headers without modifying the gateway source.
|
|
1100
|
+
|
|
1101
|
+
```ts
|
|
1102
|
+
advanced: {
|
|
1103
|
+
forwardHeaders: [
|
|
1104
|
+
"x-my-custom-trace-id",
|
|
1105
|
+
"x-internal-team",
|
|
1106
|
+
],
|
|
1107
|
+
}
|
|
1108
|
+
```
|
|
1109
|
+
|
|
1068
1110
|
### Passing Framework State to Hooks
|
|
1069
1111
|
|
|
1070
1112
|
You can pass per-request info from your framework into the gateway via the second `state` argument on the handler, then read it in hooks through `ctx.state`.
|
|
@@ -1170,32 +1212,3 @@ Non-streaming versions are available via `toChatCompletionsResponse`. Equivalent
|
|
|
1170
1212
|
|
|
1171
1213
|
> [!TIP]
|
|
1172
1214
|
> Since Zod v4.3 you can generate a JSON Schema from any zod object by calling `z.toJSONSchema(...)`. This is useful for producing OpenAPI documentation from the same source of truth.
|
|
1173
|
-
|
|
1174
|
-
### Request Body Size
|
|
1175
|
-
|
|
1176
|
-
The gateway supports gzip and deflate compressed request bodies via the Web Compression Streams API. The `maxBodySize` option controls the maximum _decompressed_ body size for these compressed requests, protecting against gzip bombs and oversized payloads.
|
|
1177
|
-
|
|
1178
|
-
```ts
|
|
1179
|
-
import { gateway } from "@hebo-ai/gateway";
|
|
1180
|
-
|
|
1181
|
-
const gw = gateway({
|
|
1182
|
-
// ...
|
|
1183
|
-
// Maximum decompressed body size in bytes (default: 10 MB).
|
|
1184
|
-
// Set to 0 to disable the decompressed size limit.
|
|
1185
|
-
maxBodySize: 10 * 1024 * 1024,
|
|
1186
|
-
});
|
|
1187
|
-
```
|
|
1188
|
-
|
|
1189
|
-
Compressed requests that exceed this limit after decompression receive an HTTP `413 Payload Too Large` response. Unsupported `Content-Encoding` values return HTTP `415 Unsupported Media Type`.
|
|
1190
|
-
|
|
1191
|
-
> [!IMPORTANT]
|
|
1192
|
-
> **Plain (uncompressed) request body size limits** are _not_ enforced by the gateway — they should be configured at the framework or server level. The gateway only enforces `maxBodySize` on decompressed output, since the framework cannot know the decompressed size ahead of time.
|
|
1193
|
-
>
|
|
1194
|
-
> Framework-level configuration examples:
|
|
1195
|
-
>
|
|
1196
|
-
> - **Bun** — [`Bun.serve({ maxRequestBodySize: 10_485_760 })`](https://bun.sh/docs/api/http#bun-serve)
|
|
1197
|
-
> - **Elysia** — inherits from Bun's `maxRequestBodySize`
|
|
1198
|
-
> - **Hono** — [`bodyLimit` middleware](https://hono.dev/docs/middleware/builtin/body-limit): `app.use(bodyLimit({ maxSize: 10 * 1024 * 1024 }))`
|
|
1199
|
-
> - **Express** — [`express.json({ limit: '10mb' })`](https://expressjs.com/en/api.html#express.json)
|
|
1200
|
-
> - **Fastify** — [`fastify({ bodyLimit: 10485760 })`](https://fastify.dev/docs/latest/Reference/Server/#bodylimit)
|
|
1201
|
-
> - **Node.js `http`** — [`server.maxRequestSize`](https://nodejs.org/api/http.html) (v22.6+), or use a reverse proxy like nginx (`client_max_body_size 10m`)
|
package/dist/config.js
CHANGED
|
@@ -4,6 +4,7 @@ import { createDefaultLogger } from "./logger/default";
|
|
|
4
4
|
import { installAiSdkWarningLogger } from "./telemetry/ai-sdk";
|
|
5
5
|
import { DEFAULT_CHAT_TIMEOUT_MS, kParsed, } from "./types";
|
|
6
6
|
import { DEFAULT_MAX_BODY_SIZE } from "./utils/body";
|
|
7
|
+
import { FORWARD_HEADER_ALLOWLIST } from "./utils/request";
|
|
7
8
|
export const parseConfig = (config) => {
|
|
8
9
|
// If it has been parsed before, just return.
|
|
9
10
|
if (kParsed in config)
|
|
@@ -72,7 +73,7 @@ export const parseConfig = (config) => {
|
|
|
72
73
|
// Default timeouts
|
|
73
74
|
let normal;
|
|
74
75
|
let flex;
|
|
75
|
-
const t = config.timeouts;
|
|
76
|
+
const t = config.advanced?.timeouts;
|
|
76
77
|
if (t === null) {
|
|
77
78
|
normal = flex = undefined;
|
|
78
79
|
}
|
|
@@ -96,7 +97,7 @@ export const parseConfig = (config) => {
|
|
|
96
97
|
}
|
|
97
98
|
const parsedTimeouts = { normal, flex };
|
|
98
99
|
// Body size limit
|
|
99
|
-
const rawMax = config.maxBodySize;
|
|
100
|
+
const rawMax = config.advanced?.maxBodySize;
|
|
100
101
|
let maxBodySize;
|
|
101
102
|
if (typeof rawMax === "number" && Number.isFinite(rawMax) && rawMax >= 0) {
|
|
102
103
|
maxBodySize = rawMax;
|
|
@@ -107,11 +108,27 @@ export const parseConfig = (config) => {
|
|
|
107
108
|
logger.warn(`[config] invalid maxBodySize (${rawMax}), using default ${DEFAULT_MAX_BODY_SIZE}`);
|
|
108
109
|
}
|
|
109
110
|
}
|
|
111
|
+
// Merge forward header allowlist once.
|
|
112
|
+
const customHeaders = config.advanced?.forwardHeaders ?? [];
|
|
113
|
+
const forwardHeaders = new Set(FORWARD_HEADER_ALLOWLIST);
|
|
114
|
+
for (const header of customHeaders) {
|
|
115
|
+
try {
|
|
116
|
+
void new Headers([[header, ""]]);
|
|
117
|
+
}
|
|
118
|
+
catch {
|
|
119
|
+
logger.warn(`[config] invalid advanced.forwardHeaders entry ignored: ${JSON.stringify(header)}`);
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
122
|
+
forwardHeaders.add(header.trim().toLowerCase());
|
|
123
|
+
}
|
|
110
124
|
// Return parsed config.
|
|
111
125
|
return {
|
|
112
126
|
...config,
|
|
113
|
-
|
|
114
|
-
|
|
127
|
+
advanced: {
|
|
128
|
+
timeouts: parsedTimeouts,
|
|
129
|
+
maxBodySize,
|
|
130
|
+
forwardHeaders: [...forwardHeaders],
|
|
131
|
+
},
|
|
115
132
|
telemetry: {
|
|
116
133
|
...config.telemetry,
|
|
117
134
|
enabled: telemetryEnabled,
|
|
@@ -24,7 +24,7 @@ export const chatCompletions = (config) => {
|
|
|
24
24
|
throw new GatewayError("Method Not Allowed", 405);
|
|
25
25
|
}
|
|
26
26
|
// Parse + validate input (handles Content-Encoding decompression + body size limits).
|
|
27
|
-
ctx.body = (await parseRequestBody(ctx.request, cfg.maxBodySize));
|
|
27
|
+
ctx.body = (await parseRequestBody(ctx.request, cfg.advanced.maxBodySize));
|
|
28
28
|
logger.trace({ requestId: ctx.requestId, body: ctx.body }, "[chat] ChatCompletionsBody");
|
|
29
29
|
addSpanEvent("hebo.request.deserialized");
|
|
30
30
|
const parsed = ChatCompletionsBodySchema.safeParse(ctx.body);
|
|
@@ -81,10 +81,12 @@ export const chatCompletions = (config) => {
|
|
|
81
81
|
let ttft = 0;
|
|
82
82
|
const result = streamText({
|
|
83
83
|
model: languageModelWithMiddleware,
|
|
84
|
-
headers: prepareForwardHeaders(ctx.request),
|
|
84
|
+
headers: prepareForwardHeaders(ctx.request, cfg.advanced.forwardHeaders),
|
|
85
85
|
abortSignal: ctx.request.signal,
|
|
86
86
|
timeout: {
|
|
87
|
-
totalMs: ctx.body.service_tier === "flex"
|
|
87
|
+
totalMs: ctx.body.service_tier === "flex"
|
|
88
|
+
? cfg.advanced.timeouts.flex
|
|
89
|
+
: cfg.advanced.timeouts.normal,
|
|
88
90
|
},
|
|
89
91
|
onAbort: () => {
|
|
90
92
|
throw new DOMException("The operation was aborted.", "AbortError");
|
|
@@ -122,9 +124,11 @@ export const chatCompletions = (config) => {
|
|
|
122
124
|
addSpanEvent("hebo.ai-sdk.started");
|
|
123
125
|
const result = await generateText({
|
|
124
126
|
model: languageModelWithMiddleware,
|
|
125
|
-
headers: prepareForwardHeaders(ctx.request),
|
|
127
|
+
headers: prepareForwardHeaders(ctx.request, cfg.advanced.forwardHeaders),
|
|
126
128
|
abortSignal: ctx.request.signal,
|
|
127
|
-
timeout: ctx.body.service_tier === "flex"
|
|
129
|
+
timeout: ctx.body.service_tier === "flex"
|
|
130
|
+
? cfg.advanced.timeouts.flex
|
|
131
|
+
: cfg.advanced.timeouts.normal,
|
|
128
132
|
experimental_include: {
|
|
129
133
|
requestBody: false,
|
|
130
134
|
responseBody: false,
|
|
@@ -43,7 +43,7 @@ export const conversations = (config) => {
|
|
|
43
43
|
};
|
|
44
44
|
}
|
|
45
45
|
async function create(ctx) {
|
|
46
|
-
const body = await parseRequestBody(ctx.request, parsedConfig.maxBodySize);
|
|
46
|
+
const body = await parseRequestBody(ctx.request, parsedConfig.advanced.maxBodySize);
|
|
47
47
|
addSpanEvent("hebo.request.deserialized");
|
|
48
48
|
const parsed = ConversationCreateParamsSchema.safeParse(body);
|
|
49
49
|
if (!parsed.success) {
|
|
@@ -67,7 +67,7 @@ export const conversations = (config) => {
|
|
|
67
67
|
return toConversation(entity);
|
|
68
68
|
}
|
|
69
69
|
async function update(ctx, conversationId) {
|
|
70
|
-
const body = await parseRequestBody(ctx.request, parsedConfig.maxBodySize);
|
|
70
|
+
const body = await parseRequestBody(ctx.request, parsedConfig.advanced.maxBodySize);
|
|
71
71
|
addSpanEvent("hebo.request.deserialized");
|
|
72
72
|
const parsed = ConversationUpdateBodySchema.safeParse(body);
|
|
73
73
|
if (!parsed.success) {
|
|
@@ -139,7 +139,7 @@ export const conversations = (config) => {
|
|
|
139
139
|
};
|
|
140
140
|
}
|
|
141
141
|
async function addItems(ctx, conversationId) {
|
|
142
|
-
const body = await parseRequestBody(ctx.request, parsedConfig.maxBodySize);
|
|
142
|
+
const body = await parseRequestBody(ctx.request, parsedConfig.advanced.maxBodySize);
|
|
143
143
|
addSpanEvent("hebo.request.deserialized");
|
|
144
144
|
const parsed = ConversationItemsAddBodySchema.safeParse(body);
|
|
145
145
|
if (!parsed.success) {
|
|
@@ -24,7 +24,7 @@ export const embeddings = (config) => {
|
|
|
24
24
|
throw new GatewayError("Method Not Allowed", 405);
|
|
25
25
|
}
|
|
26
26
|
// Parse + validate input (handles Content-Encoding decompression + body size limits).
|
|
27
|
-
ctx.body = (await parseRequestBody(ctx.request, cfg.maxBodySize));
|
|
27
|
+
ctx.body = (await parseRequestBody(ctx.request, cfg.advanced.maxBodySize));
|
|
28
28
|
logger.trace({ requestId: ctx.requestId, result: ctx.body }, "[chat] EmbeddingsBody");
|
|
29
29
|
addSpanEvent("hebo.request.deserialized");
|
|
30
30
|
const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
|
|
@@ -75,7 +75,7 @@ export const embeddings = (config) => {
|
|
|
75
75
|
addSpanEvent("hebo.ai-sdk.started");
|
|
76
76
|
const result = await embedMany({
|
|
77
77
|
model: embeddingModelWithMiddleware,
|
|
78
|
-
headers: prepareForwardHeaders(ctx.request),
|
|
78
|
+
headers: prepareForwardHeaders(ctx.request, cfg.advanced.forwardHeaders),
|
|
79
79
|
abortSignal: ctx.request.signal,
|
|
80
80
|
...embedOptions,
|
|
81
81
|
});
|
|
@@ -133,34 +133,30 @@ export function convertToModelMessages(messages, system) {
|
|
|
133
133
|
return modelMessages;
|
|
134
134
|
}
|
|
135
135
|
function fromUserMessage(message, toolNameMap) {
|
|
136
|
-
const result = [];
|
|
137
136
|
if (typeof message.content === "string") {
|
|
138
|
-
|
|
139
|
-
return result;
|
|
137
|
+
return [{ role: "user", content: message.content }];
|
|
140
138
|
}
|
|
141
|
-
const
|
|
142
|
-
|
|
139
|
+
const result = [];
|
|
140
|
+
let currentParts = [];
|
|
141
|
+
let currentRole;
|
|
143
142
|
for (const block of message.content) {
|
|
144
|
-
|
|
145
|
-
|
|
143
|
+
const isToolResult = block.type === "tool_result";
|
|
144
|
+
const role = isToolResult ? "tool" : "user";
|
|
145
|
+
const part = isToolResult
|
|
146
|
+
? fromToolResultBlock(block, toolNameMap)
|
|
147
|
+
: fromUserContentBlock(block);
|
|
148
|
+
if (!part)
|
|
149
|
+
continue;
|
|
150
|
+
if (role === currentRole) {
|
|
151
|
+
currentParts.push(part);
|
|
146
152
|
}
|
|
147
153
|
else {
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
154
|
+
currentParts = [part];
|
|
155
|
+
currentRole = role;
|
|
156
|
+
result.push({ role, content: currentParts });
|
|
151
157
|
}
|
|
152
158
|
}
|
|
153
|
-
|
|
154
|
-
result.push({ role: "user", content: userParts });
|
|
155
|
-
}
|
|
156
|
-
if (toolResultParts.length > 0) {
|
|
157
|
-
result.push({ role: "tool", content: toolResultParts });
|
|
158
|
-
}
|
|
159
|
-
// If only tool results and no user parts, still valid
|
|
160
|
-
if (userParts.length === 0 && toolResultParts.length === 0) {
|
|
161
|
-
result.push({ role: "user", content: "" });
|
|
162
|
-
}
|
|
163
|
-
return result;
|
|
159
|
+
return result.length > 0 ? result : [{ role: "user", content: "" }];
|
|
164
160
|
}
|
|
165
161
|
function fromUserContentBlock(block) {
|
|
166
162
|
// tool_result blocks are handled separately in fromUserMessage
|
|
@@ -23,7 +23,7 @@ export const messages = (config) => {
|
|
|
23
23
|
throw new GatewayError("Method Not Allowed", 405);
|
|
24
24
|
}
|
|
25
25
|
// Parse + validate input (handles Content-Encoding decompression + body size limits).
|
|
26
|
-
ctx.body = (await parseRequestBody(ctx.request, cfg.maxBodySize));
|
|
26
|
+
ctx.body = (await parseRequestBody(ctx.request, cfg.advanced.maxBodySize));
|
|
27
27
|
logger.trace({ requestId: ctx.requestId, body: ctx.body }, "[messages] MessagesBody");
|
|
28
28
|
addSpanEvent("hebo.request.deserialized");
|
|
29
29
|
const parsed = MessagesBodySchema.safeParse(ctx.body);
|
|
@@ -72,10 +72,10 @@ export const messages = (config) => {
|
|
|
72
72
|
let ttft = 0;
|
|
73
73
|
const result = streamText({
|
|
74
74
|
model: languageModelWithMiddleware,
|
|
75
|
-
headers: prepareForwardHeaders(ctx.request),
|
|
75
|
+
headers: prepareForwardHeaders(ctx.request, cfg.advanced.forwardHeaders),
|
|
76
76
|
abortSignal: ctx.request.signal,
|
|
77
77
|
timeout: {
|
|
78
|
-
totalMs: cfg.timeouts.normal,
|
|
78
|
+
totalMs: cfg.advanced.timeouts.normal,
|
|
79
79
|
},
|
|
80
80
|
onAbort: () => {
|
|
81
81
|
throw new DOMException("The operation was aborted.", "AbortError");
|
|
@@ -113,9 +113,9 @@ export const messages = (config) => {
|
|
|
113
113
|
addSpanEvent("hebo.ai-sdk.started");
|
|
114
114
|
const result = await generateText({
|
|
115
115
|
model: languageModelWithMiddleware,
|
|
116
|
-
headers: prepareForwardHeaders(ctx.request),
|
|
116
|
+
headers: prepareForwardHeaders(ctx.request, cfg.advanced.forwardHeaders),
|
|
117
117
|
abortSignal: ctx.request.signal,
|
|
118
|
-
timeout: cfg.timeouts.normal,
|
|
118
|
+
timeout: cfg.advanced.timeouts.normal,
|
|
119
119
|
experimental_include: {
|
|
120
120
|
requestBody: false,
|
|
121
121
|
responseBody: false,
|
|
@@ -23,7 +23,7 @@ export const responses = (config) => {
|
|
|
23
23
|
throw new GatewayError("Method Not Allowed", 405);
|
|
24
24
|
}
|
|
25
25
|
// Parse + validate input (handles Content-Encoding decompression + body size limits).
|
|
26
|
-
ctx.body = (await parseRequestBody(ctx.request, cfg.maxBodySize));
|
|
26
|
+
ctx.body = (await parseRequestBody(ctx.request, cfg.advanced.maxBodySize));
|
|
27
27
|
logger.trace({ requestId: ctx.requestId, body: ctx.body }, "[responses] ResponsesBody");
|
|
28
28
|
addSpanEvent("hebo.request.deserialized");
|
|
29
29
|
const parsed = ResponsesBodySchema.safeParse(ctx.body);
|
|
@@ -71,10 +71,12 @@ export const responses = (config) => {
|
|
|
71
71
|
let ttft = 0;
|
|
72
72
|
const result = streamText({
|
|
73
73
|
model: languageModelWithMiddleware,
|
|
74
|
-
headers: prepareForwardHeaders(ctx.request),
|
|
74
|
+
headers: prepareForwardHeaders(ctx.request, cfg.advanced.forwardHeaders),
|
|
75
75
|
abortSignal: ctx.request.signal,
|
|
76
76
|
timeout: {
|
|
77
|
-
totalMs: ctx.body.service_tier === "flex"
|
|
77
|
+
totalMs: ctx.body.service_tier === "flex"
|
|
78
|
+
? cfg.advanced.timeouts.flex
|
|
79
|
+
: cfg.advanced.timeouts.normal,
|
|
78
80
|
},
|
|
79
81
|
onAbort: () => {
|
|
80
82
|
throw new DOMException("The operation was aborted.", "AbortError");
|
|
@@ -112,9 +114,11 @@ export const responses = (config) => {
|
|
|
112
114
|
addSpanEvent("hebo.ai-sdk.started");
|
|
113
115
|
const result = await generateText({
|
|
114
116
|
model: languageModelWithMiddleware,
|
|
115
|
-
headers: prepareForwardHeaders(ctx.request),
|
|
117
|
+
headers: prepareForwardHeaders(ctx.request, cfg.advanced.forwardHeaders),
|
|
116
118
|
abortSignal: ctx.request.signal,
|
|
117
|
-
timeout: ctx.body.service_tier === "flex"
|
|
119
|
+
timeout: ctx.body.service_tier === "flex"
|
|
120
|
+
? cfg.advanced.timeouts.flex
|
|
121
|
+
: cfg.advanced.timeouts.normal,
|
|
118
122
|
experimental_include: {
|
|
119
123
|
requestBody: false,
|
|
120
124
|
responseBody: false,
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
package/dist/lifecycle.js
CHANGED
|
@@ -54,8 +54,7 @@ export const winterCgHandler = (run, config) => {
|
|
|
54
54
|
requestId: ctx.requestId,
|
|
55
55
|
err: reason ?? ctx.request.signal.reason,
|
|
56
56
|
});
|
|
57
|
-
|
|
58
|
-
span.recordError(reason, realStatus >= 500 || isUpstreamError);
|
|
57
|
+
span.recordError(reason, true);
|
|
59
58
|
}
|
|
60
59
|
span.setAttributes({ "http.response.status_code_effective": realStatus });
|
|
61
60
|
if (ctx.operation === "chat" ||
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
2
|
+
import { calculateReasoningBudgetFromEffort } from "../../middleware/utils";
|
|
3
|
+
const QWEN_DEFAULT_MAX_OUTPUT_TOKENS = 16384;
|
|
4
|
+
export const qwenReasoningMiddleware = {
|
|
5
|
+
specificationVersion: "v3",
|
|
6
|
+
// oxlint-disable-next-line require-await
|
|
7
|
+
transformParams: async ({ params }) => {
|
|
8
|
+
const unknown = params.providerOptions?.["unknown"];
|
|
9
|
+
if (!unknown)
|
|
10
|
+
return params;
|
|
11
|
+
const reasoning = unknown["reasoning"];
|
|
12
|
+
if (!reasoning)
|
|
13
|
+
return params;
|
|
14
|
+
const target = (params.providerOptions["alibaba"] ??= {});
|
|
15
|
+
if (!reasoning.enabled || reasoning.effort === "none") {
|
|
16
|
+
target.enableThinking = false;
|
|
17
|
+
delete target.thinkingBudget;
|
|
18
|
+
}
|
|
19
|
+
else {
|
|
20
|
+
target.enableThinking = true;
|
|
21
|
+
target.thinkingBudget =
|
|
22
|
+
reasoning.max_tokens ??
|
|
23
|
+
calculateReasoningBudgetFromEffort(reasoning.effort ?? "medium", params.maxOutputTokens ?? QWEN_DEFAULT_MAX_OUTPUT_TOKENS);
|
|
24
|
+
}
|
|
25
|
+
delete unknown["reasoning"];
|
|
26
|
+
return params;
|
|
27
|
+
},
|
|
28
|
+
};
|
|
29
|
+
modelMiddlewareMatcher.useForModel("alibaba/qwen*", {
|
|
30
|
+
language: [qwenReasoningMiddleware],
|
|
31
|
+
});
|