@hebo-ai/gateway 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +172 -67
- package/dist/config.js +2 -12
- package/dist/endpoints/chat-completions/converters.d.ts +28 -24
- package/dist/endpoints/chat-completions/converters.js +99 -73
- package/dist/endpoints/chat-completions/handler.js +36 -30
- package/dist/endpoints/chat-completions/schema.d.ts +394 -272
- package/dist/endpoints/chat-completions/schema.js +124 -57
- package/dist/endpoints/embeddings/converters.d.ts +4 -4
- package/dist/endpoints/embeddings/converters.js +8 -9
- package/dist/endpoints/embeddings/handler.js +32 -26
- package/dist/endpoints/embeddings/schema.d.ts +28 -38
- package/dist/endpoints/embeddings/schema.js +10 -10
- package/dist/endpoints/models/converters.d.ts +2 -2
- package/dist/endpoints/models/converters.js +9 -12
- package/dist/endpoints/models/handler.js +8 -9
- package/dist/endpoints/models/schema.d.ts +37 -31
- package/dist/endpoints/models/schema.js +23 -12
- package/dist/gateway.d.ts +8 -9
- package/dist/gateway.js +7 -10
- package/dist/index.d.ts +2 -0
- package/dist/index.js +2 -0
- package/dist/lifecycle.d.ts +2 -0
- package/dist/{utils/hooks.js → lifecycle.js} +16 -8
- package/dist/middleware/common.d.ts +4 -0
- package/dist/middleware/common.js +44 -0
- package/dist/middleware/matcher.d.ts +18 -0
- package/dist/middleware/matcher.js +83 -0
- package/dist/models/amazon/index.d.ts +2 -0
- package/dist/models/amazon/index.js +2 -0
- package/dist/models/amazon/middleware.d.ts +2 -0
- package/dist/models/amazon/middleware.js +20 -0
- package/dist/models/amazon/presets.d.ts +2390 -0
- package/dist/models/amazon/presets.js +80 -0
- package/dist/models/anthropic/index.d.ts +2 -0
- package/dist/models/anthropic/index.js +2 -0
- package/dist/models/anthropic/middleware.d.ts +5 -0
- package/dist/models/anthropic/middleware.js +67 -0
- package/dist/models/anthropic/presets.d.ts +4106 -0
- package/dist/models/anthropic/presets.js +113 -0
- package/dist/models/catalog.d.ts +3 -1
- package/dist/models/catalog.js +3 -2
- package/dist/models/cohere/index.d.ts +2 -0
- package/dist/models/cohere/index.js +2 -0
- package/dist/models/cohere/middleware.d.ts +2 -0
- package/dist/models/cohere/middleware.js +18 -0
- package/dist/models/cohere/presets.d.ts +2918 -0
- package/dist/models/cohere/presets.js +129 -0
- package/dist/models/google/index.d.ts +2 -0
- package/dist/models/google/index.js +2 -0
- package/dist/models/google/middleware.d.ts +2 -0
- package/dist/models/google/middleware.js +20 -0
- package/dist/models/{presets/gemini.d.ts → google/presets.d.ts} +400 -174
- package/dist/models/{presets/gemini.js → google/presets.js} +20 -5
- package/dist/models/meta/index.d.ts +1 -0
- package/dist/models/meta/index.js +1 -0
- package/dist/models/meta/presets.d.ts +3254 -0
- package/dist/models/{presets/llama.js → meta/presets.js} +44 -7
- package/dist/models/openai/index.d.ts +2 -0
- package/dist/models/openai/index.js +2 -0
- package/dist/models/openai/middleware.d.ts +2 -0
- package/dist/models/openai/middleware.js +20 -0
- package/dist/models/openai/presets.d.ts +6252 -0
- package/dist/models/openai/presets.js +206 -0
- package/dist/models/types.d.ts +3 -3
- package/dist/models/types.js +27 -0
- package/dist/models/voyage/index.d.ts +2 -0
- package/dist/models/voyage/index.js +2 -0
- package/dist/models/voyage/middleware.d.ts +2 -0
- package/dist/models/voyage/middleware.js +18 -0
- package/dist/models/{presets/voyage.d.ts → voyage/presets.d.ts} +322 -323
- package/dist/providers/anthropic/canonical.d.ts +3 -0
- package/dist/providers/anthropic/canonical.js +9 -0
- package/dist/providers/anthropic/index.d.ts +1 -0
- package/dist/providers/anthropic/index.js +1 -0
- package/dist/providers/bedrock/canonical.d.ts +15 -0
- package/dist/providers/{canonical/bedrock.js → bedrock/canonical.js} +13 -15
- package/dist/providers/bedrock/index.d.ts +1 -0
- package/dist/providers/bedrock/index.js +1 -0
- package/dist/providers/cohere/canonical.d.ts +3 -0
- package/dist/providers/{canonical/cohere.js → cohere/canonical.js} +6 -6
- package/dist/providers/cohere/index.d.ts +1 -0
- package/dist/providers/cohere/index.js +1 -0
- package/dist/providers/groq/canonical.d.ts +3 -0
- package/dist/providers/groq/canonical.js +12 -0
- package/dist/providers/groq/index.d.ts +1 -0
- package/dist/providers/groq/index.js +1 -0
- package/dist/providers/openai/canonical.d.ts +3 -0
- package/dist/providers/openai/canonical.js +8 -0
- package/dist/providers/openai/index.d.ts +1 -0
- package/dist/providers/openai/index.js +1 -0
- package/dist/providers/registry.d.ts +16 -26
- package/dist/providers/registry.js +19 -26
- package/dist/providers/types.d.ts +1 -1
- package/dist/providers/types.js +1 -0
- package/dist/providers/vertex/canonical.d.ts +3 -0
- package/dist/providers/vertex/canonical.js +8 -0
- package/dist/providers/vertex/index.d.ts +1 -0
- package/dist/providers/vertex/index.js +1 -0
- package/dist/providers/voyage/canonical.d.ts +3 -0
- package/dist/providers/voyage/canonical.js +7 -0
- package/dist/providers/voyage/index.d.ts +1 -0
- package/dist/providers/voyage/index.js +1 -0
- package/dist/types.d.ts +60 -30
- package/dist/utils/errors.js +2 -0
- package/dist/utils/preset.d.ts +1 -7
- package/dist/utils/preset.js +1 -1
- package/dist/utils/response.d.ts +1 -0
- package/dist/utils/response.js +10 -0
- package/package.json +79 -70
- package/src/config.ts +2 -18
- package/src/endpoints/chat-completions/converters.test.ts +39 -0
- package/src/endpoints/chat-completions/converters.ts +191 -112
- package/src/endpoints/chat-completions/handler.test.ts +47 -18
- package/src/endpoints/chat-completions/handler.ts +40 -34
- package/src/endpoints/chat-completions/schema.ts +161 -88
- package/src/endpoints/embeddings/converters.ts +15 -11
- package/src/endpoints/embeddings/handler.test.ts +27 -30
- package/src/endpoints/embeddings/handler.ts +34 -28
- package/src/endpoints/embeddings/schema.ts +10 -10
- package/src/endpoints/models/converters.ts +22 -14
- package/src/endpoints/models/handler.test.ts +26 -29
- package/src/endpoints/models/handler.ts +10 -12
- package/src/endpoints/models/schema.ts +26 -20
- package/src/gateway.ts +10 -24
- package/src/index.ts +3 -0
- package/src/{utils/hooks.ts → lifecycle.ts} +21 -11
- package/src/middleware/common.ts +68 -0
- package/src/middleware/matcher.ts +117 -0
- package/src/models/amazon/index.ts +2 -0
- package/src/models/amazon/middleware.ts +25 -0
- package/src/models/amazon/presets.ts +104 -0
- package/src/models/anthropic/index.ts +2 -0
- package/src/models/anthropic/middleware.test.ts +184 -0
- package/src/models/anthropic/middleware.ts +75 -0
- package/src/models/anthropic/presets.ts +161 -0
- package/src/models/catalog.ts +10 -2
- package/src/models/cohere/index.ts +2 -0
- package/src/models/cohere/middleware.ts +23 -0
- package/src/models/cohere/presets.ts +181 -0
- package/src/models/google/index.ts +2 -0
- package/src/models/google/middleware.ts +25 -0
- package/src/models/{presets/gemini.ts → google/presets.ts} +25 -5
- package/src/models/meta/index.ts +1 -0
- package/src/models/{presets/llama.ts → meta/presets.ts} +68 -7
- package/src/models/openai/index.ts +2 -0
- package/src/models/openai/middleware.ts +25 -0
- package/src/models/openai/presets.ts +269 -0
- package/src/models/types.ts +29 -2
- package/src/models/voyage/index.ts +2 -0
- package/src/models/voyage/middleware.ts +23 -0
- package/src/providers/anthropic/canonical.ts +17 -0
- package/src/providers/anthropic/index.ts +1 -0
- package/src/providers/{canonical/bedrock.ts → bedrock/canonical.ts} +22 -32
- package/src/providers/bedrock/index.ts +1 -0
- package/src/providers/cohere/canonical.ts +26 -0
- package/src/providers/cohere/index.ts +1 -0
- package/src/providers/groq/canonical.ts +21 -0
- package/src/providers/groq/index.ts +1 -0
- package/src/providers/openai/canonical.ts +16 -0
- package/src/providers/openai/index.ts +1 -0
- package/src/providers/registry.test.ts +12 -10
- package/src/providers/registry.ts +43 -43
- package/src/providers/types.ts +1 -0
- package/src/providers/vertex/canonical.ts +17 -0
- package/src/providers/vertex/index.ts +1 -0
- package/src/providers/voyage/canonical.ts +16 -0
- package/src/providers/voyage/index.ts +1 -0
- package/src/types.ts +64 -28
- package/src/utils/errors.ts +2 -0
- package/src/utils/preset.ts +2 -6
- package/src/utils/response.ts +15 -0
- package/dist/models/presets/claude.d.ts +0 -1165
- package/dist/models/presets/claude.js +0 -40
- package/dist/models/presets/cohere.d.ts +0 -383
- package/dist/models/presets/cohere.js +0 -26
- package/dist/models/presets/gpt-oss.d.ts +0 -779
- package/dist/models/presets/gpt-oss.js +0 -40
- package/dist/models/presets/llama.d.ts +0 -1400
- package/dist/providers/canonical/anthropic.d.ts +0 -25
- package/dist/providers/canonical/anthropic.js +0 -14
- package/dist/providers/canonical/bedrock.d.ts +0 -26
- package/dist/providers/canonical/cohere.d.ts +0 -17
- package/dist/providers/canonical/groq.d.ts +0 -17
- package/dist/providers/canonical/groq.js +0 -10
- package/dist/providers/canonical/openai.d.ts +0 -17
- package/dist/providers/canonical/openai.js +0 -8
- package/dist/providers/canonical/vertex.d.ts +0 -17
- package/dist/providers/canonical/vertex.js +0 -10
- package/dist/providers/canonical/voyage.d.ts +0 -17
- package/dist/providers/canonical/voyage.js +0 -8
- package/dist/utils/hooks.d.ts +0 -2
- package/src/models/presets/claude.ts +0 -59
- package/src/models/presets/cohere.ts +0 -37
- package/src/models/presets/gpt-oss.ts +0 -55
- package/src/providers/canonical/anthropic.ts +0 -32
- package/src/providers/canonical/cohere.ts +0 -36
- package/src/providers/canonical/groq.ts +0 -25
- package/src/providers/canonical/openai.ts +0 -16
- package/src/providers/canonical/vertex.ts +0 -18
- package/src/providers/canonical/voyage.ts +0 -16
- package/dist/models/{presets/voyage.js → voyage/presets.js} +10 -10
- package/src/models/{presets/voyage.ts → voyage/presets.ts} +10 -10
package/README.md
CHANGED
|
@@ -21,7 +21,7 @@ Hebo Gateway is an open-source, embeddable AI gateway framework built to live in
|
|
|
21
21
|
## Installation
|
|
22
22
|
|
|
23
23
|
```bash
|
|
24
|
-
bun
|
|
24
|
+
bun install @hebo-ai/gateway
|
|
25
25
|
```
|
|
26
26
|
|
|
27
27
|
## Quickstart
|
|
@@ -31,33 +31,39 @@ bun add @hebo-ai/gateway ai @ai-sdk/groq
|
|
|
31
31
|
Start by creating a gateway instance with at least one provider and a few models.
|
|
32
32
|
|
|
33
33
|
```ts
|
|
34
|
-
import {
|
|
35
|
-
import {
|
|
36
|
-
import {
|
|
34
|
+
import { createGroq } from "@ai-sdk/groq";
|
|
35
|
+
import { gateway, defineModelCatalog } from "@hebo-ai/gateway";
|
|
36
|
+
import { withCanonicalIdsForGroq } from "@hebo-ai/gateway/providers/groq";
|
|
37
|
+
import { gptOss20b, gptOss } from "@hebo-ai/gateway/models/openai";
|
|
37
38
|
|
|
38
39
|
export const gw = gateway({
|
|
39
40
|
// PROVIDER REGISTRY
|
|
40
41
|
providers: {
|
|
41
|
-
// Any Vercel AI SDK provider +
|
|
42
|
-
groq:
|
|
43
|
-
|
|
44
|
-
|
|
42
|
+
// Any Vercel AI SDK provider + withCanonicalIdsForX helper
|
|
43
|
+
groq: withCanonicalIdsForGroq(
|
|
44
|
+
createGroq({
|
|
45
|
+
apiKey: process.env.GROQ_API_KEY,
|
|
46
|
+
}),
|
|
47
|
+
),
|
|
45
48
|
},
|
|
46
49
|
|
|
47
50
|
// MODEL CATALOG
|
|
48
|
-
models:
|
|
49
|
-
// Choose a preset for common SOTA models
|
|
50
|
-
gptOss20b
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
51
|
+
models: defineModelCatalog(
|
|
52
|
+
// Choose a pre-configured preset for common SOTA models
|
|
53
|
+
gptOss20b,
|
|
54
|
+
// Or add a whole model family with your own provider list
|
|
55
|
+
gptOss["all"].map(
|
|
56
|
+
preset => preset({
|
|
57
|
+
providers: ["groq"],
|
|
58
|
+
})
|
|
56
59
|
),
|
|
57
60
|
),
|
|
58
61
|
});
|
|
59
62
|
```
|
|
60
63
|
|
|
64
|
+
> [!NOTE]
|
|
65
|
+
> Don't forget to install the Groq provider package too: `@ai-sdk/groq`.
|
|
66
|
+
|
|
61
67
|
### Mount Route Handlers
|
|
62
68
|
|
|
63
69
|
Hebo Gateway plugs into your favorite web framework. Simply mount the gateway’s `handler` under a prefix, and keep using your existing lifecycle hooks for authentication, logging, observability, and more.
|
|
@@ -194,39 +200,41 @@ Hebo Gateway’s provider registry accepts any **Vercel AI SDK Provider**. For H
|
|
|
194
200
|
|
|
195
201
|
Out-of-the-box canonical providers:
|
|
196
202
|
|
|
197
|
-
- Amazon Bedrock (`
|
|
198
|
-
- Anthropic (`
|
|
199
|
-
- Cohere (`
|
|
200
|
-
- Google Vertex AI (`
|
|
201
|
-
- Groq (`
|
|
202
|
-
- OpenAI (`
|
|
203
|
-
- Voyage (`
|
|
203
|
+
- Amazon Bedrock (`withCanonicalIdsForBedrock`): `@hebo-ai/gateway/providers/bedrock`
|
|
204
|
+
- Anthropic (`withCanonicalIdsForAnthropic`): `@hebo-ai/gateway/providers/anthropic`
|
|
205
|
+
- Cohere (`withCanonicalIdsForCohere`): `@hebo-ai/gateway/providers/cohere`
|
|
206
|
+
- Google Vertex AI (`withCanonicalIdsForVertex`): `@hebo-ai/gateway/providers/vertex`
|
|
207
|
+
- Groq (`withCanonicalIdsForGroq`): `@hebo-ai/gateway/providers/groq`
|
|
208
|
+
- OpenAI (`withCanonicalIdsForOpenAI`): `@hebo-ai/gateway/providers/openai`
|
|
209
|
+
- Voyage (`withCanonicalIdsForVoyage`): `@hebo-ai/gateway/providers/voyage`
|
|
204
210
|
|
|
205
211
|
If an adapter is not yet provided, you can create your own by wrapping the provider instance with the `withCanonicalIds` helper and define your custom canonicalization mapping & rules.
|
|
206
212
|
|
|
207
213
|
```ts
|
|
208
|
-
import {
|
|
214
|
+
import { createAzure } from "@ai-sdk/openai";
|
|
209
215
|
import {
|
|
210
216
|
gateway,
|
|
211
|
-
createModelCatalog,
|
|
212
217
|
withCanonicalIds,
|
|
213
218
|
} from "@hebo-ai/gateway";
|
|
214
219
|
|
|
215
|
-
const
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
220
|
+
const azure = withCanonicalIds(
|
|
221
|
+
createAzure({
|
|
222
|
+
resourceName: process.env["AZURE_RESOURCE_NAME"],
|
|
223
|
+
apiKey: process.env["AZURE_API_KEY"]
|
|
224
|
+
}), {
|
|
225
|
+
mapping: {
|
|
226
|
+
"openai/gpt-4.1-mini": "your-gpt-4.1-mini-deployment-name",
|
|
227
|
+
"openai/text-embedding-3-small": "your-embeddings-3-small-deployment-name",
|
|
228
|
+
}},
|
|
221
229
|
);
|
|
222
230
|
|
|
223
231
|
const gw = gateway({
|
|
224
232
|
providers: {
|
|
225
|
-
|
|
233
|
+
azure,
|
|
226
234
|
},
|
|
227
|
-
models:
|
|
235
|
+
models: {
|
|
228
236
|
// ...your models pointing at canonical IDs above
|
|
229
|
-
}
|
|
237
|
+
},
|
|
230
238
|
});
|
|
231
239
|
```
|
|
232
240
|
|
|
@@ -236,46 +244,53 @@ Registering models tells Hebo Gateway which models are available, under which ca
|
|
|
236
244
|
|
|
237
245
|
#### Model Presets
|
|
238
246
|
|
|
239
|
-
To simplify the registration, Hebo Gateway ships a set of model presets under `@hebo-ai/gateway/models`. Use these when you want ready-to-use catalog entries with sane defaults for common SOTA models.
|
|
247
|
+
To simplify the registration, Hebo Gateway ships a set of model presets under `@hebo-ai/gateway/models`. Use these when you want ready-to-use catalog entries with sane defaults for common SOTA models.
|
|
240
248
|
|
|
241
249
|
Presets come in two forms:
|
|
250
|
+
|
|
242
251
|
- Individual presets (e.g. `gptOss20b`, `claudeSonnet45`) for a single model.
|
|
243
252
|
- Family presets (e.g. `claude`, `gemini`, `llama`) which group multiple models and expose helpers like `latest`, `all`, and versioned arrays (for example `claude["v4.5"]`).
|
|
244
253
|
|
|
245
254
|
Out-of-the-box model presets:
|
|
246
255
|
|
|
247
|
-
- **
|
|
248
|
-
|
|
256
|
+
- **Amazon** — `@hebo-ai/gateway/models/amazon`
|
|
257
|
+
Nova: `nova` (`v1`, `v2`, `v1.x`, `v2.x`, `latest`, `embeddings`, `all`)
|
|
249
258
|
|
|
250
|
-
- **
|
|
251
|
-
|
|
259
|
+
- **Anthropic** — `@hebo-ai/gateway/models/anthropic`
|
|
260
|
+
Claude: `claude` (`v4.5`, `v4.1`, `v4`, `v3.7`, `v3.5`, `v3`, `v4.x`, `v3.x`, `haiku`, `sonnet`, `opus`, `latest`, `all`)
|
|
252
261
|
|
|
253
|
-
- **
|
|
254
|
-
|
|
262
|
+
- **Cohere** — `@hebo-ai/gateway/models/cohere`
|
|
263
|
+
Command: `command` (`A`, `R`, `latest`, `all`)
|
|
264
|
+
Embed: `embed` (`v4`, `v3`, `latest`, `all`)
|
|
255
265
|
|
|
256
|
-
- **
|
|
257
|
-
|
|
266
|
+
- **Google** — `@hebo-ai/gateway/models/google`
|
|
267
|
+
Gemini: `gemini` (`v2.5`, `v3-preview`, `v2.x`, `v3.x`, `embeddings`, `latest`, `preview`, `all`)
|
|
258
268
|
|
|
259
|
-
- **
|
|
260
|
-
|
|
269
|
+
- **Meta** — `@hebo-ai/gateway/models/meta`
|
|
270
|
+
Llama: `llama` (`v3.1`, `v3.2`, `v3.3`, `v4`, `v3.x`, `v4.x`, `latest`, `all`)
|
|
271
|
+
|
|
272
|
+
- **OpenAI** — `@hebo-ai/gateway/models/openai`
|
|
273
|
+
GPT: `gpt` (`v5`, `v5.1`, `v5.2`, `v5.x`, `chat`, `codex`, `pro`, `latest`, `all`)
|
|
274
|
+
GPT-OSS: `gptOss` (`v1`, `v1.x`, `latest`, `all`)
|
|
275
|
+
Embeddings: `textEmbeddings` (`v3`, `v3.x`, `latest`, `all`)
|
|
261
276
|
|
|
262
277
|
- **Voyage** — `@hebo-ai/gateway/models/voyage`
|
|
263
|
-
|
|
278
|
+
Voyage: `voyage` (`v2`, `v3`, `v3.5`, `v4`, `v2.x`, `v3.x`, `v4.x`, `latest`, `all`)
|
|
264
279
|
|
|
265
280
|
```ts
|
|
266
|
-
import {
|
|
267
|
-
import { gptOss20b } from "@hebo-ai/gateway/models/
|
|
268
|
-
import { claudeSonnet45, claude } from "@hebo-ai/gateway/models/
|
|
281
|
+
import { defineModelCatalog } from "@hebo-ai/gateway";
|
|
282
|
+
import { gptOss20b } from "@hebo-ai/gateway/models/openai";
|
|
283
|
+
import { claudeSonnet45, claude } from "@hebo-ai/gateway/models/anthropic";
|
|
269
284
|
|
|
270
285
|
// Individual preset
|
|
271
|
-
const models =
|
|
286
|
+
const models = defineModelCatalog(
|
|
272
287
|
gptOss20b({ providers: ["groq"] }),
|
|
273
288
|
claudeSonnet45({ providers: ["bedrock"] }),
|
|
274
289
|
);
|
|
275
290
|
|
|
276
291
|
// Family preset (pick a group and apply the same override to each)
|
|
277
|
-
const modelsFromFamily =
|
|
278
|
-
|
|
292
|
+
const modelsFromFamily = defineModelCatalog(
|
|
293
|
+
claude["latest"].map((preset) => preset({ providers: ["anthropic"] })),
|
|
279
294
|
);
|
|
280
295
|
```
|
|
281
296
|
|
|
@@ -288,7 +303,7 @@ const gw = gateway({
|
|
|
288
303
|
providers: {
|
|
289
304
|
// ...
|
|
290
305
|
},
|
|
291
|
-
models:
|
|
306
|
+
models: {
|
|
292
307
|
"openai/gpt-5.2": {
|
|
293
308
|
name: "GPT 5.2",
|
|
294
309
|
created: "2025-12-11",
|
|
@@ -312,10 +327,12 @@ const gw = gateway({
|
|
|
312
327
|
}
|
|
313
328
|
},
|
|
314
329
|
// ...
|
|
315
|
-
}
|
|
330
|
+
},
|
|
316
331
|
});
|
|
317
332
|
```
|
|
318
333
|
|
|
334
|
+
Note: the only mandatory property is the `providers` array, everything else is optional metadata.
|
|
335
|
+
|
|
319
336
|
### Hooks
|
|
320
337
|
|
|
321
338
|
Hooks allow you to plug-into the lifecycle of the gateway and enrich it with additional functionality. All hooks are available as async and non-async.
|
|
@@ -345,26 +362,32 @@ const gw = gateway({
|
|
|
345
362
|
},
|
|
346
363
|
/**
|
|
347
364
|
* Maps a user-provided model ID or alias to a canonical ID.
|
|
365
|
+
* @param ctx.body The parsed body object with all call parameters.
|
|
348
366
|
* @param ctx.modelId Incoming model ID.
|
|
349
367
|
* @returns Canonical model ID or undefined to keep original.
|
|
350
368
|
*/
|
|
351
|
-
resolveModelId
|
|
369
|
+
resolveModelId?: (ctx: {
|
|
370
|
+
body: ChatCompletionsBody | EmbeddingsBody;
|
|
371
|
+
modelId: ModelId;
|
|
372
|
+
}) => ModelId | void | Promise<ModelId | void> {
|
|
352
373
|
// Example Use Cases:
|
|
353
374
|
// - Resolve modelAlias to modelId
|
|
354
375
|
return undefined;
|
|
355
376
|
},
|
|
356
377
|
/**
|
|
357
378
|
* Picks a provider instance for the request.
|
|
358
|
-
* @param ctx.providers
|
|
379
|
+
* @param ctx.providers ProviderRegistry from config.
|
|
359
380
|
* @param ctx.models ModelCatalog from config.
|
|
381
|
+
* @param ctx.body The parsed body object with all call parameters.
|
|
360
382
|
* @param ctx.modelId Resolved model ID.
|
|
361
383
|
* @param ctx.operation Operation type ("text" | "embeddings").
|
|
362
384
|
* @returns ProviderV3 to override, or undefined to use default.
|
|
363
385
|
*/
|
|
364
386
|
resolveProvider: async (ctx: {
|
|
365
|
-
providers:
|
|
387
|
+
providers: ProviderRegistry;
|
|
366
388
|
models: ModelCatalog;
|
|
367
389
|
modelId: ModelId;
|
|
390
|
+
body: ChatCompletionsBody | EmbeddingsBody;
|
|
368
391
|
operation: "text" | "embeddings";
|
|
369
392
|
}): Promise<ProviderV3 | void> => {
|
|
370
393
|
// Example Use Cases:
|
|
@@ -387,8 +410,86 @@ const gw = gateway({
|
|
|
387
410
|
});
|
|
388
411
|
```
|
|
389
412
|
|
|
413
|
+
Hook contexts are **readonly for core fields**. Use return values to override request / response and return modelId / provider.
|
|
414
|
+
|
|
415
|
+
To pass data between hooks, use `ctx.state`. It’s a per-request mutable bag in which you can stash things like auth info, routing decisions, timers, or trace IDs and read them later again in any of the other hooks.
|
|
416
|
+
|
|
417
|
+
## OpenAI Extensions
|
|
418
|
+
|
|
419
|
+
### Reasoning
|
|
420
|
+
|
|
421
|
+
In addition to the official `reasoning_effort` parameter, the chat completions endpoint accepts a `reasoning` object for more fine-grained control of the budget. It's treated as provider-agnostic input and normalized before hitting the upstream model.
|
|
422
|
+
|
|
423
|
+
```json
|
|
424
|
+
{
|
|
425
|
+
"model": "anthropic/claude-4-sonnet",
|
|
426
|
+
"messages": [{ "role": "user", "content": "Explain the tradeoffs." }],
|
|
427
|
+
"reasoning": { "effort": "medium" }
|
|
428
|
+
}
|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
Normalization rules:
|
|
432
|
+
|
|
433
|
+
- `enabled` -> fall-back to model default if none provided
|
|
434
|
+
- `max_tokens`: fall-back to model default if model supports
|
|
435
|
+
- `effort` -> budget = percentage of `max_tokens`
|
|
436
|
+
- `none`: 0%
|
|
437
|
+
- `minimal`: 10%
|
|
438
|
+
- `low`: 20%
|
|
439
|
+
- `medium`: 50% (default)
|
|
440
|
+
- `high`: 80%
|
|
441
|
+
- `xhigh`: 95%
|
|
442
|
+
|
|
443
|
+
Reasoning output is surfaced as extension to the `completion` object.
|
|
444
|
+
|
|
445
|
+
- When present, it is returned on the assistant message as `reasoning_content`. Reasoning token counts (when available) are returned on `usage.completion_tokens_details.reasoning_tokens`.
|
|
446
|
+
- For stream responses, reasoning text is sent incrementally as `reasoning_content` part (separate from normal text `content` deltas). Token counts land in the final `usage` object on the terminating chunk.
|
|
447
|
+
|
|
448
|
+
Most SDKs handle these fields out-of-the-box.
|
|
449
|
+
|
|
390
450
|
## Advanced Usage
|
|
391
451
|
|
|
452
|
+
### Passing Framework State to Hooks
|
|
453
|
+
|
|
454
|
+
You can pass per-request info from your framework into the gateway via the second `state` argument on the handler, then read it in hooks through `ctx.state`.
|
|
455
|
+
|
|
456
|
+
```ts
|
|
457
|
+
import { Elysia } from "elysia";
|
|
458
|
+
import { gateway } from "@hebo-ai/gateway";
|
|
459
|
+
|
|
460
|
+
const basePath = "/v1/gateway";
|
|
461
|
+
|
|
462
|
+
const gw = gateway({
|
|
463
|
+
basePath,
|
|
464
|
+
providers: {
|
|
465
|
+
// ...
|
|
466
|
+
},
|
|
467
|
+
models: {
|
|
468
|
+
// ...
|
|
469
|
+
},
|
|
470
|
+
hooks: {
|
|
471
|
+
resolveProvider: async (ctx) => {
|
|
472
|
+
// Select provider based on userId
|
|
473
|
+
const user = ctx.state.auth.userId;
|
|
474
|
+
if (user.startsWith("vip:")) {
|
|
475
|
+
return ctx.providers["openai"];
|
|
476
|
+
} else {
|
|
477
|
+
return ctx.providers["groq"];
|
|
478
|
+
}
|
|
479
|
+
},
|
|
480
|
+
},
|
|
481
|
+
});
|
|
482
|
+
|
|
483
|
+
const app = new Elysia()
|
|
484
|
+
.derive(({ headers }) => ({
|
|
485
|
+
auth: {
|
|
486
|
+
userId: headers["x-user-id"],
|
|
487
|
+
},
|
|
488
|
+
}))
|
|
489
|
+
.all(`${basepath}`, ({ request, auth }) => gw.handler(request, { auth }))
|
|
490
|
+
.listen(3000);
|
|
491
|
+
```
|
|
492
|
+
|
|
392
493
|
### Selective Route Mounting
|
|
393
494
|
|
|
394
495
|
If you want to have more flexibility, for example for custom rate limit checks per route, you can also choose to only mount individual routes from the gateway's `routes` property.
|
|
@@ -410,14 +511,15 @@ console.log(`🐒 /chat/completions mounted to ${app.server?.url}/chat`);
|
|
|
410
511
|
We also provide full schemas, helper functions and types to convert between **OpenAI <> Vercel AI SDK** for advanced use cases like creating your own endpoint. They are available via deep-imports and completely tree-shakeable.
|
|
411
512
|
|
|
412
513
|
```ts
|
|
413
|
-
import { streamText } from "ai";
|
|
514
|
+
import { streamText, wrapLanguageModel } from "ai";
|
|
414
515
|
import { createGroq } from "@ai-sdk/groq";
|
|
415
516
|
import * as z from "zod";
|
|
416
517
|
import {
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
518
|
+
ChatCompletionsBodySchema,
|
|
519
|
+
convertToTextCallOptions,
|
|
520
|
+
toChatCompletionsStreamResponse,
|
|
420
521
|
} from "@hebo-ai/gateway/endpoints/chat-completions";
|
|
522
|
+
import { forwardParamsMiddleware } from "@hebo-ai/gateway/middleware/common";
|
|
421
523
|
|
|
422
524
|
const groq = createGroq({ apiKey: process.env.GROQ_API_KEY });
|
|
423
525
|
|
|
@@ -425,24 +527,27 @@ export async function handler(req: Request): Promise<Response> {
|
|
|
425
527
|
|
|
426
528
|
const body = await req.json();
|
|
427
529
|
|
|
428
|
-
const parsed =
|
|
530
|
+
const parsed = ChatCompletionsBodySchema.safeParse(body);
|
|
429
531
|
if (!parsed.success) {
|
|
430
532
|
return new Response(z.prettifyError(parsed.error), { status: 422 });
|
|
431
533
|
}
|
|
432
534
|
|
|
433
535
|
const { model, ...inputs } = parsed.data;
|
|
434
536
|
|
|
435
|
-
const textOptions =
|
|
537
|
+
const textOptions = convertToTextCallOptions(inputs);
|
|
436
538
|
|
|
437
539
|
const result = await streamText({
|
|
438
|
-
model:
|
|
540
|
+
model: wrapLanguageModel({
|
|
541
|
+
model: groq(model),
|
|
542
|
+
middleware: forwardParamsMiddleware("groq"),
|
|
543
|
+
}),
|
|
439
544
|
...textOptions
|
|
440
545
|
});
|
|
441
546
|
|
|
442
|
-
return
|
|
547
|
+
return toChatCompletionsStreamResponse(result, model);
|
|
443
548
|
}
|
|
444
549
|
```
|
|
445
550
|
|
|
446
|
-
Non-streaming versions are available via `
|
|
551
|
+
Non-streaming versions are available via `createChatCompletionsResponse`. Equivalent schemas and helpers are available in the `embeddings` and `models` endpoints.
|
|
447
552
|
|
|
448
|
-
Since Zod v4.3 you can also generate a JSON Schema from any zod object by calling the
|
|
553
|
+
Since Zod v4.3 you can also generate a JSON Schema from any zod object by calling the `z.toJSONSchema(...)` function. This can be useful, for example, to create OpenAPI documentation.
|
package/dist/config.js
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { createProviderRegistry } from "ai";
|
|
2
1
|
import { kParsed } from "./types";
|
|
3
2
|
export const parseConfig = (config) => {
|
|
4
3
|
// If it has been parsed before, just return
|
|
@@ -9,22 +8,13 @@ export const parseConfig = (config) => {
|
|
|
9
8
|
if (Object.keys(providers).length === 0) {
|
|
10
9
|
throw new Error("Gateway config error: no providers configured (config.providers is empty).");
|
|
11
10
|
}
|
|
12
|
-
// Initialize ProviderRegistry (if nessecary)
|
|
13
|
-
let registry;
|
|
14
|
-
if ("languageModel" in providers) {
|
|
15
|
-
registry = providers;
|
|
16
|
-
}
|
|
17
|
-
else {
|
|
18
|
-
registry = createProviderRegistry(providers);
|
|
19
|
-
}
|
|
20
11
|
// Strip out providers from models that are not configured
|
|
21
|
-
const providerKeys = Object.keys(registry.providers);
|
|
22
12
|
const parsedModels = {};
|
|
23
13
|
for (const id in models) {
|
|
24
14
|
const model = models[id];
|
|
25
15
|
const kept = [];
|
|
26
16
|
for (const p of model.providers) {
|
|
27
|
-
if (
|
|
17
|
+
if (p in providers)
|
|
28
18
|
kept.push(p);
|
|
29
19
|
else
|
|
30
20
|
console.warn(`[models] ${id}: provider "${p}" removed (not configured)`);
|
|
@@ -35,5 +25,5 @@ export const parseConfig = (config) => {
|
|
|
35
25
|
if (Object.keys(parsedModels).length === 0) {
|
|
36
26
|
throw new Error("Gateway config error: no models configured (config.models is empty).");
|
|
37
27
|
}
|
|
38
|
-
return { ...config, providers
|
|
28
|
+
return { ...config, providers, models: parsedModels, [kParsed]: true };
|
|
39
29
|
};
|
|
@@ -1,32 +1,36 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
import type { GenerateTextResult, StreamTextResult, FinishReason, ToolChoice, ToolSet, ModelMessage, UserContent, LanguageModelUsage, Output } from "ai";
|
|
3
|
-
import type {
|
|
1
|
+
import type { SharedV3ProviderOptions } from "@ai-sdk/provider";
|
|
2
|
+
import type { GenerateTextResult, StreamTextResult, FinishReason, ToolChoice, ToolSet, ModelMessage, UserContent, LanguageModelUsage, Output, TextStreamPart, AssistantModelMessage, ToolModelMessage, UserModelMessage } from "ai";
|
|
3
|
+
import type { ChatCompletionsToolCall, ChatCompletionsTool, ChatCompletionsToolChoice, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsUserMessage, ChatCompletionsAssistantMessage, ChatCompletionsToolMessage, ChatCompletionsFinishReason, ChatCompletionsUsage, ChatCompletionsInputs, ChatCompletions, ChatCompletionsChunk } from "./schema";
|
|
4
|
+
import { OpenAIError } from "../../utils/errors";
|
|
4
5
|
export type TextCallOptions = {
|
|
5
6
|
messages: ModelMessage[];
|
|
6
7
|
tools?: ToolSet;
|
|
7
8
|
toolChoice?: ToolChoice<ToolSet>;
|
|
8
9
|
temperature?: number;
|
|
9
|
-
|
|
10
|
+
maxOutputTokens?: number;
|
|
11
|
+
frequencyPenalty?: number;
|
|
12
|
+
presencePenalty?: number;
|
|
13
|
+
seed?: number;
|
|
14
|
+
stopSequences?: string[];
|
|
15
|
+
topP?: number;
|
|
16
|
+
providerOptions: SharedV3ProviderOptions;
|
|
10
17
|
};
|
|
11
|
-
export declare function
|
|
12
|
-
export declare function
|
|
13
|
-
export declare function
|
|
14
|
-
export declare function
|
|
15
|
-
export declare function
|
|
16
|
-
export declare function
|
|
17
|
-
export declare const
|
|
18
|
-
export declare const
|
|
19
|
-
export declare function
|
|
20
|
-
export declare function
|
|
21
|
-
export declare function
|
|
22
|
-
export declare function
|
|
23
|
-
export declare class
|
|
18
|
+
export declare function convertToTextCallOptions(params: ChatCompletionsInputs): TextCallOptions;
|
|
19
|
+
export declare function convertToModelMessages(messages: ChatCompletionsMessage[]): ModelMessage[];
|
|
20
|
+
export declare function fromChatCompletionsUserMessage(message: ChatCompletionsUserMessage): UserModelMessage;
|
|
21
|
+
export declare function fromChatCompletionsAssistantMessage(message: ChatCompletionsAssistantMessage): AssistantModelMessage;
|
|
22
|
+
export declare function fromChatCompletionsToolResultMessage(message: ChatCompletionsAssistantMessage, toolById: Map<string, ChatCompletionsToolMessage>): ToolModelMessage | undefined;
|
|
23
|
+
export declare function fromChatCompletionsContent(content: ChatCompletionsContentPart[]): UserContent;
|
|
24
|
+
export declare const convertToToolSet: (tools: ChatCompletionsTool[] | undefined) => ToolSet | undefined;
|
|
25
|
+
export declare const convertToToolChoice: (toolChoice: ChatCompletionsToolChoice | undefined) => ToolChoice<ToolSet> | undefined;
|
|
26
|
+
export declare function toChatCompletions(result: GenerateTextResult<ToolSet, Output.Output>, model: string): ChatCompletions;
|
|
27
|
+
export declare function toChatCompletionsResponse(result: GenerateTextResult<ToolSet, Output.Output>, model: string, responseInit?: ResponseInit): Response;
|
|
28
|
+
export declare function toChatCompletionsStream(result: StreamTextResult<ToolSet, Output.Output>, model: string): ReadableStream<Uint8Array>;
|
|
29
|
+
export declare function toChatCompletionsStreamResponse(result: StreamTextResult<ToolSet, Output.Output>, model: string, responseInit?: ResponseInit): Response;
|
|
30
|
+
export declare class ChatCompletionsStream extends TransformStream<TextStreamPart<ToolSet>, ChatCompletionsChunk | OpenAIError> {
|
|
24
31
|
constructor(model: string);
|
|
25
32
|
}
|
|
26
|
-
export declare
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
export declare const
|
|
30
|
-
export declare function toCompletionsUsage(usage: LanguageModelUsage | undefined): CompletionsUsage | undefined;
|
|
31
|
-
export declare function toCompletionsToolCall(id: string, name: string, args: unknown): CompletionsToolCall;
|
|
32
|
-
export declare const toCompletionsFinishReason: (finishReason: FinishReason) => CompletionsFinishReason;
|
|
33
|
+
export declare const toChatCompletionsAssistantMessage: (result: GenerateTextResult<ToolSet, Output.Output>) => ChatCompletionsAssistantMessage;
|
|
34
|
+
export declare function toChatCompletionsUsage(usage: LanguageModelUsage): ChatCompletionsUsage;
|
|
35
|
+
export declare function toChatCompletionsToolCall(id: string, name: string, args: unknown): ChatCompletionsToolCall;
|
|
36
|
+
export declare const toChatCompletionsFinishReason: (finishReason: FinishReason) => ChatCompletionsFinishReason;
|