@hebo-ai/gateway 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -11
- package/dist/endpoints/chat-completions/handler.js +2 -4
- package/dist/endpoints/embeddings/handler.js +2 -4
- package/dist/lifecycle.js +7 -12
- package/dist/models/anthropic/presets.d.ts +463 -0
- package/dist/models/anthropic/presets.js +10 -2
- package/dist/models/types.d.ts +1 -1
- package/dist/models/types.js +1 -0
- package/dist/providers/bedrock/canonical.js +1 -0
- package/dist/telemetry/http.js +0 -3
- package/dist/telemetry/stream.js +1 -1
- package/dist/types.d.ts +10 -20
- package/dist/utils/request.d.ts +1 -3
- package/dist/utils/request.js +3 -26
- package/dist/utils/response.d.ts +1 -1
- package/dist/utils/response.js +3 -3
- package/package.json +1 -1
- package/src/endpoints/chat-completions/handler.ts +2 -5
- package/src/endpoints/embeddings/handler.ts +5 -5
- package/src/lifecycle.ts +7 -11
- package/src/models/anthropic/presets.ts +14 -2
- package/src/models/types.ts +1 -0
- package/src/providers/bedrock/canonical.ts +1 -0
- package/src/telemetry/http.ts +0 -3
- package/src/telemetry/stream.ts +1 -1
- package/src/types.ts +19 -23
- package/src/utils/request.ts +5 -33
- package/src/utils/response.ts +3 -3
package/README.md
CHANGED
|
@@ -19,7 +19,7 @@ Learn more in our blog post: [Yet Another AI Gateway?](https://hebo.ai/blog/2601
|
|
|
19
19
|
- 🗂️ Model catalog with extensible metadata capabilities.
|
|
20
20
|
- 🪝 Hook system to customize routing, auth, rate limits, and shape responses.
|
|
21
21
|
- 🧰 Low-level OpenAI-compatible schema, converters, and middleware helpers.
|
|
22
|
-
- 👁️
|
|
22
|
+
- 👁️ Observability via OTel GenAI semantic conventions (Langfuse-compatible).
|
|
23
23
|
|
|
24
24
|
## 📦 Installation
|
|
25
25
|
|
|
@@ -272,7 +272,7 @@ const gw = gateway({
|
|
|
272
272
|
|
|
273
273
|
### Hooks
|
|
274
274
|
|
|
275
|
-
Hooks allow you to plug
|
|
275
|
+
Hooks allow you to plug into the lifecycle of the gateway and enrich it with additional functionality, like your actual routing logic. All hooks are available as async and non-async.
|
|
276
276
|
|
|
277
277
|
```ts
|
|
278
278
|
const gw = gateway({
|
|
@@ -286,10 +286,9 @@ const gw = gateway({
|
|
|
286
286
|
/**
|
|
287
287
|
* Runs before any endpoint handler logic.
|
|
288
288
|
* @param ctx.request Incoming request.
|
|
289
|
-
* @returns Optional
|
|
290
|
-
* Returning a Response stops execution of the endpoint.
|
|
289
|
+
* @returns Optional Response to short-circuit the request.
|
|
291
290
|
*/
|
|
292
|
-
onRequest: async (ctx: { request: Request }): Promise<
|
|
291
|
+
onRequest: async (ctx: { request: Request }): Promise<Response | void> => {
|
|
293
292
|
// Example Use Cases:
|
|
294
293
|
// - Verify authentication
|
|
295
294
|
// - Enforce rate limits
|
|
@@ -315,10 +314,10 @@ const gw = gateway({
|
|
|
315
314
|
* @param ctx.modelId Incoming model ID.
|
|
316
315
|
* @returns Canonical model ID or undefined to keep original.
|
|
317
316
|
*/
|
|
318
|
-
resolveModelId
|
|
317
|
+
resolveModelId: async (ctx: {
|
|
319
318
|
body: ChatCompletionsBody | EmbeddingsBody;
|
|
320
319
|
modelId: ModelId;
|
|
321
|
-
})
|
|
320
|
+
}): Promise<ModelId | void> => {
|
|
322
321
|
// Example Use Cases:
|
|
323
322
|
// - Resolve modelAlias to modelId
|
|
324
323
|
return undefined;
|
|
@@ -328,7 +327,7 @@ const gw = gateway({
|
|
|
328
327
|
* @param ctx.providers ProviderRegistry from config.
|
|
329
328
|
* @param ctx.models ModelCatalog from config.
|
|
330
329
|
* @param ctx.body The parsed body object with all call parameters.
|
|
331
|
-
* @param ctx.
|
|
330
|
+
* @param ctx.resolvedModelId Resolved model ID.
|
|
332
331
|
* @param ctx.operation Operation type ("chat" | "embeddings").
|
|
333
332
|
* @returns ProviderV3 to override, or undefined to use default.
|
|
334
333
|
*/
|
|
@@ -336,7 +335,7 @@ const gw = gateway({
|
|
|
336
335
|
providers: ProviderRegistry;
|
|
337
336
|
models: ModelCatalog;
|
|
338
337
|
body: ChatCompletionsBody | EmbeddingsBody;
|
|
339
|
-
|
|
338
|
+
resolvedModelId: ModelId;
|
|
340
339
|
operation: "chat" | "embeddings";
|
|
341
340
|
}): Promise<ProviderV3 | void> => {
|
|
342
341
|
// Example Use Cases:
|
|
@@ -350,8 +349,8 @@ const gw = gateway({
|
|
|
350
349
|
* @returns Modified result, or undefined to keep original.
|
|
351
350
|
*/
|
|
352
351
|
after: async (ctx: {
|
|
353
|
-
result: ChatCompletions
|
|
354
|
-
}): Promise<ChatCompletions
|
|
352
|
+
result: ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings;
|
|
353
|
+
}): Promise<ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings | void> => {
|
|
355
354
|
// Example Use Cases:
|
|
356
355
|
// - Transform result
|
|
357
356
|
// - Result logging
|
|
@@ -7,7 +7,6 @@ import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
|
7
7
|
import { resolveProvider } from "../../providers/registry";
|
|
8
8
|
import { recordRequestDuration, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
|
|
9
9
|
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
10
|
-
import { resolveRequestId } from "../../utils/headers";
|
|
11
10
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
12
11
|
import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
|
|
13
12
|
import { getChatGeneralAttributes, getChatRequestAttributes, getChatResponseAttributes, } from "./otel";
|
|
@@ -22,7 +21,6 @@ export const chatCompletions = (config) => {
|
|
|
22
21
|
if (!ctx.request || ctx.request.method !== "POST") {
|
|
23
22
|
throw new GatewayError("Method Not Allowed", 405);
|
|
24
23
|
}
|
|
25
|
-
const requestId = resolveRequestId(ctx.request);
|
|
26
24
|
// Parse + validate input.
|
|
27
25
|
try {
|
|
28
26
|
ctx.body = await ctx.request.json();
|
|
@@ -68,7 +66,7 @@ export const chatCompletions = (config) => {
|
|
|
68
66
|
// Convert inputs to AI SDK call options.
|
|
69
67
|
const textOptions = convertToTextCallOptions(inputs);
|
|
70
68
|
logger.trace({
|
|
71
|
-
requestId,
|
|
69
|
+
requestId: ctx.requestId,
|
|
72
70
|
options: textOptions,
|
|
73
71
|
}, "[chat] AI SDK options");
|
|
74
72
|
addSpanEvent("hebo.options.prepared");
|
|
@@ -127,7 +125,7 @@ export const chatCompletions = (config) => {
|
|
|
127
125
|
},
|
|
128
126
|
...textOptions,
|
|
129
127
|
});
|
|
130
|
-
logger.trace({ requestId, result }, "[chat] AI SDK result");
|
|
128
|
+
logger.trace({ requestId: ctx.requestId, result }, "[chat] AI SDK result");
|
|
131
129
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
132
130
|
// Transform result.
|
|
133
131
|
ctx.result = toChatCompletions(result, ctx.resolvedModelId);
|
|
@@ -7,7 +7,6 @@ import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
|
7
7
|
import { resolveProvider } from "../../providers/registry";
|
|
8
8
|
import { recordRequestDuration, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
|
|
9
9
|
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
10
|
-
import { resolveRequestId } from "../../utils/headers";
|
|
11
10
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
12
11
|
import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
|
|
13
12
|
import { getEmbeddingsGeneralAttributes, getEmbeddingsRequestAttributes, getEmbeddingsResponseAttributes, } from "./otel";
|
|
@@ -22,7 +21,6 @@ export const embeddings = (config) => {
|
|
|
22
21
|
if (!ctx.request || ctx.request.method !== "POST") {
|
|
23
22
|
throw new GatewayError("Method Not Allowed", 405);
|
|
24
23
|
}
|
|
25
|
-
const requestId = resolveRequestId(ctx.request);
|
|
26
24
|
// Parse + validate input.
|
|
27
25
|
try {
|
|
28
26
|
ctx.body = await ctx.request.json();
|
|
@@ -67,7 +65,7 @@ export const embeddings = (config) => {
|
|
|
67
65
|
setSpanAttributes(genAiGeneralAttrs);
|
|
68
66
|
// Convert inputs to AI SDK call options.
|
|
69
67
|
const embedOptions = convertToEmbedCallOptions(inputs);
|
|
70
|
-
logger.trace({ requestId, options: embedOptions }, "[embeddings] AI SDK options");
|
|
68
|
+
logger.trace({ requestId: ctx.requestId, options: embedOptions }, "[embeddings] AI SDK options");
|
|
71
69
|
addSpanEvent("hebo.options.prepared");
|
|
72
70
|
setSpanAttributes(getEmbeddingsRequestAttributes(inputs, genAiSignalLevel));
|
|
73
71
|
// Build middleware chain (model -> forward params -> provider).
|
|
@@ -83,7 +81,7 @@ export const embeddings = (config) => {
|
|
|
83
81
|
abortSignal: ctx.request.signal,
|
|
84
82
|
...embedOptions,
|
|
85
83
|
});
|
|
86
|
-
logger.trace({ requestId, result }, "[embeddings] AI SDK result");
|
|
84
|
+
logger.trace({ requestId: ctx.requestId, result }, "[embeddings] AI SDK result");
|
|
87
85
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
88
86
|
// Transform result.
|
|
89
87
|
ctx.result = toEmbeddings(result, ctx.modelId);
|
package/dist/lifecycle.js
CHANGED
|
@@ -8,8 +8,7 @@ import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
|
|
|
8
8
|
import { recordV8jsMemory } from "./telemetry/memory";
|
|
9
9
|
import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
|
|
10
10
|
import { wrapStream } from "./telemetry/stream";
|
|
11
|
-
import {
|
|
12
|
-
import { maybeApplyRequestPatch, prepareRequestHeaders } from "./utils/request";
|
|
11
|
+
import { resolveOrCreateRequestId } from "./utils/request";
|
|
13
12
|
import { prepareResponseInit, toResponse } from "./utils/response";
|
|
14
13
|
export const winterCgHandler = (run, config) => {
|
|
15
14
|
const parsedConfig = parseConfig(config);
|
|
@@ -24,14 +23,13 @@ export const winterCgHandler = (run, config) => {
|
|
|
24
23
|
state: state ?? {},
|
|
25
24
|
providers: parsedConfig.providers,
|
|
26
25
|
models: parsedConfig.models,
|
|
26
|
+
requestId: resolveOrCreateRequestId(request),
|
|
27
27
|
};
|
|
28
|
-
const headers = prepareRequestHeaders(ctx.request);
|
|
29
|
-
if (headers)
|
|
30
|
-
ctx.request = new Request(ctx.request, { headers });
|
|
31
28
|
const span = startSpan(ctx.request.url);
|
|
32
29
|
span.setAttributes(getBaggageAttributes(ctx.request));
|
|
33
30
|
if (!span.isExisting) {
|
|
34
31
|
span.setAttributes(getRequestAttributes(ctx.request, parsedConfig.telemetry?.signals?.http));
|
|
32
|
+
span.setAttributes({ "http.request.id": ctx.requestId });
|
|
35
33
|
}
|
|
36
34
|
const finalize = (status, reason) => {
|
|
37
35
|
if (ctx.operation) {
|
|
@@ -47,8 +45,8 @@ export const winterCgHandler = (run, config) => {
|
|
|
47
45
|
else if (status === 200 && ctx.response?.status)
|
|
48
46
|
realStatus = ctx.response.status;
|
|
49
47
|
if (realStatus !== 200) {
|
|
50
|
-
|
|
51
|
-
requestId:
|
|
48
|
+
logger[realStatus >= 500 ? "error" : "warn"]({
|
|
49
|
+
requestId: ctx.requestId,
|
|
52
50
|
err: reason ?? ctx.request.signal.reason,
|
|
53
51
|
});
|
|
54
52
|
if (realStatus >= 500)
|
|
@@ -65,16 +63,13 @@ export const winterCgHandler = (run, config) => {
|
|
|
65
63
|
if (onRequest instanceof Response) {
|
|
66
64
|
ctx.response = onRequest;
|
|
67
65
|
}
|
|
68
|
-
else if (onRequest) {
|
|
69
|
-
ctx.request = maybeApplyRequestPatch(ctx.request, onRequest);
|
|
70
|
-
}
|
|
71
66
|
}
|
|
72
67
|
if (!ctx.response) {
|
|
73
68
|
ctx.result = (await span.runWithContext(() => run(ctx)));
|
|
74
69
|
if (ctx.result instanceof ReadableStream) {
|
|
75
70
|
ctx.result = wrapStream(ctx.result, { onDone: finalize });
|
|
76
71
|
}
|
|
77
|
-
ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.
|
|
72
|
+
ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.requestId));
|
|
78
73
|
}
|
|
79
74
|
if (parsedConfig.hooks?.onResponse) {
|
|
80
75
|
const onResponse = await parsedConfig.hooks.onResponse(ctx);
|
|
@@ -91,7 +86,7 @@ export const winterCgHandler = (run, config) => {
|
|
|
91
86
|
catch (error) {
|
|
92
87
|
ctx.response = toOpenAIErrorResponse(ctx.request.signal.aborted
|
|
93
88
|
? new GatewayError(error ?? ctx.request.signal.reason, 499)
|
|
94
|
-
: error, prepareResponseInit(ctx.
|
|
89
|
+
: error, prepareResponseInit(ctx.requestId));
|
|
95
90
|
finalize(ctx.response.status, error);
|
|
96
91
|
}
|
|
97
92
|
return ctx.response ?? new Response("Internal Server Error", { status: 500 });
|