npm - @hebo-ai/gateway - Versions diffs - 0.4.0 → 0.4.2 - Mend

@hebo-ai/gateway 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +10 -11
package/dist/endpoints/chat-completions/handler.js +2 -4
package/dist/endpoints/embeddings/handler.js +2 -4
package/dist/lifecycle.js +7 -12
package/dist/models/anthropic/presets.d.ts +463 -0
package/dist/models/anthropic/presets.js +10 -2
package/dist/models/types.d.ts +1 -1
package/dist/models/types.js +1 -0
package/dist/providers/bedrock/canonical.js +1 -0
package/dist/telemetry/http.js +0 -3
package/dist/telemetry/stream.js +1 -1
package/dist/types.d.ts +10 -20
package/dist/utils/request.d.ts +1 -3
package/dist/utils/request.js +3 -26
package/dist/utils/response.d.ts +1 -1
package/dist/utils/response.js +3 -3
package/package.json +1 -1
package/src/endpoints/chat-completions/handler.ts +2 -5
package/src/endpoints/embeddings/handler.ts +5 -5
package/src/lifecycle.ts +7 -11
package/src/models/anthropic/presets.ts +14 -2
package/src/models/types.ts +1 -0
package/src/providers/bedrock/canonical.ts +1 -0
package/src/telemetry/http.ts +0 -3
package/src/telemetry/stream.ts +1 -1
package/src/types.ts +19 -23
package/src/utils/request.ts +5 -33
package/src/utils/response.ts +3 -3

package/README.md CHANGED Viewed

@@ -19,7 +19,7 @@ Learn more in our blog post: [Yet Another AI Gateway?](https://hebo.ai/blog/2601
 - 🗂️ Model catalog with extensible metadata capabilities.
 - 🪝 Hook system to customize routing, auth, rate limits, and shape responses.
 - 🧰 Low-level OpenAI-compatible schema, converters, and middleware helpers.
-- 👁️ OpenTelemetry support for GenAI semantic conventions (Langfuse-compatible).
+- 👁️ Observability via OTel GenAI semantic conventions (Langfuse-compatible).
 ## 📦 Installation
@@ -272,7 +272,7 @@ const gw = gateway({
 ### Hooks
-Hooks allow you to plug-into the lifecycle of the gateway and enrich it with additional functionality, like your actual routing logic. All hooks are available as async and non-async.
+Hooks allow you to plug into the lifecycle of the gateway and enrich it with additional functionality, like your actual routing logic. All hooks are available as async and non-async.
 ```ts
 const gw = gateway({
@@ -286,10 +286,9 @@ const gw = gateway({
     /**
      * Runs before any endpoint handler logic.
      * @param ctx.request Incoming request.
-     * @returns Optional RequestPatch to merge into headers / override body.
-     * Returning a Response stops execution of the endpoint.
+     * @returns Optional Response to short-circuit the request.
      */
-    onRequest: async (ctx: { request: Request }): Promise<RequestPatch | Response | void> => {
+    onRequest: async (ctx: { request: Request }): Promise<Response | void> => {
       // Example Use Cases:
       // - Verify authentication
       // - Enforce rate limits
@@ -315,10 +314,10 @@ const gw = gateway({
      * @param ctx.modelId Incoming model ID.
      * @returns Canonical model ID or undefined to keep original.
      */
-    resolveModelId?: (ctx: {
+    resolveModelId: async (ctx: {
       body: ChatCompletionsBody | EmbeddingsBody;
       modelId: ModelId;
-    }) => ModelId | void | Promise<ModelId | void> {
+    }): Promise<ModelId | void> => {
       // Example Use Cases:
       // - Resolve modelAlias to modelId
       return undefined;
@@ -328,7 +327,7 @@ const gw = gateway({
      * @param ctx.providers ProviderRegistry from config.
      * @param ctx.models ModelCatalog from config.
      * @param ctx.body The parsed body object with all call parameters.
-     * @param ctx.modelId Resolved model ID.
+     * @param ctx.resolvedModelId Resolved model ID.
      * @param ctx.operation Operation type ("chat" | "embeddings").
      * @returns ProviderV3 to override, or undefined to use default.
      */
@@ -336,7 +335,7 @@ const gw = gateway({
       providers: ProviderRegistry;
       models: ModelCatalog;
       body: ChatCompletionsBody | EmbeddingsBody;
-      modelId: ModelId;
+      resolvedModelId: ModelId;
       operation: "chat" | "embeddings";
     }): Promise<ProviderV3 | void> => {
       // Example Use Cases:
@@ -350,8 +349,8 @@ const gw = gateway({
      * @returns Modified result, or undefined to keep original.
      */
     after: async (ctx: {
-      result: ChatCompletions  | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings
-    }): Promise<ChatCompletions  | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings | void> => {
+      result: ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings;
+    }): Promise<ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings | void> => {
       // Example Use Cases:
       // - Transform result
       // - Result logging

package/dist/endpoints/chat-completions/handler.js CHANGED Viewed

@@ -7,7 +7,6 @@ import { modelMiddlewareMatcher } from "../../middleware/matcher";
 import { resolveProvider } from "../../providers/registry";
 import { recordRequestDuration, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
 import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
-import { resolveRequestId } from "../../utils/headers";
 import { prepareForwardHeaders } from "../../utils/request";
 import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
 import { getChatGeneralAttributes, getChatRequestAttributes, getChatResponseAttributes, } from "./otel";
@@ -22,7 +21,6 @@ export const chatCompletions = (config) => {
         if (!ctx.request || ctx.request.method !== "POST") {
             throw new GatewayError("Method Not Allowed", 405);
         }
-        const requestId = resolveRequestId(ctx.request);
         // Parse + validate input.
         try {
             ctx.body = await ctx.request.json();
@@ -68,7 +66,7 @@ export const chatCompletions = (config) => {
         // Convert inputs to AI SDK call options.
         const textOptions = convertToTextCallOptions(inputs);
         logger.trace({
-            requestId,
+            requestId: ctx.requestId,
             options: textOptions,
         }, "[chat] AI SDK options");
         addSpanEvent("hebo.options.prepared");
@@ -127,7 +125,7 @@ export const chatCompletions = (config) => {
             },
             ...textOptions,
         });
-        logger.trace({ requestId, result }, "[chat] AI SDK result");
+        logger.trace({ requestId: ctx.requestId, result }, "[chat] AI SDK result");
         addSpanEvent("hebo.ai-sdk.completed");
         // Transform result.
         ctx.result = toChatCompletions(result, ctx.resolvedModelId);

package/dist/endpoints/embeddings/handler.js CHANGED Viewed

@@ -7,7 +7,6 @@ import { modelMiddlewareMatcher } from "../../middleware/matcher";
 import { resolveProvider } from "../../providers/registry";
 import { recordRequestDuration, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
 import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
-import { resolveRequestId } from "../../utils/headers";
 import { prepareForwardHeaders } from "../../utils/request";
 import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
 import { getEmbeddingsGeneralAttributes, getEmbeddingsRequestAttributes, getEmbeddingsResponseAttributes, } from "./otel";
@@ -22,7 +21,6 @@ export const embeddings = (config) => {
         if (!ctx.request || ctx.request.method !== "POST") {
             throw new GatewayError("Method Not Allowed", 405);
         }
-        const requestId = resolveRequestId(ctx.request);
         // Parse + validate input.
         try {
             ctx.body = await ctx.request.json();
@@ -67,7 +65,7 @@ export const embeddings = (config) => {
         setSpanAttributes(genAiGeneralAttrs);
         // Convert inputs to AI SDK call options.
         const embedOptions = convertToEmbedCallOptions(inputs);
-        logger.trace({ requestId, options: embedOptions }, "[embeddings] AI SDK options");
+        logger.trace({ requestId: ctx.requestId, options: embedOptions }, "[embeddings] AI SDK options");
         addSpanEvent("hebo.options.prepared");
         setSpanAttributes(getEmbeddingsRequestAttributes(inputs, genAiSignalLevel));
         // Build middleware chain (model -> forward params -> provider).
@@ -83,7 +81,7 @@ export const embeddings = (config) => {
             abortSignal: ctx.request.signal,
             ...embedOptions,
         });
-        logger.trace({ requestId, result }, "[embeddings] AI SDK result");
+        logger.trace({ requestId: ctx.requestId, result }, "[embeddings] AI SDK result");
         addSpanEvent("hebo.ai-sdk.completed");
         // Transform result.
         ctx.result = toEmbeddings(result, ctx.modelId);

package/dist/lifecycle.js CHANGED Viewed

@@ -8,8 +8,7 @@ import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
 import { recordV8jsMemory } from "./telemetry/memory";
 import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
 import { wrapStream } from "./telemetry/stream";
-import { resolveRequestId } from "./utils/headers";
-import { maybeApplyRequestPatch, prepareRequestHeaders } from "./utils/request";
+import { resolveOrCreateRequestId } from "./utils/request";
 import { prepareResponseInit, toResponse } from "./utils/response";
 export const winterCgHandler = (run, config) => {
     const parsedConfig = parseConfig(config);
@@ -24,14 +23,13 @@ export const winterCgHandler = (run, config) => {
             state: state ?? {},
             providers: parsedConfig.providers,
             models: parsedConfig.models,
+            requestId: resolveOrCreateRequestId(request),
         };
-        const headers = prepareRequestHeaders(ctx.request);
-        if (headers)
-            ctx.request = new Request(ctx.request, { headers });
         const span = startSpan(ctx.request.url);
         span.setAttributes(getBaggageAttributes(ctx.request));
         if (!span.isExisting) {
             span.setAttributes(getRequestAttributes(ctx.request, parsedConfig.telemetry?.signals?.http));
+            span.setAttributes({ "http.request.id": ctx.requestId });
         }
         const finalize = (status, reason) => {
             if (ctx.operation) {
@@ -47,8 +45,8 @@ export const winterCgHandler = (run, config) => {
             else if (status === 200 && ctx.response?.status)
                 realStatus = ctx.response.status;
             if (realStatus !== 200) {
-                (realStatus >= 500 ? logger.error : logger.warn)({
-                    requestId: resolveRequestId(ctx.request),
+                logger[realStatus >= 500 ? "error" : "warn"]({
+                    requestId: ctx.requestId,
                     err: reason ?? ctx.request.signal.reason,
                 });
                 if (realStatus >= 500)
@@ -65,16 +63,13 @@ export const winterCgHandler = (run, config) => {
                 if (onRequest instanceof Response) {
                     ctx.response = onRequest;
                 }
-                else if (onRequest) {
-                    ctx.request = maybeApplyRequestPatch(ctx.request, onRequest);
-                }
             }
             if (!ctx.response) {
                 ctx.result = (await span.runWithContext(() => run(ctx)));
                 if (ctx.result instanceof ReadableStream) {
                     ctx.result = wrapStream(ctx.result, { onDone: finalize });
                 }
-                ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.request));
+                ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.requestId));
             }
             if (parsedConfig.hooks?.onResponse) {
                 const onResponse = await parsedConfig.hooks.onResponse(ctx);
@@ -91,7 +86,7 @@ export const winterCgHandler = (run, config) => {
         catch (error) {
             ctx.response = toOpenAIErrorResponse(ctx.request.signal.aborted
                 ? new GatewayError(error ?? ctx.request.signal.reason, 499)
-                : error, prepareResponseInit(ctx.request));
+                : error, prepareResponseInit(ctx.requestId));
             finalize(ctx.response.status, error);
         }
         return ctx.response ?? new Response("Internal Server Error", { status: 500 });