npm - @hebo-ai/gateway - Versions diffs - 0.8.2 → 0.9.1 - Mend

@hebo-ai/gateway 0.8.2 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/README.md +131 -32
package/dist/endpoints/chat-completions/converters.d.ts +4 -21
package/dist/endpoints/chat-completions/converters.js +23 -160
package/dist/endpoints/chat-completions/handler.js +2 -2
package/dist/endpoints/chat-completions/schema.d.ts +45 -101
package/dist/endpoints/chat-completions/schema.js +13 -69
package/dist/endpoints/conversations/converters.js +2 -3
package/dist/endpoints/conversations/schema.d.ts +506 -644
package/dist/endpoints/conversations/schema.js +8 -159
package/dist/endpoints/conversations/storage/dialects/greptime.js +20 -6
package/dist/endpoints/conversations/storage/dialects/mysql.js +3 -1
package/dist/endpoints/conversations/storage/dialects/postgres.js +6 -3
package/dist/endpoints/conversations/storage/dialects/sqlite.js +3 -1
package/dist/endpoints/conversations/storage/sql.js +11 -6
package/dist/endpoints/embeddings/handler.js +1 -1
package/dist/endpoints/responses/converters.d.ts +17 -0
package/dist/endpoints/responses/converters.js +1034 -0
package/dist/endpoints/responses/handler.d.ts +2 -0
package/dist/endpoints/responses/handler.js +137 -0
package/dist/endpoints/responses/index.d.ts +4 -0
package/dist/endpoints/responses/index.js +4 -0
package/dist/endpoints/responses/otel.d.ts +6 -0
package/dist/endpoints/responses/otel.js +221 -0
package/dist/endpoints/responses/schema.d.ts +2109 -0
package/dist/endpoints/responses/schema.js +314 -0
package/dist/endpoints/shared/converters.d.ts +56 -0
package/dist/endpoints/shared/converters.js +179 -0
package/dist/endpoints/shared/schema.d.ts +70 -0
package/dist/endpoints/shared/schema.js +46 -0
package/dist/gateway.d.ts +1 -0
package/dist/gateway.js +2 -0
package/dist/index.d.ts +0 -4
package/dist/index.js +0 -4
package/dist/lifecycle.js +46 -29
package/dist/models/anthropic/middleware.d.ts +1 -1
package/dist/models/anthropic/presets.js +6 -1
package/dist/models/google/middleware.d.ts +1 -1
package/dist/models/google/middleware.js +9 -3
package/dist/models/meta/presets.js +12 -2
package/dist/providers/registry.d.ts +1 -1
package/dist/types.d.ts +18 -6
package/dist/utils/env.js +1 -1
package/dist/utils/preset.js +0 -1
package/package.json +8 -4

package/README.md CHANGED Viewed

@@ -13,6 +13,7 @@ Learn more in our blog post: [Yet Another AI Gateway?](https://hebo.ai/blog/2601
 ## 🍌 Features
 - 🌐 OpenAI-compatible /chat/completions, /embeddings & /models endpoints.
+- 🔄 /responses endpoint implementing the Open Responses API (stateless).
 - 💬 /conversations endpoint built on top of the Responses API.
 - 🔌 Integrate into your existing Hono, Elysia, Next.js & TanStack apps.
 - 🧩 Provider registry compatible with Vercel AI SDK providers.
@@ -38,6 +39,8 @@ bun install @hebo-ai/gateway
   - [ElysiaJS](#elysiajs) | [Hono](#hono) | [Next.js](#nextjs) | [TanStack Start](#tanstack-start)
 - Runtime Support
   - [Vercel Edge](#vercel-edge) | [Cloudflare Workers](#cloudflare-workers) | [Deno Deploy](#deno-deploy) | [AWS Lambda](#aws-lambda)
+- Endpoints
+  - [/chat/completions](#chatcompletions) | [/embeddings](#embeddings) | [/models](#models) | [/responses](#responses) | [/conversations](#conversations)
 - OpenAI Extensions
   - [Reasoning](#reasoning) | [Service Tier](#service-tier) | [Prompt Caching](#prompt-caching)
 - Advanced Usage
@@ -366,6 +369,17 @@ const gw = gateway({
       // - Replace or redact response payload
       return undefined;
     },
+    /**
+     * Runs when the lifecycle catches an error.
+     * @param ctx.error The thrown error.
+     * @returns Replacement error response, or undefined to use the default OpenAI-compatible error response.
+     */
+    onError: async (ctx: { error: unknown }): Promise<Response | void> => {
+      // Example Use Cases:
+      // - Map internal errors to custom API responses
+      // - Add app-specific logging or alerting
+      return undefined;
+    },
   },
 });
 ```
@@ -568,6 +582,123 @@ export const handler = awsLambdaEventHandler({
 });
 ```
+## 🚀 Endpoints
+Hebo Gateway provides several OpenAI-compatible and standard-based endpoints.
+### `/chat/completions`
+The primary endpoint for generating chat completions.
+Official documentation: [OpenAI API Reference](https://developers.openai.com/api/reference/resources/chat/subresources/completions/methods/create)
+It supports:
+- Streaming responses (Server-Sent Events).
+- Tool calling / Function calling.
+- Advanced extensions like [Reasoning](#reasoning), [Service Tier](#service-tier), and [Prompt Caching](#prompt-caching).
+- Usage tracking and metadata.
+> [!IMPORTANT]
+> **Compatibility & Roadmap:**
+> We are actively working to expand support for the full OpenAI spec:
+- **`logprobs` / `top_logprobs`**: Token-level logprobs.
+- **`logit_bias`**: Logit bias in the request body.
+- **`n` > 1**: Multi-choice completions.
+### `/embeddings`
+Generates vector representations for text inputs, compatible with OpenAI's embeddings API.
+Official documentation: [OpenAI API Reference](https://developers.openai.com/api/reference/resources/embeddings/methods/create)
+It supports:
+- Text and token array inputs.
+- Custom dimensions (for `v3` models).
+- Standard `float` and `base64` encoding formats.
+> [!IMPORTANT]
+> **Compatibility & Roadmap:**
+- **`encoding_format`**: `base64` results.
+### `/models`
+Lists all available models in your [Model Catalog](#models), including their capabilities and metadata.
+Official documentation: [OpenAI API Reference](https://developers.openai.com/api/reference/resources/models/methods/list)
+It supports:
+- Comprehensive model metadata (capabilities, context limits, knowledge cutoffs).
+- Canonical model ID resolution.
+- Provider-specific availability filtering.
+### `/responses`
+Hebo Gateway provides a `/responses` endpoint implementing the [Open Responses API](https://www.openresponses.org/reference).
+Official documentation: [Open Responses API Reference](https://www.openresponses.org/reference)
+It supports:
+- The same models, providers, hooks, and extensions as `/chat/completions`.
+- Responses API request/response format.
+- Tool calling and multimodal inputs.
+- Normalized reasoning and thought signatures.
+> [!IMPORTANT]
+> **Compatibility & Roadmap:**
+> We are working towards full Open Responses parity:
+- **Persistence**: Server-side response storage (`store`), background orchestration (`background`), and chaining via `previous_response_id`.
+- **`conversation`**: Directly passing conversation IDs for automatic context management.
+- **`context_management`**: Support for automatic compaction strategies.
+- **`prompt`**: Reusable prompt templates with variables.
+- **`phase`**: Support for `commentary` vs `final_answer` reasoning phases.
+- **`safety_identifier`**: Custom safety and moderation policies.
+- **`truncation`**: Context window management strategies.
+- **`text.verbosity`**: Control over response detail (low/medium/high).
+- **`logprobs` / `top_logprobs`**: Token-level logprobs.
+- **`include`**: Selective response fields (e.g., `logprobs`, `reasoning.encrypted_content`, and tool-specific outputs).
+- **`stream_options.include_obfuscation`**: Normalizing payload sizes to mitigate side-channel attacks.
+### `/conversations`
+Hebo Gateway provides a dedicated `/conversations` endpoint for managing persistent conversation state. It is designed as an extension of the [OpenAI Conversations API](https://developers.openai.com/api/reference/resources/conversations/methods/create) and supports standard CRUD operations alongside advanced listing with metadata filtering.
+Official documentation: [OpenAI Conversations API](https://developers.openai.com/api/reference/resources/conversations/methods/create)
+#### List & Filter Conversations (Hebo Extension)
+Since standard OpenAI APIs (like Threads) do not support global listing of conversations, Hebo Gateway provides this capability as an extension. You can list all conversations using cursor-based pagination and filter by any metadata key using the `metadata.KEY=VALUE` pattern.
+```bash
+# List conversations for a specific user using metadata filtering
+curl "https://api.gateway.com/conversations?limit=10&metadata.user_id=123"
+```
+The response follows the standard OpenAI list object:
+```json
+{
+  "object": "list",
+  "data": [
+    {
+      "id": "conv_abc123",
+      "object": "conversation",
+      "created_at": 1678531200,
+      "metadata": { "user_id": "123" }
+    }
+  ],
+  "first_id": "conv_abc123",
+  "last_id": "conv_abc123",
+  "has_more": false
+}
+```
 ## 🧠 OpenAI Extensions
 ### Reasoning
@@ -630,38 +761,6 @@ Provider-specific mapping:
 When available, the resolved value is echoed back on response as `service_tier`.
-### Conversations
-Hebo Gateway provides a dedicated `/conversations` endpoint for managing persistent conversation state. It is designed as an extension of the [OpenAI Conversations API](https://developers.openai.com/api/reference/typescript/resources/conversations) and supports standard CRUD operations alongside advanced listing with metadata filtering.
-#### List & Filter Conversations
-You can list conversations with standard cursor-based pagination and filter by any metadata key using the `metadata.KEY=VALUE` pattern.
-```bash
-# List conversations for a specific user
-curl "https://api.gateway.com/conversations?limit=10&metadata.user_id=123"
-```
-The response follows the standard OpenAI list object:
-```json
-{
-  "object": "list",
-  "data": [
-    {
-      "id": "conv_abc123",
-      "object": "conversation",
-      "created_at": 1678531200,
-      "metadata": { "user_id": "123" }
-    }
-  ],
-  "first_id": "conv_abc123",
-  "last_id": "conv_abc123",
-  "has_more": false
-}
-```
 ### Prompt Caching
 The chat completions endpoint supports both implicit (provider-managed) and explicit prompt caching across OpenAI-compatible providers.

package/dist/endpoints/chat-completions/converters.d.ts CHANGED Viewed

@@ -1,23 +1,9 @@
-import type { SharedV3ProviderOptions, SharedV3ProviderMetadata } from "@ai-sdk/provider";
-import type { GenerateTextResult, StreamTextResult, FinishReason, ToolChoice, ToolSet, ModelMessage, UserContent, LanguageModelUsage, TextStreamPart, ReasoningOutput, AssistantModelMessage, ToolModelMessage, UserModelMessage } from "ai";
+import type { SharedV3ProviderMetadata } from "@ai-sdk/provider";
+import type { GenerateTextResult, StreamTextResult, FinishReason, ToolSet, ModelMessage, UserContent, LanguageModelUsage, TextStreamPart, ReasoningOutput, AssistantModelMessage, ToolModelMessage, UserModelMessage } from "ai";
 import { Output } from "ai";
 import type { ChatCompletionsToolCall, ChatCompletionsTool, ChatCompletionsToolChoice, ChatCompletionsStream, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsUserMessage, ChatCompletionsAssistantMessage, ChatCompletionsToolMessage, ChatCompletionsFinishReason, ChatCompletionsUsage, ChatCompletionsInputs, ChatCompletions, ChatCompletionsChunk, ChatCompletionsReasoningDetail } from "./schema";
 import type { SseErrorFrame, SseFrame } from "../../utils/stream";
-export type TextCallOptions = {
-    messages: ModelMessage[];
-    tools?: ToolSet;
-    toolChoice?: ToolChoice<ToolSet>;
-    activeTools?: Array<keyof ToolSet>;
-    output?: Output.Output;
-    temperature?: number;
-    maxOutputTokens?: number;
-    frequencyPenalty?: number;
-    presencePenalty?: number;
-    seed?: number;
-    stopSequences?: string[];
-    topP?: number;
-    providerOptions: SharedV3ProviderOptions;
-};
+import { type TextCallOptions, type ToolChoiceOptions } from "../shared/converters";
 export declare function convertToTextCallOptions(params: ChatCompletionsInputs): TextCallOptions;
 export declare function convertToModelMessages(messages: ChatCompletionsMessage[]): ModelMessage[];
 export declare function fromChatCompletionsUserMessage(message: ChatCompletionsUserMessage): UserModelMessage;
@@ -25,10 +11,7 @@ export declare function fromChatCompletionsAssistantMessage(message: ChatComplet
 export declare function fromChatCompletionsToolResultMessage(message: ChatCompletionsAssistantMessage, toolById: Map<string, ChatCompletionsToolMessage>): ToolModelMessage | undefined;
 export declare function fromChatCompletionsContent(content: ChatCompletionsContentPart[]): UserContent;
 export declare const convertToToolSet: (tools: ChatCompletionsTool[] | undefined) => ToolSet | undefined;
-export declare const convertToToolChoiceOptions: (toolChoice: ChatCompletionsToolChoice | undefined) => {
-    toolChoice?: ToolChoice<ToolSet>;
-    activeTools?: Array<keyof ToolSet>;
-};
+export declare const convertToToolChoiceOptions: (toolChoice: ChatCompletionsToolChoice | undefined) => ToolChoiceOptions;
 export declare function toChatCompletions(result: GenerateTextResult<ToolSet, Output.Output>, model: string): ChatCompletions;
 export declare function toChatCompletionsResponse(result: GenerateTextResult<ToolSet, Output.Output>, model: string, responseInit?: ResponseInit): Response;
 export declare function toChatCompletionsStream(result: StreamTextResult<ToolSet, Output.Output>, model: string): ChatCompletionsStream;

package/dist/endpoints/chat-completions/converters.js CHANGED Viewed

@@ -1,8 +1,6 @@
 import { Output, jsonSchema, tool } from "ai";
-import { z } from "zod";
-import { GatewayError } from "../../errors/gateway";
 import { toResponse } from "../../utils/response";
-import { parseDataUrl } from "../../utils/url";
+import { parseJsonOrText, parseReasoningOptions, parsePromptCachingOptions, resolveResponseServiceTier, normalizeToolName, stripEmptyKeys, parseBase64, parseImageInput, extractReasoningMetadata, } from "../shared/converters";
 // --- Request Flow ---
 export function convertToTextCallOptions(params) {
     const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, prompt_cache_key, prompt_cache_retention, extra_body, cache_control, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
@@ -165,7 +163,7 @@ export function fromChatCompletionsAssistantMessage(message) {
         out.providerOptions = extra_content;
     }
     if (cache_control) {
-        ((out.providerOptions ??= { unknown: {} })["unknown"] ??= {})["cache_control"] = cache_control;
+        (out.providerOptions ??= {})["unknown"] = { cache_control };
     }
     return out;
 }
@@ -214,29 +212,27 @@ export function fromChatCompletionsContent(content) {
     });
 }
 function fromImageUrlPart(url, cacheControl) {
-    if (url.startsWith("data:")) {
-        const { mimeType, dataStart } = parseDataUrl(url);
-        if (!mimeType || dataStart <= "data:".length || dataStart >= url.length) {
-            throw new GatewayError("Invalid data URL", 400);
-        }
-        return fromFilePart(url.slice(dataStart), mimeType, undefined, cacheControl);
-    }
-    const out = {
-        type: "image",
-        image: new URL(url),
-    };
-    if (cacheControl) {
-        out.providerOptions = {
-            unknown: { cache_control: cacheControl },
+    const { image, mediaType } = parseImageInput(url);
+    if (image instanceof URL) {
+        const out = {
+            type: "image",
+            image,
         };
+        if (cacheControl) {
+            out.providerOptions = {
+                unknown: { cache_control: cacheControl },
+            };
+        }
+        return out;
     }
-    return out;
+    return fromFilePart(image, mediaType ?? "image/jpeg", undefined, cacheControl);
 }
 function fromFilePart(base64Data, mediaType, filename, cacheControl) {
+    const data = parseBase64(base64Data);
     if (mediaType.startsWith("image/")) {
         const out = {
             type: "image",
-            image: z.util.base64ToUint8Array(base64Data),
+            image: data,
             mediaType,
         };
         if (cacheControl) {
@@ -248,7 +244,7 @@ function fromFilePart(base64Data, mediaType, filename, cacheControl) {
     }
     const out = {
         type: "file",
-        data: z.util.base64ToUint8Array(base64Data),
+        data: data,
         filename,
         mediaType,
     };
@@ -280,7 +276,9 @@ export const convertToToolChoiceOptions = (toolChoice) => {
     if (toolChoice === "none" || toolChoice === "auto" || toolChoice === "required") {
         return { toolChoice };
     }
-    // FUTURE: this is right now google specific, which is not supported by AI SDK, until then, we temporarily map it to auto for now https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
+    // FUTURE: this is right now google specific, which is not supported by AI SDK, until then,
+    // we temporarily map it to auto for now
+    // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
     if (toolChoice === "validated") {
         return { toolChoice: "auto" };
     }
@@ -309,59 +307,6 @@ function parseToolResult(content) {
     }
     return parseJsonOrText(content);
 }
-function parseJsonOrText(content) {
-    try {
-        // oxlint-disable-next-line no-unsafe-assignment
-        return { type: "json", value: JSON.parse(content) };
-    }
-    catch {
-        return { type: "text", value: content };
-    }
-}
-function parseReasoningOptions(reasoning_effort, reasoning) {
-    const effort = reasoning?.effort ?? reasoning_effort;
-    const max_tokens = reasoning?.max_tokens;
-    if (reasoning?.enabled === false || effort === "none") {
-        return { reasoning: { enabled: false }, reasoning_effort: "none" };
-    }
-    if (!reasoning && effort === undefined)
-        return {};
-    const out = { reasoning: {} };
-    if (effort) {
-        out.reasoning.enabled = true;
-        out.reasoning.effort = effort;
-        out.reasoning_effort = effort;
-    }
-    if (max_tokens) {
-        out.reasoning.enabled = true;
-        out.reasoning.max_tokens = max_tokens;
-    }
-    if (out.reasoning.enabled) {
-        out.reasoning.exclude = reasoning?.exclude;
-    }
-    return out;
-}
-function parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cache_control) {
-    const out = {};
-    let retention = prompt_cache_retention;
-    if (!retention && cache_control?.ttl) {
-        retention = cache_control.ttl === "24h" ? "24h" : "in_memory";
-    }
-    let control = cache_control;
-    if (!control && retention) {
-        control = {
-            type: "ephemeral",
-            ttl: retention === "24h" ? "24h" : "5m",
-        };
-    }
-    if (prompt_cache_key)
-        out["prompt_cache_key"] = prompt_cache_key;
-    if (retention)
-        out["prompt_cache_retention"] = retention;
-    if (control)
-        out["cache_control"] = control;
-    return out;
-}
 // --- Response Flow ---
 export function toChatCompletions(result, model) {
     return {
@@ -421,6 +366,8 @@ export class ChatCompletionsTransformStream extends TransformStream {
         };
         super({
             transform(part, controller) {
+                // Omit lifecycle (start/end) and intermediate events; /chat/completions
+                // is a stateless stream of deltas. Tool calls are emitted once fully-formed.
                 // oxlint-disable-next-line switch-exhaustiveness-check
                 switch (part.type) {
                     case "text-delta": {
@@ -471,47 +418,6 @@ export class ChatCompletionsTransformStream extends TransformStream {
         });
     }
 }
-function resolveResponseServiceTier(providerMetadata) {
-    if (!providerMetadata)
-        return;
-    for (const metadata of Object.values(providerMetadata)) {
-        const tier = parseReturnedServiceTier(metadata["service_tier"] ??
-            metadata["usage_metadata"]?.["traffic_type"]);
-        if (tier)
-            return tier;
-    }
-}
-function parseReturnedServiceTier(value) {
-    if (typeof value !== "string")
-        return undefined;
-    const n = value.toLowerCase();
-    switch (n) {
-        case "traffic_type_unspecified":
-        case "auto":
-            return "auto";
-        case "default":
-        case "on_demand":
-        case "on-demand":
-        case "shared":
-            return "default";
-        case "on_demand_flex":
-        case "flex":
-            return "flex";
-        case "on_demand_priority":
-        case "priority":
-        case "performance":
-            return "priority";
-        case "provisioned_throughput":
-        case "scale":
-        case "reserved":
-        case "dedicated":
-        case "provisioned":
-        case "throughput":
-            return "scale";
-        default:
-            return undefined;
-    }
-}
 export const toChatCompletionsAssistantMessage = (result) => {
     const message = {
         role: "assistant",
@@ -550,19 +456,7 @@ export const toChatCompletionsAssistantMessage = (result) => {
     return message;
 };
 export function toReasoningDetail(reasoning, id, index) {
-    const providerMetadata = reasoning.providerMetadata ?? {};
-    let redactedData;
-    let signature;
-    for (const metadata of Object.values(providerMetadata)) {
-        if (metadata && typeof metadata === "object") {
-            if ("redactedData" in metadata && typeof metadata["redactedData"] === "string") {
-                redactedData = metadata["redactedData"];
-            }
-            if ("signature" in metadata && typeof metadata["signature"] === "string") {
-                signature = metadata["signature"];
-            }
-        }
-    }
+    const { redactedData, signature } = extractReasoningMetadata(reasoning.providerMetadata);
     if (redactedData) {
         return {
             id,
@@ -622,37 +516,6 @@ export function toChatCompletionsToolCall(id, name, args, providerMetadata) {
     }
     return out;
 }
-function normalizeToolName(name) {
-    // some models hallucinate invalid characters
-    // normalize to valid characters [^A-Za-z0-9_-.] (non regex for perf)
-    // https://modelcontextprotocol.io/specification/draft/server/tools#tool-names
-    let out = "";
-    for (let i = 0; i < name.length; i++) {
-        if (out.length === 128)
-            break;
-        // oxlint-disable-next-line unicorn/prefer-code-point
-        const c = name.charCodeAt(i);
-        if ((c >= 48 && c <= 57) ||
-            (c >= 65 && c <= 90) ||
-            (c >= 97 && c <= 122) ||
-            c === 95 ||
-            c === 45 ||
-            c === 46) {
-            out += name[i];
-        }
-        else {
-            out += "_";
-        }
-    }
-    return out;
-}
-function stripEmptyKeys(obj) {
-    if (!obj || typeof obj !== "object" || Array.isArray(obj))
-        return obj;
-    // some models hallucinate empty parameters
-    delete obj[""];
-    return obj;
-}
 export const toChatCompletionsFinishReason = (finishReason) => {
     if (finishReason === "error" || finishReason === "other") {
         return "stop";

package/dist/endpoints/chat-completions/handler.js CHANGED Viewed

@@ -10,12 +10,13 @@ import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
 import { prepareForwardHeaders } from "../../utils/request";
 import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
 import { getChatRequestAttributes, getChatResponseAttributes } from "./otel";
-import { ChatCompletionsBodySchema } from "./schema";
+import { ChatCompletionsBodySchema, } from "./schema";
 export const chatCompletions = (config) => {
     const hooks = config.hooks;
     const handler = async (ctx, cfg) => {
         const start = performance.now();
         ctx.operation = "chat";
+        setSpanAttributes({ "gen_ai.operation.name": ctx.operation });
         addSpanEvent("hebo.handler.started");
         // Guard: enforce HTTP method early.
         if (!ctx.request || ctx.request.method !== "POST") {
@@ -67,7 +68,6 @@ export const chatCompletions = (config) => {
         setSpanAttributes(genAiGeneralAttrs);
         // Convert inputs to AI SDK call options.
         const { model: _model, stream, ...inputs } = ctx.body;
-        // oxlint-disable-next-line no-unsafe-argument
         const textOptions = convertToTextCallOptions(inputs);
         logger.trace({
             requestId: ctx.requestId,