@hebo-ai/gateway 0.8.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +144 -40
- package/dist/endpoints/chat-completions/converters.d.ts +4 -21
- package/dist/endpoints/chat-completions/converters.js +23 -160
- package/dist/endpoints/chat-completions/handler.js +2 -2
- package/dist/endpoints/chat-completions/otel.js +3 -1
- package/dist/endpoints/chat-completions/schema.d.ts +45 -101
- package/dist/endpoints/chat-completions/schema.js +13 -69
- package/dist/endpoints/conversations/converters.js +2 -3
- package/dist/endpoints/conversations/schema.d.ts +506 -644
- package/dist/endpoints/conversations/schema.js +8 -159
- package/dist/endpoints/conversations/storage/dialects/greptime.js +4 -2
- package/dist/endpoints/conversations/storage/dialects/mysql.js +3 -1
- package/dist/endpoints/conversations/storage/dialects/postgres.js +6 -3
- package/dist/endpoints/conversations/storage/dialects/sqlite.js +3 -1
- package/dist/endpoints/conversations/storage/sql.js +11 -6
- package/dist/endpoints/embeddings/handler.js +1 -1
- package/dist/endpoints/responses/converters.d.ts +17 -0
- package/dist/endpoints/responses/converters.js +1034 -0
- package/dist/endpoints/responses/handler.d.ts +2 -0
- package/dist/endpoints/responses/handler.js +137 -0
- package/dist/endpoints/responses/index.d.ts +4 -0
- package/dist/endpoints/responses/index.js +4 -0
- package/dist/endpoints/responses/otel.d.ts +6 -0
- package/dist/endpoints/responses/otel.js +221 -0
- package/dist/endpoints/responses/schema.d.ts +2109 -0
- package/dist/endpoints/responses/schema.js +314 -0
- package/dist/endpoints/shared/converters.d.ts +55 -0
- package/dist/endpoints/shared/converters.js +179 -0
- package/dist/endpoints/shared/schema.d.ts +70 -0
- package/dist/endpoints/shared/schema.js +46 -0
- package/dist/gateway.d.ts +1 -0
- package/dist/gateway.js +2 -0
- package/dist/index.d.ts +0 -4
- package/dist/index.js +0 -4
- package/dist/lifecycle.js +46 -29
- package/dist/models/anthropic/middleware.d.ts +1 -1
- package/dist/models/google/middleware.d.ts +1 -1
- package/dist/providers/registry.d.ts +1 -1
- package/dist/types.d.ts +18 -6
- package/dist/utils/preset.js +0 -1
- package/package.json +5 -1
package/README.md
CHANGED
|
@@ -13,6 +13,7 @@ Learn more in our blog post: [Yet Another AI Gateway?](https://hebo.ai/blog/2601
|
|
|
13
13
|
## 🍌 Features
|
|
14
14
|
|
|
15
15
|
- 🌐 OpenAI-compatible /chat/completions, /embeddings & /models endpoints.
|
|
16
|
+
- 🔄 /responses endpoint implementing the Open Responses API (stateless).
|
|
16
17
|
- 💬 /conversations endpoint built on top of the Responses API.
|
|
17
18
|
- 🔌 Integrate into your existing Hono, Elysia, Next.js & TanStack apps.
|
|
18
19
|
- 🧩 Provider registry compatible with Vercel AI SDK providers.
|
|
@@ -38,6 +39,8 @@ bun install @hebo-ai/gateway
|
|
|
38
39
|
- [ElysiaJS](#elysiajs) | [Hono](#hono) | [Next.js](#nextjs) | [TanStack Start](#tanstack-start)
|
|
39
40
|
- Runtime Support
|
|
40
41
|
- [Vercel Edge](#vercel-edge) | [Cloudflare Workers](#cloudflare-workers) | [Deno Deploy](#deno-deploy) | [AWS Lambda](#aws-lambda)
|
|
42
|
+
- Endpoints
|
|
43
|
+
- [/chat/completions](#chatcompletions) | [/embeddings](#embeddings) | [/models](#models) | [/responses](#responses) | [/conversations](#conversations)
|
|
41
44
|
- OpenAI Extensions
|
|
42
45
|
- [Reasoning](#reasoning) | [Service Tier](#service-tier) | [Prompt Caching](#prompt-caching)
|
|
43
46
|
- Advanced Usage
|
|
@@ -147,26 +150,31 @@ import { withCanonicalIdsForGroq } from "@hebo-ai/gateway/providers/groq";
|
|
|
147
150
|
|
|
148
151
|
If an adapter is not yet provided, you can create your own by wrapping the provider instance with the `withCanonicalIds` helper and define your custom canonicalization mapping & rules.
|
|
149
152
|
|
|
153
|
+
For Azure, use `createAzure` from `@ai-sdk/azure` directly. Name each [Azure AI Foundry](https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/endpoints) deployment after its Hebo canonical ID (e.g. `anthropic/claude-sonnet-4.5`).
|
|
154
|
+
|
|
155
|
+
For other providers, use `withCanonicalIds` with an explicit `mapping`:
|
|
156
|
+
|
|
150
157
|
```ts
|
|
151
|
-
import {
|
|
158
|
+
import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
|
|
152
159
|
import { gateway, withCanonicalIds } from "@hebo-ai/gateway";
|
|
153
160
|
|
|
154
|
-
const
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
161
|
+
const myProvider = withCanonicalIds(
|
|
162
|
+
createOpenAICompatible({
|
|
163
|
+
name: "my-provider",
|
|
164
|
+
baseURL: "https://api.my-provider.com/v1",
|
|
165
|
+
apiKey: process.env["MY_PROVIDER_API_KEY"],
|
|
158
166
|
}),
|
|
159
167
|
{
|
|
160
168
|
mapping: {
|
|
161
|
-
"openai/gpt-4.1-mini": "
|
|
162
|
-
"
|
|
169
|
+
"openai/gpt-4.1-mini": "gpt-4.1-mini-custom",
|
|
170
|
+
"anthropic/claude-sonnet-4.5": "claude-sonnet-4-5",
|
|
163
171
|
},
|
|
164
172
|
},
|
|
165
173
|
);
|
|
166
174
|
|
|
167
175
|
const gw = gateway({
|
|
168
176
|
providers: {
|
|
169
|
-
|
|
177
|
+
myProvider,
|
|
170
178
|
},
|
|
171
179
|
models: {
|
|
172
180
|
// ...your models pointing at canonical IDs above
|
|
@@ -361,6 +369,17 @@ const gw = gateway({
|
|
|
361
369
|
// - Replace or redact response payload
|
|
362
370
|
return undefined;
|
|
363
371
|
},
|
|
372
|
+
/**
|
|
373
|
+
* Runs when the lifecycle catches an error.
|
|
374
|
+
* @param ctx.error The thrown error.
|
|
375
|
+
* @returns Replacement error response, or undefined to use the default OpenAI-compatible error response.
|
|
376
|
+
*/
|
|
377
|
+
onError: async (ctx: { error: unknown }): Promise<Response | void> => {
|
|
378
|
+
// Example Use Cases:
|
|
379
|
+
// - Map internal errors to custom API responses
|
|
380
|
+
// - Add app-specific logging or alerting
|
|
381
|
+
return undefined;
|
|
382
|
+
},
|
|
364
383
|
},
|
|
365
384
|
});
|
|
366
385
|
```
|
|
@@ -563,6 +582,123 @@ export const handler = awsLambdaEventHandler({
|
|
|
563
582
|
});
|
|
564
583
|
```
|
|
565
584
|
|
|
585
|
+
## 🚀 Endpoints
|
|
586
|
+
|
|
587
|
+
Hebo Gateway provides several OpenAI-compatible and standard-based endpoints.
|
|
588
|
+
|
|
589
|
+
### `/chat/completions`
|
|
590
|
+
|
|
591
|
+
The primary endpoint for generating chat completions.
|
|
592
|
+
|
|
593
|
+
Official documentation: [OpenAI API Reference](https://developers.openai.com/api/reference/resources/chat/subresources/completions/methods/create)
|
|
594
|
+
|
|
595
|
+
It supports:
|
|
596
|
+
|
|
597
|
+
- Streaming responses (Server-Sent Events).
|
|
598
|
+
- Tool calling / Function calling.
|
|
599
|
+
- Advanced extensions like [Reasoning](#reasoning), [Service Tier](#service-tier), and [Prompt Caching](#prompt-caching).
|
|
600
|
+
- Usage tracking and metadata.
|
|
601
|
+
|
|
602
|
+
> [!IMPORTANT]
|
|
603
|
+
> **Compatibility & Roadmap:**
|
|
604
|
+
> We are actively working to expand support for the full OpenAI spec:
|
|
605
|
+
|
|
606
|
+
- **`logprobs` / `top_logprobs`**: Token-level logprobs.
|
|
607
|
+
- **`logit_bias`**: Logit bias in the request body.
|
|
608
|
+
- **`n` > 1**: Multi-choice completions.
|
|
609
|
+
|
|
610
|
+
### `/embeddings`
|
|
611
|
+
|
|
612
|
+
Generates vector representations for text inputs, compatible with OpenAI's embeddings API.
|
|
613
|
+
|
|
614
|
+
Official documentation: [OpenAI API Reference](https://developers.openai.com/api/reference/resources/embeddings/methods/create)
|
|
615
|
+
|
|
616
|
+
It supports:
|
|
617
|
+
|
|
618
|
+
- Text and token array inputs.
|
|
619
|
+
- Custom dimensions (for `v3` models).
|
|
620
|
+
- Standard `float` and `base64` encoding formats.
|
|
621
|
+
|
|
622
|
+
> [!IMPORTANT]
|
|
623
|
+
> **Compatibility & Roadmap:**
|
|
624
|
+
|
|
625
|
+
- **`encoding_format`**: `base64` results.
|
|
626
|
+
|
|
627
|
+
### `/models`
|
|
628
|
+
|
|
629
|
+
Lists all available models in your [Model Catalog](#models), including their capabilities and metadata.
|
|
630
|
+
|
|
631
|
+
Official documentation: [OpenAI API Reference](https://developers.openai.com/api/reference/resources/models/methods/list)
|
|
632
|
+
|
|
633
|
+
It supports:
|
|
634
|
+
|
|
635
|
+
- Comprehensive model metadata (capabilities, context limits, knowledge cutoffs).
|
|
636
|
+
- Canonical model ID resolution.
|
|
637
|
+
- Provider-specific availability filtering.
|
|
638
|
+
|
|
639
|
+
### `/responses`
|
|
640
|
+
|
|
641
|
+
Hebo Gateway provides a `/responses` endpoint implementing the [Open Responses API](https://www.openresponses.org/reference).
|
|
642
|
+
|
|
643
|
+
Official documentation: [Open Responses API Reference](https://www.openresponses.org/reference)
|
|
644
|
+
|
|
645
|
+
It supports:
|
|
646
|
+
|
|
647
|
+
- The same models, providers, hooks, and extensions as `/chat/completions`.
|
|
648
|
+
- Responses API request/response format.
|
|
649
|
+
- Tool calling and multimodal inputs.
|
|
650
|
+
- Normalized reasoning and thought signatures.
|
|
651
|
+
|
|
652
|
+
> [!IMPORTANT]
|
|
653
|
+
> **Compatibility & Roadmap:**
|
|
654
|
+
> We are working towards full Open Responses parity:
|
|
655
|
+
|
|
656
|
+
- **Persistence**: Server-side response storage (`store`), background orchestration (`background`), and chaining via `previous_response_id`.
|
|
657
|
+
- **`conversation`**: Directly passing conversation IDs for automatic context management.
|
|
658
|
+
- **`context_management`**: Support for automatic compaction strategies.
|
|
659
|
+
- **`prompt`**: Reusable prompt templates with variables.
|
|
660
|
+
- **`phase`**: Support for `commentary` vs `final_answer` reasoning phases.
|
|
661
|
+
- **`safety_identifier`**: Custom safety and moderation policies.
|
|
662
|
+
- **`truncation`**: Context window management strategies.
|
|
663
|
+
- **`text.verbosity`**: Control over response detail (low/medium/high).
|
|
664
|
+
- **`logprobs` / `top_logprobs`**: Token-level logprobs.
|
|
665
|
+
- **`include`**: Selective response fields (e.g., `logprobs`, `reasoning.encrypted_content`, and tool-specific outputs).
|
|
666
|
+
- **`stream_options.include_obfuscation`**: Normalizing payload sizes to mitigate side-channel attacks.
|
|
667
|
+
|
|
668
|
+
### `/conversations`
|
|
669
|
+
|
|
670
|
+
Hebo Gateway provides a dedicated `/conversations` endpoint for managing persistent conversation state. It is designed as an extension of the [OpenAI Conversations API](https://developers.openai.com/api/reference/resources/conversations/methods/create) and supports standard CRUD operations alongside advanced listing with metadata filtering.
|
|
671
|
+
|
|
672
|
+
Official documentation: [OpenAI Conversations API](https://developers.openai.com/api/reference/resources/conversations/methods/create)
|
|
673
|
+
|
|
674
|
+
#### List & Filter Conversations (Hebo Extension)
|
|
675
|
+
|
|
676
|
+
Since standard OpenAI APIs (like Threads) do not support global listing of conversations, Hebo Gateway provides this capability as an extension. You can list all conversations using cursor-based pagination and filter by any metadata key using the `metadata.KEY=VALUE` pattern.
|
|
677
|
+
|
|
678
|
+
```bash
|
|
679
|
+
# List conversations for a specific user using metadata filtering
|
|
680
|
+
curl "https://api.gateway.com/conversations?limit=10&metadata.user_id=123"
|
|
681
|
+
```
|
|
682
|
+
|
|
683
|
+
The response follows the standard OpenAI list object:
|
|
684
|
+
|
|
685
|
+
```json
|
|
686
|
+
{
|
|
687
|
+
"object": "list",
|
|
688
|
+
"data": [
|
|
689
|
+
{
|
|
690
|
+
"id": "conv_abc123",
|
|
691
|
+
"object": "conversation",
|
|
692
|
+
"created_at": 1678531200,
|
|
693
|
+
"metadata": { "user_id": "123" }
|
|
694
|
+
}
|
|
695
|
+
],
|
|
696
|
+
"first_id": "conv_abc123",
|
|
697
|
+
"last_id": "conv_abc123",
|
|
698
|
+
"has_more": false
|
|
699
|
+
}
|
|
700
|
+
```
|
|
701
|
+
|
|
566
702
|
## 🧠 OpenAI Extensions
|
|
567
703
|
|
|
568
704
|
### Reasoning
|
|
@@ -625,38 +761,6 @@ Provider-specific mapping:
|
|
|
625
761
|
|
|
626
762
|
When available, the resolved value is echoed back on response as `service_tier`.
|
|
627
763
|
|
|
628
|
-
### Conversations
|
|
629
|
-
|
|
630
|
-
Hebo Gateway provides a dedicated `/conversations` endpoint for managing persistent conversation state. It is designed as an extension of the [OpenAI Conversations API](https://developers.openai.com/api/reference/typescript/resources/conversations) and supports standard CRUD operations alongside advanced listing with metadata filtering.
|
|
631
|
-
|
|
632
|
-
#### List & Filter Conversations
|
|
633
|
-
|
|
634
|
-
You can list conversations with standard cursor-based pagination and filter by any metadata key using the `metadata.KEY=VALUE` pattern.
|
|
635
|
-
|
|
636
|
-
```bash
|
|
637
|
-
# List conversations for a specific user
|
|
638
|
-
curl "https://api.gateway.com/conversations?limit=10&metadata.user_id=123"
|
|
639
|
-
```
|
|
640
|
-
|
|
641
|
-
The response follows the standard OpenAI list object:
|
|
642
|
-
|
|
643
|
-
```json
|
|
644
|
-
{
|
|
645
|
-
"object": "list",
|
|
646
|
-
"data": [
|
|
647
|
-
{
|
|
648
|
-
"id": "conv_abc123",
|
|
649
|
-
"object": "conversation",
|
|
650
|
-
"created_at": 1678531200,
|
|
651
|
-
"metadata": { "user_id": "123" }
|
|
652
|
-
}
|
|
653
|
-
],
|
|
654
|
-
"first_id": "conv_abc123",
|
|
655
|
-
"last_id": "conv_abc123",
|
|
656
|
-
"has_more": false
|
|
657
|
-
}
|
|
658
|
-
```
|
|
659
|
-
|
|
660
764
|
### Prompt Caching
|
|
661
765
|
|
|
662
766
|
The chat completions endpoint supports both implicit (provider-managed) and explicit prompt caching across OpenAI-compatible providers.
|
|
@@ -1,23 +1,9 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
import type { GenerateTextResult, StreamTextResult, FinishReason,
|
|
1
|
+
import type { SharedV3ProviderMetadata } from "@ai-sdk/provider";
|
|
2
|
+
import type { GenerateTextResult, StreamTextResult, FinishReason, ToolSet, ModelMessage, UserContent, LanguageModelUsage, TextStreamPart, ReasoningOutput, AssistantModelMessage, ToolModelMessage, UserModelMessage } from "ai";
|
|
3
3
|
import { Output } from "ai";
|
|
4
4
|
import type { ChatCompletionsToolCall, ChatCompletionsTool, ChatCompletionsToolChoice, ChatCompletionsStream, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsUserMessage, ChatCompletionsAssistantMessage, ChatCompletionsToolMessage, ChatCompletionsFinishReason, ChatCompletionsUsage, ChatCompletionsInputs, ChatCompletions, ChatCompletionsChunk, ChatCompletionsReasoningDetail } from "./schema";
|
|
5
5
|
import type { SseErrorFrame, SseFrame } from "../../utils/stream";
|
|
6
|
-
|
|
7
|
-
messages: ModelMessage[];
|
|
8
|
-
tools?: ToolSet;
|
|
9
|
-
toolChoice?: ToolChoice<ToolSet>;
|
|
10
|
-
activeTools?: Array<keyof ToolSet>;
|
|
11
|
-
output?: Output.Output;
|
|
12
|
-
temperature?: number;
|
|
13
|
-
maxOutputTokens?: number;
|
|
14
|
-
frequencyPenalty?: number;
|
|
15
|
-
presencePenalty?: number;
|
|
16
|
-
seed?: number;
|
|
17
|
-
stopSequences?: string[];
|
|
18
|
-
topP?: number;
|
|
19
|
-
providerOptions: SharedV3ProviderOptions;
|
|
20
|
-
};
|
|
6
|
+
import { type TextCallOptions, type ToolChoiceOptions } from "../shared/converters";
|
|
21
7
|
export declare function convertToTextCallOptions(params: ChatCompletionsInputs): TextCallOptions;
|
|
22
8
|
export declare function convertToModelMessages(messages: ChatCompletionsMessage[]): ModelMessage[];
|
|
23
9
|
export declare function fromChatCompletionsUserMessage(message: ChatCompletionsUserMessage): UserModelMessage;
|
|
@@ -25,10 +11,7 @@ export declare function fromChatCompletionsAssistantMessage(message: ChatComplet
|
|
|
25
11
|
export declare function fromChatCompletionsToolResultMessage(message: ChatCompletionsAssistantMessage, toolById: Map<string, ChatCompletionsToolMessage>): ToolModelMessage | undefined;
|
|
26
12
|
export declare function fromChatCompletionsContent(content: ChatCompletionsContentPart[]): UserContent;
|
|
27
13
|
export declare const convertToToolSet: (tools: ChatCompletionsTool[] | undefined) => ToolSet | undefined;
|
|
28
|
-
export declare const convertToToolChoiceOptions: (toolChoice: ChatCompletionsToolChoice | undefined) =>
|
|
29
|
-
toolChoice?: ToolChoice<ToolSet>;
|
|
30
|
-
activeTools?: Array<keyof ToolSet>;
|
|
31
|
-
};
|
|
14
|
+
export declare const convertToToolChoiceOptions: (toolChoice: ChatCompletionsToolChoice | undefined) => ToolChoiceOptions;
|
|
32
15
|
export declare function toChatCompletions(result: GenerateTextResult<ToolSet, Output.Output>, model: string): ChatCompletions;
|
|
33
16
|
export declare function toChatCompletionsResponse(result: GenerateTextResult<ToolSet, Output.Output>, model: string, responseInit?: ResponseInit): Response;
|
|
34
17
|
export declare function toChatCompletionsStream(result: StreamTextResult<ToolSet, Output.Output>, model: string): ChatCompletionsStream;
|
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
import { Output, jsonSchema, tool } from "ai";
|
|
2
|
-
import { z } from "zod";
|
|
3
|
-
import { GatewayError } from "../../errors/gateway";
|
|
4
2
|
import { toResponse } from "../../utils/response";
|
|
5
|
-
import {
|
|
3
|
+
import { parseJsonOrText, parseReasoningOptions, parsePromptCachingOptions, resolveResponseServiceTier, normalizeToolName, stripEmptyKeys, parseBase64, parseImageInput, extractReasoningMetadata, } from "../shared/converters";
|
|
6
4
|
// --- Request Flow ---
|
|
7
5
|
export function convertToTextCallOptions(params) {
|
|
8
6
|
const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, prompt_cache_key, prompt_cache_retention, extra_body, cache_control, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
|
|
@@ -165,7 +163,7 @@ export function fromChatCompletionsAssistantMessage(message) {
|
|
|
165
163
|
out.providerOptions = extra_content;
|
|
166
164
|
}
|
|
167
165
|
if (cache_control) {
|
|
168
|
-
(
|
|
166
|
+
(out.providerOptions ??= {})["unknown"] = { cache_control };
|
|
169
167
|
}
|
|
170
168
|
return out;
|
|
171
169
|
}
|
|
@@ -214,29 +212,27 @@ export function fromChatCompletionsContent(content) {
|
|
|
214
212
|
});
|
|
215
213
|
}
|
|
216
214
|
function fromImageUrlPart(url, cacheControl) {
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
return fromFilePart(url.slice(dataStart), mimeType, undefined, cacheControl);
|
|
223
|
-
}
|
|
224
|
-
const out = {
|
|
225
|
-
type: "image",
|
|
226
|
-
image: new URL(url),
|
|
227
|
-
};
|
|
228
|
-
if (cacheControl) {
|
|
229
|
-
out.providerOptions = {
|
|
230
|
-
unknown: { cache_control: cacheControl },
|
|
215
|
+
const { image, mediaType } = parseImageInput(url);
|
|
216
|
+
if (image instanceof URL) {
|
|
217
|
+
const out = {
|
|
218
|
+
type: "image",
|
|
219
|
+
image,
|
|
231
220
|
};
|
|
221
|
+
if (cacheControl) {
|
|
222
|
+
out.providerOptions = {
|
|
223
|
+
unknown: { cache_control: cacheControl },
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
return out;
|
|
232
227
|
}
|
|
233
|
-
return
|
|
228
|
+
return fromFilePart(image, mediaType ?? "image/jpeg", undefined, cacheControl);
|
|
234
229
|
}
|
|
235
230
|
function fromFilePart(base64Data, mediaType, filename, cacheControl) {
|
|
231
|
+
const data = parseBase64(base64Data);
|
|
236
232
|
if (mediaType.startsWith("image/")) {
|
|
237
233
|
const out = {
|
|
238
234
|
type: "image",
|
|
239
|
-
image:
|
|
235
|
+
image: data,
|
|
240
236
|
mediaType,
|
|
241
237
|
};
|
|
242
238
|
if (cacheControl) {
|
|
@@ -248,7 +244,7 @@ function fromFilePart(base64Data, mediaType, filename, cacheControl) {
|
|
|
248
244
|
}
|
|
249
245
|
const out = {
|
|
250
246
|
type: "file",
|
|
251
|
-
data:
|
|
247
|
+
data: data,
|
|
252
248
|
filename,
|
|
253
249
|
mediaType,
|
|
254
250
|
};
|
|
@@ -280,7 +276,9 @@ export const convertToToolChoiceOptions = (toolChoice) => {
|
|
|
280
276
|
if (toolChoice === "none" || toolChoice === "auto" || toolChoice === "required") {
|
|
281
277
|
return { toolChoice };
|
|
282
278
|
}
|
|
283
|
-
// FUTURE: this is right now google specific, which is not supported by AI SDK, until then,
|
|
279
|
+
// FUTURE: this is right now google specific, which is not supported by AI SDK, until then,
|
|
280
|
+
// we temporarily map it to auto for now
|
|
281
|
+
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
|
|
284
282
|
if (toolChoice === "validated") {
|
|
285
283
|
return { toolChoice: "auto" };
|
|
286
284
|
}
|
|
@@ -309,59 +307,6 @@ function parseToolResult(content) {
|
|
|
309
307
|
}
|
|
310
308
|
return parseJsonOrText(content);
|
|
311
309
|
}
|
|
312
|
-
function parseJsonOrText(content) {
|
|
313
|
-
try {
|
|
314
|
-
// oxlint-disable-next-line no-unsafe-assignment
|
|
315
|
-
return { type: "json", value: JSON.parse(content) };
|
|
316
|
-
}
|
|
317
|
-
catch {
|
|
318
|
-
return { type: "text", value: content };
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
function parseReasoningOptions(reasoning_effort, reasoning) {
|
|
322
|
-
const effort = reasoning?.effort ?? reasoning_effort;
|
|
323
|
-
const max_tokens = reasoning?.max_tokens;
|
|
324
|
-
if (reasoning?.enabled === false || effort === "none") {
|
|
325
|
-
return { reasoning: { enabled: false }, reasoning_effort: "none" };
|
|
326
|
-
}
|
|
327
|
-
if (!reasoning && effort === undefined)
|
|
328
|
-
return {};
|
|
329
|
-
const out = { reasoning: {} };
|
|
330
|
-
if (effort) {
|
|
331
|
-
out.reasoning.enabled = true;
|
|
332
|
-
out.reasoning.effort = effort;
|
|
333
|
-
out.reasoning_effort = effort;
|
|
334
|
-
}
|
|
335
|
-
if (max_tokens) {
|
|
336
|
-
out.reasoning.enabled = true;
|
|
337
|
-
out.reasoning.max_tokens = max_tokens;
|
|
338
|
-
}
|
|
339
|
-
if (out.reasoning.enabled) {
|
|
340
|
-
out.reasoning.exclude = reasoning?.exclude;
|
|
341
|
-
}
|
|
342
|
-
return out;
|
|
343
|
-
}
|
|
344
|
-
function parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cache_control) {
|
|
345
|
-
const out = {};
|
|
346
|
-
let retention = prompt_cache_retention;
|
|
347
|
-
if (!retention && cache_control?.ttl) {
|
|
348
|
-
retention = cache_control.ttl === "24h" ? "24h" : "in_memory";
|
|
349
|
-
}
|
|
350
|
-
let control = cache_control;
|
|
351
|
-
if (!control && retention) {
|
|
352
|
-
control = {
|
|
353
|
-
type: "ephemeral",
|
|
354
|
-
ttl: retention === "24h" ? "24h" : "5m",
|
|
355
|
-
};
|
|
356
|
-
}
|
|
357
|
-
if (prompt_cache_key)
|
|
358
|
-
out["prompt_cache_key"] = prompt_cache_key;
|
|
359
|
-
if (retention)
|
|
360
|
-
out["prompt_cache_retention"] = retention;
|
|
361
|
-
if (control)
|
|
362
|
-
out["cache_control"] = control;
|
|
363
|
-
return out;
|
|
364
|
-
}
|
|
365
310
|
// --- Response Flow ---
|
|
366
311
|
export function toChatCompletions(result, model) {
|
|
367
312
|
return {
|
|
@@ -421,6 +366,8 @@ export class ChatCompletionsTransformStream extends TransformStream {
|
|
|
421
366
|
};
|
|
422
367
|
super({
|
|
423
368
|
transform(part, controller) {
|
|
369
|
+
// Omit lifecycle (start/end) and intermediate events; /chat/completions
|
|
370
|
+
// is a stateless stream of deltas. Tool calls are emitted once fully-formed.
|
|
424
371
|
// oxlint-disable-next-line switch-exhaustiveness-check
|
|
425
372
|
switch (part.type) {
|
|
426
373
|
case "text-delta": {
|
|
@@ -471,47 +418,6 @@ export class ChatCompletionsTransformStream extends TransformStream {
|
|
|
471
418
|
});
|
|
472
419
|
}
|
|
473
420
|
}
|
|
474
|
-
function resolveResponseServiceTier(providerMetadata) {
|
|
475
|
-
if (!providerMetadata)
|
|
476
|
-
return;
|
|
477
|
-
for (const metadata of Object.values(providerMetadata)) {
|
|
478
|
-
const tier = parseReturnedServiceTier(metadata["service_tier"] ??
|
|
479
|
-
metadata["usage_metadata"]?.["traffic_type"]);
|
|
480
|
-
if (tier)
|
|
481
|
-
return tier;
|
|
482
|
-
}
|
|
483
|
-
}
|
|
484
|
-
function parseReturnedServiceTier(value) {
|
|
485
|
-
if (typeof value !== "string")
|
|
486
|
-
return undefined;
|
|
487
|
-
const n = value.toLowerCase();
|
|
488
|
-
switch (n) {
|
|
489
|
-
case "traffic_type_unspecified":
|
|
490
|
-
case "auto":
|
|
491
|
-
return "auto";
|
|
492
|
-
case "default":
|
|
493
|
-
case "on_demand":
|
|
494
|
-
case "on-demand":
|
|
495
|
-
case "shared":
|
|
496
|
-
return "default";
|
|
497
|
-
case "on_demand_flex":
|
|
498
|
-
case "flex":
|
|
499
|
-
return "flex";
|
|
500
|
-
case "on_demand_priority":
|
|
501
|
-
case "priority":
|
|
502
|
-
case "performance":
|
|
503
|
-
return "priority";
|
|
504
|
-
case "provisioned_throughput":
|
|
505
|
-
case "scale":
|
|
506
|
-
case "reserved":
|
|
507
|
-
case "dedicated":
|
|
508
|
-
case "provisioned":
|
|
509
|
-
case "throughput":
|
|
510
|
-
return "scale";
|
|
511
|
-
default:
|
|
512
|
-
return undefined;
|
|
513
|
-
}
|
|
514
|
-
}
|
|
515
421
|
export const toChatCompletionsAssistantMessage = (result) => {
|
|
516
422
|
const message = {
|
|
517
423
|
role: "assistant",
|
|
@@ -550,19 +456,7 @@ export const toChatCompletionsAssistantMessage = (result) => {
|
|
|
550
456
|
return message;
|
|
551
457
|
};
|
|
552
458
|
export function toReasoningDetail(reasoning, id, index) {
|
|
553
|
-
const
|
|
554
|
-
let redactedData;
|
|
555
|
-
let signature;
|
|
556
|
-
for (const metadata of Object.values(providerMetadata)) {
|
|
557
|
-
if (metadata && typeof metadata === "object") {
|
|
558
|
-
if ("redactedData" in metadata && typeof metadata["redactedData"] === "string") {
|
|
559
|
-
redactedData = metadata["redactedData"];
|
|
560
|
-
}
|
|
561
|
-
if ("signature" in metadata && typeof metadata["signature"] === "string") {
|
|
562
|
-
signature = metadata["signature"];
|
|
563
|
-
}
|
|
564
|
-
}
|
|
565
|
-
}
|
|
459
|
+
const { redactedData, signature } = extractReasoningMetadata(reasoning.providerMetadata);
|
|
566
460
|
if (redactedData) {
|
|
567
461
|
return {
|
|
568
462
|
id,
|
|
@@ -622,37 +516,6 @@ export function toChatCompletionsToolCall(id, name, args, providerMetadata) {
|
|
|
622
516
|
}
|
|
623
517
|
return out;
|
|
624
518
|
}
|
|
625
|
-
function normalizeToolName(name) {
|
|
626
|
-
// some models hallucinate invalid characters
|
|
627
|
-
// normalize to valid characters [^A-Za-z0-9_-.] (non regex for perf)
|
|
628
|
-
// https://modelcontextprotocol.io/specification/draft/server/tools#tool-names
|
|
629
|
-
let out = "";
|
|
630
|
-
for (let i = 0; i < name.length; i++) {
|
|
631
|
-
if (out.length === 128)
|
|
632
|
-
break;
|
|
633
|
-
// oxlint-disable-next-line unicorn/prefer-code-point
|
|
634
|
-
const c = name.charCodeAt(i);
|
|
635
|
-
if ((c >= 48 && c <= 57) ||
|
|
636
|
-
(c >= 65 && c <= 90) ||
|
|
637
|
-
(c >= 97 && c <= 122) ||
|
|
638
|
-
c === 95 ||
|
|
639
|
-
c === 45 ||
|
|
640
|
-
c === 46) {
|
|
641
|
-
out += name[i];
|
|
642
|
-
}
|
|
643
|
-
else {
|
|
644
|
-
out += "_";
|
|
645
|
-
}
|
|
646
|
-
}
|
|
647
|
-
return out;
|
|
648
|
-
}
|
|
649
|
-
function stripEmptyKeys(obj) {
|
|
650
|
-
if (!obj || typeof obj !== "object" || Array.isArray(obj))
|
|
651
|
-
return obj;
|
|
652
|
-
// some models hallucinate empty parameters
|
|
653
|
-
delete obj[""];
|
|
654
|
-
return obj;
|
|
655
|
-
}
|
|
656
519
|
export const toChatCompletionsFinishReason = (finishReason) => {
|
|
657
520
|
if (finishReason === "error" || finishReason === "other") {
|
|
658
521
|
return "stop";
|
|
@@ -10,12 +10,13 @@ import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
|
10
10
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
11
11
|
import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
|
|
12
12
|
import { getChatRequestAttributes, getChatResponseAttributes } from "./otel";
|
|
13
|
-
import { ChatCompletionsBodySchema } from "./schema";
|
|
13
|
+
import { ChatCompletionsBodySchema, } from "./schema";
|
|
14
14
|
export const chatCompletions = (config) => {
|
|
15
15
|
const hooks = config.hooks;
|
|
16
16
|
const handler = async (ctx, cfg) => {
|
|
17
17
|
const start = performance.now();
|
|
18
18
|
ctx.operation = "chat";
|
|
19
|
+
setSpanAttributes({ "gen_ai.operation.name": ctx.operation });
|
|
19
20
|
addSpanEvent("hebo.handler.started");
|
|
20
21
|
// Guard: enforce HTTP method early.
|
|
21
22
|
if (!ctx.request || ctx.request.method !== "POST") {
|
|
@@ -67,7 +68,6 @@ export const chatCompletions = (config) => {
|
|
|
67
68
|
setSpanAttributes(genAiGeneralAttrs);
|
|
68
69
|
// Convert inputs to AI SDK call options.
|
|
69
70
|
const { model: _model, stream, ...inputs } = ctx.body;
|
|
70
|
-
// oxlint-disable-next-line no-unsafe-argument
|
|
71
71
|
const textOptions = convertToTextCallOptions(inputs);
|
|
72
72
|
logger.trace({
|
|
73
73
|
requestId: ctx.requestId,
|
|
@@ -108,7 +108,9 @@ export const getChatRequestAttributes = (body, signalLevel) => {
|
|
|
108
108
|
}
|
|
109
109
|
if (signalLevel !== "required") {
|
|
110
110
|
Object.assign(attrs, {
|
|
111
|
-
|
|
111
|
+
"gen_ai.request.reasoning.enabled": body.reasoning?.enabled,
|
|
112
|
+
"gen_ai.request.reasoning.effort": body.reasoning?.effort,
|
|
113
|
+
"gen_ai.request.reasoning.max_tokens": body.reasoning?.max_tokens,
|
|
112
114
|
"gen_ai.request.stream": body.stream,
|
|
113
115
|
"gen_ai.request.service_tier": body.service_tier,
|
|
114
116
|
"gen_ai.request.frequency_penalty": body.frequency_penalty,
|