@hebo-ai/gateway 0.8.2 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +131 -32
- package/dist/endpoints/chat-completions/converters.d.ts +4 -21
- package/dist/endpoints/chat-completions/converters.js +23 -160
- package/dist/endpoints/chat-completions/handler.js +2 -2
- package/dist/endpoints/chat-completions/schema.d.ts +45 -101
- package/dist/endpoints/chat-completions/schema.js +13 -69
- package/dist/endpoints/conversations/converters.js +2 -3
- package/dist/endpoints/conversations/schema.d.ts +506 -644
- package/dist/endpoints/conversations/schema.js +8 -159
- package/dist/endpoints/conversations/storage/dialects/greptime.js +20 -6
- package/dist/endpoints/conversations/storage/dialects/mysql.js +3 -1
- package/dist/endpoints/conversations/storage/dialects/postgres.js +6 -3
- package/dist/endpoints/conversations/storage/dialects/sqlite.js +3 -1
- package/dist/endpoints/conversations/storage/sql.js +11 -6
- package/dist/endpoints/embeddings/handler.js +1 -1
- package/dist/endpoints/responses/converters.d.ts +17 -0
- package/dist/endpoints/responses/converters.js +1034 -0
- package/dist/endpoints/responses/handler.d.ts +2 -0
- package/dist/endpoints/responses/handler.js +137 -0
- package/dist/endpoints/responses/index.d.ts +4 -0
- package/dist/endpoints/responses/index.js +4 -0
- package/dist/endpoints/responses/otel.d.ts +6 -0
- package/dist/endpoints/responses/otel.js +221 -0
- package/dist/endpoints/responses/schema.d.ts +2109 -0
- package/dist/endpoints/responses/schema.js +314 -0
- package/dist/endpoints/shared/converters.d.ts +56 -0
- package/dist/endpoints/shared/converters.js +179 -0
- package/dist/endpoints/shared/schema.d.ts +70 -0
- package/dist/endpoints/shared/schema.js +46 -0
- package/dist/gateway.d.ts +1 -0
- package/dist/gateway.js +2 -0
- package/dist/index.d.ts +0 -4
- package/dist/index.js +0 -4
- package/dist/lifecycle.js +46 -29
- package/dist/models/anthropic/middleware.d.ts +1 -1
- package/dist/models/anthropic/presets.js +6 -1
- package/dist/models/google/middleware.d.ts +1 -1
- package/dist/models/google/middleware.js +9 -3
- package/dist/models/meta/presets.js +12 -2
- package/dist/providers/registry.d.ts +1 -1
- package/dist/types.d.ts +18 -6
- package/dist/utils/env.js +1 -1
- package/dist/utils/preset.js +0 -1
- package/package.json +8 -4
package/README.md
CHANGED
|
@@ -13,6 +13,7 @@ Learn more in our blog post: [Yet Another AI Gateway?](https://hebo.ai/blog/2601
|
|
|
13
13
|
## 🍌 Features
|
|
14
14
|
|
|
15
15
|
- 🌐 OpenAI-compatible /chat/completions, /embeddings & /models endpoints.
|
|
16
|
+
- 🔄 /responses endpoint implementing the Open Responses API (stateless).
|
|
16
17
|
- 💬 /conversations endpoint built on top of the Responses API.
|
|
17
18
|
- 🔌 Integrate into your existing Hono, Elysia, Next.js & TanStack apps.
|
|
18
19
|
- 🧩 Provider registry compatible with Vercel AI SDK providers.
|
|
@@ -38,6 +39,8 @@ bun install @hebo-ai/gateway
|
|
|
38
39
|
- [ElysiaJS](#elysiajs) | [Hono](#hono) | [Next.js](#nextjs) | [TanStack Start](#tanstack-start)
|
|
39
40
|
- Runtime Support
|
|
40
41
|
- [Vercel Edge](#vercel-edge) | [Cloudflare Workers](#cloudflare-workers) | [Deno Deploy](#deno-deploy) | [AWS Lambda](#aws-lambda)
|
|
42
|
+
- Endpoints
|
|
43
|
+
- [/chat/completions](#chatcompletions) | [/embeddings](#embeddings) | [/models](#models) | [/responses](#responses) | [/conversations](#conversations)
|
|
41
44
|
- OpenAI Extensions
|
|
42
45
|
- [Reasoning](#reasoning) | [Service Tier](#service-tier) | [Prompt Caching](#prompt-caching)
|
|
43
46
|
- Advanced Usage
|
|
@@ -366,6 +369,17 @@ const gw = gateway({
|
|
|
366
369
|
// - Replace or redact response payload
|
|
367
370
|
return undefined;
|
|
368
371
|
},
|
|
372
|
+
/**
|
|
373
|
+
* Runs when the lifecycle catches an error.
|
|
374
|
+
* @param ctx.error The thrown error.
|
|
375
|
+
* @returns Replacement error response, or undefined to use the default OpenAI-compatible error response.
|
|
376
|
+
*/
|
|
377
|
+
onError: async (ctx: { error: unknown }): Promise<Response | void> => {
|
|
378
|
+
// Example Use Cases:
|
|
379
|
+
// - Map internal errors to custom API responses
|
|
380
|
+
// - Add app-specific logging or alerting
|
|
381
|
+
return undefined;
|
|
382
|
+
},
|
|
369
383
|
},
|
|
370
384
|
});
|
|
371
385
|
```
|
|
@@ -568,6 +582,123 @@ export const handler = awsLambdaEventHandler({
|
|
|
568
582
|
});
|
|
569
583
|
```
|
|
570
584
|
|
|
585
|
+
## 🚀 Endpoints
|
|
586
|
+
|
|
587
|
+
Hebo Gateway provides several OpenAI-compatible and standard-based endpoints.
|
|
588
|
+
|
|
589
|
+
### `/chat/completions`
|
|
590
|
+
|
|
591
|
+
The primary endpoint for generating chat completions.
|
|
592
|
+
|
|
593
|
+
Official documentation: [OpenAI API Reference](https://developers.openai.com/api/reference/resources/chat/subresources/completions/methods/create)
|
|
594
|
+
|
|
595
|
+
It supports:
|
|
596
|
+
|
|
597
|
+
- Streaming responses (Server-Sent Events).
|
|
598
|
+
- Tool calling / Function calling.
|
|
599
|
+
- Advanced extensions like [Reasoning](#reasoning), [Service Tier](#service-tier), and [Prompt Caching](#prompt-caching).
|
|
600
|
+
- Usage tracking and metadata.
|
|
601
|
+
|
|
602
|
+
> [!IMPORTANT]
|
|
603
|
+
> **Compatibility & Roadmap:**
|
|
604
|
+
> We are actively working to expand support for the full OpenAI spec:
|
|
605
|
+
|
|
606
|
+
- **`logprobs` / `top_logprobs`**: Token-level logprobs.
|
|
607
|
+
- **`logit_bias`**: Logit bias in the request body.
|
|
608
|
+
- **`n` > 1**: Multi-choice completions.
|
|
609
|
+
|
|
610
|
+
### `/embeddings`
|
|
611
|
+
|
|
612
|
+
Generates vector representations for text inputs, compatible with OpenAI's embeddings API.
|
|
613
|
+
|
|
614
|
+
Official documentation: [OpenAI API Reference](https://developers.openai.com/api/reference/resources/embeddings/methods/create)
|
|
615
|
+
|
|
616
|
+
It supports:
|
|
617
|
+
|
|
618
|
+
- Text and token array inputs.
|
|
619
|
+
- Custom dimensions (for `v3` models).
|
|
620
|
+
- Standard `float` and `base64` encoding formats.
|
|
621
|
+
|
|
622
|
+
> [!IMPORTANT]
|
|
623
|
+
> **Compatibility & Roadmap:**
|
|
624
|
+
|
|
625
|
+
- **`encoding_format`**: `base64` results.
|
|
626
|
+
|
|
627
|
+
### `/models`
|
|
628
|
+
|
|
629
|
+
Lists all available models in your [Model Catalog](#models), including their capabilities and metadata.
|
|
630
|
+
|
|
631
|
+
Official documentation: [OpenAI API Reference](https://developers.openai.com/api/reference/resources/models/methods/list)
|
|
632
|
+
|
|
633
|
+
It supports:
|
|
634
|
+
|
|
635
|
+
- Comprehensive model metadata (capabilities, context limits, knowledge cutoffs).
|
|
636
|
+
- Canonical model ID resolution.
|
|
637
|
+
- Provider-specific availability filtering.
|
|
638
|
+
|
|
639
|
+
### `/responses`
|
|
640
|
+
|
|
641
|
+
Hebo Gateway provides a `/responses` endpoint implementing the [Open Responses API](https://www.openresponses.org/reference).
|
|
642
|
+
|
|
643
|
+
Official documentation: [Open Responses API Reference](https://www.openresponses.org/reference)
|
|
644
|
+
|
|
645
|
+
It supports:
|
|
646
|
+
|
|
647
|
+
- The same models, providers, hooks, and extensions as `/chat/completions`.
|
|
648
|
+
- Responses API request/response format.
|
|
649
|
+
- Tool calling and multimodal inputs.
|
|
650
|
+
- Normalized reasoning and thought signatures.
|
|
651
|
+
|
|
652
|
+
> [!IMPORTANT]
|
|
653
|
+
> **Compatibility & Roadmap:**
|
|
654
|
+
> We are working towards full Open Responses parity:
|
|
655
|
+
|
|
656
|
+
- **Persistence**: Server-side response storage (`store`), background orchestration (`background`), and chaining via `previous_response_id`.
|
|
657
|
+
- **`conversation`**: Directly passing conversation IDs for automatic context management.
|
|
658
|
+
- **`context_management`**: Support for automatic compaction strategies.
|
|
659
|
+
- **`prompt`**: Reusable prompt templates with variables.
|
|
660
|
+
- **`phase`**: Support for `commentary` vs `final_answer` reasoning phases.
|
|
661
|
+
- **`safety_identifier`**: Custom safety and moderation policies.
|
|
662
|
+
- **`truncation`**: Context window management strategies.
|
|
663
|
+
- **`text.verbosity`**: Control over response detail (low/medium/high).
|
|
664
|
+
- **`logprobs` / `top_logprobs`**: Token-level logprobs.
|
|
665
|
+
- **`include`**: Selective response fields (e.g., `logprobs`, `reasoning.encrypted_content`, and tool-specific outputs).
|
|
666
|
+
- **`stream_options.include_obfuscation`**: Normalizing payload sizes to mitigate side-channel attacks.
|
|
667
|
+
|
|
668
|
+
### `/conversations`
|
|
669
|
+
|
|
670
|
+
Hebo Gateway provides a dedicated `/conversations` endpoint for managing persistent conversation state. It is designed as an extension of the [OpenAI Conversations API](https://developers.openai.com/api/reference/resources/conversations/methods/create) and supports standard CRUD operations alongside advanced listing with metadata filtering.
|
|
671
|
+
|
|
672
|
+
Official documentation: [OpenAI Conversations API](https://developers.openai.com/api/reference/resources/conversations/methods/create)
|
|
673
|
+
|
|
674
|
+
#### List & Filter Conversations (Hebo Extension)
|
|
675
|
+
|
|
676
|
+
Since standard OpenAI APIs (like Threads) do not support global listing of conversations, Hebo Gateway provides this capability as an extension. You can list all conversations using cursor-based pagination and filter by any metadata key using the `metadata.KEY=VALUE` pattern.
|
|
677
|
+
|
|
678
|
+
```bash
|
|
679
|
+
# List conversations for a specific user using metadata filtering
|
|
680
|
+
curl "https://api.gateway.com/conversations?limit=10&metadata.user_id=123"
|
|
681
|
+
```
|
|
682
|
+
|
|
683
|
+
The response follows the standard OpenAI list object:
|
|
684
|
+
|
|
685
|
+
```json
|
|
686
|
+
{
|
|
687
|
+
"object": "list",
|
|
688
|
+
"data": [
|
|
689
|
+
{
|
|
690
|
+
"id": "conv_abc123",
|
|
691
|
+
"object": "conversation",
|
|
692
|
+
"created_at": 1678531200,
|
|
693
|
+
"metadata": { "user_id": "123" }
|
|
694
|
+
}
|
|
695
|
+
],
|
|
696
|
+
"first_id": "conv_abc123",
|
|
697
|
+
"last_id": "conv_abc123",
|
|
698
|
+
"has_more": false
|
|
699
|
+
}
|
|
700
|
+
```
|
|
701
|
+
|
|
571
702
|
## 🧠 OpenAI Extensions
|
|
572
703
|
|
|
573
704
|
### Reasoning
|
|
@@ -630,38 +761,6 @@ Provider-specific mapping:
|
|
|
630
761
|
|
|
631
762
|
When available, the resolved value is echoed back on response as `service_tier`.
|
|
632
763
|
|
|
633
|
-
### Conversations
|
|
634
|
-
|
|
635
|
-
Hebo Gateway provides a dedicated `/conversations` endpoint for managing persistent conversation state. It is designed as an extension of the [OpenAI Conversations API](https://developers.openai.com/api/reference/typescript/resources/conversations) and supports standard CRUD operations alongside advanced listing with metadata filtering.
|
|
636
|
-
|
|
637
|
-
#### List & Filter Conversations
|
|
638
|
-
|
|
639
|
-
You can list conversations with standard cursor-based pagination and filter by any metadata key using the `metadata.KEY=VALUE` pattern.
|
|
640
|
-
|
|
641
|
-
```bash
|
|
642
|
-
# List conversations for a specific user
|
|
643
|
-
curl "https://api.gateway.com/conversations?limit=10&metadata.user_id=123"
|
|
644
|
-
```
|
|
645
|
-
|
|
646
|
-
The response follows the standard OpenAI list object:
|
|
647
|
-
|
|
648
|
-
```json
|
|
649
|
-
{
|
|
650
|
-
"object": "list",
|
|
651
|
-
"data": [
|
|
652
|
-
{
|
|
653
|
-
"id": "conv_abc123",
|
|
654
|
-
"object": "conversation",
|
|
655
|
-
"created_at": 1678531200,
|
|
656
|
-
"metadata": { "user_id": "123" }
|
|
657
|
-
}
|
|
658
|
-
],
|
|
659
|
-
"first_id": "conv_abc123",
|
|
660
|
-
"last_id": "conv_abc123",
|
|
661
|
-
"has_more": false
|
|
662
|
-
}
|
|
663
|
-
```
|
|
664
|
-
|
|
665
764
|
### Prompt Caching
|
|
666
765
|
|
|
667
766
|
The chat completions endpoint supports both implicit (provider-managed) and explicit prompt caching across OpenAI-compatible providers.
|
|
@@ -1,23 +1,9 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
import type { GenerateTextResult, StreamTextResult, FinishReason,
|
|
1
|
+
import type { SharedV3ProviderMetadata } from "@ai-sdk/provider";
|
|
2
|
+
import type { GenerateTextResult, StreamTextResult, FinishReason, ToolSet, ModelMessage, UserContent, LanguageModelUsage, TextStreamPart, ReasoningOutput, AssistantModelMessage, ToolModelMessage, UserModelMessage } from "ai";
|
|
3
3
|
import { Output } from "ai";
|
|
4
4
|
import type { ChatCompletionsToolCall, ChatCompletionsTool, ChatCompletionsToolChoice, ChatCompletionsStream, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsUserMessage, ChatCompletionsAssistantMessage, ChatCompletionsToolMessage, ChatCompletionsFinishReason, ChatCompletionsUsage, ChatCompletionsInputs, ChatCompletions, ChatCompletionsChunk, ChatCompletionsReasoningDetail } from "./schema";
|
|
5
5
|
import type { SseErrorFrame, SseFrame } from "../../utils/stream";
|
|
6
|
-
|
|
7
|
-
messages: ModelMessage[];
|
|
8
|
-
tools?: ToolSet;
|
|
9
|
-
toolChoice?: ToolChoice<ToolSet>;
|
|
10
|
-
activeTools?: Array<keyof ToolSet>;
|
|
11
|
-
output?: Output.Output;
|
|
12
|
-
temperature?: number;
|
|
13
|
-
maxOutputTokens?: number;
|
|
14
|
-
frequencyPenalty?: number;
|
|
15
|
-
presencePenalty?: number;
|
|
16
|
-
seed?: number;
|
|
17
|
-
stopSequences?: string[];
|
|
18
|
-
topP?: number;
|
|
19
|
-
providerOptions: SharedV3ProviderOptions;
|
|
20
|
-
};
|
|
6
|
+
import { type TextCallOptions, type ToolChoiceOptions } from "../shared/converters";
|
|
21
7
|
export declare function convertToTextCallOptions(params: ChatCompletionsInputs): TextCallOptions;
|
|
22
8
|
export declare function convertToModelMessages(messages: ChatCompletionsMessage[]): ModelMessage[];
|
|
23
9
|
export declare function fromChatCompletionsUserMessage(message: ChatCompletionsUserMessage): UserModelMessage;
|
|
@@ -25,10 +11,7 @@ export declare function fromChatCompletionsAssistantMessage(message: ChatComplet
|
|
|
25
11
|
export declare function fromChatCompletionsToolResultMessage(message: ChatCompletionsAssistantMessage, toolById: Map<string, ChatCompletionsToolMessage>): ToolModelMessage | undefined;
|
|
26
12
|
export declare function fromChatCompletionsContent(content: ChatCompletionsContentPart[]): UserContent;
|
|
27
13
|
export declare const convertToToolSet: (tools: ChatCompletionsTool[] | undefined) => ToolSet | undefined;
|
|
28
|
-
export declare const convertToToolChoiceOptions: (toolChoice: ChatCompletionsToolChoice | undefined) =>
|
|
29
|
-
toolChoice?: ToolChoice<ToolSet>;
|
|
30
|
-
activeTools?: Array<keyof ToolSet>;
|
|
31
|
-
};
|
|
14
|
+
export declare const convertToToolChoiceOptions: (toolChoice: ChatCompletionsToolChoice | undefined) => ToolChoiceOptions;
|
|
32
15
|
export declare function toChatCompletions(result: GenerateTextResult<ToolSet, Output.Output>, model: string): ChatCompletions;
|
|
33
16
|
export declare function toChatCompletionsResponse(result: GenerateTextResult<ToolSet, Output.Output>, model: string, responseInit?: ResponseInit): Response;
|
|
34
17
|
export declare function toChatCompletionsStream(result: StreamTextResult<ToolSet, Output.Output>, model: string): ChatCompletionsStream;
|
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
import { Output, jsonSchema, tool } from "ai";
|
|
2
|
-
import { z } from "zod";
|
|
3
|
-
import { GatewayError } from "../../errors/gateway";
|
|
4
2
|
import { toResponse } from "../../utils/response";
|
|
5
|
-
import {
|
|
3
|
+
import { parseJsonOrText, parseReasoningOptions, parsePromptCachingOptions, resolveResponseServiceTier, normalizeToolName, stripEmptyKeys, parseBase64, parseImageInput, extractReasoningMetadata, } from "../shared/converters";
|
|
6
4
|
// --- Request Flow ---
|
|
7
5
|
export function convertToTextCallOptions(params) {
|
|
8
6
|
const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, prompt_cache_key, prompt_cache_retention, extra_body, cache_control, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
|
|
@@ -165,7 +163,7 @@ export function fromChatCompletionsAssistantMessage(message) {
|
|
|
165
163
|
out.providerOptions = extra_content;
|
|
166
164
|
}
|
|
167
165
|
if (cache_control) {
|
|
168
|
-
(
|
|
166
|
+
(out.providerOptions ??= {})["unknown"] = { cache_control };
|
|
169
167
|
}
|
|
170
168
|
return out;
|
|
171
169
|
}
|
|
@@ -214,29 +212,27 @@ export function fromChatCompletionsContent(content) {
|
|
|
214
212
|
});
|
|
215
213
|
}
|
|
216
214
|
function fromImageUrlPart(url, cacheControl) {
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
return fromFilePart(url.slice(dataStart), mimeType, undefined, cacheControl);
|
|
223
|
-
}
|
|
224
|
-
const out = {
|
|
225
|
-
type: "image",
|
|
226
|
-
image: new URL(url),
|
|
227
|
-
};
|
|
228
|
-
if (cacheControl) {
|
|
229
|
-
out.providerOptions = {
|
|
230
|
-
unknown: { cache_control: cacheControl },
|
|
215
|
+
const { image, mediaType } = parseImageInput(url);
|
|
216
|
+
if (image instanceof URL) {
|
|
217
|
+
const out = {
|
|
218
|
+
type: "image",
|
|
219
|
+
image,
|
|
231
220
|
};
|
|
221
|
+
if (cacheControl) {
|
|
222
|
+
out.providerOptions = {
|
|
223
|
+
unknown: { cache_control: cacheControl },
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
return out;
|
|
232
227
|
}
|
|
233
|
-
return
|
|
228
|
+
return fromFilePart(image, mediaType ?? "image/jpeg", undefined, cacheControl);
|
|
234
229
|
}
|
|
235
230
|
function fromFilePart(base64Data, mediaType, filename, cacheControl) {
|
|
231
|
+
const data = parseBase64(base64Data);
|
|
236
232
|
if (mediaType.startsWith("image/")) {
|
|
237
233
|
const out = {
|
|
238
234
|
type: "image",
|
|
239
|
-
image:
|
|
235
|
+
image: data,
|
|
240
236
|
mediaType,
|
|
241
237
|
};
|
|
242
238
|
if (cacheControl) {
|
|
@@ -248,7 +244,7 @@ function fromFilePart(base64Data, mediaType, filename, cacheControl) {
|
|
|
248
244
|
}
|
|
249
245
|
const out = {
|
|
250
246
|
type: "file",
|
|
251
|
-
data:
|
|
247
|
+
data: data,
|
|
252
248
|
filename,
|
|
253
249
|
mediaType,
|
|
254
250
|
};
|
|
@@ -280,7 +276,9 @@ export const convertToToolChoiceOptions = (toolChoice) => {
|
|
|
280
276
|
if (toolChoice === "none" || toolChoice === "auto" || toolChoice === "required") {
|
|
281
277
|
return { toolChoice };
|
|
282
278
|
}
|
|
283
|
-
// FUTURE: this is right now google specific, which is not supported by AI SDK, until then,
|
|
279
|
+
// FUTURE: this is right now google specific, which is not supported by AI SDK, until then,
|
|
280
|
+
// we temporarily map it to auto for now
|
|
281
|
+
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
|
|
284
282
|
if (toolChoice === "validated") {
|
|
285
283
|
return { toolChoice: "auto" };
|
|
286
284
|
}
|
|
@@ -309,59 +307,6 @@ function parseToolResult(content) {
|
|
|
309
307
|
}
|
|
310
308
|
return parseJsonOrText(content);
|
|
311
309
|
}
|
|
312
|
-
function parseJsonOrText(content) {
|
|
313
|
-
try {
|
|
314
|
-
// oxlint-disable-next-line no-unsafe-assignment
|
|
315
|
-
return { type: "json", value: JSON.parse(content) };
|
|
316
|
-
}
|
|
317
|
-
catch {
|
|
318
|
-
return { type: "text", value: content };
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
function parseReasoningOptions(reasoning_effort, reasoning) {
|
|
322
|
-
const effort = reasoning?.effort ?? reasoning_effort;
|
|
323
|
-
const max_tokens = reasoning?.max_tokens;
|
|
324
|
-
if (reasoning?.enabled === false || effort === "none") {
|
|
325
|
-
return { reasoning: { enabled: false }, reasoning_effort: "none" };
|
|
326
|
-
}
|
|
327
|
-
if (!reasoning && effort === undefined)
|
|
328
|
-
return {};
|
|
329
|
-
const out = { reasoning: {} };
|
|
330
|
-
if (effort) {
|
|
331
|
-
out.reasoning.enabled = true;
|
|
332
|
-
out.reasoning.effort = effort;
|
|
333
|
-
out.reasoning_effort = effort;
|
|
334
|
-
}
|
|
335
|
-
if (max_tokens) {
|
|
336
|
-
out.reasoning.enabled = true;
|
|
337
|
-
out.reasoning.max_tokens = max_tokens;
|
|
338
|
-
}
|
|
339
|
-
if (out.reasoning.enabled) {
|
|
340
|
-
out.reasoning.exclude = reasoning?.exclude;
|
|
341
|
-
}
|
|
342
|
-
return out;
|
|
343
|
-
}
|
|
344
|
-
function parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cache_control) {
|
|
345
|
-
const out = {};
|
|
346
|
-
let retention = prompt_cache_retention;
|
|
347
|
-
if (!retention && cache_control?.ttl) {
|
|
348
|
-
retention = cache_control.ttl === "24h" ? "24h" : "in_memory";
|
|
349
|
-
}
|
|
350
|
-
let control = cache_control;
|
|
351
|
-
if (!control && retention) {
|
|
352
|
-
control = {
|
|
353
|
-
type: "ephemeral",
|
|
354
|
-
ttl: retention === "24h" ? "24h" : "5m",
|
|
355
|
-
};
|
|
356
|
-
}
|
|
357
|
-
if (prompt_cache_key)
|
|
358
|
-
out["prompt_cache_key"] = prompt_cache_key;
|
|
359
|
-
if (retention)
|
|
360
|
-
out["prompt_cache_retention"] = retention;
|
|
361
|
-
if (control)
|
|
362
|
-
out["cache_control"] = control;
|
|
363
|
-
return out;
|
|
364
|
-
}
|
|
365
310
|
// --- Response Flow ---
|
|
366
311
|
export function toChatCompletions(result, model) {
|
|
367
312
|
return {
|
|
@@ -421,6 +366,8 @@ export class ChatCompletionsTransformStream extends TransformStream {
|
|
|
421
366
|
};
|
|
422
367
|
super({
|
|
423
368
|
transform(part, controller) {
|
|
369
|
+
// Omit lifecycle (start/end) and intermediate events; /chat/completions
|
|
370
|
+
// is a stateless stream of deltas. Tool calls are emitted once fully-formed.
|
|
424
371
|
// oxlint-disable-next-line switch-exhaustiveness-check
|
|
425
372
|
switch (part.type) {
|
|
426
373
|
case "text-delta": {
|
|
@@ -471,47 +418,6 @@ export class ChatCompletionsTransformStream extends TransformStream {
|
|
|
471
418
|
});
|
|
472
419
|
}
|
|
473
420
|
}
|
|
474
|
-
function resolveResponseServiceTier(providerMetadata) {
|
|
475
|
-
if (!providerMetadata)
|
|
476
|
-
return;
|
|
477
|
-
for (const metadata of Object.values(providerMetadata)) {
|
|
478
|
-
const tier = parseReturnedServiceTier(metadata["service_tier"] ??
|
|
479
|
-
metadata["usage_metadata"]?.["traffic_type"]);
|
|
480
|
-
if (tier)
|
|
481
|
-
return tier;
|
|
482
|
-
}
|
|
483
|
-
}
|
|
484
|
-
function parseReturnedServiceTier(value) {
|
|
485
|
-
if (typeof value !== "string")
|
|
486
|
-
return undefined;
|
|
487
|
-
const n = value.toLowerCase();
|
|
488
|
-
switch (n) {
|
|
489
|
-
case "traffic_type_unspecified":
|
|
490
|
-
case "auto":
|
|
491
|
-
return "auto";
|
|
492
|
-
case "default":
|
|
493
|
-
case "on_demand":
|
|
494
|
-
case "on-demand":
|
|
495
|
-
case "shared":
|
|
496
|
-
return "default";
|
|
497
|
-
case "on_demand_flex":
|
|
498
|
-
case "flex":
|
|
499
|
-
return "flex";
|
|
500
|
-
case "on_demand_priority":
|
|
501
|
-
case "priority":
|
|
502
|
-
case "performance":
|
|
503
|
-
return "priority";
|
|
504
|
-
case "provisioned_throughput":
|
|
505
|
-
case "scale":
|
|
506
|
-
case "reserved":
|
|
507
|
-
case "dedicated":
|
|
508
|
-
case "provisioned":
|
|
509
|
-
case "throughput":
|
|
510
|
-
return "scale";
|
|
511
|
-
default:
|
|
512
|
-
return undefined;
|
|
513
|
-
}
|
|
514
|
-
}
|
|
515
421
|
export const toChatCompletionsAssistantMessage = (result) => {
|
|
516
422
|
const message = {
|
|
517
423
|
role: "assistant",
|
|
@@ -550,19 +456,7 @@ export const toChatCompletionsAssistantMessage = (result) => {
|
|
|
550
456
|
return message;
|
|
551
457
|
};
|
|
552
458
|
export function toReasoningDetail(reasoning, id, index) {
|
|
553
|
-
const
|
|
554
|
-
let redactedData;
|
|
555
|
-
let signature;
|
|
556
|
-
for (const metadata of Object.values(providerMetadata)) {
|
|
557
|
-
if (metadata && typeof metadata === "object") {
|
|
558
|
-
if ("redactedData" in metadata && typeof metadata["redactedData"] === "string") {
|
|
559
|
-
redactedData = metadata["redactedData"];
|
|
560
|
-
}
|
|
561
|
-
if ("signature" in metadata && typeof metadata["signature"] === "string") {
|
|
562
|
-
signature = metadata["signature"];
|
|
563
|
-
}
|
|
564
|
-
}
|
|
565
|
-
}
|
|
459
|
+
const { redactedData, signature } = extractReasoningMetadata(reasoning.providerMetadata);
|
|
566
460
|
if (redactedData) {
|
|
567
461
|
return {
|
|
568
462
|
id,
|
|
@@ -622,37 +516,6 @@ export function toChatCompletionsToolCall(id, name, args, providerMetadata) {
|
|
|
622
516
|
}
|
|
623
517
|
return out;
|
|
624
518
|
}
|
|
625
|
-
function normalizeToolName(name) {
|
|
626
|
-
// some models hallucinate invalid characters
|
|
627
|
-
// normalize to valid characters [^A-Za-z0-9_-.] (non regex for perf)
|
|
628
|
-
// https://modelcontextprotocol.io/specification/draft/server/tools#tool-names
|
|
629
|
-
let out = "";
|
|
630
|
-
for (let i = 0; i < name.length; i++) {
|
|
631
|
-
if (out.length === 128)
|
|
632
|
-
break;
|
|
633
|
-
// oxlint-disable-next-line unicorn/prefer-code-point
|
|
634
|
-
const c = name.charCodeAt(i);
|
|
635
|
-
if ((c >= 48 && c <= 57) ||
|
|
636
|
-
(c >= 65 && c <= 90) ||
|
|
637
|
-
(c >= 97 && c <= 122) ||
|
|
638
|
-
c === 95 ||
|
|
639
|
-
c === 45 ||
|
|
640
|
-
c === 46) {
|
|
641
|
-
out += name[i];
|
|
642
|
-
}
|
|
643
|
-
else {
|
|
644
|
-
out += "_";
|
|
645
|
-
}
|
|
646
|
-
}
|
|
647
|
-
return out;
|
|
648
|
-
}
|
|
649
|
-
function stripEmptyKeys(obj) {
|
|
650
|
-
if (!obj || typeof obj !== "object" || Array.isArray(obj))
|
|
651
|
-
return obj;
|
|
652
|
-
// some models hallucinate empty parameters
|
|
653
|
-
delete obj[""];
|
|
654
|
-
return obj;
|
|
655
|
-
}
|
|
656
519
|
export const toChatCompletionsFinishReason = (finishReason) => {
|
|
657
520
|
if (finishReason === "error" || finishReason === "other") {
|
|
658
521
|
return "stop";
|
|
@@ -10,12 +10,13 @@ import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
|
10
10
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
11
11
|
import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
|
|
12
12
|
import { getChatRequestAttributes, getChatResponseAttributes } from "./otel";
|
|
13
|
-
import { ChatCompletionsBodySchema } from "./schema";
|
|
13
|
+
import { ChatCompletionsBodySchema, } from "./schema";
|
|
14
14
|
export const chatCompletions = (config) => {
|
|
15
15
|
const hooks = config.hooks;
|
|
16
16
|
const handler = async (ctx, cfg) => {
|
|
17
17
|
const start = performance.now();
|
|
18
18
|
ctx.operation = "chat";
|
|
19
|
+
setSpanAttributes({ "gen_ai.operation.name": ctx.operation });
|
|
19
20
|
addSpanEvent("hebo.handler.started");
|
|
20
21
|
// Guard: enforce HTTP method early.
|
|
21
22
|
if (!ctx.request || ctx.request.method !== "POST") {
|
|
@@ -67,7 +68,6 @@ export const chatCompletions = (config) => {
|
|
|
67
68
|
setSpanAttributes(genAiGeneralAttrs);
|
|
68
69
|
// Convert inputs to AI SDK call options.
|
|
69
70
|
const { model: _model, stream, ...inputs } = ctx.body;
|
|
70
|
-
// oxlint-disable-next-line no-unsafe-argument
|
|
71
71
|
const textOptions = convertToTextCallOptions(inputs);
|
|
72
72
|
logger.trace({
|
|
73
73
|
requestId: ctx.requestId,
|