@hebo-ai/gateway 0.4.0-alpha.2 → 0.4.0-alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -3
- package/dist/endpoints/chat-completions/converters.d.ts +3 -2
- package/dist/endpoints/chat-completions/converters.js +120 -28
- package/dist/endpoints/chat-completions/schema.d.ts +101 -0
- package/dist/endpoints/chat-completions/schema.js +14 -0
- package/dist/types.d.ts +2 -1
- package/dist/utils/request.js +1 -4
- package/package.json +1 -1
- package/src/config.ts +3 -3
- package/src/endpoints/chat-completions/converters.test.ts +151 -1
- package/src/endpoints/chat-completions/converters.ts +152 -28
- package/src/endpoints/chat-completions/schema.ts +16 -0
- package/src/lifecycle.ts +1 -1
- package/src/types.ts +3 -1
- package/src/utils/request.ts +1 -4
package/README.md
CHANGED
|
@@ -349,8 +349,8 @@ const gw = gateway({
|
|
|
349
349
|
* @returns Modified result, or undefined to keep original.
|
|
350
350
|
*/
|
|
351
351
|
after: async (ctx: {
|
|
352
|
-
result: ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings
|
|
353
|
-
}): Promise<ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings |
|
|
352
|
+
result: ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings
|
|
353
|
+
}): Promise<ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings | void> => {
|
|
354
354
|
// Example Use Cases:
|
|
355
355
|
// - Transform result
|
|
356
356
|
// - Result logging
|
|
@@ -555,6 +555,15 @@ Reasoning output is surfaced as extension to the `completion` object.
|
|
|
555
555
|
|
|
556
556
|
Most SDKs handle these fields out-of-the-box.
|
|
557
557
|
|
|
558
|
+
#### Thinking Blocks & Context Preservation
|
|
559
|
+
|
|
560
|
+
Advanced models (like Anthropic Claude 3.7 or Gemini 3) surface structured reasoning steps and signatures that act as a "save state" for the model's internal reasoning process. To maintain this context across multi-turn conversations and tool-calling workflows, you should pass back the following extensions in subsequent messages:
|
|
561
|
+
|
|
562
|
+
- **reasoning_details**: Standardized array of reasoning steps and generic signatures.
|
|
563
|
+
- **extra_content**: Provider-specific extensions, such as **Google's thought signatures** on Vertex AI.
|
|
564
|
+
|
|
565
|
+
For **Gemini 3** models, returning the thought signature via `extra_content` is mandatory to resume the chain-of-thought; failing to do so may result in errors or degraded performance.
|
|
566
|
+
|
|
558
567
|
## 🧪 Advanced Usage
|
|
559
568
|
|
|
560
569
|
### Logger Settings
|
|
@@ -573,7 +582,6 @@ const gw = gateway({
|
|
|
573
582
|
```
|
|
574
583
|
|
|
575
584
|
If you provide a custom logger, it must implement `trace`, `debug`, `info`, `warn`, and `error` methods.
|
|
576
|
-
For production workloads, we recommend `pino` for better logging performance and lower overhead.
|
|
577
585
|
|
|
578
586
|
Example with **pino**:
|
|
579
587
|
|
|
@@ -591,6 +599,9 @@ const gw = gateway({
|
|
|
591
599
|
});
|
|
592
600
|
```
|
|
593
601
|
|
|
602
|
+
> [!TIP]
|
|
603
|
+
> For production workloads, we recommend `pino` for better logging performance and lower overhead.
|
|
604
|
+
|
|
594
605
|
### Telemetry Settings
|
|
595
606
|
|
|
596
607
|
Hebo Gateway can forward telemetry settings via the `telemetry` config field.
|
|
@@ -610,6 +621,9 @@ const gw = gateway({
|
|
|
610
621
|
});
|
|
611
622
|
```
|
|
612
623
|
|
|
624
|
+
> [!TIP]
|
|
625
|
+
> For observability integration that is not otel compliant (for example, Langfuse), you can disable built-in telemetry and manually instrument requests during `before` / `after` hooks.
|
|
626
|
+
|
|
613
627
|
### Passing Framework State to Hooks
|
|
614
628
|
|
|
615
629
|
You can pass per-request info from your framework into the gateway via the second `state` argument on the handler, then read it in hooks through `ctx.state`.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { SharedV3ProviderOptions, SharedV3ProviderMetadata } from "@ai-sdk/provider";
|
|
2
|
-
import type { GenerateTextResult, StreamTextResult, FinishReason, ToolChoice, ToolSet, ModelMessage, UserContent, LanguageModelUsage, Output, TextStreamPart, AssistantModelMessage, ToolModelMessage, UserModelMessage } from "ai";
|
|
3
|
-
import type { ChatCompletionsToolCall, ChatCompletionsTool, ChatCompletionsToolChoice, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsUserMessage, ChatCompletionsAssistantMessage, ChatCompletionsToolMessage, ChatCompletionsFinishReason, ChatCompletionsUsage, ChatCompletionsInputs, ChatCompletions, ChatCompletionsChunk } from "./schema";
|
|
2
|
+
import type { GenerateTextResult, StreamTextResult, FinishReason, ToolChoice, ToolSet, ModelMessage, UserContent, LanguageModelUsage, Output, TextStreamPart, ReasoningOutput, AssistantModelMessage, ToolModelMessage, UserModelMessage } from "ai";
|
|
3
|
+
import type { ChatCompletionsToolCall, ChatCompletionsTool, ChatCompletionsToolChoice, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsUserMessage, ChatCompletionsAssistantMessage, ChatCompletionsToolMessage, ChatCompletionsFinishReason, ChatCompletionsUsage, ChatCompletionsInputs, ChatCompletions, ChatCompletionsChunk, ChatCompletionsReasoningDetail } from "./schema";
|
|
4
4
|
import { OpenAIError } from "../../errors/openai";
|
|
5
5
|
export type TextCallOptions = {
|
|
6
6
|
messages: ModelMessage[];
|
|
@@ -31,6 +31,7 @@ export declare class ChatCompletionsStream extends TransformStream<TextStreamPar
|
|
|
31
31
|
constructor(model: string);
|
|
32
32
|
}
|
|
33
33
|
export declare const toChatCompletionsAssistantMessage: (result: GenerateTextResult<ToolSet, Output.Output>) => ChatCompletionsAssistantMessage;
|
|
34
|
+
export declare function toReasoningDetail(reasoning: ReasoningOutput, id: string, index: number): ChatCompletionsReasoningDetail;
|
|
34
35
|
export declare function toChatCompletionsUsage(usage: LanguageModelUsage): ChatCompletionsUsage;
|
|
35
36
|
export declare function toChatCompletionsToolCall(id: string, name: string, args: unknown, providerMetadata?: SharedV3ProviderMetadata): ChatCompletionsToolCall;
|
|
36
37
|
export declare const toChatCompletionsFinishReason: (finishReason: FinishReason) => ChatCompletionsFinishReason;
|
|
@@ -61,33 +61,67 @@ export function fromChatCompletionsUserMessage(message) {
|
|
|
61
61
|
};
|
|
62
62
|
}
|
|
63
63
|
export function fromChatCompletionsAssistantMessage(message) {
|
|
64
|
-
const { tool_calls, role, content, extra_content } = message;
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
64
|
+
const { tool_calls, role, content, extra_content, reasoning_details } = message;
|
|
65
|
+
const parts = [];
|
|
66
|
+
if (Array.isArray(parts)) {
|
|
67
|
+
if (reasoning_details?.length) {
|
|
68
|
+
for (const detail of reasoning_details) {
|
|
69
|
+
if (detail.text && detail.type === "reasoning.text") {
|
|
70
|
+
parts.push({
|
|
71
|
+
type: "reasoning",
|
|
72
|
+
text: detail.text,
|
|
73
|
+
providerOptions: detail.signature
|
|
74
|
+
? {
|
|
75
|
+
unknown: {
|
|
76
|
+
signature: detail.signature,
|
|
77
|
+
},
|
|
78
|
+
}
|
|
79
|
+
: undefined,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
else if (detail.type === "reasoning.encrypted" && detail.data) {
|
|
83
|
+
parts.push({
|
|
84
|
+
type: "reasoning",
|
|
85
|
+
text: "",
|
|
86
|
+
providerOptions: {
|
|
87
|
+
unknown: {
|
|
88
|
+
redactedData: detail.data,
|
|
89
|
+
},
|
|
90
|
+
},
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
if (tool_calls?.length) {
|
|
96
|
+
for (const tc of tool_calls) {
|
|
97
|
+
const { id, function: fn, extra_content } = tc;
|
|
98
|
+
const out = {
|
|
99
|
+
type: "tool-call",
|
|
100
|
+
toolCallId: id,
|
|
101
|
+
toolName: fn.name,
|
|
102
|
+
input: parseToolOutput(fn.arguments).value,
|
|
103
|
+
};
|
|
104
|
+
if (extra_content) {
|
|
105
|
+
out.providerOptions = extra_content;
|
|
106
|
+
}
|
|
107
|
+
parts.push(out);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
else if (content !== undefined && content !== null) {
|
|
111
|
+
parts.push({
|
|
112
|
+
type: "text",
|
|
113
|
+
text: content,
|
|
114
|
+
});
|
|
72
115
|
}
|
|
73
|
-
return out;
|
|
74
116
|
}
|
|
75
|
-
|
|
117
|
+
const out = {
|
|
76
118
|
role: role,
|
|
77
|
-
content:
|
|
78
|
-
const { id, function: fn, extra_content } = tc;
|
|
79
|
-
const out = {
|
|
80
|
-
type: "tool-call",
|
|
81
|
-
toolCallId: id,
|
|
82
|
-
toolName: fn.name,
|
|
83
|
-
input: parseToolOutput(fn.arguments).value,
|
|
84
|
-
};
|
|
85
|
-
if (extra_content) {
|
|
86
|
-
out.providerOptions = extra_content;
|
|
87
|
-
}
|
|
88
|
-
return out;
|
|
89
|
-
}),
|
|
119
|
+
content: Array.isArray(parts) && parts.length > 0 ? parts : (content ?? ""),
|
|
90
120
|
};
|
|
121
|
+
if (extra_content) {
|
|
122
|
+
out.providerOptions = extra_content;
|
|
123
|
+
}
|
|
124
|
+
return out;
|
|
91
125
|
}
|
|
92
126
|
export function fromChatCompletionsToolResultMessage(message, toolById) {
|
|
93
127
|
const toolCalls = message.tool_calls ?? [];
|
|
@@ -251,6 +285,7 @@ export class ChatCompletionsStream extends TransformStream {
|
|
|
251
285
|
const streamId = `chatcmpl-${crypto.randomUUID()}`;
|
|
252
286
|
const creationTime = Math.floor(Date.now() / 1000);
|
|
253
287
|
let toolCallIndexCounter = 0;
|
|
288
|
+
const reasoningIdToIndex = new Map();
|
|
254
289
|
const createChunk = (delta, provider_metadata, finish_reason, usage) => {
|
|
255
290
|
if (provider_metadata) {
|
|
256
291
|
delta.extra_content = provider_metadata;
|
|
@@ -278,7 +313,21 @@ export class ChatCompletionsStream extends TransformStream {
|
|
|
278
313
|
break;
|
|
279
314
|
}
|
|
280
315
|
case "reasoning-delta": {
|
|
281
|
-
|
|
316
|
+
let index = reasoningIdToIndex.get(part.id);
|
|
317
|
+
if (index === undefined) {
|
|
318
|
+
index = reasoningIdToIndex.size;
|
|
319
|
+
reasoningIdToIndex.set(part.id, index);
|
|
320
|
+
}
|
|
321
|
+
controller.enqueue(createChunk({
|
|
322
|
+
reasoning_content: part.text,
|
|
323
|
+
reasoning_details: [
|
|
324
|
+
toReasoningDetail({
|
|
325
|
+
type: "reasoning",
|
|
326
|
+
text: part.text,
|
|
327
|
+
providerMetadata: part.providerMetadata,
|
|
328
|
+
}, part.id, index),
|
|
329
|
+
],
|
|
330
|
+
}, part.providerMetadata));
|
|
282
331
|
break;
|
|
283
332
|
}
|
|
284
333
|
case "tool-call": {
|
|
@@ -316,20 +365,63 @@ export const toChatCompletionsAssistantMessage = (result) => {
|
|
|
316
365
|
if (result.toolCalls && result.toolCalls.length > 0) {
|
|
317
366
|
message.tool_calls = result.toolCalls.map((toolCall) => toChatCompletionsToolCall(toolCall.toolCallId, toolCall.toolName, toolCall.input, toolCall.providerMetadata));
|
|
318
367
|
}
|
|
368
|
+
const reasoningDetails = [];
|
|
319
369
|
for (const part of result.content) {
|
|
320
370
|
if (part.type === "text") {
|
|
321
|
-
message.content
|
|
322
|
-
|
|
323
|
-
|
|
371
|
+
if (message.content === null) {
|
|
372
|
+
message.content = part.text;
|
|
373
|
+
if (part.providerMetadata) {
|
|
374
|
+
message.extra_content = part.providerMetadata;
|
|
375
|
+
}
|
|
324
376
|
}
|
|
325
|
-
|
|
377
|
+
}
|
|
378
|
+
else if (part.type === "reasoning") {
|
|
379
|
+
reasoningDetails.push(toReasoningDetail(part, `reasoning-${crypto.randomUUID()}`, reasoningDetails.length));
|
|
326
380
|
}
|
|
327
381
|
}
|
|
328
382
|
if (result.reasoningText) {
|
|
329
383
|
message.reasoning_content = result.reasoningText;
|
|
384
|
+
if (reasoningDetails.length === 0) {
|
|
385
|
+
reasoningDetails.push(toReasoningDetail({ type: "reasoning", text: result.reasoningText }, `reasoning-${crypto.randomUUID()}`, 0));
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
if (reasoningDetails.length > 0) {
|
|
389
|
+
message.reasoning_details = reasoningDetails;
|
|
330
390
|
}
|
|
331
391
|
return message;
|
|
332
392
|
};
|
|
393
|
+
export function toReasoningDetail(reasoning, id, index) {
|
|
394
|
+
const providerMetadata = reasoning.providerMetadata ?? {};
|
|
395
|
+
let redactedData;
|
|
396
|
+
let signature;
|
|
397
|
+
for (const metadata of Object.values(providerMetadata)) {
|
|
398
|
+
if (metadata && typeof metadata === "object") {
|
|
399
|
+
if ("redactedData" in metadata && typeof metadata["redactedData"] === "string") {
|
|
400
|
+
redactedData = metadata["redactedData"];
|
|
401
|
+
}
|
|
402
|
+
if ("signature" in metadata && typeof metadata["signature"] === "string") {
|
|
403
|
+
signature = metadata["signature"];
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
if (redactedData) {
|
|
408
|
+
return {
|
|
409
|
+
id,
|
|
410
|
+
index,
|
|
411
|
+
type: "reasoning.encrypted",
|
|
412
|
+
data: redactedData,
|
|
413
|
+
format: "unknown",
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
return {
|
|
417
|
+
id,
|
|
418
|
+
index,
|
|
419
|
+
type: "reasoning.text",
|
|
420
|
+
text: reasoning.text,
|
|
421
|
+
signature,
|
|
422
|
+
format: "unknown",
|
|
423
|
+
};
|
|
424
|
+
}
|
|
333
425
|
export function toChatCompletionsUsage(usage) {
|
|
334
426
|
const out = {};
|
|
335
427
|
const prompt = usage.inputTokens;
|
|
@@ -57,6 +57,17 @@ export declare const ChatCompletionsUserMessageSchema: z.ZodObject<{
|
|
|
57
57
|
name: z.ZodOptional<z.ZodString>;
|
|
58
58
|
}, z.core.$strip>;
|
|
59
59
|
export type ChatCompletionsUserMessage = z.infer<typeof ChatCompletionsUserMessageSchema>;
|
|
60
|
+
export declare const ChatCompletionsReasoningDetailSchema: z.ZodObject<{
|
|
61
|
+
id: z.ZodOptional<z.ZodString>;
|
|
62
|
+
index: z.ZodInt;
|
|
63
|
+
type: z.ZodString;
|
|
64
|
+
text: z.ZodOptional<z.ZodString>;
|
|
65
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
66
|
+
data: z.ZodOptional<z.ZodString>;
|
|
67
|
+
summary: z.ZodOptional<z.ZodString>;
|
|
68
|
+
format: z.ZodOptional<z.ZodString>;
|
|
69
|
+
}, z.core.$strip>;
|
|
70
|
+
export type ChatCompletionsReasoningDetail = z.infer<typeof ChatCompletionsReasoningDetailSchema>;
|
|
60
71
|
export declare const ChatCompletionsAssistantMessageSchema: z.ZodObject<{
|
|
61
72
|
role: z.ZodLiteral<"assistant">;
|
|
62
73
|
content: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNull]>>;
|
|
@@ -71,6 +82,16 @@ export declare const ChatCompletionsAssistantMessageSchema: z.ZodObject<{
|
|
|
71
82
|
extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
|
|
72
83
|
}, z.core.$strip>>>;
|
|
73
84
|
reasoning_content: z.ZodOptional<z.ZodString>;
|
|
85
|
+
reasoning_details: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
86
|
+
id: z.ZodOptional<z.ZodString>;
|
|
87
|
+
index: z.ZodInt;
|
|
88
|
+
type: z.ZodString;
|
|
89
|
+
text: z.ZodOptional<z.ZodString>;
|
|
90
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
91
|
+
data: z.ZodOptional<z.ZodString>;
|
|
92
|
+
summary: z.ZodOptional<z.ZodString>;
|
|
93
|
+
format: z.ZodOptional<z.ZodString>;
|
|
94
|
+
}, z.core.$strip>>>;
|
|
74
95
|
extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
|
|
75
96
|
}, z.core.$strip>;
|
|
76
97
|
export type ChatCompletionsAssistantMessage = z.infer<typeof ChatCompletionsAssistantMessageSchema>;
|
|
@@ -118,6 +139,16 @@ export declare const ChatCompletionsMessageSchema: z.ZodUnion<readonly [z.ZodObj
|
|
|
118
139
|
extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
|
|
119
140
|
}, z.core.$strip>>>;
|
|
120
141
|
reasoning_content: z.ZodOptional<z.ZodString>;
|
|
142
|
+
reasoning_details: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
143
|
+
id: z.ZodOptional<z.ZodString>;
|
|
144
|
+
index: z.ZodInt;
|
|
145
|
+
type: z.ZodString;
|
|
146
|
+
text: z.ZodOptional<z.ZodString>;
|
|
147
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
148
|
+
data: z.ZodOptional<z.ZodString>;
|
|
149
|
+
summary: z.ZodOptional<z.ZodString>;
|
|
150
|
+
format: z.ZodOptional<z.ZodString>;
|
|
151
|
+
}, z.core.$strip>>>;
|
|
121
152
|
extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
|
|
122
153
|
}, z.core.$strip>, z.ZodObject<{
|
|
123
154
|
role: z.ZodLiteral<"tool">;
|
|
@@ -189,6 +220,16 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
|
|
|
189
220
|
extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
|
|
190
221
|
}, z.core.$strip>>>;
|
|
191
222
|
reasoning_content: z.ZodOptional<z.ZodString>;
|
|
223
|
+
reasoning_details: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
224
|
+
id: z.ZodOptional<z.ZodString>;
|
|
225
|
+
index: z.ZodInt;
|
|
226
|
+
type: z.ZodString;
|
|
227
|
+
text: z.ZodOptional<z.ZodString>;
|
|
228
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
229
|
+
data: z.ZodOptional<z.ZodString>;
|
|
230
|
+
summary: z.ZodOptional<z.ZodString>;
|
|
231
|
+
format: z.ZodOptional<z.ZodString>;
|
|
232
|
+
}, z.core.$strip>>>;
|
|
192
233
|
extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
|
|
193
234
|
}, z.core.$strip>, z.ZodObject<{
|
|
194
235
|
role: z.ZodLiteral<"tool">;
|
|
@@ -265,6 +306,16 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
|
|
|
265
306
|
extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
|
|
266
307
|
}, z.core.$strip>>>;
|
|
267
308
|
reasoning_content: z.ZodOptional<z.ZodString>;
|
|
309
|
+
reasoning_details: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
310
|
+
id: z.ZodOptional<z.ZodString>;
|
|
311
|
+
index: z.ZodInt;
|
|
312
|
+
type: z.ZodString;
|
|
313
|
+
text: z.ZodOptional<z.ZodString>;
|
|
314
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
315
|
+
data: z.ZodOptional<z.ZodString>;
|
|
316
|
+
summary: z.ZodOptional<z.ZodString>;
|
|
317
|
+
format: z.ZodOptional<z.ZodString>;
|
|
318
|
+
}, z.core.$strip>>>;
|
|
268
319
|
extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
|
|
269
320
|
}, z.core.$strip>, z.ZodObject<{
|
|
270
321
|
role: z.ZodLiteral<"tool">;
|
|
@@ -322,6 +373,16 @@ export declare const ChatCompletionsChoiceSchema: z.ZodObject<{
|
|
|
322
373
|
extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
|
|
323
374
|
}, z.core.$strip>>>;
|
|
324
375
|
reasoning_content: z.ZodOptional<z.ZodString>;
|
|
376
|
+
reasoning_details: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
377
|
+
id: z.ZodOptional<z.ZodString>;
|
|
378
|
+
index: z.ZodInt;
|
|
379
|
+
type: z.ZodString;
|
|
380
|
+
text: z.ZodOptional<z.ZodString>;
|
|
381
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
382
|
+
data: z.ZodOptional<z.ZodString>;
|
|
383
|
+
summary: z.ZodOptional<z.ZodString>;
|
|
384
|
+
format: z.ZodOptional<z.ZodString>;
|
|
385
|
+
}, z.core.$strip>>>;
|
|
325
386
|
extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
|
|
326
387
|
}, z.core.$strip>;
|
|
327
388
|
finish_reason: z.ZodUnion<readonly [z.ZodLiteral<"stop">, z.ZodLiteral<"length">, z.ZodLiteral<"content_filter">, z.ZodLiteral<"tool_calls">]>;
|
|
@@ -361,6 +422,16 @@ export declare const ChatCompletionsSchema: z.ZodObject<{
|
|
|
361
422
|
extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
|
|
362
423
|
}, z.core.$strip>>>;
|
|
363
424
|
reasoning_content: z.ZodOptional<z.ZodString>;
|
|
425
|
+
reasoning_details: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
426
|
+
id: z.ZodOptional<z.ZodString>;
|
|
427
|
+
index: z.ZodInt;
|
|
428
|
+
type: z.ZodString;
|
|
429
|
+
text: z.ZodOptional<z.ZodString>;
|
|
430
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
431
|
+
data: z.ZodOptional<z.ZodString>;
|
|
432
|
+
summary: z.ZodOptional<z.ZodString>;
|
|
433
|
+
format: z.ZodOptional<z.ZodString>;
|
|
434
|
+
}, z.core.$strip>>>;
|
|
364
435
|
extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
|
|
365
436
|
}, z.core.$strip>;
|
|
366
437
|
finish_reason: z.ZodUnion<readonly [z.ZodLiteral<"stop">, z.ZodLiteral<"length">, z.ZodLiteral<"content_filter">, z.ZodLiteral<"tool_calls">]>;
|
|
@@ -396,6 +467,16 @@ export declare const ChatCompletionsAssistantMessageDeltaSchema: z.ZodObject<{
|
|
|
396
467
|
content: z.ZodOptional<z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNull]>>>;
|
|
397
468
|
name: z.ZodOptional<z.ZodOptional<z.ZodString>>;
|
|
398
469
|
reasoning_content: z.ZodOptional<z.ZodOptional<z.ZodString>>;
|
|
470
|
+
reasoning_details: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
471
|
+
id: z.ZodOptional<z.ZodString>;
|
|
472
|
+
index: z.ZodInt;
|
|
473
|
+
type: z.ZodString;
|
|
474
|
+
text: z.ZodOptional<z.ZodString>;
|
|
475
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
476
|
+
data: z.ZodOptional<z.ZodString>;
|
|
477
|
+
summary: z.ZodOptional<z.ZodString>;
|
|
478
|
+
format: z.ZodOptional<z.ZodString>;
|
|
479
|
+
}, z.core.$strip>>>>;
|
|
399
480
|
extra_content: z.ZodOptional<z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>>;
|
|
400
481
|
tool_calls: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
401
482
|
type: z.ZodOptional<z.ZodLiteral<"function">>;
|
|
@@ -416,6 +497,16 @@ export declare const ChatCompletionsChoiceDeltaSchema: z.ZodObject<{
|
|
|
416
497
|
content: z.ZodOptional<z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNull]>>>;
|
|
417
498
|
name: z.ZodOptional<z.ZodOptional<z.ZodString>>;
|
|
418
499
|
reasoning_content: z.ZodOptional<z.ZodOptional<z.ZodString>>;
|
|
500
|
+
reasoning_details: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
501
|
+
id: z.ZodOptional<z.ZodString>;
|
|
502
|
+
index: z.ZodInt;
|
|
503
|
+
type: z.ZodString;
|
|
504
|
+
text: z.ZodOptional<z.ZodString>;
|
|
505
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
506
|
+
data: z.ZodOptional<z.ZodString>;
|
|
507
|
+
summary: z.ZodOptional<z.ZodString>;
|
|
508
|
+
format: z.ZodOptional<z.ZodString>;
|
|
509
|
+
}, z.core.$strip>>>>;
|
|
419
510
|
extra_content: z.ZodOptional<z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>>;
|
|
420
511
|
tool_calls: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
421
512
|
type: z.ZodOptional<z.ZodLiteral<"function">>;
|
|
@@ -444,6 +535,16 @@ export declare const ChatCompletionsChunkSchema: z.ZodObject<{
|
|
|
444
535
|
content: z.ZodOptional<z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNull]>>>;
|
|
445
536
|
name: z.ZodOptional<z.ZodOptional<z.ZodString>>;
|
|
446
537
|
reasoning_content: z.ZodOptional<z.ZodOptional<z.ZodString>>;
|
|
538
|
+
reasoning_details: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
539
|
+
id: z.ZodOptional<z.ZodString>;
|
|
540
|
+
index: z.ZodInt;
|
|
541
|
+
type: z.ZodString;
|
|
542
|
+
text: z.ZodOptional<z.ZodString>;
|
|
543
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
544
|
+
data: z.ZodOptional<z.ZodString>;
|
|
545
|
+
summary: z.ZodOptional<z.ZodString>;
|
|
546
|
+
format: z.ZodOptional<z.ZodString>;
|
|
547
|
+
}, z.core.$strip>>>>;
|
|
447
548
|
extra_content: z.ZodOptional<z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>>;
|
|
448
549
|
tool_calls: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
449
550
|
type: z.ZodOptional<z.ZodLiteral<"function">>;
|
|
@@ -44,6 +44,16 @@ export const ChatCompletionsUserMessageSchema = z.object({
|
|
|
44
44
|
]),
|
|
45
45
|
name: z.string().optional(),
|
|
46
46
|
});
|
|
47
|
+
export const ChatCompletionsReasoningDetailSchema = z.object({
|
|
48
|
+
id: z.string().optional(),
|
|
49
|
+
index: z.int().nonnegative(),
|
|
50
|
+
type: z.string(),
|
|
51
|
+
text: z.string().optional(),
|
|
52
|
+
signature: z.string().optional(),
|
|
53
|
+
data: z.string().optional(),
|
|
54
|
+
summary: z.string().optional(),
|
|
55
|
+
format: z.string().optional(),
|
|
56
|
+
});
|
|
47
57
|
export const ChatCompletionsAssistantMessageSchema = z.object({
|
|
48
58
|
role: z.literal("assistant"),
|
|
49
59
|
// FUTURE: this should support arrays of TextContentPart and RefusalContentPart
|
|
@@ -53,6 +63,10 @@ export const ChatCompletionsAssistantMessageSchema = z.object({
|
|
|
53
63
|
tool_calls: z.array(ChatCompletionsToolCallSchema).optional(),
|
|
54
64
|
// Extensions
|
|
55
65
|
reasoning_content: z.string().optional().meta({ extension: true }),
|
|
66
|
+
reasoning_details: z
|
|
67
|
+
.array(ChatCompletionsReasoningDetailSchema)
|
|
68
|
+
.optional()
|
|
69
|
+
.meta({ extension: true }),
|
|
56
70
|
extra_content: z.record(z.string(), z.any()).optional().meta({ extension: true }),
|
|
57
71
|
});
|
|
58
72
|
export const ChatCompletionsToolMessageSchema = z.object({
|
package/dist/types.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ import type { ProviderV3 } from "@ai-sdk/provider";
|
|
|
2
2
|
import type { Tracer } from "@opentelemetry/api";
|
|
3
3
|
import type { ChatCompletions, ChatCompletionsBody, ChatCompletionsChunk } from "./endpoints/chat-completions/schema";
|
|
4
4
|
import type { Embeddings, EmbeddingsBody } from "./endpoints/embeddings/schema";
|
|
5
|
+
import type { Model, ModelList } from "./endpoints/models";
|
|
5
6
|
import type { OpenAIError } from "./errors/openai";
|
|
6
7
|
import type { Logger, LoggerConfig } from "./logger";
|
|
7
8
|
import type { ModelCatalog, ModelId } from "./models/types";
|
|
@@ -66,7 +67,7 @@ export type GatewayContext = {
|
|
|
66
67
|
/**
|
|
67
68
|
* Result returned by the handler (pre-response).
|
|
68
69
|
*/
|
|
69
|
-
result?: ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings |
|
|
70
|
+
result?: ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings | Model | ModelList;
|
|
70
71
|
/**
|
|
71
72
|
* Final response returned by the lifecycle.
|
|
72
73
|
*/
|
package/dist/utils/request.js
CHANGED
|
@@ -5,9 +5,7 @@ export const prepareRequestHeaders = (request) => {
|
|
|
5
5
|
const existingRequestId = request.headers.get(REQUEST_ID_HEADER);
|
|
6
6
|
if (existingRequestId)
|
|
7
7
|
return;
|
|
8
|
-
const requestId =
|
|
9
|
-
request.headers.get("x-trace-id") ??
|
|
10
|
-
crypto.randomUUID();
|
|
8
|
+
const requestId = "req_" + crypto.getRandomValues(new Uint32Array(2)).reduce((s, n) => s + n.toString(36), "");
|
|
11
9
|
const headers = new Headers(request.headers);
|
|
12
10
|
headers.set(REQUEST_ID_HEADER, requestId);
|
|
13
11
|
return headers;
|
|
@@ -27,7 +25,6 @@ export const prepareForwardHeaders = (request) => {
|
|
|
27
25
|
? `${userAgent} @hebo-ai/gateway/${GATEWAY_VERSION}`
|
|
28
26
|
: `@hebo-ai/gateway/${GATEWAY_VERSION}`;
|
|
29
27
|
return {
|
|
30
|
-
[REQUEST_ID_HEADER]: request.headers.get(REQUEST_ID_HEADER),
|
|
31
28
|
"user-agent": appendedUserAgent,
|
|
32
29
|
};
|
|
33
30
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hebo-ai/gateway",
|
|
3
|
-
"version": "0.4.0-alpha.
|
|
3
|
+
"version": "0.4.0-alpha.3",
|
|
4
4
|
"description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI-compatible /chat/completions, /embeddings & /models.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|
package/src/config.ts
CHANGED
|
@@ -41,16 +41,16 @@ export const parseConfig = (config: GatewayConfig): GatewayConfigParsed => {
|
|
|
41
41
|
const parsedModels = {} as typeof models;
|
|
42
42
|
const warnings = new Set<string>();
|
|
43
43
|
for (const id in models) {
|
|
44
|
-
const model = models[id
|
|
44
|
+
const model = models[id]!;
|
|
45
45
|
|
|
46
46
|
const kept: string[] = [];
|
|
47
47
|
|
|
48
|
-
for (const p of model
|
|
48
|
+
for (const p of model.providers) {
|
|
49
49
|
if (p in parsedProviders) kept.push(p);
|
|
50
50
|
else warnings.add(p);
|
|
51
51
|
}
|
|
52
52
|
|
|
53
|
-
if (kept.length > 0) parsedModels[id] = { ...model
|
|
53
|
+
if (kept.length > 0) parsedModels[id] = { ...model, providers: kept };
|
|
54
54
|
}
|
|
55
55
|
for (const warning of warnings) {
|
|
56
56
|
logger.warn(`[config] ${warning} provider removed (not configured)`);
|
|
@@ -2,7 +2,11 @@ import type { GenerateTextResult, ToolSet, Output } from "ai";
|
|
|
2
2
|
|
|
3
3
|
import { describe, expect, test } from "bun:test";
|
|
4
4
|
|
|
5
|
-
import {
|
|
5
|
+
import {
|
|
6
|
+
convertToTextCallOptions,
|
|
7
|
+
toChatCompletionsAssistantMessage,
|
|
8
|
+
fromChatCompletionsAssistantMessage,
|
|
9
|
+
} from "./converters";
|
|
6
10
|
|
|
7
11
|
describe("Chat Completions Converters", () => {
|
|
8
12
|
describe("toChatCompletionsAssistantMessage", () => {
|
|
@@ -52,6 +56,152 @@ describe("Chat Completions Converters", () => {
|
|
|
52
56
|
vertex: { thought_signature: "tool-signature" },
|
|
53
57
|
});
|
|
54
58
|
});
|
|
59
|
+
|
|
60
|
+
test("should extract reasoning_details from reasoning parts", () => {
|
|
61
|
+
const mockResult: GenerateTextResult<ToolSet, Output.Output> = {
|
|
62
|
+
content: [
|
|
63
|
+
{
|
|
64
|
+
type: "reasoning",
|
|
65
|
+
text: "I am thinking...",
|
|
66
|
+
providerMetadata: {
|
|
67
|
+
anthropic: {
|
|
68
|
+
signature: "sig-123",
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
} as any,
|
|
72
|
+
{
|
|
73
|
+
type: "text",
|
|
74
|
+
text: "Final answer.",
|
|
75
|
+
} as any,
|
|
76
|
+
],
|
|
77
|
+
reasoningText: "I am thinking...",
|
|
78
|
+
toolCalls: [],
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
const message = toChatCompletionsAssistantMessage(mockResult);
|
|
82
|
+
|
|
83
|
+
expect(message.reasoning_content).toBe("I am thinking...");
|
|
84
|
+
expect(message.reasoning_details![0]).toMatchObject({
|
|
85
|
+
type: "reasoning.text",
|
|
86
|
+
text: "I am thinking...",
|
|
87
|
+
signature: "sig-123",
|
|
88
|
+
format: "unknown",
|
|
89
|
+
index: 0,
|
|
90
|
+
});
|
|
91
|
+
expect(message.reasoning_details![0].id).toStartWith("reasoning-");
|
|
92
|
+
expect(message.content).toBe("Final answer.");
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
test("should fallback to reasoningText if no reasoning parts in content", () => {
|
|
96
|
+
const mockResult: GenerateTextResult<ToolSet, Output.Output> = {
|
|
97
|
+
content: [
|
|
98
|
+
{
|
|
99
|
+
type: "text",
|
|
100
|
+
text: "Hello",
|
|
101
|
+
} as any,
|
|
102
|
+
],
|
|
103
|
+
reasoningText: "Thinking via text...",
|
|
104
|
+
toolCalls: [],
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
const message = toChatCompletionsAssistantMessage(mockResult);
|
|
108
|
+
|
|
109
|
+
expect(message.reasoning_content).toBe("Thinking via text...");
|
|
110
|
+
expect(message.reasoning_details![0]).toMatchObject({
|
|
111
|
+
type: "reasoning.text",
|
|
112
|
+
text: "Thinking via text...",
|
|
113
|
+
index: 0,
|
|
114
|
+
});
|
|
115
|
+
expect(message.reasoning_details![0].id).toStartWith("reasoning-");
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
test("should handle redacted/encrypted reasoning", () => {
|
|
119
|
+
const mockResult: GenerateTextResult<ToolSet, Output.Output> = {
|
|
120
|
+
content: [
|
|
121
|
+
{
|
|
122
|
+
type: "reasoning",
|
|
123
|
+
text: "",
|
|
124
|
+
providerMetadata: {
|
|
125
|
+
anthropic: {
|
|
126
|
+
redactedData: "encrypted-content",
|
|
127
|
+
},
|
|
128
|
+
},
|
|
129
|
+
} as any,
|
|
130
|
+
],
|
|
131
|
+
toolCalls: [],
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
const message = toChatCompletionsAssistantMessage(mockResult);
|
|
135
|
+
|
|
136
|
+
expect(message.reasoning_details![0]).toMatchObject({
|
|
137
|
+
type: "reasoning.encrypted",
|
|
138
|
+
data: "encrypted-content",
|
|
139
|
+
});
|
|
140
|
+
expect((message.reasoning_details![0] as any).text).toBeUndefined();
|
|
141
|
+
expect(message.reasoning_details![0].signature).toBeUndefined();
|
|
142
|
+
});
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
describe("fromChatCompletionsAssistantMessage", () => {
|
|
146
|
+
test("should convert reasoning_details back to reasoning parts with unknown providerOptions", () => {
|
|
147
|
+
const message = fromChatCompletionsAssistantMessage({
|
|
148
|
+
role: "assistant",
|
|
149
|
+
content: "The result is 42.",
|
|
150
|
+
reasoning_details: [
|
|
151
|
+
{
|
|
152
|
+
type: "reasoning.text",
|
|
153
|
+
text: "Thinking hard...",
|
|
154
|
+
signature: "sig-xyz",
|
|
155
|
+
format: "unknown",
|
|
156
|
+
index: 0,
|
|
157
|
+
},
|
|
158
|
+
],
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
expect(Array.isArray(message.content)).toBe(true);
|
|
162
|
+
const content = message.content as any[];
|
|
163
|
+
expect(content).toHaveLength(2);
|
|
164
|
+
expect(content[0]).toEqual({
|
|
165
|
+
type: "reasoning",
|
|
166
|
+
text: "Thinking hard...",
|
|
167
|
+
providerOptions: {
|
|
168
|
+
unknown: {
|
|
169
|
+
signature: "sig-xyz",
|
|
170
|
+
},
|
|
171
|
+
},
|
|
172
|
+
});
|
|
173
|
+
expect(content[1]).toEqual({
|
|
174
|
+
type: "text",
|
|
175
|
+
text: "The result is 42.",
|
|
176
|
+
});
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
test("should convert reasoning.encrypted back to reasoning parts", () => {
|
|
180
|
+
const message = fromChatCompletionsAssistantMessage({
|
|
181
|
+
role: "assistant",
|
|
182
|
+
content: "Hello",
|
|
183
|
+
reasoning_details: [
|
|
184
|
+
{
|
|
185
|
+
type: "reasoning.encrypted",
|
|
186
|
+
data: "secret-data",
|
|
187
|
+
format: "unknown",
|
|
188
|
+
index: 0,
|
|
189
|
+
},
|
|
190
|
+
],
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
expect(Array.isArray(message.content)).toBe(true);
|
|
194
|
+
const content = message.content as any[];
|
|
195
|
+
expect(content[0]).toEqual({
|
|
196
|
+
type: "reasoning",
|
|
197
|
+
text: "",
|
|
198
|
+
providerOptions: {
|
|
199
|
+
unknown: {
|
|
200
|
+
redactedData: "secret-data",
|
|
201
|
+
},
|
|
202
|
+
},
|
|
203
|
+
});
|
|
204
|
+
});
|
|
55
205
|
});
|
|
56
206
|
|
|
57
207
|
describe("convertToTextCallOptions", () => {
|
|
@@ -9,9 +9,11 @@ import type {
|
|
|
9
9
|
ToolSet,
|
|
10
10
|
ModelMessage,
|
|
11
11
|
UserContent,
|
|
12
|
+
AssistantContent,
|
|
12
13
|
LanguageModelUsage,
|
|
13
14
|
Output,
|
|
14
15
|
TextStreamPart,
|
|
16
|
+
ReasoningOutput,
|
|
15
17
|
AssistantModelMessage,
|
|
16
18
|
ToolModelMessage,
|
|
17
19
|
UserModelMessage,
|
|
@@ -41,6 +43,7 @@ import type {
|
|
|
41
43
|
ChatCompletionsToolCallDelta,
|
|
42
44
|
ChatCompletionsReasoningEffort,
|
|
43
45
|
ChatCompletionsReasoningConfig,
|
|
46
|
+
ChatCompletionsReasoningDetail,
|
|
44
47
|
} from "./schema";
|
|
45
48
|
|
|
46
49
|
import { GatewayError } from "../../errors/gateway";
|
|
@@ -147,35 +150,71 @@ export function fromChatCompletionsUserMessage(
|
|
|
147
150
|
export function fromChatCompletionsAssistantMessage(
|
|
148
151
|
message: ChatCompletionsAssistantMessage,
|
|
149
152
|
): AssistantModelMessage {
|
|
150
|
-
const { tool_calls, role, content, extra_content } = message;
|
|
153
|
+
const { tool_calls, role, content, extra_content, reasoning_details } = message;
|
|
154
|
+
|
|
155
|
+
const parts: AssistantContent = [];
|
|
156
|
+
|
|
157
|
+
if (Array.isArray(parts)) {
|
|
158
|
+
if (reasoning_details?.length) {
|
|
159
|
+
for (const detail of reasoning_details) {
|
|
160
|
+
if (detail.text && detail.type === "reasoning.text") {
|
|
161
|
+
parts.push({
|
|
162
|
+
type: "reasoning",
|
|
163
|
+
text: detail.text,
|
|
164
|
+
providerOptions: detail.signature
|
|
165
|
+
? {
|
|
166
|
+
unknown: {
|
|
167
|
+
signature: detail.signature,
|
|
168
|
+
},
|
|
169
|
+
}
|
|
170
|
+
: undefined,
|
|
171
|
+
});
|
|
172
|
+
} else if (detail.type === "reasoning.encrypted" && detail.data) {
|
|
173
|
+
parts.push({
|
|
174
|
+
type: "reasoning",
|
|
175
|
+
text: "",
|
|
176
|
+
providerOptions: {
|
|
177
|
+
unknown: {
|
|
178
|
+
redactedData: detail.data,
|
|
179
|
+
},
|
|
180
|
+
},
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
151
185
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
186
|
+
if (tool_calls?.length) {
|
|
187
|
+
for (const tc of tool_calls) {
|
|
188
|
+
const { id, function: fn, extra_content } = tc;
|
|
189
|
+
const out: ToolCallPart = {
|
|
190
|
+
type: "tool-call",
|
|
191
|
+
toolCallId: id,
|
|
192
|
+
toolName: fn.name,
|
|
193
|
+
input: parseToolOutput(fn.arguments).value,
|
|
194
|
+
};
|
|
195
|
+
if (extra_content) {
|
|
196
|
+
out.providerOptions = extra_content;
|
|
197
|
+
}
|
|
198
|
+
parts.push(out);
|
|
199
|
+
}
|
|
200
|
+
} else if (content !== undefined && content !== null) {
|
|
201
|
+
parts.push({
|
|
202
|
+
type: "text",
|
|
203
|
+
text: content,
|
|
204
|
+
});
|
|
159
205
|
}
|
|
160
|
-
return out;
|
|
161
206
|
}
|
|
162
207
|
|
|
163
|
-
|
|
208
|
+
const out: AssistantModelMessage = {
|
|
164
209
|
role: role,
|
|
165
|
-
content:
|
|
166
|
-
const { id, function: fn, extra_content } = tc;
|
|
167
|
-
const out: ToolCallPart = {
|
|
168
|
-
type: "tool-call",
|
|
169
|
-
toolCallId: id,
|
|
170
|
-
toolName: fn.name,
|
|
171
|
-
input: parseToolOutput(fn.arguments).value,
|
|
172
|
-
};
|
|
173
|
-
if (extra_content) {
|
|
174
|
-
out.providerOptions = extra_content;
|
|
175
|
-
}
|
|
176
|
-
return out;
|
|
177
|
-
}),
|
|
210
|
+
content: Array.isArray(parts) && parts.length > 0 ? parts : (content ?? ""),
|
|
178
211
|
};
|
|
212
|
+
|
|
213
|
+
if (extra_content) {
|
|
214
|
+
out.providerOptions = extra_content;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
return out;
|
|
179
218
|
}
|
|
180
219
|
|
|
181
220
|
export function fromChatCompletionsToolResultMessage(
|
|
@@ -388,6 +427,7 @@ export class ChatCompletionsStream extends TransformStream<
|
|
|
388
427
|
const streamId = `chatcmpl-${crypto.randomUUID()}`;
|
|
389
428
|
const creationTime = Math.floor(Date.now() / 1000);
|
|
390
429
|
let toolCallIndexCounter = 0;
|
|
430
|
+
const reasoningIdToIndex = new Map<string, number>();
|
|
391
431
|
|
|
392
432
|
const createChunk = (
|
|
393
433
|
delta: ChatCompletionsAssistantMessageDelta,
|
|
@@ -425,8 +465,30 @@ export class ChatCompletionsStream extends TransformStream<
|
|
|
425
465
|
}
|
|
426
466
|
|
|
427
467
|
case "reasoning-delta": {
|
|
468
|
+
let index = reasoningIdToIndex.get(part.id);
|
|
469
|
+
if (index === undefined) {
|
|
470
|
+
index = reasoningIdToIndex.size;
|
|
471
|
+
reasoningIdToIndex.set(part.id, index);
|
|
472
|
+
}
|
|
473
|
+
|
|
428
474
|
controller.enqueue(
|
|
429
|
-
createChunk(
|
|
475
|
+
createChunk(
|
|
476
|
+
{
|
|
477
|
+
reasoning_content: part.text,
|
|
478
|
+
reasoning_details: [
|
|
479
|
+
toReasoningDetail(
|
|
480
|
+
{
|
|
481
|
+
type: "reasoning",
|
|
482
|
+
text: part.text,
|
|
483
|
+
providerMetadata: part.providerMetadata,
|
|
484
|
+
},
|
|
485
|
+
part.id,
|
|
486
|
+
index,
|
|
487
|
+
),
|
|
488
|
+
],
|
|
489
|
+
},
|
|
490
|
+
part.providerMetadata,
|
|
491
|
+
),
|
|
430
492
|
);
|
|
431
493
|
break;
|
|
432
494
|
}
|
|
@@ -502,23 +564,85 @@ export const toChatCompletionsAssistantMessage = (
|
|
|
502
564
|
);
|
|
503
565
|
}
|
|
504
566
|
|
|
567
|
+
const reasoningDetails: ChatCompletionsReasoningDetail[] = [];
|
|
568
|
+
|
|
505
569
|
for (const part of result.content) {
|
|
506
570
|
if (part.type === "text") {
|
|
507
|
-
message.content
|
|
508
|
-
|
|
509
|
-
|
|
571
|
+
if (message.content === null) {
|
|
572
|
+
message.content = part.text;
|
|
573
|
+
if (part.providerMetadata) {
|
|
574
|
+
message.extra_content = part.providerMetadata;
|
|
575
|
+
}
|
|
510
576
|
}
|
|
511
|
-
|
|
577
|
+
} else if (part.type === "reasoning") {
|
|
578
|
+
reasoningDetails.push(
|
|
579
|
+
toReasoningDetail(part, `reasoning-${crypto.randomUUID()}`, reasoningDetails.length),
|
|
580
|
+
);
|
|
512
581
|
}
|
|
513
582
|
}
|
|
514
583
|
|
|
515
584
|
if (result.reasoningText) {
|
|
516
585
|
message.reasoning_content = result.reasoningText;
|
|
586
|
+
|
|
587
|
+
if (reasoningDetails.length === 0) {
|
|
588
|
+
reasoningDetails.push(
|
|
589
|
+
toReasoningDetail(
|
|
590
|
+
{ type: "reasoning", text: result.reasoningText },
|
|
591
|
+
`reasoning-${crypto.randomUUID()}`,
|
|
592
|
+
0,
|
|
593
|
+
),
|
|
594
|
+
);
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
if (reasoningDetails.length > 0) {
|
|
599
|
+
message.reasoning_details = reasoningDetails;
|
|
517
600
|
}
|
|
518
601
|
|
|
519
602
|
return message;
|
|
520
603
|
};
|
|
521
604
|
|
|
605
|
+
export function toReasoningDetail(
|
|
606
|
+
reasoning: ReasoningOutput,
|
|
607
|
+
id: string,
|
|
608
|
+
index: number,
|
|
609
|
+
): ChatCompletionsReasoningDetail {
|
|
610
|
+
const providerMetadata = reasoning.providerMetadata ?? {};
|
|
611
|
+
|
|
612
|
+
let redactedData: string | undefined;
|
|
613
|
+
let signature: string | undefined;
|
|
614
|
+
|
|
615
|
+
for (const metadata of Object.values(providerMetadata)) {
|
|
616
|
+
if (metadata && typeof metadata === "object") {
|
|
617
|
+
if ("redactedData" in metadata && typeof metadata["redactedData"] === "string") {
|
|
618
|
+
redactedData = metadata["redactedData"];
|
|
619
|
+
}
|
|
620
|
+
if ("signature" in metadata && typeof metadata["signature"] === "string") {
|
|
621
|
+
signature = metadata["signature"];
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
if (redactedData) {
|
|
627
|
+
return {
|
|
628
|
+
id,
|
|
629
|
+
index,
|
|
630
|
+
type: "reasoning.encrypted",
|
|
631
|
+
data: redactedData,
|
|
632
|
+
format: "unknown",
|
|
633
|
+
};
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
return {
|
|
637
|
+
id,
|
|
638
|
+
index,
|
|
639
|
+
type: "reasoning.text",
|
|
640
|
+
text: reasoning.text,
|
|
641
|
+
signature,
|
|
642
|
+
format: "unknown",
|
|
643
|
+
};
|
|
644
|
+
}
|
|
645
|
+
|
|
522
646
|
export function toChatCompletionsUsage(usage: LanguageModelUsage): ChatCompletionsUsage {
|
|
523
647
|
const out: ChatCompletionsUsage = {};
|
|
524
648
|
|
|
@@ -62,6 +62,18 @@ export const ChatCompletionsUserMessageSchema = z.object({
|
|
|
62
62
|
});
|
|
63
63
|
export type ChatCompletionsUserMessage = z.infer<typeof ChatCompletionsUserMessageSchema>;
|
|
64
64
|
|
|
65
|
+
export const ChatCompletionsReasoningDetailSchema = z.object({
|
|
66
|
+
id: z.string().optional(),
|
|
67
|
+
index: z.int().nonnegative(),
|
|
68
|
+
type: z.string(),
|
|
69
|
+
text: z.string().optional(),
|
|
70
|
+
signature: z.string().optional(),
|
|
71
|
+
data: z.string().optional(),
|
|
72
|
+
summary: z.string().optional(),
|
|
73
|
+
format: z.string().optional(),
|
|
74
|
+
});
|
|
75
|
+
export type ChatCompletionsReasoningDetail = z.infer<typeof ChatCompletionsReasoningDetailSchema>;
|
|
76
|
+
|
|
65
77
|
export const ChatCompletionsAssistantMessageSchema = z.object({
|
|
66
78
|
role: z.literal("assistant"),
|
|
67
79
|
// FUTURE: this should support arrays of TextContentPart and RefusalContentPart
|
|
@@ -71,6 +83,10 @@ export const ChatCompletionsAssistantMessageSchema = z.object({
|
|
|
71
83
|
tool_calls: z.array(ChatCompletionsToolCallSchema).optional(),
|
|
72
84
|
// Extensions
|
|
73
85
|
reasoning_content: z.string().optional().meta({ extension: true }),
|
|
86
|
+
reasoning_details: z
|
|
87
|
+
.array(ChatCompletionsReasoningDetailSchema)
|
|
88
|
+
.optional()
|
|
89
|
+
.meta({ extension: true }),
|
|
74
90
|
extra_content: z.record(z.string(), z.any()).optional().meta({ extension: true }),
|
|
75
91
|
});
|
|
76
92
|
export type ChatCompletionsAssistantMessage = z.infer<typeof ChatCompletionsAssistantMessageSchema>;
|
package/src/lifecycle.ts
CHANGED
|
@@ -37,7 +37,7 @@ export const winterCgHandler = (
|
|
|
37
37
|
if (onResponse) ctx.response = onResponse;
|
|
38
38
|
} catch (error) {
|
|
39
39
|
logger.error({
|
|
40
|
-
requestId: resolveRequestId(ctx.request)
|
|
40
|
+
requestId: resolveRequestId(ctx.request),
|
|
41
41
|
err: error instanceof Error ? error : new Error(String(error)),
|
|
42
42
|
});
|
|
43
43
|
ctx.response = toOpenAIErrorResponse(error, prepareResponseInit(ctx.request));
|
package/src/types.ts
CHANGED
|
@@ -7,6 +7,7 @@ import type {
|
|
|
7
7
|
ChatCompletionsChunk,
|
|
8
8
|
} from "./endpoints/chat-completions/schema";
|
|
9
9
|
import type { Embeddings, EmbeddingsBody } from "./endpoints/embeddings/schema";
|
|
10
|
+
import type { Model, ModelList } from "./endpoints/models";
|
|
10
11
|
import type { OpenAIError } from "./errors/openai";
|
|
11
12
|
import type { Logger, LoggerConfig } from "./logger";
|
|
12
13
|
import type { ModelCatalog, ModelId } from "./models/types";
|
|
@@ -77,7 +78,8 @@ export type GatewayContext = {
|
|
|
77
78
|
| ChatCompletions
|
|
78
79
|
| ReadableStream<ChatCompletionsChunk | OpenAIError>
|
|
79
80
|
| Embeddings
|
|
80
|
-
|
|
|
81
|
+
| Model
|
|
82
|
+
| ModelList;
|
|
81
83
|
/**
|
|
82
84
|
* Final response returned by the lifecycle.
|
|
83
85
|
*/
|
package/src/utils/request.ts
CHANGED
|
@@ -10,9 +10,7 @@ export const prepareRequestHeaders = (request: Request) => {
|
|
|
10
10
|
if (existingRequestId) return;
|
|
11
11
|
|
|
12
12
|
const requestId =
|
|
13
|
-
|
|
14
|
-
request.headers.get("x-trace-id") ??
|
|
15
|
-
crypto.randomUUID();
|
|
13
|
+
"req_" + crypto.getRandomValues(new Uint32Array(2)).reduce((s, n) => s + n.toString(36), "");
|
|
16
14
|
|
|
17
15
|
const headers = new Headers(request.headers);
|
|
18
16
|
headers.set(REQUEST_ID_HEADER, requestId);
|
|
@@ -38,7 +36,6 @@ export const prepareForwardHeaders = (request: Request): Record<string, string>
|
|
|
38
36
|
: `@hebo-ai/gateway/${GATEWAY_VERSION}`;
|
|
39
37
|
|
|
40
38
|
return {
|
|
41
|
-
[REQUEST_ID_HEADER]: request.headers.get(REQUEST_ID_HEADER)!,
|
|
42
39
|
"user-agent": appendedUserAgent,
|
|
43
40
|
};
|
|
44
41
|
};
|