@hebo-ai/gateway 0.4.0-alpha.2 → 0.4.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -349,8 +349,8 @@ const gw = gateway({
349
349
  * @returns Modified result, or undefined to keep original.
350
350
  */
351
351
  after: async (ctx: {
352
- result: ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings | object
353
- }): Promise<ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings | object | void> => {
352
+ result: ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings
353
+ }): Promise<ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings | void> => {
354
354
  // Example Use Cases:
355
355
  // - Transform result
356
356
  // - Result logging
@@ -555,6 +555,15 @@ Reasoning output is surfaced as extension to the `completion` object.
555
555
 
556
556
  Most SDKs handle these fields out-of-the-box.
557
557
 
558
+ #### Thinking Blocks & Context Preservation
559
+
560
+ Advanced models (like Anthropic Claude 3.7 or Gemini 3) surface structured reasoning steps and signatures that act as a "save state" for the model's internal reasoning process. To maintain this context across multi-turn conversations and tool-calling workflows, you should pass back the following extensions in subsequent messages:
561
+
562
+ - **reasoning_details**: Standardized array of reasoning steps and generic signatures.
563
+ - **extra_content**: Provider-specific extensions, such as **Google's thought signatures** on Vertex AI.
564
+
565
+ For **Gemini 3** models, returning the thought signature via `extra_content` is mandatory to resume the chain-of-thought; failing to do so may result in errors or degraded performance.
566
+
558
567
  ## 🧪 Advanced Usage
559
568
 
560
569
  ### Logger Settings
@@ -573,7 +582,6 @@ const gw = gateway({
573
582
  ```
574
583
 
575
584
  If you provide a custom logger, it must implement `trace`, `debug`, `info`, `warn`, and `error` methods.
576
- For production workloads, we recommend `pino` for better logging performance and lower overhead.
577
585
 
578
586
  Example with **pino**:
579
587
 
@@ -591,6 +599,9 @@ const gw = gateway({
591
599
  });
592
600
  ```
593
601
 
602
+ > [!TIP]
603
+ > For production workloads, we recommend `pino` for better logging performance and lower overhead.
604
+
594
605
  ### Telemetry Settings
595
606
 
596
607
  Hebo Gateway can forward telemetry settings via the `telemetry` config field.
@@ -610,6 +621,9 @@ const gw = gateway({
610
621
  });
611
622
  ```
612
623
 
624
+ > [!TIP]
625
+ > For observability integration that is not otel compliant (for example, Langfuse), you can disable built-in telemetry and manually instrument requests during `before` / `after` hooks.
626
+
613
627
  ### Passing Framework State to Hooks
614
628
 
615
629
  You can pass per-request info from your framework into the gateway via the second `state` argument on the handler, then read it in hooks through `ctx.state`.
@@ -1,6 +1,6 @@
1
1
  import type { SharedV3ProviderOptions, SharedV3ProviderMetadata } from "@ai-sdk/provider";
2
- import type { GenerateTextResult, StreamTextResult, FinishReason, ToolChoice, ToolSet, ModelMessage, UserContent, LanguageModelUsage, Output, TextStreamPart, AssistantModelMessage, ToolModelMessage, UserModelMessage } from "ai";
3
- import type { ChatCompletionsToolCall, ChatCompletionsTool, ChatCompletionsToolChoice, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsUserMessage, ChatCompletionsAssistantMessage, ChatCompletionsToolMessage, ChatCompletionsFinishReason, ChatCompletionsUsage, ChatCompletionsInputs, ChatCompletions, ChatCompletionsChunk } from "./schema";
2
+ import type { GenerateTextResult, StreamTextResult, FinishReason, ToolChoice, ToolSet, ModelMessage, UserContent, LanguageModelUsage, Output, TextStreamPart, ReasoningOutput, AssistantModelMessage, ToolModelMessage, UserModelMessage } from "ai";
3
+ import type { ChatCompletionsToolCall, ChatCompletionsTool, ChatCompletionsToolChoice, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsUserMessage, ChatCompletionsAssistantMessage, ChatCompletionsToolMessage, ChatCompletionsFinishReason, ChatCompletionsUsage, ChatCompletionsInputs, ChatCompletions, ChatCompletionsChunk, ChatCompletionsReasoningDetail } from "./schema";
4
4
  import { OpenAIError } from "../../errors/openai";
5
5
  export type TextCallOptions = {
6
6
  messages: ModelMessage[];
@@ -31,6 +31,7 @@ export declare class ChatCompletionsStream extends TransformStream<TextStreamPar
31
31
  constructor(model: string);
32
32
  }
33
33
  export declare const toChatCompletionsAssistantMessage: (result: GenerateTextResult<ToolSet, Output.Output>) => ChatCompletionsAssistantMessage;
34
+ export declare function toReasoningDetail(reasoning: ReasoningOutput, id: string, index: number): ChatCompletionsReasoningDetail;
34
35
  export declare function toChatCompletionsUsage(usage: LanguageModelUsage): ChatCompletionsUsage;
35
36
  export declare function toChatCompletionsToolCall(id: string, name: string, args: unknown, providerMetadata?: SharedV3ProviderMetadata): ChatCompletionsToolCall;
36
37
  export declare const toChatCompletionsFinishReason: (finishReason: FinishReason) => ChatCompletionsFinishReason;
@@ -61,33 +61,67 @@ export function fromChatCompletionsUserMessage(message) {
61
61
  };
62
62
  }
63
63
  export function fromChatCompletionsAssistantMessage(message) {
64
- const { tool_calls, role, content, extra_content } = message;
65
- if (!tool_calls?.length) {
66
- const out = {
67
- role: role,
68
- content: content ?? "",
69
- };
70
- if (extra_content) {
71
- out.providerOptions = extra_content;
64
+ const { tool_calls, role, content, extra_content, reasoning_details } = message;
65
+ const parts = [];
66
+ if (Array.isArray(parts)) {
67
+ if (reasoning_details?.length) {
68
+ for (const detail of reasoning_details) {
69
+ if (detail.text && detail.type === "reasoning.text") {
70
+ parts.push({
71
+ type: "reasoning",
72
+ text: detail.text,
73
+ providerOptions: detail.signature
74
+ ? {
75
+ unknown: {
76
+ signature: detail.signature,
77
+ },
78
+ }
79
+ : undefined,
80
+ });
81
+ }
82
+ else if (detail.type === "reasoning.encrypted" && detail.data) {
83
+ parts.push({
84
+ type: "reasoning",
85
+ text: "",
86
+ providerOptions: {
87
+ unknown: {
88
+ redactedData: detail.data,
89
+ },
90
+ },
91
+ });
92
+ }
93
+ }
94
+ }
95
+ if (tool_calls?.length) {
96
+ for (const tc of tool_calls) {
97
+ const { id, function: fn, extra_content } = tc;
98
+ const out = {
99
+ type: "tool-call",
100
+ toolCallId: id,
101
+ toolName: fn.name,
102
+ input: parseToolOutput(fn.arguments).value,
103
+ };
104
+ if (extra_content) {
105
+ out.providerOptions = extra_content;
106
+ }
107
+ parts.push(out);
108
+ }
109
+ }
110
+ else if (content !== undefined && content !== null) {
111
+ parts.push({
112
+ type: "text",
113
+ text: content,
114
+ });
72
115
  }
73
- return out;
74
116
  }
75
- return {
117
+ const out = {
76
118
  role: role,
77
- content: tool_calls.map((tc) => {
78
- const { id, function: fn, extra_content } = tc;
79
- const out = {
80
- type: "tool-call",
81
- toolCallId: id,
82
- toolName: fn.name,
83
- input: parseToolOutput(fn.arguments).value,
84
- };
85
- if (extra_content) {
86
- out.providerOptions = extra_content;
87
- }
88
- return out;
89
- }),
119
+ content: Array.isArray(parts) && parts.length > 0 ? parts : (content ?? ""),
90
120
  };
121
+ if (extra_content) {
122
+ out.providerOptions = extra_content;
123
+ }
124
+ return out;
91
125
  }
92
126
  export function fromChatCompletionsToolResultMessage(message, toolById) {
93
127
  const toolCalls = message.tool_calls ?? [];
@@ -251,6 +285,7 @@ export class ChatCompletionsStream extends TransformStream {
251
285
  const streamId = `chatcmpl-${crypto.randomUUID()}`;
252
286
  const creationTime = Math.floor(Date.now() / 1000);
253
287
  let toolCallIndexCounter = 0;
288
+ const reasoningIdToIndex = new Map();
254
289
  const createChunk = (delta, provider_metadata, finish_reason, usage) => {
255
290
  if (provider_metadata) {
256
291
  delta.extra_content = provider_metadata;
@@ -278,7 +313,21 @@ export class ChatCompletionsStream extends TransformStream {
278
313
  break;
279
314
  }
280
315
  case "reasoning-delta": {
281
- controller.enqueue(createChunk({ reasoning_content: part.text }, part.providerMetadata));
316
+ let index = reasoningIdToIndex.get(part.id);
317
+ if (index === undefined) {
318
+ index = reasoningIdToIndex.size;
319
+ reasoningIdToIndex.set(part.id, index);
320
+ }
321
+ controller.enqueue(createChunk({
322
+ reasoning_content: part.text,
323
+ reasoning_details: [
324
+ toReasoningDetail({
325
+ type: "reasoning",
326
+ text: part.text,
327
+ providerMetadata: part.providerMetadata,
328
+ }, part.id, index),
329
+ ],
330
+ }, part.providerMetadata));
282
331
  break;
283
332
  }
284
333
  case "tool-call": {
@@ -316,20 +365,63 @@ export const toChatCompletionsAssistantMessage = (result) => {
316
365
  if (result.toolCalls && result.toolCalls.length > 0) {
317
366
  message.tool_calls = result.toolCalls.map((toolCall) => toChatCompletionsToolCall(toolCall.toolCallId, toolCall.toolName, toolCall.input, toolCall.providerMetadata));
318
367
  }
368
+ const reasoningDetails = [];
319
369
  for (const part of result.content) {
320
370
  if (part.type === "text") {
321
- message.content = part.text;
322
- if (part.providerMetadata) {
323
- message.extra_content = part.providerMetadata;
371
+ if (message.content === null) {
372
+ message.content = part.text;
373
+ if (part.providerMetadata) {
374
+ message.extra_content = part.providerMetadata;
375
+ }
324
376
  }
325
- break;
377
+ }
378
+ else if (part.type === "reasoning") {
379
+ reasoningDetails.push(toReasoningDetail(part, `reasoning-${crypto.randomUUID()}`, reasoningDetails.length));
326
380
  }
327
381
  }
328
382
  if (result.reasoningText) {
329
383
  message.reasoning_content = result.reasoningText;
384
+ if (reasoningDetails.length === 0) {
385
+ reasoningDetails.push(toReasoningDetail({ type: "reasoning", text: result.reasoningText }, `reasoning-${crypto.randomUUID()}`, 0));
386
+ }
387
+ }
388
+ if (reasoningDetails.length > 0) {
389
+ message.reasoning_details = reasoningDetails;
330
390
  }
331
391
  return message;
332
392
  };
393
+ export function toReasoningDetail(reasoning, id, index) {
394
+ const providerMetadata = reasoning.providerMetadata ?? {};
395
+ let redactedData;
396
+ let signature;
397
+ for (const metadata of Object.values(providerMetadata)) {
398
+ if (metadata && typeof metadata === "object") {
399
+ if ("redactedData" in metadata && typeof metadata["redactedData"] === "string") {
400
+ redactedData = metadata["redactedData"];
401
+ }
402
+ if ("signature" in metadata && typeof metadata["signature"] === "string") {
403
+ signature = metadata["signature"];
404
+ }
405
+ }
406
+ }
407
+ if (redactedData) {
408
+ return {
409
+ id,
410
+ index,
411
+ type: "reasoning.encrypted",
412
+ data: redactedData,
413
+ format: "unknown",
414
+ };
415
+ }
416
+ return {
417
+ id,
418
+ index,
419
+ type: "reasoning.text",
420
+ text: reasoning.text,
421
+ signature,
422
+ format: "unknown",
423
+ };
424
+ }
333
425
  export function toChatCompletionsUsage(usage) {
334
426
  const out = {};
335
427
  const prompt = usage.inputTokens;
@@ -57,6 +57,17 @@ export declare const ChatCompletionsUserMessageSchema: z.ZodObject<{
57
57
  name: z.ZodOptional<z.ZodString>;
58
58
  }, z.core.$strip>;
59
59
  export type ChatCompletionsUserMessage = z.infer<typeof ChatCompletionsUserMessageSchema>;
60
+ export declare const ChatCompletionsReasoningDetailSchema: z.ZodObject<{
61
+ id: z.ZodOptional<z.ZodString>;
62
+ index: z.ZodInt;
63
+ type: z.ZodString;
64
+ text: z.ZodOptional<z.ZodString>;
65
+ signature: z.ZodOptional<z.ZodString>;
66
+ data: z.ZodOptional<z.ZodString>;
67
+ summary: z.ZodOptional<z.ZodString>;
68
+ format: z.ZodOptional<z.ZodString>;
69
+ }, z.core.$strip>;
70
+ export type ChatCompletionsReasoningDetail = z.infer<typeof ChatCompletionsReasoningDetailSchema>;
60
71
  export declare const ChatCompletionsAssistantMessageSchema: z.ZodObject<{
61
72
  role: z.ZodLiteral<"assistant">;
62
73
  content: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNull]>>;
@@ -71,6 +82,16 @@ export declare const ChatCompletionsAssistantMessageSchema: z.ZodObject<{
71
82
  extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
72
83
  }, z.core.$strip>>>;
73
84
  reasoning_content: z.ZodOptional<z.ZodString>;
85
+ reasoning_details: z.ZodOptional<z.ZodArray<z.ZodObject<{
86
+ id: z.ZodOptional<z.ZodString>;
87
+ index: z.ZodInt;
88
+ type: z.ZodString;
89
+ text: z.ZodOptional<z.ZodString>;
90
+ signature: z.ZodOptional<z.ZodString>;
91
+ data: z.ZodOptional<z.ZodString>;
92
+ summary: z.ZodOptional<z.ZodString>;
93
+ format: z.ZodOptional<z.ZodString>;
94
+ }, z.core.$strip>>>;
74
95
  extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
75
96
  }, z.core.$strip>;
76
97
  export type ChatCompletionsAssistantMessage = z.infer<typeof ChatCompletionsAssistantMessageSchema>;
@@ -118,6 +139,16 @@ export declare const ChatCompletionsMessageSchema: z.ZodUnion<readonly [z.ZodObj
118
139
  extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
119
140
  }, z.core.$strip>>>;
120
141
  reasoning_content: z.ZodOptional<z.ZodString>;
142
+ reasoning_details: z.ZodOptional<z.ZodArray<z.ZodObject<{
143
+ id: z.ZodOptional<z.ZodString>;
144
+ index: z.ZodInt;
145
+ type: z.ZodString;
146
+ text: z.ZodOptional<z.ZodString>;
147
+ signature: z.ZodOptional<z.ZodString>;
148
+ data: z.ZodOptional<z.ZodString>;
149
+ summary: z.ZodOptional<z.ZodString>;
150
+ format: z.ZodOptional<z.ZodString>;
151
+ }, z.core.$strip>>>;
121
152
  extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
122
153
  }, z.core.$strip>, z.ZodObject<{
123
154
  role: z.ZodLiteral<"tool">;
@@ -189,6 +220,16 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
189
220
  extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
190
221
  }, z.core.$strip>>>;
191
222
  reasoning_content: z.ZodOptional<z.ZodString>;
223
+ reasoning_details: z.ZodOptional<z.ZodArray<z.ZodObject<{
224
+ id: z.ZodOptional<z.ZodString>;
225
+ index: z.ZodInt;
226
+ type: z.ZodString;
227
+ text: z.ZodOptional<z.ZodString>;
228
+ signature: z.ZodOptional<z.ZodString>;
229
+ data: z.ZodOptional<z.ZodString>;
230
+ summary: z.ZodOptional<z.ZodString>;
231
+ format: z.ZodOptional<z.ZodString>;
232
+ }, z.core.$strip>>>;
192
233
  extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
193
234
  }, z.core.$strip>, z.ZodObject<{
194
235
  role: z.ZodLiteral<"tool">;
@@ -265,6 +306,16 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
265
306
  extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
266
307
  }, z.core.$strip>>>;
267
308
  reasoning_content: z.ZodOptional<z.ZodString>;
309
+ reasoning_details: z.ZodOptional<z.ZodArray<z.ZodObject<{
310
+ id: z.ZodOptional<z.ZodString>;
311
+ index: z.ZodInt;
312
+ type: z.ZodString;
313
+ text: z.ZodOptional<z.ZodString>;
314
+ signature: z.ZodOptional<z.ZodString>;
315
+ data: z.ZodOptional<z.ZodString>;
316
+ summary: z.ZodOptional<z.ZodString>;
317
+ format: z.ZodOptional<z.ZodString>;
318
+ }, z.core.$strip>>>;
268
319
  extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
269
320
  }, z.core.$strip>, z.ZodObject<{
270
321
  role: z.ZodLiteral<"tool">;
@@ -322,6 +373,16 @@ export declare const ChatCompletionsChoiceSchema: z.ZodObject<{
322
373
  extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
323
374
  }, z.core.$strip>>>;
324
375
  reasoning_content: z.ZodOptional<z.ZodString>;
376
+ reasoning_details: z.ZodOptional<z.ZodArray<z.ZodObject<{
377
+ id: z.ZodOptional<z.ZodString>;
378
+ index: z.ZodInt;
379
+ type: z.ZodString;
380
+ text: z.ZodOptional<z.ZodString>;
381
+ signature: z.ZodOptional<z.ZodString>;
382
+ data: z.ZodOptional<z.ZodString>;
383
+ summary: z.ZodOptional<z.ZodString>;
384
+ format: z.ZodOptional<z.ZodString>;
385
+ }, z.core.$strip>>>;
325
386
  extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
326
387
  }, z.core.$strip>;
327
388
  finish_reason: z.ZodUnion<readonly [z.ZodLiteral<"stop">, z.ZodLiteral<"length">, z.ZodLiteral<"content_filter">, z.ZodLiteral<"tool_calls">]>;
@@ -361,6 +422,16 @@ export declare const ChatCompletionsSchema: z.ZodObject<{
361
422
  extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
362
423
  }, z.core.$strip>>>;
363
424
  reasoning_content: z.ZodOptional<z.ZodString>;
425
+ reasoning_details: z.ZodOptional<z.ZodArray<z.ZodObject<{
426
+ id: z.ZodOptional<z.ZodString>;
427
+ index: z.ZodInt;
428
+ type: z.ZodString;
429
+ text: z.ZodOptional<z.ZodString>;
430
+ signature: z.ZodOptional<z.ZodString>;
431
+ data: z.ZodOptional<z.ZodString>;
432
+ summary: z.ZodOptional<z.ZodString>;
433
+ format: z.ZodOptional<z.ZodString>;
434
+ }, z.core.$strip>>>;
364
435
  extra_content: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
365
436
  }, z.core.$strip>;
366
437
  finish_reason: z.ZodUnion<readonly [z.ZodLiteral<"stop">, z.ZodLiteral<"length">, z.ZodLiteral<"content_filter">, z.ZodLiteral<"tool_calls">]>;
@@ -396,6 +467,16 @@ export declare const ChatCompletionsAssistantMessageDeltaSchema: z.ZodObject<{
396
467
  content: z.ZodOptional<z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNull]>>>;
397
468
  name: z.ZodOptional<z.ZodOptional<z.ZodString>>;
398
469
  reasoning_content: z.ZodOptional<z.ZodOptional<z.ZodString>>;
470
+ reasoning_details: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
471
+ id: z.ZodOptional<z.ZodString>;
472
+ index: z.ZodInt;
473
+ type: z.ZodString;
474
+ text: z.ZodOptional<z.ZodString>;
475
+ signature: z.ZodOptional<z.ZodString>;
476
+ data: z.ZodOptional<z.ZodString>;
477
+ summary: z.ZodOptional<z.ZodString>;
478
+ format: z.ZodOptional<z.ZodString>;
479
+ }, z.core.$strip>>>>;
399
480
  extra_content: z.ZodOptional<z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>>;
400
481
  tool_calls: z.ZodOptional<z.ZodArray<z.ZodObject<{
401
482
  type: z.ZodOptional<z.ZodLiteral<"function">>;
@@ -416,6 +497,16 @@ export declare const ChatCompletionsChoiceDeltaSchema: z.ZodObject<{
416
497
  content: z.ZodOptional<z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNull]>>>;
417
498
  name: z.ZodOptional<z.ZodOptional<z.ZodString>>;
418
499
  reasoning_content: z.ZodOptional<z.ZodOptional<z.ZodString>>;
500
+ reasoning_details: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
501
+ id: z.ZodOptional<z.ZodString>;
502
+ index: z.ZodInt;
503
+ type: z.ZodString;
504
+ text: z.ZodOptional<z.ZodString>;
505
+ signature: z.ZodOptional<z.ZodString>;
506
+ data: z.ZodOptional<z.ZodString>;
507
+ summary: z.ZodOptional<z.ZodString>;
508
+ format: z.ZodOptional<z.ZodString>;
509
+ }, z.core.$strip>>>>;
419
510
  extra_content: z.ZodOptional<z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>>;
420
511
  tool_calls: z.ZodOptional<z.ZodArray<z.ZodObject<{
421
512
  type: z.ZodOptional<z.ZodLiteral<"function">>;
@@ -444,6 +535,16 @@ export declare const ChatCompletionsChunkSchema: z.ZodObject<{
444
535
  content: z.ZodOptional<z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNull]>>>;
445
536
  name: z.ZodOptional<z.ZodOptional<z.ZodString>>;
446
537
  reasoning_content: z.ZodOptional<z.ZodOptional<z.ZodString>>;
538
+ reasoning_details: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
539
+ id: z.ZodOptional<z.ZodString>;
540
+ index: z.ZodInt;
541
+ type: z.ZodString;
542
+ text: z.ZodOptional<z.ZodString>;
543
+ signature: z.ZodOptional<z.ZodString>;
544
+ data: z.ZodOptional<z.ZodString>;
545
+ summary: z.ZodOptional<z.ZodString>;
546
+ format: z.ZodOptional<z.ZodString>;
547
+ }, z.core.$strip>>>>;
447
548
  extra_content: z.ZodOptional<z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>>;
448
549
  tool_calls: z.ZodOptional<z.ZodArray<z.ZodObject<{
449
550
  type: z.ZodOptional<z.ZodLiteral<"function">>;
@@ -44,6 +44,16 @@ export const ChatCompletionsUserMessageSchema = z.object({
44
44
  ]),
45
45
  name: z.string().optional(),
46
46
  });
47
+ export const ChatCompletionsReasoningDetailSchema = z.object({
48
+ id: z.string().optional(),
49
+ index: z.int().nonnegative(),
50
+ type: z.string(),
51
+ text: z.string().optional(),
52
+ signature: z.string().optional(),
53
+ data: z.string().optional(),
54
+ summary: z.string().optional(),
55
+ format: z.string().optional(),
56
+ });
47
57
  export const ChatCompletionsAssistantMessageSchema = z.object({
48
58
  role: z.literal("assistant"),
49
59
  // FUTURE: this should support arrays of TextContentPart and RefusalContentPart
@@ -53,6 +63,10 @@ export const ChatCompletionsAssistantMessageSchema = z.object({
53
63
  tool_calls: z.array(ChatCompletionsToolCallSchema).optional(),
54
64
  // Extensions
55
65
  reasoning_content: z.string().optional().meta({ extension: true }),
66
+ reasoning_details: z
67
+ .array(ChatCompletionsReasoningDetailSchema)
68
+ .optional()
69
+ .meta({ extension: true }),
56
70
  extra_content: z.record(z.string(), z.any()).optional().meta({ extension: true }),
57
71
  });
58
72
  export const ChatCompletionsToolMessageSchema = z.object({
package/dist/types.d.ts CHANGED
@@ -2,6 +2,7 @@ import type { ProviderV3 } from "@ai-sdk/provider";
2
2
  import type { Tracer } from "@opentelemetry/api";
3
3
  import type { ChatCompletions, ChatCompletionsBody, ChatCompletionsChunk } from "./endpoints/chat-completions/schema";
4
4
  import type { Embeddings, EmbeddingsBody } from "./endpoints/embeddings/schema";
5
+ import type { Model, ModelList } from "./endpoints/models";
5
6
  import type { OpenAIError } from "./errors/openai";
6
7
  import type { Logger, LoggerConfig } from "./logger";
7
8
  import type { ModelCatalog, ModelId } from "./models/types";
@@ -66,7 +67,7 @@ export type GatewayContext = {
66
67
  /**
67
68
  * Result returned by the handler (pre-response).
68
69
  */
69
- result?: ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings | object;
70
+ result?: ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings | Model | ModelList;
70
71
  /**
71
72
  * Final response returned by the lifecycle.
72
73
  */
@@ -5,9 +5,7 @@ export const prepareRequestHeaders = (request) => {
5
5
  const existingRequestId = request.headers.get(REQUEST_ID_HEADER);
6
6
  if (existingRequestId)
7
7
  return;
8
- const requestId = request.headers.get("x-correlation-id") ??
9
- request.headers.get("x-trace-id") ??
10
- crypto.randomUUID();
8
+ const requestId = "req_" + crypto.getRandomValues(new Uint32Array(2)).reduce((s, n) => s + n.toString(36), "");
11
9
  const headers = new Headers(request.headers);
12
10
  headers.set(REQUEST_ID_HEADER, requestId);
13
11
  return headers;
@@ -27,7 +25,6 @@ export const prepareForwardHeaders = (request) => {
27
25
  ? `${userAgent} @hebo-ai/gateway/${GATEWAY_VERSION}`
28
26
  : `@hebo-ai/gateway/${GATEWAY_VERSION}`;
29
27
  return {
30
- [REQUEST_ID_HEADER]: request.headers.get(REQUEST_ID_HEADER),
31
28
  "user-agent": appendedUserAgent,
32
29
  };
33
30
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hebo-ai/gateway",
3
- "version": "0.4.0-alpha.2",
3
+ "version": "0.4.0-alpha.3",
4
4
  "description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI-compatible /chat/completions, /embeddings & /models.",
5
5
  "keywords": [
6
6
  "ai",
package/src/config.ts CHANGED
@@ -41,16 +41,16 @@ export const parseConfig = (config: GatewayConfig): GatewayConfigParsed => {
41
41
  const parsedModels = {} as typeof models;
42
42
  const warnings = new Set<string>();
43
43
  for (const id in models) {
44
- const model = models[id!];
44
+ const model = models[id]!;
45
45
 
46
46
  const kept: string[] = [];
47
47
 
48
- for (const p of model!.providers) {
48
+ for (const p of model.providers) {
49
49
  if (p in parsedProviders) kept.push(p);
50
50
  else warnings.add(p);
51
51
  }
52
52
 
53
- if (kept.length > 0) parsedModels[id] = { ...model!, providers: kept };
53
+ if (kept.length > 0) parsedModels[id] = { ...model, providers: kept };
54
54
  }
55
55
  for (const warning of warnings) {
56
56
  logger.warn(`[config] ${warning} provider removed (not configured)`);
@@ -2,7 +2,11 @@ import type { GenerateTextResult, ToolSet, Output } from "ai";
2
2
 
3
3
  import { describe, expect, test } from "bun:test";
4
4
 
5
- import { convertToTextCallOptions, toChatCompletionsAssistantMessage } from "./converters";
5
+ import {
6
+ convertToTextCallOptions,
7
+ toChatCompletionsAssistantMessage,
8
+ fromChatCompletionsAssistantMessage,
9
+ } from "./converters";
6
10
 
7
11
  describe("Chat Completions Converters", () => {
8
12
  describe("toChatCompletionsAssistantMessage", () => {
@@ -52,6 +56,152 @@ describe("Chat Completions Converters", () => {
52
56
  vertex: { thought_signature: "tool-signature" },
53
57
  });
54
58
  });
59
+
60
+ test("should extract reasoning_details from reasoning parts", () => {
61
+ const mockResult: GenerateTextResult<ToolSet, Output.Output> = {
62
+ content: [
63
+ {
64
+ type: "reasoning",
65
+ text: "I am thinking...",
66
+ providerMetadata: {
67
+ anthropic: {
68
+ signature: "sig-123",
69
+ },
70
+ },
71
+ } as any,
72
+ {
73
+ type: "text",
74
+ text: "Final answer.",
75
+ } as any,
76
+ ],
77
+ reasoningText: "I am thinking...",
78
+ toolCalls: [],
79
+ };
80
+
81
+ const message = toChatCompletionsAssistantMessage(mockResult);
82
+
83
+ expect(message.reasoning_content).toBe("I am thinking...");
84
+ expect(message.reasoning_details![0]).toMatchObject({
85
+ type: "reasoning.text",
86
+ text: "I am thinking...",
87
+ signature: "sig-123",
88
+ format: "unknown",
89
+ index: 0,
90
+ });
91
+ expect(message.reasoning_details![0].id).toStartWith("reasoning-");
92
+ expect(message.content).toBe("Final answer.");
93
+ });
94
+
95
+ test("should fallback to reasoningText if no reasoning parts in content", () => {
96
+ const mockResult: GenerateTextResult<ToolSet, Output.Output> = {
97
+ content: [
98
+ {
99
+ type: "text",
100
+ text: "Hello",
101
+ } as any,
102
+ ],
103
+ reasoningText: "Thinking via text...",
104
+ toolCalls: [],
105
+ };
106
+
107
+ const message = toChatCompletionsAssistantMessage(mockResult);
108
+
109
+ expect(message.reasoning_content).toBe("Thinking via text...");
110
+ expect(message.reasoning_details![0]).toMatchObject({
111
+ type: "reasoning.text",
112
+ text: "Thinking via text...",
113
+ index: 0,
114
+ });
115
+ expect(message.reasoning_details![0].id).toStartWith("reasoning-");
116
+ });
117
+
118
+ test("should handle redacted/encrypted reasoning", () => {
119
+ const mockResult: GenerateTextResult<ToolSet, Output.Output> = {
120
+ content: [
121
+ {
122
+ type: "reasoning",
123
+ text: "",
124
+ providerMetadata: {
125
+ anthropic: {
126
+ redactedData: "encrypted-content",
127
+ },
128
+ },
129
+ } as any,
130
+ ],
131
+ toolCalls: [],
132
+ };
133
+
134
+ const message = toChatCompletionsAssistantMessage(mockResult);
135
+
136
+ expect(message.reasoning_details![0]).toMatchObject({
137
+ type: "reasoning.encrypted",
138
+ data: "encrypted-content",
139
+ });
140
+ expect((message.reasoning_details![0] as any).text).toBeUndefined();
141
+ expect(message.reasoning_details![0].signature).toBeUndefined();
142
+ });
143
+ });
144
+
145
+ describe("fromChatCompletionsAssistantMessage", () => {
146
+ test("should convert reasoning_details back to reasoning parts with unknown providerOptions", () => {
147
+ const message = fromChatCompletionsAssistantMessage({
148
+ role: "assistant",
149
+ content: "The result is 42.",
150
+ reasoning_details: [
151
+ {
152
+ type: "reasoning.text",
153
+ text: "Thinking hard...",
154
+ signature: "sig-xyz",
155
+ format: "unknown",
156
+ index: 0,
157
+ },
158
+ ],
159
+ });
160
+
161
+ expect(Array.isArray(message.content)).toBe(true);
162
+ const content = message.content as any[];
163
+ expect(content).toHaveLength(2);
164
+ expect(content[0]).toEqual({
165
+ type: "reasoning",
166
+ text: "Thinking hard...",
167
+ providerOptions: {
168
+ unknown: {
169
+ signature: "sig-xyz",
170
+ },
171
+ },
172
+ });
173
+ expect(content[1]).toEqual({
174
+ type: "text",
175
+ text: "The result is 42.",
176
+ });
177
+ });
178
+
179
+ test("should convert reasoning.encrypted back to reasoning parts", () => {
180
+ const message = fromChatCompletionsAssistantMessage({
181
+ role: "assistant",
182
+ content: "Hello",
183
+ reasoning_details: [
184
+ {
185
+ type: "reasoning.encrypted",
186
+ data: "secret-data",
187
+ format: "unknown",
188
+ index: 0,
189
+ },
190
+ ],
191
+ });
192
+
193
+ expect(Array.isArray(message.content)).toBe(true);
194
+ const content = message.content as any[];
195
+ expect(content[0]).toEqual({
196
+ type: "reasoning",
197
+ text: "",
198
+ providerOptions: {
199
+ unknown: {
200
+ redactedData: "secret-data",
201
+ },
202
+ },
203
+ });
204
+ });
55
205
  });
56
206
 
57
207
  describe("convertToTextCallOptions", () => {
@@ -9,9 +9,11 @@ import type {
9
9
  ToolSet,
10
10
  ModelMessage,
11
11
  UserContent,
12
+ AssistantContent,
12
13
  LanguageModelUsage,
13
14
  Output,
14
15
  TextStreamPart,
16
+ ReasoningOutput,
15
17
  AssistantModelMessage,
16
18
  ToolModelMessage,
17
19
  UserModelMessage,
@@ -41,6 +43,7 @@ import type {
41
43
  ChatCompletionsToolCallDelta,
42
44
  ChatCompletionsReasoningEffort,
43
45
  ChatCompletionsReasoningConfig,
46
+ ChatCompletionsReasoningDetail,
44
47
  } from "./schema";
45
48
 
46
49
  import { GatewayError } from "../../errors/gateway";
@@ -147,35 +150,71 @@ export function fromChatCompletionsUserMessage(
147
150
  export function fromChatCompletionsAssistantMessage(
148
151
  message: ChatCompletionsAssistantMessage,
149
152
  ): AssistantModelMessage {
150
- const { tool_calls, role, content, extra_content } = message;
153
+ const { tool_calls, role, content, extra_content, reasoning_details } = message;
154
+
155
+ const parts: AssistantContent = [];
156
+
157
+ if (Array.isArray(parts)) {
158
+ if (reasoning_details?.length) {
159
+ for (const detail of reasoning_details) {
160
+ if (detail.text && detail.type === "reasoning.text") {
161
+ parts.push({
162
+ type: "reasoning",
163
+ text: detail.text,
164
+ providerOptions: detail.signature
165
+ ? {
166
+ unknown: {
167
+ signature: detail.signature,
168
+ },
169
+ }
170
+ : undefined,
171
+ });
172
+ } else if (detail.type === "reasoning.encrypted" && detail.data) {
173
+ parts.push({
174
+ type: "reasoning",
175
+ text: "",
176
+ providerOptions: {
177
+ unknown: {
178
+ redactedData: detail.data,
179
+ },
180
+ },
181
+ });
182
+ }
183
+ }
184
+ }
151
185
 
152
- if (!tool_calls?.length) {
153
- const out: AssistantModelMessage = {
154
- role: role,
155
- content: content ?? "",
156
- };
157
- if (extra_content) {
158
- out.providerOptions = extra_content;
186
+ if (tool_calls?.length) {
187
+ for (const tc of tool_calls) {
188
+ const { id, function: fn, extra_content } = tc;
189
+ const out: ToolCallPart = {
190
+ type: "tool-call",
191
+ toolCallId: id,
192
+ toolName: fn.name,
193
+ input: parseToolOutput(fn.arguments).value,
194
+ };
195
+ if (extra_content) {
196
+ out.providerOptions = extra_content;
197
+ }
198
+ parts.push(out);
199
+ }
200
+ } else if (content !== undefined && content !== null) {
201
+ parts.push({
202
+ type: "text",
203
+ text: content,
204
+ });
159
205
  }
160
- return out;
161
206
  }
162
207
 
163
- return {
208
+ const out: AssistantModelMessage = {
164
209
  role: role,
165
- content: tool_calls.map((tc: ChatCompletionsToolCall) => {
166
- const { id, function: fn, extra_content } = tc;
167
- const out: ToolCallPart = {
168
- type: "tool-call",
169
- toolCallId: id,
170
- toolName: fn.name,
171
- input: parseToolOutput(fn.arguments).value,
172
- };
173
- if (extra_content) {
174
- out.providerOptions = extra_content;
175
- }
176
- return out;
177
- }),
210
+ content: Array.isArray(parts) && parts.length > 0 ? parts : (content ?? ""),
178
211
  };
212
+
213
+ if (extra_content) {
214
+ out.providerOptions = extra_content;
215
+ }
216
+
217
+ return out;
179
218
  }
180
219
 
181
220
  export function fromChatCompletionsToolResultMessage(
@@ -388,6 +427,7 @@ export class ChatCompletionsStream extends TransformStream<
388
427
  const streamId = `chatcmpl-${crypto.randomUUID()}`;
389
428
  const creationTime = Math.floor(Date.now() / 1000);
390
429
  let toolCallIndexCounter = 0;
430
+ const reasoningIdToIndex = new Map<string, number>();
391
431
 
392
432
  const createChunk = (
393
433
  delta: ChatCompletionsAssistantMessageDelta,
@@ -425,8 +465,30 @@ export class ChatCompletionsStream extends TransformStream<
425
465
  }
426
466
 
427
467
  case "reasoning-delta": {
468
+ let index = reasoningIdToIndex.get(part.id);
469
+ if (index === undefined) {
470
+ index = reasoningIdToIndex.size;
471
+ reasoningIdToIndex.set(part.id, index);
472
+ }
473
+
428
474
  controller.enqueue(
429
- createChunk({ reasoning_content: part.text }, part.providerMetadata),
475
+ createChunk(
476
+ {
477
+ reasoning_content: part.text,
478
+ reasoning_details: [
479
+ toReasoningDetail(
480
+ {
481
+ type: "reasoning",
482
+ text: part.text,
483
+ providerMetadata: part.providerMetadata,
484
+ },
485
+ part.id,
486
+ index,
487
+ ),
488
+ ],
489
+ },
490
+ part.providerMetadata,
491
+ ),
430
492
  );
431
493
  break;
432
494
  }
@@ -502,23 +564,85 @@ export const toChatCompletionsAssistantMessage = (
502
564
  );
503
565
  }
504
566
 
567
+ const reasoningDetails: ChatCompletionsReasoningDetail[] = [];
568
+
505
569
  for (const part of result.content) {
506
570
  if (part.type === "text") {
507
- message.content = part.text;
508
- if (part.providerMetadata) {
509
- message.extra_content = part.providerMetadata;
571
+ if (message.content === null) {
572
+ message.content = part.text;
573
+ if (part.providerMetadata) {
574
+ message.extra_content = part.providerMetadata;
575
+ }
510
576
  }
511
- break;
577
+ } else if (part.type === "reasoning") {
578
+ reasoningDetails.push(
579
+ toReasoningDetail(part, `reasoning-${crypto.randomUUID()}`, reasoningDetails.length),
580
+ );
512
581
  }
513
582
  }
514
583
 
515
584
  if (result.reasoningText) {
516
585
  message.reasoning_content = result.reasoningText;
586
+
587
+ if (reasoningDetails.length === 0) {
588
+ reasoningDetails.push(
589
+ toReasoningDetail(
590
+ { type: "reasoning", text: result.reasoningText },
591
+ `reasoning-${crypto.randomUUID()}`,
592
+ 0,
593
+ ),
594
+ );
595
+ }
596
+ }
597
+
598
+ if (reasoningDetails.length > 0) {
599
+ message.reasoning_details = reasoningDetails;
517
600
  }
518
601
 
519
602
  return message;
520
603
  };
521
604
 
605
+ export function toReasoningDetail(
606
+ reasoning: ReasoningOutput,
607
+ id: string,
608
+ index: number,
609
+ ): ChatCompletionsReasoningDetail {
610
+ const providerMetadata = reasoning.providerMetadata ?? {};
611
+
612
+ let redactedData: string | undefined;
613
+ let signature: string | undefined;
614
+
615
+ for (const metadata of Object.values(providerMetadata)) {
616
+ if (metadata && typeof metadata === "object") {
617
+ if ("redactedData" in metadata && typeof metadata["redactedData"] === "string") {
618
+ redactedData = metadata["redactedData"];
619
+ }
620
+ if ("signature" in metadata && typeof metadata["signature"] === "string") {
621
+ signature = metadata["signature"];
622
+ }
623
+ }
624
+ }
625
+
626
+ if (redactedData) {
627
+ return {
628
+ id,
629
+ index,
630
+ type: "reasoning.encrypted",
631
+ data: redactedData,
632
+ format: "unknown",
633
+ };
634
+ }
635
+
636
+ return {
637
+ id,
638
+ index,
639
+ type: "reasoning.text",
640
+ text: reasoning.text,
641
+ signature,
642
+ format: "unknown",
643
+ };
644
+ }
645
+
522
646
  export function toChatCompletionsUsage(usage: LanguageModelUsage): ChatCompletionsUsage {
523
647
  const out: ChatCompletionsUsage = {};
524
648
 
@@ -62,6 +62,18 @@ export const ChatCompletionsUserMessageSchema = z.object({
62
62
  });
63
63
  export type ChatCompletionsUserMessage = z.infer<typeof ChatCompletionsUserMessageSchema>;
64
64
 
65
+ export const ChatCompletionsReasoningDetailSchema = z.object({
66
+ id: z.string().optional(),
67
+ index: z.int().nonnegative(),
68
+ type: z.string(),
69
+ text: z.string().optional(),
70
+ signature: z.string().optional(),
71
+ data: z.string().optional(),
72
+ summary: z.string().optional(),
73
+ format: z.string().optional(),
74
+ });
75
+ export type ChatCompletionsReasoningDetail = z.infer<typeof ChatCompletionsReasoningDetailSchema>;
76
+
65
77
  export const ChatCompletionsAssistantMessageSchema = z.object({
66
78
  role: z.literal("assistant"),
67
79
  // FUTURE: this should support arrays of TextContentPart and RefusalContentPart
@@ -71,6 +83,10 @@ export const ChatCompletionsAssistantMessageSchema = z.object({
71
83
  tool_calls: z.array(ChatCompletionsToolCallSchema).optional(),
72
84
  // Extensions
73
85
  reasoning_content: z.string().optional().meta({ extension: true }),
86
+ reasoning_details: z
87
+ .array(ChatCompletionsReasoningDetailSchema)
88
+ .optional()
89
+ .meta({ extension: true }),
74
90
  extra_content: z.record(z.string(), z.any()).optional().meta({ extension: true }),
75
91
  });
76
92
  export type ChatCompletionsAssistantMessage = z.infer<typeof ChatCompletionsAssistantMessageSchema>;
package/src/lifecycle.ts CHANGED
@@ -37,7 +37,7 @@ export const winterCgHandler = (
37
37
  if (onResponse) ctx.response = onResponse;
38
38
  } catch (error) {
39
39
  logger.error({
40
- requestId: resolveRequestId(ctx.request)!,
40
+ requestId: resolveRequestId(ctx.request),
41
41
  err: error instanceof Error ? error : new Error(String(error)),
42
42
  });
43
43
  ctx.response = toOpenAIErrorResponse(error, prepareResponseInit(ctx.request));
package/src/types.ts CHANGED
@@ -7,6 +7,7 @@ import type {
7
7
  ChatCompletionsChunk,
8
8
  } from "./endpoints/chat-completions/schema";
9
9
  import type { Embeddings, EmbeddingsBody } from "./endpoints/embeddings/schema";
10
+ import type { Model, ModelList } from "./endpoints/models";
10
11
  import type { OpenAIError } from "./errors/openai";
11
12
  import type { Logger, LoggerConfig } from "./logger";
12
13
  import type { ModelCatalog, ModelId } from "./models/types";
@@ -77,7 +78,8 @@ export type GatewayContext = {
77
78
  | ChatCompletions
78
79
  | ReadableStream<ChatCompletionsChunk | OpenAIError>
79
80
  | Embeddings
80
- | object;
81
+ | Model
82
+ | ModelList;
81
83
  /**
82
84
  * Final response returned by the lifecycle.
83
85
  */
@@ -10,9 +10,7 @@ export const prepareRequestHeaders = (request: Request) => {
10
10
  if (existingRequestId) return;
11
11
 
12
12
  const requestId =
13
- request.headers.get("x-correlation-id") ??
14
- request.headers.get("x-trace-id") ??
15
- crypto.randomUUID();
13
+ "req_" + crypto.getRandomValues(new Uint32Array(2)).reduce((s, n) => s + n.toString(36), "");
16
14
 
17
15
  const headers = new Headers(request.headers);
18
16
  headers.set(REQUEST_ID_HEADER, requestId);
@@ -38,7 +36,6 @@ export const prepareForwardHeaders = (request: Request): Record<string, string>
38
36
  : `@hebo-ai/gateway/${GATEWAY_VERSION}`;
39
37
 
40
38
  return {
41
- [REQUEST_ID_HEADER]: request.headers.get(REQUEST_ID_HEADER)!,
42
39
  "user-agent": appendedUserAgent,
43
40
  };
44
41
  };